您现在的位置是: 网站首页> 学习笔记> 爬虫 爬虫
腾讯滑块
2021-04-11 [滑块验证码] 5040人已围观
import requests
import cv2
from selenium import webdriver
import time
import numpy as np
import pyautogui
import random
from lxml.html import etree
from selenium.common.exceptions import NoSuchWindowException, WebDriverException
class CaptchaOne(object):
def __init__(self):
self.values = []
self.headers = {
}
self.cookie = [
]
self.drive = webdriver.Firefox()
self.drive.maximize_window()
self.drive.set_page_load_timeout(12)
# 先请求一次页面,不然cookie设置时会报域名不正确的异常
self.drive.get('http://www.glidedsky.com/level/web/crawler-captcha-1?page=1')
# 设置cookie
for c in self.cookie:
self.drive.add_cookie({
'name': list(c.keys())[0],
'value': list(c.values())[0],
})
def get_tracks(self, distance):
'''
获取移动轨迹, 先匀加速再匀减速
匀变速运动基本公式:
v = v0+at
s = v0t+1/2at平方
v平方-v0平方 = 2as
:param distance: 需要移动的距离
:return: 存放每0.3秒移动的距离
'''
# 初速度
v = 0
# 单位时间为0.2秒来统计轨迹,轨迹即0.2秒内的位移
t = 0.5
# 位移/轨迹列表,列表内的一个元素代表0.2s的位移
tracks = []
# 当前的位移
current = 0
# 到达mid值开始减速
mid = distance * 3 / 5
while current < distance:
if current < mid:
# 加速度越小位移越小,模拟的轨迹就越多越详细
a = 5
else:
a = -6
# 初速度
v0 = v
# 0.2秒时间内的位移
s = v0 * t + 1 / 2 * a * (t ** 2)
# 当前的位置
current += s
# 添加到轨迹列表中
tracks.append(round(s))
v = v0 + a * t
# print('tracks: ', tracks)
return tracks
def get_page(self, url):
self.drive.get("about:config")
self.set_useragent()
try:
self.drive.get(url)
self.drive.implicitly_wait(10)
# 切换到验证码所在的frame
self.drive.switch_to.frame('tcaptcha_iframe')
# 背景图url
bg_url = self.drive.find_element_by_xpath('//*[@id="cdn1"]').get_attribute('src')
# 滑块图url
sl_url = self.drive.find_element_by_xpath('//*[@id="cdn2"]').get_attribute('src')
distance = self.get_distance(bg_url, sl_url)
if distance['val'] <= 0:
print(f'获取{url}失败, 原因:', distance['msg'])
print(f'开始重新获取{url}...')
self.get_page(url)
else:
# x 730 y 655 x 25 y15 distance['val']
x = 730
y = 655
x0 = x
pyautogui.moveTo(x=730 + random.randint(-20, 20), y=655 + random.randint(-10, 10),
duration=random.randint(25, 35) / 100)
pyautogui.mouseDown()
y += random.randint(2, 5)
x0 = x0 + int(distance['val'] * random.randint(12, 18) / 20)
pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
y += random.randint(-9, 0)
x0 = x0 + int(distance['val'] * random.randint(15, 25) / 20)
pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(x + distance['val'], y, duration=random.randint(25, 35) / 100)
time.sleep(random.randint(40, 75) / 100)
pyautogui.mouseUp()
# tracks = self.get_tracks(distance['val'])
# hk = self.drive.find_element_by_xpath('//*[@id="tcaptcha_drag_thumb"]')
#
# action = ActionChains(self.drive)
# action.click_and_hold(hk).perform()
# for x in tracks:
# action.move_by_offset(x, 0)
#
# action.release().perform()
time.sleep(5)
try:
if '拖动下方滑块完成拼图' in self.drive.page_source:
print(f'获取{url}失败, 原因:滑动验证失败!')
print(f'开始重新获取{url}...')
time.sleep(2)
self.get_page(url)
except NoSuchWindowException:
self.drive.switch_to.parent_frame()
res = etree.HTML(self.drive.page_source)
vals = []
for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
vals.append(int(item.strip()))
if vals:
self.values.extend(vals)
print(vals)
else:
print(f'获取{url}失败, 原因:未获取到页面内容!')
print(f'开始重新获取{url}...')
self.get_page(url)
except WebDriverException:
self.drive.switch_to.parent_frame()
res = etree.HTML(self.drive.page_source)
vals = []
for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
vals.append(int(item.strip()))
if vals:
self.values.extend(vals)
print(vals)
else:
print(f'获取{url}失败, 原因:未获取到页面内容!')
print(f'开始重新获取{url}...')
self.get_page(url)
except Exception as e:
print(f'获取{url}失败, 原因:滑动验证失败!')
print(f'开始重新获取{url}...')
self.get_page(url)
return self.values
except Exception as e:
print('Self Error: ', e)
self.get_page(url)
def quit(self):
self.drive.quit()
def get_img_from_net(self, bg_url, sl_url):
'''
通过图片url获取图片
:param bg_url: 阴影缺口图片url
:param sl_url: 小滑块图片url
:return: 返回响应的content
'''
bg_res = requests.get(bg_url)
if bg_res.status_code != 200:
return False, False
sl_res = requests.get(sl_url)
if sl_res.status_code != 200:
return False, False
return bg_res.content, sl_res.content
def get_distance(self, bg_url, sl_url):
'''
获取小滑块到阴影缺口偏移量
:param bg_url: 阴影缺口图片url
:param sl_url: 小滑块图片url
:return: {'val': 0, 'msg': '未获取到图片'} 如果没获取到图片 val为0, 如果获取到图片则 val为计算出的偏移量
'''
# 获取图片
bg_img, sl_img = self.get_img_from_net(bg_url, sl_url)
if not bg_img and sl_img:
return {'val': 0, 'msg': '获取验证码图片失败!'}
# 阴影缺口图
bg_img = cv2.imdecode(np.frombuffer(bg_img, np.uint8), cv2.IMREAD_GRAYSCALE)
# 进行缩放
bg_width, bg_height = bg_img.shape[:2]
bg_img = cv2.resize(bg_img, (int(bg_height * 0.5), int(bg_width * 0.5)))
# 小滑块
sl_img = cv2.imdecode(np.frombuffer(requests.get(sl_url).content, np.uint8), cv2.IMREAD_GRAYSCALE)
# 进行缩放
sl_width, sl_height = sl_img.shape[:2]
sl_img = cv2.resize(sl_img, (int(sl_height * 0.5), int(sl_width * 0.5)))
# 返回值
# [[ 0.10075403 0.1022609 0.10353662 ... 0.07513236 0.07784532
# 0.08106443]
# [ 0.10010067 0.10148325 0.10491841 ... 0.07120055 0.07427745
# 0.07763027]
# ...
# [-0.00110397 -0.01404627 -0.02621776 ... -0.06852742 -0.07676314
# -0.0506512 ]]
ret = cv2.matchTemplate(bg_img, sl_img, cv2.TM_CCOEFF_NORMED)
# 取出最佳匹配值, +13指小滑块左边的透明边距 -40指小滑块左边框距离阴影缺口图左边框的距离
ret = cv2.minMaxLoc(ret)[2:][0][0] + 12 - 40
return {'val': ret, 'msg': '未获取到图片'}
def set_useragent(self):
ua_list = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
js_code = '''
var customUserAgent = "'''+random.choice(ua_list)+'''";
//修改后的userAgent
Object.defineProperty(navigator, 'userAgent', {
value: customUserAgent,
writable: false
});
console.log(navigator.userAgent);
'''
self.drive.execute_script(js_code)
ip = requests.get('http://nets.tpddns.cn:5010/get/').json()['proxy']
js_base='''var pf = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);
pf.setIntPref("network.proxy.type", 1);
pf.setCharPref("network.proxy.http", "{0}");
pf.setIntPref("network.proxy.http_port", {1});
pf.setCharPref("network.proxy.ssl", "{2}");
pf.setIntPref("network.proxy.ssl_port", {3});'''
self.drive.execute_script(js_base.format(ip.split(':')[0], ip.split(':')[1], ip.split(':')[0], ip.split(':')[1]))
if __name__ == '__main__':
base_url = 'http://www.glidedsky.com/level/web/crawler-captcha-1?page={}'
co = CaptchaOne()
for page in range(1, 1001):
print('page: ', page)
ret = co.get_page(base_url.format(page))
print('sum: ', sum(co.values))
time.sleep(5)
co.quit()
上一篇:git
下一篇:base64字体存为本地文件
相关文章
文章评论
#2023-06-06 21:57 @ Mattie:
Party Snapѕ Photo Booth OᏟ | Photo Booth Rental Orange County 12911 Dungan Ln, Garden Grove, CA 92840 led гental
#2023-06-07 06:53 @ Windy:
Группа объявлений Ульяновск в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять! Объявления Ульяновска
#2023-06-07 11:39 @ Johnson:
Группа объявлений Нижнего Тагила в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять... Объявления Нижний Тагил
#2023-06-10 15:20 @ Kraig:
примерно на кривляться оформление сильно необходимых документов и доставим авто из ОАЭ под источник Самые Лучшие Микрозаймы Невзирая сверху то, что кредит показывается быстрым а также эффективным средством резолюции финансовых заморочек, жуть шиздец находят решение сверху этот шаг через сложности процедуры евонный оформления. [URL=https://credit-mikrozaim.com]Микрозайм Или Микрозаем[/URL] https://credit-mikrozaim.com/
#2023-06-12 01:53 @ Brigette:
Pɑrty Snaps Photo Booth OC | Phһoto Booth Rental Orange Ϲounty 12911 Dungan Ln, Garden Grove, CA 92840 VOGUE photo boօtһ rentɑl Laɡuna Niguel
#2023-06-12 16:49 @ Helen:
Party Ꮪnaps Photfo Boothh OC | Photo Booth Rental Orаnge County 12911 Dungan Ꮮn, Garden Ꮐroѵe, CA 92840 photo booth rental baby shower
#2023-06-14 09:21 @ toursex:
<a href=https://viagr.cfd>over the counter viagra substitute</a> Recombination efficiency
#2023-06-29 04:07 @ Playelo:
The shaking is usually fast, about 4 to 12 movements per second <a href=https://sildenafi.cfd>how often should you take viagra</a> Doctors and patients are always looking to identify ways to tell if cancer treatments are working
添加评论
点击排行
本栏推荐
标签云
热评文章
- django使用qq邮箱发送邮件
- mysql8设置数据库远程连接
- pip修改下载源为国内源
- win10看不到win7共享的文件夹的解决方法
- SQLyog连接 Mysql 8.0.11 报error no.1251- Client does not support authentic...
- 使用Oracel Net Nanager配置Oracle数据库远程访问
- 将anaconda的下载源切换为国内的源
- Python+selenium+firefox设置代理IP
- selenium+firefox+js实现动态设置firefox浏览器代理IP
- scrapy文件下载(高新技术企业认定网)
- Python调用JS代码
- Chrome浏览器的overrides的使用
站点信息
- 建站时间:2021-01-01
- 网站程序:Django 3.1.2
- 文章统计:47篇
- 文章评论:20条
- 统计数据:
#2023-06-05 05:21 @ Bridget:
Pагty Snaps Photo Booth OC | Photo Booth Rental Orange County 12911 Dungan Ln, Gɑrden Grove, CA 92840 best photo booth rentɑls near me