您现在的位置是: 网站首页> 学习笔记> 爬虫 爬虫

腾讯滑块

2021-04-11 [滑块验证码] 7094人已围观

import requests
import cv2
from selenium import webdriver
import time
import numpy as np
import pyautogui
import random
from lxml.html import etree
from selenium.common.exceptions import NoSuchWindowException, WebDriverException


class CaptchaOne(object):
    def __init__(self):
        self.values = []
        self.headers = {
        }
        self.cookie = [

        ]
        self.drive = webdriver.Firefox()
        self.drive.maximize_window()
        self.drive.set_page_load_timeout(12)
        # 先请求一次页面,不然cookie设置时会报域名不正确的异常
        self.drive.get('http://www.glidedsky.com/level/web/crawler-captcha-1?page=1')
        # 设置cookie
        for c in self.cookie:
            self.drive.add_cookie({
                'name': list(c.keys())[0],
                'value': list(c.values())[0],
            })

    def get_tracks(self, distance):
        '''
        获取移动轨迹, 先匀加速再匀减速
        匀变速运动基本公式:
        v = v0+at
        s = v0t+1/2at平方
        v平方-v0平方 = 2as
        :param distance: 需要移动的距离
        :return: 存放每0.3秒移动的距离
        '''
        # 初速度
        v = 0
        # 单位时间为0.2秒来统计轨迹,轨迹即0.2秒内的位移
        t = 0.5
        # 位移/轨迹列表,列表内的一个元素代表0.2s的位移
        tracks = []
        # 当前的位移
        current = 0
        # 到达mid值开始减速
        mid = distance * 3 / 5
        while current < distance:
            if current < mid:
                # 加速度越小位移越小,模拟的轨迹就越多越详细
                a = 5
            else:
                a = -6
            # 初速度
            v0 = v
            # 0.2秒时间内的位移
            s = v0 * t + 1 / 2 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表中
            tracks.append(round(s))
            v = v0 + a * t
        # print('tracks: ', tracks)
        return tracks

    def get_page(self, url):
        self.drive.get("about:config")
        self.set_useragent()
        try:
            self.drive.get(url)
            self.drive.implicitly_wait(10)
            # 切换到验证码所在的frame
            self.drive.switch_to.frame('tcaptcha_iframe')

            # 背景图url
            bg_url = self.drive.find_element_by_xpath('//*[@id="cdn1"]').get_attribute('src')
            # 滑块图url
            sl_url = self.drive.find_element_by_xpath('//*[@id="cdn2"]').get_attribute('src')

            distance = self.get_distance(bg_url, sl_url)
            if distance['val'] <= 0:
                print(f'获取{url}失败, 原因:', distance['msg'])
                print(f'开始重新获取{url}...')
                self.get_page(url)
            else:
                # x 730 y 655     x 25  y15  distance['val']
                x = 730
                y = 655
                x0 = x
                pyautogui.moveTo(x=730 + random.randint(-20, 20), y=655 + random.randint(-10, 10),
                                 duration=random.randint(25, 35) / 100)
                pyautogui.mouseDown()
                y += random.randint(2, 5)
                x0 = x0 + int(distance['val'] * random.randint(12, 18) / 20)
                pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
                y += random.randint(-9, 0)
                x0 = x0 + int(distance['val'] * random.randint(15, 25) / 20)
                pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
                y += random.randint(0, 8)
                pyautogui.moveTo(x + distance['val'], y, duration=random.randint(25, 35) / 100)
                time.sleep(random.randint(40, 75) / 100)
                pyautogui.mouseUp()
                # tracks = self.get_tracks(distance['val'])
                # hk = self.drive.find_element_by_xpath('//*[@id="tcaptcha_drag_thumb"]')
                #
                # action = ActionChains(self.drive)
                # action.click_and_hold(hk).perform()
                # for x in tracks:
                #     action.move_by_offset(x, 0)
                #
                # action.release().perform()
                time.sleep(5)
                try:
                    if '拖动下方滑块完成拼图' in self.drive.page_source:
                        print(f'获取{url}失败, 原因:滑动验证失败!')
                        print(f'开始重新获取{url}...')
                        time.sleep(2)
                        self.get_page(url)
                except NoSuchWindowException:
                    self.drive.switch_to.parent_frame()
                    res = etree.HTML(self.drive.page_source)
                    vals = []
                    for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
                        vals.append(int(item.strip()))

                    if vals:
                        self.values.extend(vals)
                        print(vals)
                    else:
                        print(f'获取{url}失败, 原因:未获取到页面内容!')
                        print(f'开始重新获取{url}...')
                        self.get_page(url)

                except WebDriverException:
                    self.drive.switch_to.parent_frame()
                    res = etree.HTML(self.drive.page_source)
                    vals = []
                    for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
                        vals.append(int(item.strip()))

                    if vals:
                        self.values.extend(vals)
                        print(vals)
                    else:
                        print(f'获取{url}失败, 原因:未获取到页面内容!')
                        print(f'开始重新获取{url}...')
                        self.get_page(url)
                except Exception as e:
                    print(f'获取{url}失败, 原因:滑动验证失败!')
                    print(f'开始重新获取{url}...')
                    self.get_page(url)

            return self.values
        except Exception as e:
            print('Self Error: ', e)
            self.get_page(url)

    def quit(self):
        self.drive.quit()

    def get_img_from_net(self, bg_url, sl_url):
        '''
        通过图片url获取图片
        :param bg_url: 阴影缺口图片url
        :param sl_url: 小滑块图片url
        :return: 返回响应的content
        '''
        bg_res = requests.get(bg_url)
        if bg_res.status_code != 200:
            return False, False
        sl_res = requests.get(sl_url)
        if sl_res.status_code != 200:
            return False, False
        return bg_res.content, sl_res.content

    def get_distance(self, bg_url, sl_url):
        '''
        获取小滑块到阴影缺口偏移量
        :param bg_url: 阴影缺口图片url
        :param sl_url: 小滑块图片url
        :return: {'val': 0, 'msg': '未获取到图片'} 如果没获取到图片 val为0, 如果获取到图片则 val为计算出的偏移量
        '''
        # 获取图片
        bg_img, sl_img = self.get_img_from_net(bg_url, sl_url)
        if not bg_img and sl_img:
            return {'val': 0, 'msg': '获取验证码图片失败!'}

        # 阴影缺口图
        bg_img = cv2.imdecode(np.frombuffer(bg_img, np.uint8), cv2.IMREAD_GRAYSCALE)
        # 进行缩放
        bg_width, bg_height = bg_img.shape[:2]
        bg_img = cv2.resize(bg_img, (int(bg_height * 0.5), int(bg_width * 0.5)))

        # 小滑块
        sl_img = cv2.imdecode(np.frombuffer(requests.get(sl_url).content, np.uint8), cv2.IMREAD_GRAYSCALE)
        # 进行缩放
        sl_width, sl_height = sl_img.shape[:2]
        sl_img = cv2.resize(sl_img, (int(sl_height * 0.5), int(sl_width * 0.5)))

        # 返回值
        # [[ 0.10075403  0.1022609   0.10353662 ...  0.07513236  0.07784532
        #    0.08106443]
        #  [ 0.10010067  0.10148325  0.10491841 ...  0.07120055  0.07427745
        #    0.07763027]
        #  ...
        #  [-0.00110397 -0.01404627 -0.02621776 ... -0.06852742 -0.07676314
        #   -0.0506512 ]]
        ret = cv2.matchTemplate(bg_img, sl_img, cv2.TM_CCOEFF_NORMED)
        # 取出最佳匹配值, +13指小滑块左边的透明边距  -40指小滑块左边框距离阴影缺口图左边框的距离
        ret = cv2.minMaxLoc(ret)[2:][0][0] + 12 - 40
        return {'val': ret, 'msg': '未获取到图片'}


    def set_useragent(self):
        ua_list = [
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
            "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
            "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
            "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
            "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
        ]

        js_code = '''
            var customUserAgent = "'''+random.choice(ua_list)+'''";

            //修改后的userAgent            
            Object.defineProperty(navigator, 'userAgent', {
              value: customUserAgent,
              writable: false
            });
            console.log(navigator.userAgent);
        '''
        self.drive.execute_script(js_code)

        ip = requests.get('http://nets.tpddns.cn:5010/get/').json()['proxy']
        js_base='''var pf = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);

            pf.setIntPref("network.proxy.type", 1);

            pf.setCharPref("network.proxy.http", "{0}");

            pf.setIntPref("network.proxy.http_port", {1});

            pf.setCharPref("network.proxy.ssl", "{2}");

            pf.setIntPref("network.proxy.ssl_port", {3});'''
        self.drive.execute_script(js_base.format(ip.split(':')[0], ip.split(':')[1], ip.split(':')[0], ip.split(':')[1]))

if __name__ == '__main__':
    base_url = 'http://www.glidedsky.com/level/web/crawler-captcha-1?page={}'
    co = CaptchaOne()
    for page in range(1, 1001):
        print('page: ', page)
        ret = co.get_page(base_url.format(page))
        print('sum: ', sum(co.values))
    time.sleep(5)
    co.quit()

上一篇:git

下一篇:base64字体存为本地文件

相关文章

文章评论

#2023-06-05 05:21 @ Bridget:

Pагty Snaps Photo Booth OC | Photo Booth Rental Orange County 12911 Dungan Ln, Gɑrden Grove, CA 92840 best photo booth rentɑls near me

#2023-06-06 21:57 @ Mattie:

Party Snapѕ Photo Booth OᏟ | Photo Booth Rental Orange County 12911 Dungan Ln, Garden Grove, CA 92840 led гental

#2023-06-07 06:53 @ Windy:

Группа объявлений Ульяновск в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять! Объявления Ульяновска

#2023-06-07 11:39 @ Johnson:

Группа объявлений Нижнего Тагила в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять... Объявления Нижний Тагил

#2023-06-10 15:20 @ Kraig:

примерно на кривляться оформление сильно необходимых документов и доставим авто из ОАЭ под источник Самые Лучшие Микрозаймы Невзирая сверху то, что кредит показывается быстрым а также эффективным средством резолюции финансовых заморочек, жуть шиздец находят решение сверху этот шаг через сложности процедуры евонный оформления. [URL=https://credit-mikrozaim.com]Микрозайм Или Микрозаем[/URL] https://credit-mikrozaim.com/

#2023-06-12 01:53 @ Brigette:

Pɑrty Snaps Photo Booth OC | Phһoto Booth Rental Orange Ϲounty 12911 Dungan Ln, Garden Grove, CA 92840 VOGUE photo boօtһ rentɑl Laɡuna Niguel

#2023-06-12 16:49 @ Helen:

Party Ꮪnaps Photfo Boothh OC | Photo Booth Rental Orаnge County 12911 Dungan Ꮮn, Garden Ꮐroѵe, CA 92840 photo booth rental baby shower

#2023-06-14 09:21 @ toursex:

<a href=https://viagr.cfd>over the counter viagra substitute</a> Recombination efficiency

#2023-06-29 04:07 @ Playelo:

The shaking is usually fast, about 4 to 12 movements per second <a href=https://sildenafi.cfd>how often should you take viagra</a> Doctors and patients are always looking to identify ways to tell if cancer treatments are working

#2024-03-25 22:32 @ Doug:

Sabung Ayam Online

#2024-03-31 01:53 @ Jayme:

Situs Sabung Ayam Digmaan

#2024-06-02 08:41 @ nivaawarp:

In this model the most significant NF ОєB subunit appears to be NF ОєB1, as mice lacking NF ОєB2 and c Rel demonstrated few differences in response compared to WT mice <a href=https://cialis.lat/discover-the-best-prices-for-cialis>brand name cialis online</a>

#2024-06-06 12:21 @ nivaawarp:

<a href=https://cialis.lat/discover-the-best-prices-for-cialis>real cialis no generic</a> Clomid stimulates the production of GnRH GnRH, testosterone, and also prevents the risk of developing gynecomastia male breast growth due to excessive fluid retention

添加评论





本栏推荐

站点信息

  • 建站时间:2021-01-01
  • 网站程序:Django 3.1.2
  • 文章统计:53篇
  • 文章评论:31条
  • 统计数据