Python 之pytesseract模块读取知乎验证码案例

import pytesseract
from PIL import Image
import requests
import time

# 获取只会验证码图片并保存为本地
def get_data_request():
    headers = {
        "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0"
    }
    captcha_url = 'https://www.zhihu.com/captcha.gif?r=%d&type=login' % (time.time() * 1000)
    try:
        response = requests.get(captcha_url, headers=headers)
        try:
            img_name = "./captcha.png"
            with open(img_name, "wb") as f:
                f.write(response.content)
            return img_name
        except IOError as e:
            print(e)
    except ConnectionError as e:
        print(e)

# 读取图片内容返回
def read_captcha(img_url):
    image = Image.open(img_url)
    text = pytesseract.image_to_string(image)
    return text


def main():
    img = get_data_request()
    read_data = read_captcha(img)
    print(read_data)


if __name__ == '__main__':
    main()

结果如图:

原文地址:https://www.cnblogs.com/yang-2018/p/10952427.html