# 豆瓣验证码地址
# https://www.douban.com/misc/sorry?original-url=https%3A%2F%2Fwww.douban.com%2Fgroup%2F692739%2Fdiscussion%3Fstart%3D225
from aip import AipOcr
from PIL import Image, ImageFilter # 图片处理
import pytesseract # 识别
im = Image.open('img.png')
out = im
aa = pytesseract.image_to_string(out)
# 滤波处理 去掉背景色
threshold = 37
width, height = im.size
for i in range(0, width):
for j in range(0, height):
p = im.getpixel((i, j))
r, g, b = p
if r > threshold or g > threshold or b > threshold:
# self.frame[i, j] = WHITE
im.putpixel((i,j),(255,255,255))
else:
# self.frame[i, j ] = BLACK
im.putpixel((i,j),(0,0,0))
# 保存和识别图片
# 中值滤波
im = im.filter(ImageFilter.MedianFilter())
im.save('profit-filter.png')
# 此处使用百度AI进行识别,更换为自己的注册信息
APP_ID = ''
API_KEY = ''
SECRET_KEY = ' '
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)#创建连接
fp=open("profit-filter.png","rb").read()#打开并读取文件内容
res=client.basicGeneral(fp)#普通
#print(res)
#将所有的文字都合并到一起
strx=""
for tex in res["words_result"]:#遍历结果
strx+=tex["words"]#每一行
print(strx)#输出内容
# =============================================================================================================
# 此处使用pytesseract识别,与上面的百度AI一样都可以识别,我这边测试的百度AI识别效果会好一些
# aa = pytesseract.image_to_string(im)
# print(aa)