Python3.4 12306 2015年3月验证码识别

<pre name="code" class="python">import sslimport jsonfrom PIL import Imageimport requestsimport reimport urllib.request as urllib2if hasattr(ssl, '_create_unverified_context'):ssl.create_default_context = ssl._create_unverified_contextUA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"def imgCut(): pic_obj = Image.open('./tmp.jpg') box = (120, 0, 290, 25) region = pic_obj.crop(box) region.save('./text.jpg')def ocrApi():filename = './text.jpg'upload_pic_url = ""filename_tmp = filename.split('/')[-1]headers_fake = {'ccept': '*/*','Accept-Encoding': 'gzip, deflate','Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6','Connection': 'keep-alive','Host': 'cn.docs88.com','Origin': '','User-Agent': 'Mozilla/5.0 (KHTML, like Gecko) Chrome/41.0.2272.89','X-Requested-With': 'ShockwaveFlash/17.0.0.134',}para = {'Filename': filename_tmp,'sourcename': filename_tmp,'sourcelanguage': 'cn','desttype': 'txt','Upload': 'Submit Query',}upload_pic = requests.post(upload_pic_url, data=para, files={"Filedata" : open(filename, 'rb')}, headers=headers_fake)text_result_url = '' + str(upload_pic.content)[5:-1]text_result = urllib2.urlopen(text_result_url).read().decode()return text_resultdef get_img():resp = urllib2.urlopen(pic_url)raw = resp.read()with open('./tmp.jpg', 'wb') as fp:fp.write(raw)return Image.open('./tmp.jpg')def get_sub_img(im, x, y):assert 0 <= x <= 3assert 0 <= y <= 2#WITH = HEIGHT = 68left = 5 + (67 + 5) * xtop = 41 + (67 + 5) * yright = left + 67bottom = top + 67return im.crop((left, top, right, bottom))def baidu_stu_lookup(im):url = "?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="im.save("./query_temp_img.png")raw = open("./query_temp_img.png", 'rb').read()url = url + str(len(raw))req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})resp_url = urllib2.urlopen(req).read()url = "?queryImageUrl=" + urllib2.quote(resp_url)req = urllib2.Request(url, headers={'User-Agent': UA})resp = urllib2.urlopen(req)html = resp.read().decode()return baidu_stu_html_extract(html)def baidu_stu_html_extract(html):pattern = re.compile(r"keywords:'(.*?)'")matches = pattern.findall(html)if not matches:return '[UNKOWN]'json_str = matches[0]json_str = json_str.replace('\\x22', '"').replace('\\\\', '\\')result = [item['keyword'] for item in json.loads(json_str)]return '|'.join(result) if result else '[UNKOWN]'if __name__ == '__main__':im = get_img()imgCut()captcha_text = ocrApi()print(captcha_text)dic_list = {}count = 0for y in range(2):for x in range(4):count += 1im2 = get_sub_img(im, x, y)result = baidu_stu_lookup(im2)dic_list[count] = resultprint((y, x), result)if captcha_text.strip():print('\n可能的结果是:')maybe_result = []for v in dic_list:for c in range(len(captcha_text.strip())):text = (captcha_text)[c]if text in dic_list[v]:_str_res = '%s — %s' % (v, dic_list[v])maybe_result.append(_str_res)for r in list(set(maybe_result)):print(r)else:print('False')改自 https://gist.github.com/Evi1m0/fbbdb1ba7c66cc4e1bb2 Python3.4

,人生谁无少年时,甜苦酸辛各自知。

Python3.4 12306 2015年3月验证码识别

相关文章:

你感兴趣的文章:

标签云: