百度翻译
BaiduTranslate.js(百度翻译所需的js代码)
function a(r) { if (Array.isArray(r)) { for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o]; return t } return Array.from(r) } function n(r, o) { for (var t = 0; t < o.length - 2; t += 3) { var a = o.charAt(t + 2); a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a } return r } function e(r, gtk) { var i = null var window = {} var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g); if (null === o) { var t = r.length; t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10)) } else { for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]); var g = f.length; g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join("")) } var u = void 0 // var l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107); // u = null !== i ? i : (i = window[l] || "") || ""; u = null !== i ? i : (i = gtk || "") || ""; for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) { var A = r.charCodeAt(v); 128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128) } for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F); return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m) }
BaiduTranslate.py
import os import random import re import time from lxml import etree from urllib import parse import requests import execjs """ pip install PyExecJS """ """ BaiduTranslate.js function a(r) { if (Array.isArray(r)) { for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o]; return t } return Array.from(r) } function n(r, o) { for (var t = 0; t < o.length - 2; t += 3) { var a = o.charAt(t + 2); a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a } return r } function e(r, gtk) { var i = null var window = {} var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g); if (null === o) { var t = r.length; t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10)) } else { for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]); var g = f.length; g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join("")) } var u = void 0 // var l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107); // u = null !== i ? i : (i = window[l] || "") || ""; u = null !== i ? i : (i = gtk || "") || ""; for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) { var A = r.charCodeAt(v); 128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128) } for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F); return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m) } """ def get_random_agent(): agent_list = [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ] agent = random.choice(agent_list) return agent class Translate(): def get_token(self, query, headers): _ = self params = { 'query': query, 'keyfrom': 'baidu', 'smartresult': 'dict', # 'lang': 'en2zh', 'lang': 'zh2en', } params_str = parse.urlencode(params) u = 'https://fanyi.baidu.com/translate?' + params_str res = requests.get(u, headers=headers).content.decode('utf-8') with open('111.html', 'w', encoding='utf-8') as w: w.write(res) html = etree.HTML(res) common_script = html.xpath('/html/body/script[1]/text()')[0] try: token = re.findall("token: '(.*?)',", common_script)[0] except: token = '9904ca00b5752d4442a203b934b90b2a' try: langs = re.findall("langList:{(.*?)},", common_script.replace('\n', '').replace(' ', ''))[0] langs = langs.replace(' ', '').replace("'", "") langs_list = langs.split(',') lang_dict = {i.split(':')[0]: i.split(':')[1] for i in langs_list} except: lang_dict = {'zh': '中文', 'jp': '日语', 'jpka': '日语假名', 'th': '泰语', 'fra': '法语', 'en': '英语', 'spa': '西班牙语', 'kor': '韩语', 'tr': '土耳其语', 'vie': '越南语', 'ms': '马来语', 'de': '德语', 'ru': '俄语', 'ir': '伊朗语', 'ara': '阿拉伯语', 'est': '爱沙尼亚语', 'be': '白俄罗斯语', 'bul': '保加利亚语', 'hi': '印地语', 'is': '冰岛语', 'pl': '波兰语', 'fa': '波斯语', 'dan': '丹麦语', 'tl': '菲律宾语', 'fin': '芬兰语', 'nl': '荷兰语', 'ca': '加泰罗尼亚语', 'cs': '捷克语', 'hr': '克罗地亚语', 'lv': '拉脱维亚语', 'lt': '立陶宛语', 'rom': '罗马尼亚语', 'af': '南非语', 'no': '挪威语', 'pt_BR': '巴西语', 'pt': '葡萄牙语', 'swe': '瑞典语', 'sr': '塞尔维亚语', 'eo': '世界语', 'sk': '斯洛伐克语', 'slo': '斯洛文尼亚语', 'sw': '斯瓦希里语', 'uk': '乌克兰语', 'iw': '希伯来语', 'el': '希腊语', 'hu': '匈牙利语', 'hy': '亚美尼亚语', 'it': '意大利语', 'id': '印尼语', 'sq': '阿尔巴尼亚语', 'am': '阿姆哈拉语', 'as': '阿萨姆语', 'az': '阿塞拜疆语', 'eu': '巴斯克语', 'bn': '孟加拉语', 'bs': '波斯尼亚语', 'gl': '加利西亚语', 'ka': '格鲁吉亚语', 'gu': '古吉拉特语', 'ha': '豪萨语', 'ig': '伊博语', 'iu': '因纽特语', 'ga': '爱尔兰语', 'zu': '祖鲁语', 'kn': '卡纳达语', 'kk': '哈萨克语', 'ky': '吉尔吉斯语', 'lb': '卢森堡语', 'mk': '马其顿语', 'mt': '马耳他语', 'mi': '毛利语', 'mr': '马拉提语', 'ne': '尼泊尔语', 'or': '奥利亚语', 'pa': '旁遮普语', 'qu': '凯楚亚语', 'tn': '塞茨瓦纳语', 'si': '僧加罗语', 'ta': '泰米尔语', 'tt': '塔塔尔语', 'te': '泰卢固语', 'ur': '乌尔都语', 'uz': '乌兹别克语', 'cy': '威尔士语', 'yo': '约鲁巴语', 'yue': '粤语', 'wyw': '文言文', 'cht': '中文繁体'} gtk_script = html.xpath('/html/body/script[3]/text()')[0] try: gtk = re.findall("window.gtk = '(.*?)';", gtk_script)[0] except: gtk = '' return token, gtk, lang_dict def convert_cookie_str(self, cookie): _ = self cookies = [k + '=' + v for k, v in cookie.items()] c = ';'.join(cookies) return c def get_cookie(self, query): url = 'https://fanyi.baidu.com/langdetect' data = { 'query': query, } cookies = requests.post(url, data).cookies cookie = requests.utils.dict_from_cookiejar(cookies) cookies = self.convert_cookie_str(cookie) return cookies def get_sign(self, query, gtk): _ = self # query = 'NOMINATIONS SENT TO THE SENATE' path = os.path.abspath(os.path.dirname(__file__)) js_path = os.path.join(path, 'BaiduTranslate.js') with open(js_path, 'r', encoding='utf-8') as r: js = r.read() ctx = execjs.compile(js) sign = ctx.call("e", query, gtk) return sign def get_headers(self, query): user_agent = get_random_agent() cookies = self.get_cookie(query) headers = { 'User-Agent': user_agent, 'X-Requested-With': 'XMLHttpRequest', 'Cookie': cookies, } return headers def get_data(self, f, to, query, sign, token): _ = self data = { # "from": "en", # "to": "zh", "from": f, "to": to, "query": query, "transtype": "translang", "simple_means_flag": "3", # "sign": "237507.490738", # "sign": "515140.212853", "sign": sign, "token": token, "domain": "common", } return data def translate(self, query, interpret): f, to = tuple(interpret.split('_')) headers = self.get_headers(query) token, gtk, lang_dict = self.get_token(query, headers) sign = self.get_sign(query, gtk) data = self.get_data(f, to, query, sign, token) url = 'https://fanyi.baidu.com/v2transapi?from={}&to={}'.format(f, to) res = requests.post(url, data, headers=headers) try: dst = res.json().get('trans_result').get('data')[0].get('dst') return dst except: return None def en_to_zh(query): print('英文翻译为中文', query) while True: try: interpret = 'en_zh' dst = Translate().translate(query, interpret) if dst: break else: time.sleep(3) except: time.sleep(3) print('英文翻译为中文,翻译成功', dst) return dst def zh_to_en(query): print('中文翻译为英文', query) while True: try: interpret = 'zh_en' dst = Translate().translate(query, interpret) if dst: break else: time.sleep(3) except: time.sleep(3) print('中文翻译为英文,翻译成功', dst) return dst if __name__ == '__main__': query = 'NOMINATIONS SENT TO THE SENATE' interpret = 'en_zh' dst = Translate().translate(query, interpret) print(dst)