百度翻译


BaiduTranslate.js(百度翻译所需的js代码)

function a(r) {
    if (Array.isArray(r)) {
        for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o];
        return t
    }
    return Array.from(r)
}

function n(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
        var a = o.charAt(t + 2);
        a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}

function e(r, gtk) {
    var i = null
    var window = {}
    var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
    if (null === o) {
        var t = r.length;
        t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
    } else {
        for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
        var g = f.length;
        g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
    }
    var u = void 0
    // var l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    // u = null !== i ? i : (i = window[l] || "") || "";
    u = null !== i ? i : (i = gtk || "") || "";
    for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
        var A = r.charCodeAt(v);
        128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
    }
    for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
    return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}
BaiduTranslate.py
import os
import random
import re
import time

from lxml import etree
from urllib import parse

import requests
import execjs

"""
pip install PyExecJS
"""

"""
BaiduTranslate.js
function a(r) {
    if (Array.isArray(r)) {
        for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o];
        return t
    }
    return Array.from(r)
}

function n(r, o) {
    for (var t = 0; t < o.length - 2; t += 3) {
        var a = o.charAt(t + 2);
        a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a), a = "+" === o.charAt(t + 1) ? r >>> a : r << a, r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
    }
    return r
}

function e(r, gtk) {
    var i = null
    var window = {}
    var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
    if (null === o) {
        var t = r.length;
        t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
    } else {
        for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++) "" !== e[C] && f.push.apply(f, a(e[C].split(""))), C !== h - 1 && f.push(o[C]);
        var g = f.length;
        g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
    }
    var u = void 0
    // var l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
    // u = null !== i ? i : (i = window[l] || "") || "";
    u = null !== i ? i : (i = gtk || "") || "";
    for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
        var A = r.charCodeAt(v);
        128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
    }
    for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b], p = n(p, F);
    return p = n(p, D), p ^= s, 0 > p && (p = (2147483647 & p) + 2147483648), p %= 1e6, p.toString() + "." + (p ^ m)
}
"""


def get_random_agent():
    agent_list = [
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
        "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
        "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
        "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
        "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
        "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
        "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
        "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
        "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    ]
    agent = random.choice(agent_list)
    return agent


class Translate():

    def get_token(self, query, headers):
        _ = self
        params = {
            'query': query,
            'keyfrom': 'baidu',
            'smartresult': 'dict',
            # 'lang': 'en2zh',
            'lang': 'zh2en',
        }

        params_str = parse.urlencode(params)
        u = 'https://fanyi.baidu.com/translate?' + params_str
        res = requests.get(u, headers=headers).content.decode('utf-8')
        with open('111.html', 'w', encoding='utf-8') as w:
            w.write(res)
        html = etree.HTML(res)
        common_script = html.xpath('/html/body/script[1]/text()')[0]
        try:
            token = re.findall("token: '(.*?)',", common_script)[0]
        except:
            token = '9904ca00b5752d4442a203b934b90b2a'
        try:
            langs = re.findall("langList:{(.*?)},", common_script.replace('\n', '').replace(' ', ''))[0]
            langs = langs.replace('  ', '').replace("'", "")
            langs_list = langs.split(',')
            lang_dict = {i.split(':')[0]: i.split(':')[1] for i in langs_list}
        except:
            lang_dict = {'zh': '中文', 'jp': '日语', 'jpka': '日语假名', 'th': '泰语', 'fra': '法语', 'en': '英语', 'spa': '西班牙语',
                         'kor': '韩语', 'tr': '土耳其语', 'vie': '越南语', 'ms': '马来语', 'de': '德语', 'ru': '俄语', 'ir': '伊朗语',
                         'ara': '阿拉伯语', 'est': '爱沙尼亚语', 'be': '白俄罗斯语', 'bul': '保加利亚语', 'hi': '印地语', 'is': '冰岛语',
                         'pl': '波兰语', 'fa': '波斯语', 'dan': '丹麦语', 'tl': '菲律宾语', 'fin': '芬兰语', 'nl': '荷兰语',
                         'ca': '加泰罗尼亚语', 'cs': '捷克语', 'hr': '克罗地亚语', 'lv': '拉脱维亚语', 'lt': '立陶宛语', 'rom': '罗马尼亚语',
                         'af': '南非语', 'no': '挪威语', 'pt_BR': '巴西语', 'pt': '葡萄牙语', 'swe': '瑞典语', 'sr': '塞尔维亚语',
                         'eo': '世界语', 'sk': '斯洛伐克语', 'slo': '斯洛文尼亚语', 'sw': '斯瓦希里语', 'uk': '乌克兰语', 'iw': '希伯来语',
                         'el': '希腊语', 'hu': '匈牙利语', 'hy': '亚美尼亚语', 'it': '意大利语', 'id': '印尼语', 'sq': '阿尔巴尼亚语',
                         'am': '阿姆哈拉语', 'as': '阿萨姆语', 'az': '阿塞拜疆语', 'eu': '巴斯克语', 'bn': '孟加拉语', 'bs': '波斯尼亚语',
                         'gl': '加利西亚语', 'ka': '格鲁吉亚语', 'gu': '古吉拉特语', 'ha': '豪萨语', 'ig': '伊博语', 'iu': '因纽特语',
                         'ga': '爱尔兰语', 'zu': '祖鲁语', 'kn': '卡纳达语', 'kk': '哈萨克语', 'ky': '吉尔吉斯语', 'lb': '卢森堡语',
                         'mk': '马其顿语', 'mt': '马耳他语', 'mi': '毛利语', 'mr': '马拉提语', 'ne': '尼泊尔语', 'or': '奥利亚语',
                         'pa': '旁遮普语', 'qu': '凯楚亚语', 'tn': '塞茨瓦纳语', 'si': '僧加罗语', 'ta': '泰米尔语', 'tt': '塔塔尔语',
                         'te': '泰卢固语', 'ur': '乌尔都语', 'uz': '乌兹别克语', 'cy': '威尔士语', 'yo': '约鲁巴语', 'yue': '粤语',
                         'wyw': '文言文', 'cht': '中文繁体'}

        gtk_script = html.xpath('/html/body/script[3]/text()')[0]
        try:
            gtk = re.findall("window.gtk = '(.*?)';", gtk_script)[0]
        except:
            gtk = ''

        return token, gtk, lang_dict

    def convert_cookie_str(self, cookie):
        _ = self
        cookies = [k + '=' + v for k, v in cookie.items()]
        c = ';'.join(cookies)
        return c

    def get_cookie(self, query):
        url = 'https://fanyi.baidu.com/langdetect'
        data = {
            'query': query,
        }
        cookies = requests.post(url, data).cookies
        cookie = requests.utils.dict_from_cookiejar(cookies)
        cookies = self.convert_cookie_str(cookie)

        return cookies

    def get_sign(self, query, gtk):
        _ = self
        # query = 'NOMINATIONS SENT TO THE SENATE'
        path = os.path.abspath(os.path.dirname(__file__))
        js_path = os.path.join(path, 'BaiduTranslate.js')
        with open(js_path, 'r', encoding='utf-8') as r:
            js = r.read()
        ctx = execjs.compile(js)
        sign = ctx.call("e", query, gtk)
        return sign

    def get_headers(self, query):
        user_agent = get_random_agent()
        cookies = self.get_cookie(query)
        headers = {
            'User-Agent': user_agent,
            'X-Requested-With': 'XMLHttpRequest',
            'Cookie': cookies,
        }
        return headers

    def get_data(self, f, to, query, sign, token):
        _ = self
        data = {
            # "from": "en",
            # "to": "zh",
            "from": f,
            "to": to,
            "query": query,
            "transtype": "translang",
            "simple_means_flag": "3",
            # "sign": "237507.490738",
            # "sign": "515140.212853",
            "sign": sign,
            "token": token,
            "domain": "common",
        }
        return data

    def translate(self, query, interpret):
        f, to = tuple(interpret.split('_'))
        headers = self.get_headers(query)
        token, gtk, lang_dict = self.get_token(query, headers)
        sign = self.get_sign(query, gtk)
        data = self.get_data(f, to, query, sign, token)

        url = 'https://fanyi.baidu.com/v2transapi?from={}&to={}'.format(f, to)
        res = requests.post(url, data, headers=headers)
        try:
            dst = res.json().get('trans_result').get('data')[0].get('dst')
            return dst
        except:
            return None


def en_to_zh(query):
    print('英文翻译为中文', query)
    while True:
        try:
            interpret = 'en_zh'
            dst = Translate().translate(query, interpret)
            if dst:
                break
            else:
                time.sleep(3)
        except:
            time.sleep(3)
    print('英文翻译为中文,翻译成功', dst)
    return dst


def zh_to_en(query):
    print('中文翻译为英文', query)
    while True:
        try:
            interpret = 'zh_en'
            dst = Translate().translate(query, interpret)
            if dst:
                break
            else:
                time.sleep(3)
        except:
            time.sleep(3)
    print('中文翻译为英文,翻译成功', dst)
    return dst


if __name__ == '__main__':
    query = 'NOMINATIONS SENT TO THE SENATE'
    interpret = 'en_zh'
    dst = Translate().translate(query, interpret)
    print(dst)