From db62948401929e7508dd72eb1bdb7a7abfec95c8 Mon Sep 17 00:00:00 2001 From: Vadim <1074746@gmail.com> Date: Thu, 18 Jan 2024 07:58:43 +0200 Subject: [PATCH 1/3] Create Google Translate with multiple API support Implemented Google Translate functionality supporting various Google APIs. New command `translate:google` and respective test cases were added. The translation functionality provides support for numerous languages by mapping respective language codes. --- .eslintrc.json | 1 + src/commands/translate/google.ts | 70 ++++ src/shared/config.ts | 7 + src/shared/engines/google.engine.ts | 508 +++++++++++++++++++++++++ src/shared/entities/google.config.ts | 6 + src/shared/helper.ts | 2 +- src/shared/lang.google.ts | 347 +++++++++++++++++ test/commands/translate/google.test.ts | 12 + 8 files changed, 952 insertions(+), 1 deletion(-) create mode 100644 src/commands/translate/google.ts create mode 100644 src/shared/engines/google.engine.ts create mode 100644 src/shared/entities/google.config.ts create mode 100644 src/shared/lang.google.ts create mode 100644 test/commands/translate/google.test.ts diff --git a/.eslintrc.json b/.eslintrc.json index f011396..21cd880 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -11,6 +11,7 @@ "unicorn/switch-case-braces": "off", "valid-jsdoc": "off", "guard-for-in": "off", + "unicorn/numeric-separators-style": "off", "unicorn/catch-error-name": [ "error", { diff --git a/src/commands/translate/google.ts b/src/commands/translate/google.ts new file mode 100644 index 0000000..e829d00 --- /dev/null +++ b/src/commands/translate/google.ts @@ -0,0 +1,70 @@ +import {Args, Flags} from '@oclif/core' + +import {GoogleEngine} from "../../shared/engines/google.engine.js"; +import {LabelBaseCommand} from "../../shared/label-base.command.js"; +import {TGoogleLangCode, TGoogleLangCodeExtend} from "../../shared/lang.google.js"; + +/** + * node --loader ts-node/esm ./bin/dev translate:google --help + * node --loader ts-node/esm ./bin/dev translate:google Text + * node --loader ts-node/esm ./bin/dev translate:google Text -tuk + */ +export default class TranslateGoogle extends LabelBaseCommand { + static args = { + text: Args.string({description: 'The text to be translated, can\'t be blank. The maximum text length is 1000.', required: true}), + } + + static description = 'A simple and free API for Google Translator.' + + static enableJsonFlag = true; + + static examples = [ + '<%= config.bin %> <%= command.id %>', + '<%= config.bin %> <%= command.id %> Text -tuk', + '<%= config.bin %> <%= command.id %> Text -tuk --json', + ] + + static flags = { + from: Flags.string({char: 'f', default: GoogleEngine.defaultFrom, description: 'The language code of source text.'}), + to: Flags.string({char: 't', default: GoogleEngine.defaultTo, description: 'The language in which the text should be translated.'}), + } + + static hidden = true; + + /** + * source language code. `auto-detect` by default. + */ + private from: TGoogleLangCodeExtend; + /** + * content to be translated + */ + private text: string; + /** + * target language code. `en` by default. + */ + private to: TGoogleLangCode; + + public async run(): Promise | void> { + const {args, flags} = await this.parse(TranslateGoogle) + + await this.readCliConfig() + + this.text = args.text.trim(); + this.from = (flags.from || 'auto-detect') as TGoogleLangCodeExtend; + this.to = (flags.to || 'en') as TGoogleLangCode; + + const be = new GoogleEngine({...this.cliConfig.google}, flags.userAgent || this.cliConfig.userAgent); + + const response = await be.translate({ + from: this.from, + text: this.text, + to: this.to, + }) + + if (this.jsonEnabled()) { + return response; + } + + this.log(response.trans); + } +} diff --git a/src/shared/config.ts b/src/shared/config.ts index 87909ab..df3ba4f 100644 --- a/src/shared/config.ts +++ b/src/shared/config.ts @@ -2,6 +2,7 @@ import {DEFAULT_USER_AGENT} from "./constants.js"; import {IBingConfig} from "./entities/bing.config.js"; import {ITerraConfig} from "./entities/terra.config.js"; import {TEngineTranslation} from "./entities/translation.engine.js"; +import {IGoogleConfig} from "./entities/google.config.js"; export const LANG_CODE_DEFAULT = 'en'; export const LABEL_VALIDATION_DEFAULT = '^[a-z0-9\\.\\-\\_]{3,100}$'; @@ -21,6 +22,10 @@ export interface IConfig { bing?: IBingConfig; engine: TEngineTranslation; engineUseCache?: false; + /** + * Configuration options for Google setting. + */ + google?: IGoogleConfig; /** * Regular expression pattern for validating labels. * @@ -38,6 +43,7 @@ export interface IConfig { * Configuration options for TerraPrint. */ terra?: ITerraConfig; + userAgent: string; } export const CONFIG_DEFAULT: IConfig = { @@ -82,4 +88,5 @@ export const CONFIG_DEFAULT: IConfig = { toLangCode: 'uk', userAgent: DEFAULT_USER_AGENT }, + userAgent: DEFAULT_USER_AGENT } diff --git a/src/shared/engines/google.engine.ts b/src/shared/engines/google.engine.ts new file mode 100644 index 0000000..f971747 --- /dev/null +++ b/src/shared/engines/google.engine.ts @@ -0,0 +1,508 @@ +import {ExtendOptions, Got, got} from "got"; + +import {DEFAULT_USER_AGENT} from "../constants.js"; +import {IGoogleConfig} from "../entities/google.config.js"; +import {BaseEngine, IParamTranslateText} from "../entities/translation.engine.js"; +import { + GOOGLE_DOMAIN, + GOOGLE_LANG_MAP, + TGoogleDomain, + TGoogleLangCode, + TGoogleLangCodeExtend, + TGoogleType +} from "../lang.google.js"; +import {UTIL} from "../util.js"; + +export class GoogleEngine implements BaseEngine { + static readonly defaultFrom: TGoogleLangCodeExtend = 'auto'; + static readonly defaultService: TGoogleDomain = 'translate.google.com'; + static readonly defaultTo: TGoogleLangCode = 'en'; + static readonly MAX_RETRY_COUNT = 2; + + private client: Got; + private readonly cookies: Record = {}; + + private readonly parserType: TGoogleType; + + private reqId: number; + private readonly serviceUrl: TGoogleDomain; + private readonly userAgent: string; + + constructor(private readonly config: IGoogleConfig, userAgent?: string) { + this.userAgent = DEFAULT_USER_AGENT ?? userAgent; + + this.serviceUrl = config?.service || GoogleEngine.defaultService; + this.parserType = config?.type || 'rpc'; + + this.reqId = Number(Date.now().toString().slice(6, 13)); + + this.client = got.extend({ + cache: false, + headers: { + 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', + "referer": `https://${this.serviceUrl}/`, + 'user-agent': this.userAgent + }, + hooks: { + afterResponse: [ + (response) => { + if ('set-cookie' in response.headers) { + response.headers['set-cookie'].forEach((cookie: string) => { + const raw = this.parseRawCookie(cookie); + if (raw) { + this.cookies[raw.name] = raw.value; + } + }) + } + + return response; + } + ], + beforeError: [], + beforeRedirect: [], + beforeRequest: [ + (options) => { + const cookies: string[] = []; + if (cookies.length > 0) { + options.headers['set-cookie'] = cookies; + } + // options.headers.secret = options.context.secret; + } + ], + beforeRetry: [], + init: [] + }, + http2: true, + retry: { + limit: GoogleEngine.MAX_RETRY_COUNT, + methods: ['GET', 'POST'] + } + } as ExtendOptions); + } + + static isSupported(lang: TGoogleLangCodeExtend): boolean { + return lang in GOOGLE_LANG_MAP; + } + + async translate(param: IParamTranslateText) { + if (!GoogleEngine.isSupported(param.to)) { + throw new Error(`Not support lang ${param.to}`); + } + + if (param.from !== 'auto' && !GoogleEngine.isSupported(param.from)) { + throw new Error(`Not support lang ${param.from}`); + } + + if (this.parserType === "api1") { + return this.translateAPI1(param); + } + + if (this.parserType === "api2") { + return this.translateAPI2(param); + } + + if (this.parserType === "api3") { + return this.translateAPI3(param); + } + + if (this.parserType === "api4") { + return this.translateAPI4(param); + } + + if (this.parserType === "rpc") { + return this.translateRPC(param); + } + + throw new Error(`Not support type: ${this.parserType}`); + } + + async translateAPI1(param: IParamTranslateText) { + const {body, statusCode} = await this.client.get('https://translate.googleapis.com/translate_a/single', { + followRedirect: false, + responseType: 'json', + searchParams: new URLSearchParams([ + ['client', 'gtx'], + ['dt', 't'], + ['q', param.text], + ['sl', param.from], + ['tl', param.to], + ]) + }); + + if (statusCode === 0) { + throw new Error('connection server'); + } + + if (statusCode > 400) { + return null; + } + + // [[["Текст","Text",null,null,3,null,null,[[]],[[["2e8de74564aec87fb81cb0340a661858","tea_SouthSlavicA_en2bebsbghrsrsluk_2022q2.md"]]]]],null,"en",null,null,null,1,[],[["en"],null,[1],["en"]]] + let trans: string; + + try { + [[trans,]] = (body as [string[]][]).shift(); + } catch (reason) { + console.log([body]) + throw reason; + } + + return {orig: param.text, trans} + } + + async translateAPI2(param: IParamTranslateText) { + const {body, statusCode} = await this.client.get('https://clients5.google.com/translate_a/t', { + followRedirect: false, + responseType: 'json', + searchParams: new URLSearchParams([ + ['client', 'dict-chrome-ex'], + ['q', param.text], + ['sl', param.from], + ['tl', param.to], + ]) + }); + + if (statusCode === 0) { + throw new Error('connection server'); + } + + if (statusCode > 400) { + return null; + } + + // [ [ 'Текст', 'en' ]] + let trans: string; + let lang: string; + + try { + [[trans, lang,]] = body as [string[]]; + } catch (reason) { + console.log([body]) + throw reason; + } + + return {lang, orig: param.text, trans} + } + + async translateAPI3(param: IParamTranslateText) { + const {body, statusCode} = await this.client.get>('https://translate.googleapis.com/translate_a/single', { + followRedirect: false, + responseType: 'json', + searchParams: new URLSearchParams([ + ['client', 'gtx'], + ['source', 'bubble'], + ['dj', '1'], + ['q', param.text], + ['sl', param.from], + ['tl', param.to], + ['hl', param.to], + ['dt', 't'], + ['dt', 'bd'], + ['dt', 'ex'], + ['dt', 'ld'], + ['dt', 'md'], + ['dt', 'qca'], + ['dt', 'rw'], + ['dt', 'rm'], + ['dt', 'ss'], + ['dt', 'at'], + ]) + }); + + if (statusCode === 0) { + throw new Error('connection server'); + } + + if (statusCode > 400) { + return null; + } + + /// { + // "sentences":[ + // { + // "trans":"Напишіть двом розробникам", + // "orig":"Text two developers", + // "backend":3, + // "model_specification":[ + // { } + // ], + // "translation_engine_debug_info":[ + // { + // "model_tracking":{ + // "checkpoint_md5":"2e8de74564aec87fb81cb0340a661858", + // "launch_doc":"tea_SouthSlavicA_en2bebsbghrsrsluk_2022q2.md" + // } + // } + // ] + // }, + // { + // "translit":"Napyshitʹ dvom rozrobnykam" + // } + // ], + // "src":"en", + // "alternative_translations":[ + // { + // "src_phrase":"Text two developers", + // "alternative":[ + // { + // "word_postproc":"Напишіть двом розробникам", + // "score":0, + // "has_preceding_space":true, + // "attach_to_next_token":false, + // "backends":[ + // 3 + // ], + // "backend_infos":[ + // { + // "backend":3 + // } + // ] + // }, + // { + // "word_postproc":"Надішліть текстове повідомлення двом розробникам", + // "score":0, + // "has_preceding_space":true, + // "attach_to_next_token":false, + // "backends":[ + // 8 + // ] + // } + // ], + // "srcunicodeoffsets":[ + // { + // "begin":0, + // "end":19 + // } + // ], + // "raw_src_segment":"Text two developers", + // "start_pos":0, + // "end_pos":0 + // } + // ], + // "confidence":1, + // "spell":{ }, + // "ld_result":{ + // "srclangs":[ + // "en" + // ], + // "srclangs_confidences":[ + // 1 + // ], + // "extended_srclangs":[ + // "en" + // ] + // } + // } + let trans: string; + let src: string; + + try { + if ('sentences' in body && Array.isArray(body.sentences)) { + [{trans},] = body.sentences; + } + + if ('src' in body) { + src = body.src as string; + } + } catch (reason) { + console.log([body]) + throw reason; + } + + return {lang: src, orig: param.text, trans} + } + + async translateAPI4(param: IParamTranslateText) { + const {body, statusCode} = await this.client.get>('https://translate.googleapis.com/translate_a/single', { + followRedirect: false, + responseType: 'json', + searchParams: new URLSearchParams([ + ['client', 'gtx'], + ['source', 'input'], + ['dj', '1'], + ['q', param.text], + ['sl', param.from], + ['tl', param.to], + ['dt', 't'], + ['dt', 'bd'], + ]) + }); + + if (statusCode === 0) { + throw new Error('connection server'); + } + + if (statusCode > 400) { + return null; + } + + // { + // "sentences":[ + // { + // "trans":"Напишіть двом розробникам", + // "orig":"Text two developers", + // "backend":3, + // "model_specification":[ + // { } + // ], + // "translation_engine_debug_info":[ + // { + // "model_tracking":{ + // "checkpoint_md5":"2e8de74564aec87fb81cb0340a661858", + // "launch_doc":"tea_SouthSlavicA_en2bebsbghrsrsluk_2022q2.md" + // } + // } + // ] + // } + // ], + // "src":"en", + // "confidence":1, + // "spell":{ }, + // "ld_result":{ + // "srclangs":[ + // "en" + // ], + // "srclangs_confidences":[ + // 1 + // ], + // "extended_srclangs":[ + // "en" + // ] + // } + // } + + let trans: string; + let src: string; + + try { + if ('sentences' in body && Array.isArray(body.sentences)) { + [{trans},] = body.sentences; + } + + if ('src' in body) { + src = body.src as string; + } + } catch (reason) { + console.log([body]) + throw reason; + } + + return {lang: src, orig: param.text, trans} + } + + /** + * Google Translates RPC API + */ + async translateRPC(param: IParamTranslateText) { + const text = param.text.replaceAll('"', " "); + // rpcids = 'AVdN8'; + // dataString = 'f.req=' + encodeURI(`[[["${rpcids}","[\\"${text}\\",\\"${param.text}\\",\\"${param.from}\\"]",null,"generic"]]]`); + + const rpcids = 'MkEWBc'; + const dataString = 'f.req=' + encodeURI(`[[["${rpcids}","[[\\"${text}\\",\\"${param.from}\\",\\"${param.to}\\",1],[]]",null,"generic"]]]`); + + if (!GOOGLE_DOMAIN.includes(this.serviceUrl)) { + throw new Error(`Not support domain: ${this.serviceUrl}`); + } + + const {body, statusCode} = await this.client.post(`https://${this.serviceUrl}/_/TranslateWebserverUi/data/batchexecute`, { + body: dataString, + cache: false, + headers: { + Accept: '*/*', + }, + searchParams: new URLSearchParams([ + ['rpcids', rpcids], + ['source-path', '/'], + // ['f.sid', '2583840604490340159'], + ['bl', 'boq_translate-webserver_20240115.08_p0'], + // ['hl', param.to], + ['soc-app', '1'], + ['soc-platform', '1'], + ['soc-device', '1'], + ['_reqid', this.getReqId().toString()], + ['rt', 'c'], + ]) + }); + + if (statusCode === 0) { + throw new Error('connection server'); + } + + if (statusCode > 400) { + return null; + } + + // )]}' + // + // 507 + // [["wrb.fr","MkEWBc","[[null,null,\"en\",[[[0,[[[null,20]],[true]]]],20],[[\"Two.\",null,null,20]],null,[\"Two.\",\"auto\",\"uk\",true]],[[[null,\"Dvoye.\",null,null,null,[[\"Двоє.\",null,null,null,[[\"Двоє.\",[5],[]],[\"Двоє.\",[11],[]]]]],null,null,null,[]]],\"uk\",1,\"en\",[\"Two developer there.\",\"auto\",\"uk\",true]],\"en\"]",null,null,null,"generic"],["di",24],["af.httprm",23,"8141690242435471324",27]] + // 25 + // [["e",4,null,null,595]] + + let transliterate: string; + let trans: string; + let lang: string; + + try { + const lines: string[] = body.split('\n'); + const line = lines.find((ln) => ln.includes(`"${rpcids}"`)); + + const data = JSON.parse(JSON.parse(line).shift()[2]) + + if (Array.isArray(data) && data.length > 0) { + trans = UTIL.getNestedValue(data, '1.0.0.5.0.0') as string; + + transliterate = UTIL.getNestedValue(data, '1.0.0.1') as string; + + if (!param.from || param.from === 'auto') { + lang = UTIL.getNestedValue(data, '2') as string; + if (!lang) { + lang = UTIL.getNestedValue(data, '1.0.0.3') as string; + } + + if (!lang) { + lang = UTIL.getNestedValue(data, '1.3') as string; + } + } else { + lang = param.from; + } + } + } catch (reason) { + console.log([reason]) + return null; + } + + return {lang, orig: param.text, trans, transliterate} + } + + async translateText(param: IParamTranslateText): Promise { + const data = await this.translate(param); + + return data.trans; + } + + private getReqId() { + this.reqId += 100000; + return this.reqId; + } + + private parseRawCookie(raw: string) { + const [name, ...value] = raw.split(';') + .shift() + .split('=') + + if (!name) { + return null; + } + + let sep = ''; + if (value.length > 1) { + sep='='; + } + + return { + name, value: value.join(sep) + }; + } +} diff --git a/src/shared/entities/google.config.ts b/src/shared/entities/google.config.ts new file mode 100644 index 0000000..314cc49 --- /dev/null +++ b/src/shared/entities/google.config.ts @@ -0,0 +1,6 @@ +import {TGoogleDomain, TGoogleType} from "../lang.google.js"; + +export interface IGoogleConfig { + service?: TGoogleDomain; + type?: TGoogleType; +} diff --git a/src/shared/helper.ts b/src/shared/helper.ts index 6e5a730..b6a5595 100644 --- a/src/shared/helper.ts +++ b/src/shared/helper.ts @@ -11,7 +11,7 @@ const readJsonFile = async (pathFile: string): Promise> => { } const writeJsonFile = async (dataJson: NonNullable, pathFile: string, spaces = 2) => { - return writeJson(pathFile, dataJson, { + return writeJson(pathFile, dataJson, { encoding: 'utf8', spaces }); diff --git a/src/shared/lang.google.ts b/src/shared/lang.google.ts new file mode 100644 index 0000000..0637706 --- /dev/null +++ b/src/shared/lang.google.ts @@ -0,0 +1,347 @@ +export const GOOGLE_LANG_MAP = { + 'af': 'Afrikaans', + 'ak': 'Twi', + 'am': 'Amharic', + 'ar': 'Arabic', + 'as': 'Assamese', + 'ay': 'Aymara', + 'az': 'Azerbaijani', + 'be': 'Belarusian', + 'bg': 'Bulgarian', + 'bho': 'Bhojpuri', + 'bm': 'Bambara', + 'bn': 'Bengali', + 'bs': 'Bosnian', + 'ca': 'Catalan', + 'ceb': 'Cebuano', + 'ckb': 'Kurdish (Sorani)', + 'co': 'Corsican', + 'cs': 'Czech', + 'cy': 'Welsh', + 'da': 'Danish', + 'de': 'German', + 'doi': 'Dogri', + 'dv': 'Dhivehi', + 'ee': 'Ewe', + 'el': 'Greek', + 'en': 'English', + 'eo': 'Esperanto', + 'es': 'Spanish', + 'et': 'Estonian', + 'eu': 'Basque', + 'fa': 'Persian', + 'fi': 'Finnish', + 'fr': 'French', + 'fy': 'Frisian', + 'ga': 'Irish', + 'gd': 'Scots Gaelic', + 'gl': 'Galician', + 'gn': 'Guarani', + 'gom': 'Konkani', + 'gu': 'Gujarati', + 'ha': 'Hausa', + 'haw': 'Hawaiian', + 'hi': 'Hindi', + 'hmn': 'Hmong', + 'hr': 'Croatian', + 'ht': 'Haitian Creole', + 'hu': 'Hungarian', + 'hy': 'Armenian', + 'id': 'Indonesian', + 'ig': 'Igbo', + 'ilo': 'Ilocano', + 'is': 'Icelandic', + 'it': 'Italian', + 'iw': 'Hebrew', + 'ja': 'Japanese', + 'jw': 'Javanese', + 'ka': 'Georgian', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'kn': 'Kannada', + 'ko': 'Korean', + 'kri': 'Krio', + 'ku': 'Kurdish (Kurmanji)', + 'ky': 'Kyrgyz', + 'la': 'Latin', + 'lb': 'Luxembourgish', + 'lg': 'Luganda', + 'ln': 'Lingala', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lus': 'Mizo', + 'lv': 'Latvian', + 'mai': 'Maithili', + 'mg': 'Malagasy', + 'mi': 'Maori', + 'mk': 'Macedonian', + 'ml': 'Malayalam', + 'mn': 'Mongolian', + 'mni-Mtei': 'Meiteilon (Manipuri)', + 'mr': 'Marathi', + 'ms': 'Malay', + 'mt': 'Maltese', + 'my': 'Myanmar (Burmese)', + 'ne': 'Nepali', + 'nl': 'Dutch', + 'no': 'Norwegian', + 'nso': 'Sepedi', + 'ny': 'Chichewa', + 'om': 'Oromo', + 'or': 'Odia (Oriya)', + 'pa': 'Punjabi', + 'pl': 'Polish', + 'ps': 'Pashto', + 'pt': 'Portuguese', + 'qu': 'Quechua', + 'ro': 'Romanian', + 'ru': 'Russian', + 'rw': 'Kinyarwanda', + 'sa': 'Sanskrit', + 'sd': 'Sindhi', + 'si': 'Sinhala', + 'sk': 'Slovak', + 'sl': 'Slovenian', + 'sm': 'Samoan', + 'sn': 'Shona', + 'so': 'Somali', + 'sq': 'Albanian', + 'sr': 'Serbian', + 'st': 'Sesotho', + 'su': 'Sundanese', + 'sv': 'Swedish', + 'sw': 'Swahili', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'ti': 'Tigrinya', + 'tk': 'Turkmen', + 'tl': 'Filipino', + 'tr': 'Turkish', + 'ts': 'Tsonga', + 'tt': 'Tatar', + 'ug': 'Uyghur', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 'vi': 'Vietnamese', + 'xh': 'Xhosa', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'zh-CN': 'Chinese (Simplified)', + 'zh-TW': 'Chinese (Traditional)', + 'zu': 'Zulu' +}; + +export const GOOGLE_DOMAIN = [ + "translate.google.ac", + "translate.google.ad", + "translate.google.ae", + "translate.google.al", + "translate.google.am", + "translate.google.as", + "translate.google.at", + "translate.google.az", + "translate.google.ba", + "translate.google.be", + "translate.google.bf", + "translate.google.bg", + "translate.google.bi", + "translate.google.bj", + "translate.google.bs", + "translate.google.bt", + "translate.google.by", + "translate.google.ca", + "translate.google.cat", + "translate.google.cc", + "translate.google.cd", + "translate.google.cf", + "translate.google.cg", + "translate.google.ch", + "translate.google.ci", + "translate.google.cl", + "translate.google.cm", + "translate.google.cn", + "translate.google.co.ao", + "translate.google.co.bw", + "translate.google.co.ck", + "translate.google.co.cr", + "translate.google.co.id", + "translate.google.co.il", + "translate.google.co.in", + "translate.google.co.jp", + "translate.google.co.ke", + "translate.google.co.kr", + "translate.google.co.ls", + "translate.google.co.ma", + "translate.google.co.mz", + "translate.google.co.nz", + "translate.google.co.th", + "translate.google.co.tz", + "translate.google.co.ug", + "translate.google.co.uk", + "translate.google.co.uz", + "translate.google.co.ve", + "translate.google.co.vi", + "translate.google.co.za", + "translate.google.co.zm", + "translate.google.co.zw", + "translate.google.co", + "translate.google.com.af", + "translate.google.com.ag", + "translate.google.com.ai", + "translate.google.com.ar", + "translate.google.com.au", + "translate.google.com.bd", + "translate.google.com.bh", + "translate.google.com.bn", + "translate.google.com.bo", + "translate.google.com.br", + "translate.google.com.bz", + "translate.google.com.co", + "translate.google.com.cu", + "translate.google.com.cy", + "translate.google.com.do", + "translate.google.com.ec", + "translate.google.com.eg", + "translate.google.com.et", + "translate.google.com.fj", + "translate.google.com.gh", + "translate.google.com.gi", + "translate.google.com.gt", + "translate.google.com.hk", + "translate.google.com.jm", + "translate.google.com.kh", + "translate.google.com.kw", + "translate.google.com.lb", + "translate.google.com.lc", + "translate.google.com.ly", + "translate.google.com.mm", + "translate.google.com.mt", + "translate.google.com.mx", + "translate.google.com.my", + "translate.google.com.na", + "translate.google.com.ng", + "translate.google.com.ni", + "translate.google.com.np", + "translate.google.com.om", + "translate.google.com.pa", + "translate.google.com.pe", + "translate.google.com.pg", + "translate.google.com.ph", + "translate.google.com.pk", + "translate.google.com.pr", + "translate.google.com.py", + "translate.google.com.qa", + "translate.google.com.sa", + "translate.google.com.sb", + "translate.google.com.sg", + "translate.google.com.sl", + "translate.google.com.sv", + "translate.google.com.tj", + "translate.google.com.tr", + "translate.google.com.tw", + "translate.google.com.ua", + "translate.google.com.uy", + "translate.google.com.vc", + "translate.google.com.vn", + "translate.google.com", + "translate.google.cv", + "translate.google.cx", + "translate.google.cz", + "translate.google.de", + "translate.google.dj", + "translate.google.dk", + "translate.google.dm", + "translate.google.dz", + "translate.google.ee", + "translate.google.es", + "translate.google.eu", + "translate.google.fi", + "translate.google.fm", + "translate.google.fr", + "translate.google.ga", + "translate.google.ge", + "translate.google.gf", + "translate.google.gg", + "translate.google.gl", + "translate.google.gm", + "translate.google.gp", + "translate.google.gr", + "translate.google.gy", + "translate.google.hn", + "translate.google.hr", + "translate.google.ht", + "translate.google.hu", + "translate.google.ie", + "translate.google.im", + "translate.google.io", + "translate.google.iq", + "translate.google.is", + "translate.google.it", + "translate.google.je", + "translate.google.jo", + "translate.google.kg", + "translate.google.ki", + "translate.google.kz", + "translate.google.la", + "translate.google.li", + "translate.google.lk", + "translate.google.lt", + "translate.google.lu", + "translate.google.lv", + "translate.google.md", + "translate.google.me", + "translate.google.mg", + "translate.google.mk", + "translate.google.ml", + "translate.google.mn", + "translate.google.ms", + "translate.google.mu", + "translate.google.mv", + "translate.google.mw", + "translate.google.ne", + "translate.google.nf", + "translate.google.nl", + "translate.google.no", + "translate.google.nr", + "translate.google.nu", + "translate.google.pl", + "translate.google.pn", + "translate.google.ps", + "translate.google.pt", + "translate.google.ro", + "translate.google.rs", + "translate.google.ru", + "translate.google.rw", + "translate.google.sc", + "translate.google.se", + "translate.google.sh", + "translate.google.si", + "translate.google.sk", + "translate.google.sm", + "translate.google.sn", + "translate.google.so", + "translate.google.sr", + "translate.google.st", + "translate.google.td", + "translate.google.tg", + "translate.google.tk", + "translate.google.tl", + "translate.google.tm", + "translate.google.tn", + "translate.google.to", + "translate.google.tt", + "translate.google.us", + "translate.google.vg", + "translate.google.vu", + "translate.google.ws" +] + +export type TGoogleType = 'api1' | 'api2' | 'api3' | 'api4' | 'rpc'; +export type TGoogleDomain = Lowercase; + +export type TGoogleLangCode = keyof typeof GOOGLE_LANG_MAP; +export type TGoogleLangCodeExtend = 'auto' | keyof typeof GOOGLE_LANG_MAP | string; +export type TGoogleLangCodeName = typeof GOOGLE_LANG_MAP[TGoogleLangCode]; diff --git a/test/commands/translate/google.test.ts b/test/commands/translate/google.test.ts new file mode 100644 index 0000000..5848a35 --- /dev/null +++ b/test/commands/translate/google.test.ts @@ -0,0 +1,12 @@ +import {expect, test} from '@oclif/test' + +const inputText = 'Text'; + +describe('translate:google', () => { + test + .stdout() + .command([`translate:google`, inputText, '--to=en']) + .it(`runs translate:google ${inputText} --to=en`, ctx => { + expect(ctx.stdout).to.contain(inputText) + }) +}) From 785f86df9cc3633d6b6f4135ae0bd9d157ae2080 Mon Sep 17 00:00:00 2001 From: Vadim <1074746@gmail.com> Date: Thu, 18 Jan 2024 08:12:05 +0200 Subject: [PATCH 2/3] Refactor Google Translate engine code Simplified the extraction of translation and source language in Google Translate engine. The prior large block of hardcoded mappings has been replaced with `UTIL.getNestedValue()` to fetch respective keys from the returned translation object. This approach results in cleaner code and eases future updates. --- src/shared/engines/google.engine.ts | 133 ++-------------------------- 1 file changed, 8 insertions(+), 125 deletions(-) diff --git a/src/shared/engines/google.engine.ts b/src/shared/engines/google.engine.ts index f971747..27ae840 100644 --- a/src/shared/engines/google.engine.ts +++ b/src/shared/engines/google.engine.ts @@ -217,92 +217,14 @@ export class GoogleEngine implements BaseEngine { return null; } - /// { - // "sentences":[ - // { - // "trans":"Напишіть двом розробникам", - // "orig":"Text two developers", - // "backend":3, - // "model_specification":[ - // { } - // ], - // "translation_engine_debug_info":[ - // { - // "model_tracking":{ - // "checkpoint_md5":"2e8de74564aec87fb81cb0340a661858", - // "launch_doc":"tea_SouthSlavicA_en2bebsbghrsrsluk_2022q2.md" - // } - // } - // ] - // }, - // { - // "translit":"Napyshitʹ dvom rozrobnykam" - // } - // ], - // "src":"en", - // "alternative_translations":[ - // { - // "src_phrase":"Text two developers", - // "alternative":[ - // { - // "word_postproc":"Напишіть двом розробникам", - // "score":0, - // "has_preceding_space":true, - // "attach_to_next_token":false, - // "backends":[ - // 3 - // ], - // "backend_infos":[ - // { - // "backend":3 - // } - // ] - // }, - // { - // "word_postproc":"Надішліть текстове повідомлення двом розробникам", - // "score":0, - // "has_preceding_space":true, - // "attach_to_next_token":false, - // "backends":[ - // 8 - // ] - // } - // ], - // "srcunicodeoffsets":[ - // { - // "begin":0, - // "end":19 - // } - // ], - // "raw_src_segment":"Text two developers", - // "start_pos":0, - // "end_pos":0 - // } - // ], - // "confidence":1, - // "spell":{ }, - // "ld_result":{ - // "srclangs":[ - // "en" - // ], - // "srclangs_confidences":[ - // 1 - // ], - // "extended_srclangs":[ - // "en" - // ] - // } - // } let trans: string; let src: string; try { - if ('sentences' in body && Array.isArray(body.sentences)) { - [{trans},] = body.sentences; - } - - if ('src' in body) { - src = body.src as string; + trans = UTIL.getNestedValue(body, 'sentences.0.trans') as string; + src = UTIL.getNestedValue(body, 'src') as string; + if (!src) { + src = UTIL.getNestedValue(body, 'ld_result.srclangs.0') as string; } } catch (reason) { console.log([body]) @@ -336,51 +258,14 @@ export class GoogleEngine implements BaseEngine { return null; } - // { - // "sentences":[ - // { - // "trans":"Напишіть двом розробникам", - // "orig":"Text two developers", - // "backend":3, - // "model_specification":[ - // { } - // ], - // "translation_engine_debug_info":[ - // { - // "model_tracking":{ - // "checkpoint_md5":"2e8de74564aec87fb81cb0340a661858", - // "launch_doc":"tea_SouthSlavicA_en2bebsbghrsrsluk_2022q2.md" - // } - // } - // ] - // } - // ], - // "src":"en", - // "confidence":1, - // "spell":{ }, - // "ld_result":{ - // "srclangs":[ - // "en" - // ], - // "srclangs_confidences":[ - // 1 - // ], - // "extended_srclangs":[ - // "en" - // ] - // } - // } - let trans: string; let src: string; try { - if ('sentences' in body && Array.isArray(body.sentences)) { - [{trans},] = body.sentences; - } - - if ('src' in body) { - src = body.src as string; + trans = UTIL.getNestedValue(body, 'sentences.0.trans') as string; + src = UTIL.getNestedValue(body, 'src') as string; + if (!src) { + src = UTIL.getNestedValue(body, 'ld_result.srclangs.0') as string; } } catch (reason) { console.log([body]) @@ -414,9 +299,7 @@ export class GoogleEngine implements BaseEngine { searchParams: new URLSearchParams([ ['rpcids', rpcids], ['source-path', '/'], - // ['f.sid', '2583840604490340159'], ['bl', 'boq_translate-webserver_20240115.08_p0'], - // ['hl', param.to], ['soc-app', '1'], ['soc-platform', '1'], ['soc-device', '1'], From 13742c14eda0c9a656e86e8f9da0bbd1db8d7065 Mon Sep 17 00:00:00 2001 From: Vadim <1074746@gmail.com> Date: Thu, 18 Jan 2024 08:26:25 +0200 Subject: [PATCH 3/3] Add Google translation engine support With this change, GoogleEngine has now been added as a translation engine option in the application. The configuration can now switch to GoogleEngine if 'google' is selected. This added support provides more options and flexibility in the choice of translation engines. The entities, engine, and configuration files reflect this update, and the application version is incremented in package.json to signal this significant addition. --- README.md | 16 ++++++++-------- package.json | 2 +- src/shared/config.ts | 2 +- src/shared/engines/translate.engine.ts | 4 ++++ src/shared/entities/translation.engine.ts | 2 +- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index b9da475..c581152 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ EXAMPLES $ ctv cache --help ``` -_See code: [src/commands/cache.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/cache.ts)_ +_See code: [src/commands/cache.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/cache.ts)_ ## `ctv export:csv [LANGCODE]` @@ -89,7 +89,7 @@ EXAMPLES $ ctv export:csv --eol=lf ``` -_See code: [src/commands/export/csv.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/export/csv.ts)_ +_See code: [src/commands/export/csv.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/export/csv.ts)_ ## `ctv help [COMMANDS]` @@ -133,7 +133,7 @@ EXAMPLES $ ctv init --force ``` -_See code: [src/commands/init.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/init.ts)_ +_See code: [src/commands/init.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/init.ts)_ ## `ctv label [ADD] [DELETE] [GET] [REPLACE] [SYNC]` @@ -154,7 +154,7 @@ DESCRIPTION Represents a label management command. ``` -_See code: [src/commands/label/index.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/label/index.ts)_ +_See code: [src/commands/label/index.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/label/index.ts)_ ## `ctv label:add [LABEL]` @@ -186,7 +186,7 @@ EXAMPLES $ ctv label:add "hello.world" -t "Hello World!" ``` -_See code: [src/commands/label/add.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/label/add.ts)_ +_See code: [src/commands/label/add.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/label/add.ts)_ ## `ctv label:delete LABEL` @@ -211,7 +211,7 @@ EXAMPLES $ ctv label:delete hello.world ``` -_See code: [src/commands/label/delete.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/label/delete.ts)_ +_See code: [src/commands/label/delete.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/label/delete.ts)_ ## `ctv label:replace LABEL` @@ -239,7 +239,7 @@ EXAMPLES $ ctv label:replace hello.world -t="Hello world!!!" -fen ``` -_See code: [src/commands/label/replace.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/label/replace.ts)_ +_See code: [src/commands/label/replace.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/label/replace.ts)_ ## `ctv label:sync` @@ -263,7 +263,7 @@ EXAMPLES $ ctv label:sync "hello.world" -f="en" ``` -_See code: [src/commands/label/sync.ts](https://github.com/4746/transverto/blob/v1.0.0/src/commands/label/sync.ts)_ +_See code: [src/commands/label/sync.ts](https://github.com/4746/transverto/blob/v1.1.0/src/commands/label/sync.ts)_ --- diff --git a/package.json b/package.json index 59c8e39..684314f 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ ], "author": "Vadim", "license": "MIT", - "version": "1.0.1", + "version": "1.1.0", "bugs": "https://github.com/4746/transverto/issues", "homepage": "https://github.com/4746/transverto", "repository": "4746/transverto", diff --git a/src/shared/config.ts b/src/shared/config.ts index df3ba4f..725953d 100644 --- a/src/shared/config.ts +++ b/src/shared/config.ts @@ -1,8 +1,8 @@ import {DEFAULT_USER_AGENT} from "./constants.js"; import {IBingConfig} from "./entities/bing.config.js"; +import {IGoogleConfig} from "./entities/google.config.js"; import {ITerraConfig} from "./entities/terra.config.js"; import {TEngineTranslation} from "./entities/translation.engine.js"; -import {IGoogleConfig} from "./entities/google.config.js"; export const LANG_CODE_DEFAULT = 'en'; export const LABEL_VALIDATION_DEFAULT = '^[a-z0-9\\.\\-\\_]{3,100}$'; diff --git a/src/shared/engines/translate.engine.ts b/src/shared/engines/translate.engine.ts index dced9ca..fd1d60a 100644 --- a/src/shared/engines/translate.engine.ts +++ b/src/shared/engines/translate.engine.ts @@ -6,6 +6,7 @@ import {CTV_CACHE_ENGINE_FILE} from "../constants.js"; import {BaseEngine, IParamTranslateText} from "../entities/translation.engine.js"; import {Helper} from "../helper.js"; import {BingEngine} from "./bing.engine.js"; +import {GoogleEngine} from "./google.engine.js"; import {TerraEngine} from "./terra.engine.js"; let CACHE_TRANSLATE: Record; @@ -21,6 +22,9 @@ export class TranslateEngine implements BaseEngine { case 'bing': this.engine = new BingEngine(config.bing) break; + case 'google': + this.engine = new GoogleEngine(config.google, config.userAgent) + break; case 'terra': this.engine = new TerraEngine(config.terra) break; diff --git a/src/shared/entities/translation.engine.ts b/src/shared/entities/translation.engine.ts index 8c021bf..f700a15 100644 --- a/src/shared/entities/translation.engine.ts +++ b/src/shared/entities/translation.engine.ts @@ -1,7 +1,7 @@ /** * The types of translation engines available. */ -export type TEngineTranslation = 'bing' | 'terra'; +export type TEngineTranslation = 'bing' | 'google' | 'terra'; export interface IParamTranslateText { /**