#!/usr/bin/env python# -*- encoding: utf-8 -*-# Created on 2021-04-14 09:22:11# Project: nhandafrom pyspider.libs.base_handler import *import sqlite3, re, osimport datetimeimport pymysqlimport hashlib# 图片保存路径website_url = 'https://www.qdnd.vn/'DIR_PATH = "image/vietNam_www_qdnd_vn/"# http://biengioilanhtho.gov.vn/vi/chuyen-muc/thoi-su.htmlplate_list = ['chinh-tri', 'quoc-phong-an-ninh', 'quan-su-the-gioi', 'thoi-su-quoc-te', ]module_list = ['chinh-tri', 'quoc-phong-an-ninh', 'quan-su-the-gioi', 'thoi-su-quoc-te', ]conn = pymysql.connect( host='192.168.1.100', user='root', password='1qazXSW@', db='pyspider', charset='utf8',)crawl_person = 'dsw'class Handler(BaseHandler): crawl_config = { 'itag': 'v10' } def __init__(self): self.dir_path = DIR_PATH self.tool = Tool() self.google = Google() @every(minutes=24 * 60) def on_start(self): # 获取新闻板块 for module in module_list: url = website_url + module self.crawl(url, callback=self.index_page, validate_cert=False,timeout=300) @config(age=10 * 24 * 60 * 60) def index_page(self, response): # 获取新闻子版块 url = response.url print('index_page...',url) crawl_url = '.main-menu a[href^="' + url + '"]' for each in response.doc(crawl_url).items(): print( each.attr.href) self.crawl(each.attr.href, callback=self.index_page1, validate_cert=False,timeout=300) @config(age=10 * 24 * 60 * 60) def index_page1(self, response): # 获取子版块分页信息及文章链接 url = response.url print(url) page_url_re = r'.ex_page a[title^="Trang cuối"]' laest_page = response.doc(page_url_re).attr.href.split('/')[-1] print(laest_page) for each in range(int(laest_page)): self.crawl(url+'/p1/p/'+str(each+1), callback=self.get_detail_page, validate_cert=False,timeout=300) @config(priority=2) def get_detail_page(self, response): url = response.url fiter_url = url.split('/p1/p/')[0] print(fiter_url) for ee in response.doc('.content-list .row a[href^="'+fiter_url+'"]').items(): detail_page_url = ee.attr.href print(detail_page_url) if len(detail_page_url.split('/p/'))==1: self.crawl(detail_page_url, callback=self.detail_page, validate_cert=False,timeout=300) @config(priority=2) def detail_page(self, response): url = response.url title = response.doc('title').text() content = response.doc('.post-content [itemprop="articleBody"]').text() content_cn = self.google.translate('vi', 'zh-CN', content) date = response.doc('.post-subinfo').text() abstract = response.doc('.post-summary').text() crawl_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') deomo_val = url + crawl_time md5 = hashlib.md5(deomo_val.encode('utf8')).hexdigest() """ img = response.doc('.imgtelerik') img_url = img.attr.src if img_url: #获取图片文件后缀 extension = self.tool.get_extension(img_url) #拼接图片名 file_name = extension self.crawl(img_url,callback=self.save_img,save={"file_name":file_name},validate_cert=False) file_path = self.dir_path + file_name else: file_path = '' """ c = conn.cursor() # 获取游标 sql = ''' insert into data (md5,url,title,content,content_cn,time,crawl_time,crawl_person,abstract,source) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' c.execute(sql, ( md5, url, title, content, content_cn, date, crawl_time, crawl_person, abstract, website_url)) # 执行sql语句 conn.commit() # 提交数据库操作 c.close() #conn.close() # 关闭数据库连接 return { "url": response.url, "title":title, "content": content, "date": date, "abstract": abstract, "content_cn": content_cn } # 保存图片 def save_img(self, response): content = response.content file_name = response.save["file_name"] file_path = self.dir_path + file_name self.tool.save_img(content, file_path)# 工具类class Tool: def __init__(self): self.dir = DIR_PATH # 创建文件夹(如果不存在) if not os.path.exists(self.dir): os.makedirs(self.dir) # 保存图片 def save_img(self, content, path): f = open(path, "wb") f.write(content) f.close() # 获取url后缀名 def get_extension(self, url): extension = url.split("/")[-1] t = re.split('\?', extension)[0] return t# Google翻译import loggingimport urllibimport urllib.requestimport urllib.parseimport requestsimport execjs as execjsimport jsonlogger = logging.getLogger(__file__)from requests.packages import urllib3urllib3.disable_warnings()class Google(): def __init__(self): self.lan_dict = { '中文': 'zh-CN', '英文': 'en', '俄文': 'ru', '法文': 'fr', '日文': 'ja', '韩文': 'ko' } self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'} # self.url = 'https://translate.googleapis.com/translate_a/single' self.url = 'http://translate.google.cn/translate_a/single' self.session = requests.Session() self.session.keep_alive = False def getTk(self, text): return self.get_ctx().call("TL", text) def get_ctx(self): ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) return ctx def buildUrl(self, text, tk, sl, tl): baseUrl = 'http://translate.google.cn/translate_a/single' # baseUrl = 'https://translate.googleapis.com/translate_a/single' # baseUrl += '?client=webapp&' # 这里client改成webapp后翻译的效果好一些 t翻译的比较差 .. baseUrl += '?client=gtx&' # 这里client改成webapp后翻译的效果好一些 t翻译的比较差 .. baseUrl += 'sl=auto&' baseUrl += 'tl=' + str(tl) + '&' baseUrl += 'hl=zh-CN&' baseUrl += 'dt=at&' baseUrl += 'dt=bd&' baseUrl += 'dt=ex&' baseUrl += 'dt=ld&' baseUrl += 'dt=md&' baseUrl += 'dt=qca&' baseUrl += 'dt=rw&' baseUrl += 'dt=rm&' baseUrl += 'dt=ss&' baseUrl += 'dt=t&' baseUrl += 'ie=UTF-8&' baseUrl += 'oe=UTF-8&' baseUrl += 'clearbtn=1&' baseUrl += 'otf=1&' baseUrl += 'pc=1&' baseUrl += 'srcrom=0&' baseUrl += 'ssel=0&' baseUrl += 'tsel=0&' baseUrl += 'kc=2&' baseUrl += 'tk=' + str(tk) + '&' content = urllib.parse.quote(text) baseUrl += 'q=' + content return baseUrl def getHtml(self, session, url, headers): try: html = session.get(url, headers=headers) return html.json() except Exception as e: return None def translate(self, from_lan, to_lan, text): tk = self.getTk(text) url = self.buildUrl(text, tk, from_lan, to_lan) result = self.getHtml(self.session, url, self.headers) if result != None: ans = [] s = '' for i in result[0]: if i[0] != None: s += i[0] return s else: logger.info('谷歌翻译失败 ') return None
#!/usr/bin/env python# -*- encoding: utf-8 -*-# Created on 2021-04-14 09:22:11# Project: nhandafrom pyspider.libs.base_handler import *import sqlite3, re, osimport datetimeimport pymysqlimport hashlib# 图片保存路径website_url = 'https://baohaiquanvietnam.vn/'DIR_PATH = "image/vietNam_baohaiquanvietnam_vn/"module_list = ['danh-muc?id=1', 'danh-muc?id=6', 'danh-muc?id=9', ]conn = pymysql.connect( host='192.168.1.100', user='root', password='1qazXSW@', db='pyspider', charset='utf8',)crawl_person = 'dsw'class Handler(BaseHandler): crawl_config = { 'itag': 'v10' } def __init__(self): self.dir_path = DIR_PATH self.tool = Tool() self.google = Google() @every(minutes=24 * 60) def on_start(self): # 获取新闻板块 for module in module_list: url = website_url + module self.crawl(url, callback=self.index_page, validate_cert=False,timeout=300) @config(age=10 * 24 * 60 * 60) def index_page(self, response): # 获取子版块分页信息及文章链接 url = response.url print(url) #获取最后一页 page_url_re = r'.pagination li:nth-last-child(2) a' laest_page = response.doc(page_url_re).text() print(laest_page) for each in range(int(laest_page)): self.crawl(url+'?page='+str(each+1), callback=self.get_detail_page, validate_cert=False,timeout=300) @config(priority=2) def get_detail_page(self, response): for ee in response.doc('.row .col-sm-8 a[href^="'+website_url+'"]').items(): detail_page_url = ee.attr.href if len(detail_page_url.split('?page='))==1: #print('detail_page_url---',detail_page_url) self.crawl(detail_page_url, callback=self.detail_page, validate_cert=False,timeout=300) @config(priority=2) def detail_page(self, response): url = response.url title = response.doc('title').text() content = response.doc('.content_news').text() content_cn = self.google.translate('vi', 'zh-CN', content) date = response.doc('.breadcrumb li:nth-last-child(3)').text() abstract = response.doc('.que_news').text() crawl_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') deomo_val = url + crawl_time md5 = hashlib.md5(deomo_val.encode('utf8')).hexdigest() """ img = response.doc('.imgtelerik') img_url = img.attr.src if img_url: #获取图片文件后缀 extension = self.tool.get_extension(img_url) #拼接图片名 file_name = extension self.crawl(img_url,callback=self.save_img,save={"file_name":file_name},validate_cert=False) file_path = self.dir_path + file_name else: file_path = '' """ c = conn.cursor() # 获取游标 sql = ''' insert into data (md5,url,title,content,content_cn,time,crawl_time,crawl_person,abstract,source) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' c.execute(sql, ( md5, url, title, content, content_cn, date, crawl_time, crawl_person, abstract, website_url)) # 执行sql语句 conn.commit() # 提交数据库操作 c.close() #conn.close() # 关闭数据库连接 return { "url": response.url, "title":title, "content": content, "date": date, "abstract": abstract, "content_cn": content_cn } # 保存图片 def save_img(self, response): content = response.content file_name = response.save["file_name"] file_path = self.dir_path + file_name self.tool.save_img(content, file_path)# 工具类class Tool: def __init__(self): self.dir = DIR_PATH # 创建文件夹(如果不存在) if not os.path.exists(self.dir): os.makedirs(self.dir) # 保存图片 def save_img(self, content, path): f = open(path, "wb") f.write(content) f.close() # 获取url后缀名 def get_extension(self, url): extension = url.split("/")[-1] t = re.split('\?', extension)[0] return t# Google翻译import loggingimport urllibimport urllib.requestimport urllib.parseimport requestsimport execjs as execjsimport jsonlogger = logging.getLogger(__file__)from requests.packages import urllib3urllib3.disable_warnings()class Google(): def __init__(self): self.lan_dict = { '中文': 'zh-CN', '英文': 'en', '俄文': 'ru', '法文': 'fr', '日文': 'ja', '韩文': 'ko' } self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'} # self.url = 'https://translate.googleapis.com/translate_a/single' self.url = 'http://translate.google.cn/translate_a/single' self.session = requests.Session() self.session.keep_alive = False def getTk(self, text): return self.get_ctx().call("TL", text) def get_ctx(self): ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) return ctx def buildUrl(self, text, tk, sl, tl): baseUrl = 'http://translate.google.cn/translate_a/single' # baseUrl = 'https://translate.googleapis.com/translate_a/single' # baseUrl += '?client=webapp&' # 这里client改成webapp后翻译的效果好一些 t翻译的比较差 .. baseUrl += '?client=gtx&' # 这里client改成webapp后翻译的效果好一些 t翻译的比较差 .. baseUrl += 'sl=auto&' baseUrl += 'tl=' + str(tl) + '&' baseUrl += 'hl=zh-CN&' baseUrl += 'dt=at&' baseUrl += 'dt=bd&' baseUrl += 'dt=ex&' baseUrl += 'dt=ld&' baseUrl += 'dt=md&' baseUrl += 'dt=qca&' baseUrl += 'dt=rw&' baseUrl += 'dt=rm&' baseUrl += 'dt=ss&' baseUrl += 'dt=t&' baseUrl += 'ie=UTF-8&' baseUrl += 'oe=UTF-8&' baseUrl += 'clearbtn=1&' baseUrl += 'otf=1&' baseUrl += 'pc=1&' baseUrl += 'srcrom=0&' baseUrl += 'ssel=0&' baseUrl += 'tsel=0&' baseUrl += 'kc=2&' baseUrl += 'tk=' + str(tk) + '&' content = urllib.parse.quote(text) baseUrl += 'q=' + content return baseUrl def getHtml(self, session, url, headers): try: html = session.get(url, headers=headers) return html.json() except Exception as e: return None def translate(self, from_lan, to_lan, text): tk = self.getTk(text) url = self.buildUrl(text, tk, from_lan, to_lan) result = self.getHtml(self.session, url, self.headers) if result != None: ans = [] s = '' for i in result[0]: if i[0] != None: s += i[0] return s else: logger.info('谷歌翻译失败 ') return None
#!/usr/bin/env python# -*- encoding: utf-8 -*-# Created on 2021-04-14 09:22:11# Project: nhandafrom pyspider.libs.base_handler import *import sqlite3, re, osimport datetimeimport pymysqlimport hashlib# 图片保存路径website_url = 'http://biengioilanhtho.gov.vn/'DIR_PATH = "image/vietNam_baohaiquanvietnam_vn/"module_list = ['vi/chuyen-muc/thoi-su.html', 'vi/chuyen-muc/bien-gioi-viet-nam-lao.html', 'vi/chuyen-muc/bien-gioi-dat-lien-viet-nam-trung-quoc.html', 'vi/chuyen-muc/bien-gioi-dat-lien-viet-nam-campuchia.html', 'vi/chuyen-muc/bien-gioi-bien.html', 'vi/chuyen-muc/tu-lieu-lich-su.html', ]conn = pymysql.connect( host='192.168.1.100', user='root', password='1qazXSW@', db='pyspider', charset='utf8',)crawl_person = 'dsw'class Handler(BaseHandler): crawl_config = { 'itag': 'v7' } def __init__(self): self.dir_path = DIR_PATH self.tool = Tool() self.google = Google() @every(minutes=24 * 60) def on_start(self): # 获取新闻板块 for module in module_list: url = website_url + module self.crawl(url, callback=self.index_page, validate_cert=False, timeout=300,save=os.path.splitext(os.path.basename(module))[0]) @config(age=10 * 24 * 60 * 60) def index_page(self, response): # 获取子版块分页信息及文章链接 module = response.save print(module) url = response.url print(url) # 获取最后一页 laest_page = 5000 print(laest_page) page_url = 'http://biengioilanhtho.gov.vn/ajaxpro/Office.Web.Frontend.NewsListRenderBll,Office.Web.Frontend.ashx' for each in range(int(laest_page)): data = {"Lang": "vi", "Cat": module, "PageIndex": each} self.crawl(page_url+'?page='+str(each), method='POST', data=json.dumps(data), callback=self.get_detail_page, validate_cert=False, timeout=300, headers={'Content-Type': 'application/json; charset=UTF-8', 'X-AjaxPro-Method':'DrawContent'}) @config(priority=2) def get_detail_page(self, response): response.content = response.json['value']['HtmlContent'] #print(response.doc(' a[href^="' + website_url + '"]')) for ee in response.doc('a[href^="' + website_url + '"]').items(): detail_page_url = ee.attr.href print('detail_page_url---',detail_page_url) if len(detail_page_url.split('?page=')) == 1: # print('detail_page_url---',detail_page_url) self.crawl(detail_page_url, callback=self.detail_page, validate_cert=False, timeout=300) @config(priority=2) def detail_page(self, response): url = response.url title = response.doc('h2').text() content = response.doc('.content_detail p').text() content_cn = None #content_cn = self.google.translate('vi', 'zh-CN', content) date = response.doc('.title_detail span').text() abstract = response.doc('.content_detail1 p:nth-child(1)').text() crawl_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') deomo_val = url + crawl_time md5 = hashlib.md5(deomo_val.encode('utf8')).hexdigest() """ img = response.doc('.imgtelerik') img_url = img.attr.src if img_url: #获取图片文件后缀 extension = self.tool.get_extension(img_url) #拼接图片名 file_name = extension self.crawl(img_url,callback=self.save_img,save={"file_name":file_name},validate_cert=False) file_path = self.dir_path + file_name else: file_path = '' """ c = conn.cursor() # 获取游标 sql = ''' insert into data (md5,url,title,content,content_cn,time,crawl_time,crawl_person,abstract,source) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ''' c.execute(sql, ( md5, url, title, content, content_cn, date, crawl_time, crawl_person, abstract, website_url)) # 执行sql语句 conn.commit() # 提交数据库操作 c.close() # conn.close() # 关闭数据库连接 return { "url": response.url, "title": title, "content": content, "date": date, "abstract": abstract, "content_cn": content_cn } # 保存图片 def save_img(self, response): content = response.content file_name = response.save["file_name"] file_path = self.dir_path + file_name self.tool.save_img(content, file_path)# 工具类class Tool: def __init__(self): self.dir = DIR_PATH # 创建文件夹(如果不存在) if not os.path.exists(self.dir): os.makedirs(self.dir) # 保存图片 def save_img(self, content, path): f = open(path, "wb") f.write(content) f.close() # 获取url后缀名 def get_extension(self, url): extension = url.split("/")[-1] t = re.split('\?', extension)[0] return t# Google翻译import loggingimport urllibimport urllib.requestimport urllib.parseimport requestsimport execjs as execjsimport jsonlogger = logging.getLogger(__file__)from requests.packages import urllib3urllib3.disable_warnings()class Google(): def __init__(self): self.lan_dict = { '中文': 'zh-CN', '英文': 'en', '俄文': 'ru', '法文': 'fr', '日文': 'ja', '韩文': 'ko' } self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'} # self.url = 'https://translate.googleapis.com/translate_a/single' self.url = 'http://translate.google.cn/translate_a/single' self.session = requests.Session() self.session.keep_alive = False def getTk(self, text): return self.get_ctx().call("TL", text) def get_ctx(self): ctx = execjs.compile(""" function TL(a) { var k = ""; var b = 406644; var b1 = 3293161072; var jd = "."; var $b = "+-a^+6"; var Zb = "+-3^+b+-f"; for (var e = [], f = 0, g = 0; g < a.length; g++) { var m = a.charCodeAt(g); 128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023), e[f++] = m >> 18 | 240, e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224, e[f++] = m >> 6 & 63 | 128), e[f++] = m & 63 | 128) } a = b; for (f = 0; f < e.length; f++) a += e[f], a = RL(a, $b); a = RL(a, Zb); a ^= b1 || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + jd + (a ^ b) }; function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a } """) return ctx def buildUrl(self, text, tk, sl, tl): baseUrl = 'http://translate.google.cn/translate_a/single' # baseUrl = 'https://translate.googleapis.com/translate_a/single' # baseUrl += '?client=webapp&' # 这里client改成webapp后翻译的效果好一些 t翻译的比较差 .. baseUrl += '?client=gtx&' # 这里client改成webapp后翻译的效果好一些 t翻译的比较差 .. baseUrl += 'sl=auto&' baseUrl += 'tl=' + str(tl) + '&' baseUrl += 'hl=zh-CN&' baseUrl += 'dt=at&' baseUrl += 'dt=bd&' baseUrl += 'dt=ex&' baseUrl += 'dt=ld&' baseUrl += 'dt=md&' baseUrl += 'dt=qca&' baseUrl += 'dt=rw&' baseUrl += 'dt=rm&' baseUrl += 'dt=ss&' baseUrl += 'dt=t&' baseUrl += 'ie=UTF-8&' baseUrl += 'oe=UTF-8&' baseUrl += 'clearbtn=1&' baseUrl += 'otf=1&' baseUrl += 'pc=1&' baseUrl += 'srcrom=0&' baseUrl += 'ssel=0&' baseUrl += 'tsel=0&' baseUrl += 'kc=2&' baseUrl += 'tk=' + str(tk) + '&' content = urllib.parse.quote(text) baseUrl += 'q=' + content return baseUrl def getHtml(self, session, url, headers): try: html = session.get(url, headers=headers) return html.json() except Exception as e: return None def translate(self, from_lan, to_lan, text): tk = self.getTk(text) url = self.buildUrl(text, tk, from_lan, to_lan) result = self.getHtml(self.session, url, self.headers) if result != None: ans = [] s = '' for i in result[0]: if i[0] != None: s += i[0] return s else: logger.info('谷歌翻译失败 ') return None