From 6002254ac0ba95aad30ce803687b070889509cfe Mon Sep 17 00:00:00 2001 From: "St.Huang" Date: Sun, 8 Jul 2018 03:21:16 +0800 Subject: [PATCH] Bing supports mp3s and example. --- src/fastwq/service/bing.py | 114 ++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 28 deletions(-) diff --git a/src/fastwq/service/bing.py b/src/fastwq/service/bing.py index caa26c3..22135e1 100644 --- a/src/fastwq/service/bing.py +++ b/src/fastwq/service/bing.py @@ -1,11 +1,10 @@ #-*- coding:utf-8 -*- import re - -from aqt.utils import showInfo, showText from .base import WebService, export, register, with_styles, parseHtml +bing_download_mp3 = True -@register([u'必应', u'Bing']) +@register([u'Bing', u'Bing']) class Bing(WebService): def __init__(self): @@ -16,54 +15,113 @@ class Bing(WebService): data = self.get_response(u"http://cn.bing.com/dict/search?q={}&mkt=zh-cn".format(word)) soup = parseHtml(data) result = { - 'phonitic_us': '', - 'phonitic_uk': '', - 'participle': '', + 'pronunciation': {'AmE': '', 'BrE': '', 'AmEmp3': '', 'BrEmp3': ''}, 'def': '', + 'sams': '', } - element = soup.find('div', class_='hd_prUS') + #音 + element = soup.find('div', class_='hd_tf_lh') if element: - result['phonitic_us'] = str(element).decode('utf-8') + audios = element.find_all('a') + #美式英标 + tag = element.find('div', class_='hd_pr') + if tag: + result['pronunciation']['AmE'] = str(tag).decode('utf-8') + #美音 + if audios: + tag = audios[0] + audio_url = tag.get('onclick') + if audio_url: + result['pronunciation']['AmEmp3'] = u''.join(re.findall(r'https://.*\.mp3', audio_url)) - element = soup.find('div', class_='hd_pr') - if element: - result['phonitic_uk'] = str(element).decode('utf-8') - - element = soup.find('div', class_='hd_if') - if element: - result['participle'] = str(element).decode('utf-8') + #英式音标 + tag = element.find('div', class_='hd_prUS') + if tag: + result['pronunciation']['BrE'] = str(tag).decode('utf-8') + #英音 + if audios: + tag = audios[1] + audio_url = tag.get('onclick') + if audio_url: + result['pronunciation']['BrEmp3'] = u''.join(re.findall(r'https://.*\.mp3', audio_url)) + #释义 element = soup.find('div', class_='qdef') if element: element = getattr(element, 'ul', '') if element: result['def'] = u''.join([str(content) for content in element.contents]) + #例句 + element = soup.find('div', id='sentenceSeg') + if element: + #英文例句 + tags = element.find_all('div', {"class": 'sen_en'}) + result['sams'] = [{'eng': u''.join(tag.find_all(text=True))} for tag in tags] + #例句翻译 + tags = element.find_all('div', {"class": 'sen_cn'}) + for i, tag in enumerate(tags): + result['sams'][i]['chn'] = u''.join(tag.find_all(text=True)) + return self.cache_this(result) def _get_field(self, key, default=u''): return self.cache_result(key) if self.cached(key) else self._get_content().get(key, default) - @export('AME_PHON', 1) - def fld_phonetic_us(self): - return self._get_field('phonitic_us') - - @export('BRE_PHON', 2) - def fld_phonetic_uk(self): - return self._get_field('phonitic_uk') - - @export([u'词语时态', u'Participle'], 3) - def fld_participle(self): - return self._get_field('participle') - @with_styles(css='.pos{font-weight:bold;margin-right:4px;}', need_wrap_css=True, wrap_class='bing') def _css(self, val): return val + + @export('AME_PHON', 1) + def fld_phonetic_us(self): + seg = self._get_field('pronunciation') + return seg.get('AmE', u'') + + @export('BRE_PHON', 2) + def fld_phonetic_uk(self): + seg = self._get_field('pronunciation') + return seg.get('BrE', u'') + + def _fld_mp3(self, fld): + audio_url = self._get_field('pronunciation')[fld] + if bing_download_mp3 and audio_url: + filename = u''.join(re.findall(r'\w*\.mp3', audio_url)) + if filename and self.download(audio_url, filename): + return self.get_anki_label(u'bing_{0}_{1}'.format(fld, filename), 'audio') + return '' + + @export('AME_PRON', 3) + def fld_mp3_us(self): + return self._fld_mp3('AmEmp3') + + @export('BRE_PRON', 4) + def fld_mp3_uk(self): + return self._fld_mp3('BrEmp3') - @export('DEF', 4) + @export('DEF', 5) def fld_definition(self): val = self._get_field('def') if val == None or val == '': return '' return self._css(val) + + @export('EXAMPLE', 6) + def fld_samples(self): + max_numbers = 10 + segs = self._get_field('sams') + sentences = '' + for i, seg in enumerate(segs): + sentences += u"""
  • +
    {0}.{1}
    +
    {2}
    +
  • """.format(i+1, seg['eng'], seg['chn']) + if i == 9: + break + if sentences: + return u"""
    +
    +
      {0}
    +
    +
    """.format(sentences) + return ''