From 06e297a011fd9d6563ab7e316b28ca3ff173ce1c Mon Sep 17 00:00:00 2001 From: yu7777 Date: Sat, 28 Sep 2019 19:04:37 +1000 Subject: [PATCH] Create oalecd9_mdx.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 牛津高阶英汉双解词典第9例句发音版_V1.0.3c --- addons21/fastwq/service/dict/oalecd9_mdx.py | 122 ++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 addons21/fastwq/service/dict/oalecd9_mdx.py diff --git a/addons21/fastwq/service/dict/oalecd9_mdx.py b/addons21/fastwq/service/dict/oalecd9_mdx.py new file mode 100644 index 0000000..2287fea --- /dev/null +++ b/addons21/fastwq/service/dict/oalecd9_mdx.py @@ -0,0 +1,122 @@ +#-*- coding:utf-8 -*- +import os +import re +import random +from ..base import * +# from BeautifulSoup import BeautifulSoup +# from bs4 import BeautifulSoup + + +VOICE_PATTERN = r'' +VOICE_PATTERN_WQ = r'(.*?)' +MAPPINGS = [ + ['br', [re.compile(VOICE_PATTERN % r'gb')]], + ['us', [re.compile(VOICE_PATTERN % r'us')]] +] +LANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS} +DICT_PATH = u"/Users/brian/Documents/牛津高阶英汉双解词典第9例句发音版_V1.0.3c/牛津高阶英汉双解词典(第9版)_V1.0.3c.mdx" # u'E:\\BaiduYunDownload\\mdx\\L6mp3.mdx' + + +@register([u'本地词典-牛津高阶9例句发音', u'牛津高阶9例句发音']) +class oalecd9_mdx(MdxService): + + def __init__(self): + dict_path = DICT_PATH + # if DICT_PATH is a path, stop auto detect + if not dict_path: + from ...service import service_manager, service_pool + for clazz in service_manager.mdx_services: + service = service_pool.get(clazz.__unique__) + title = service.builder._title if service and service.support else u'' + service_pool.put(service) + if title.startswith(u'牛津高阶'): + dict_path = service.dict_path + break + super(oalecd9_mdx, self).__init__(dict_path) + + @property + def title(self): + return getattr(self, '__register_label__', self.unique) + + def _fld_voice(self, html, voice): + """获取发音字段""" + for regexp in LANG_TO_REGEXPS[voice]: + match = regexp.search(html) + if match: + val = '/' + match.group(1) + name = get_hex_name('mdx-'+self.unique.lower(), val, 'mp3') + name = self.save_file(val, name) + if name: + return self.get_anki_label(name, 'audio') + return '' + + @export('BRE_PRON') + def fld_voicebre(self): + return self._fld_voice(self.get_html(), 'br') + + @export('AME_PRON') + def fld_voiceame(self): + return self._fld_voice(self.get_html(), 'us') + + @export('All examples with audios') + def fld_sentence_audio(self): + return self._range_sentence_audio([i for i in range(0, 100)]) + + @export('Random example with audio') + def fld_random_sentence_audio(self): + return self._range_sentence_audio() + + @export('First example with audio') + def fld_first1_sentence_audio(self): + return self._range_sentence_audio([0]) + + @export('First 2 examples with audios') + def fld_first2_sentence_audio(self): + return self._range_sentence_audio([0, 1]) + + def _fld_audio(self, audio): + name = get_hex_name('mdx-'+self.unique.lower(), audio, 'mp3') + name = self.save_file(audio, name) + if name: + return self.get_anki_label(name, 'audio') + return '' + + def _range_sentence_audio(self, range_arr=None): + m = self.get_html() + if m: + soup = parse_html(m) + el_list = soup.findAll('x-g-blk', ) + if el_list: + maps = [] + for element in el_list: + sounds = element.find_all('a') + if sounds: + br_sound = 'None' + us_sound = None + en_text = cn_text = '' + for sound in sounds: + if sound.find('audio-gbs-liju'): + br_sound = sound['href'][7:] + elif sound.find('audio-uss-liju'): + us_sound = sound['href'][7:] + en_text = element.x['wd'] + cn_text = element.chn.contents[1] + if us_sound: # I mainly use us_sound + maps.append([br_sound, us_sound,en_text,cn_text]) + + my_str = '' + range_arr = range_arr if range_arr else [random.randrange(0, len(maps) - 1, 1)] + for i, e in enumerate(maps): + if i in range_arr: + br_sound = e[0] + us_sound = e[1] + en_text = e[2] + cn_text = e[3] + us_mp3 = self._fld_audio(us_sound) + # if br_sound != 'None': + # br_mp3 = self._fld_audio(br_sound) + # please modify the code here to get br_mp3 + # my_str = my_str + br_mp3 + ' ' + en_text + cn_text + '
' + my_str = my_str + us_mp3 + ' ' + en_text + cn_text + '
' + return my_str + return ''