anki-word-query/src/fastwq/service/LDOCE6.py

105 lines
3.5 KiB
Python
Raw Normal View History

2018-07-01 10:55:30 +08:00
#-*- coding:utf-8 -*-
import re
from .base import MdxService, export, register, with_styles, parseHtml
2018-07-01 10:55:30 +08:00
PATH = u'D:\\mdx_server\\mdx\\LDOCE6.mdx'
2018-07-01 10:55:30 +08:00
VOICE_PATTERN = r'<a href="sound://([\w/]+\w*\.mp3)"><img src="img/spkr_%s.png"></a>'
MAPPINGS = [
['br', [re.compile(VOICE_PATTERN % r'r')]],
['us', [re.compile(VOICE_PATTERN % r'b')]]
]
LANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS}
2018-07-01 10:55:30 +08:00
@register(u'本地词典-LDOCE6')
class Ldoce6(MdxService):
def __init__(self):
super(Ldoce6, self).__init__(PATH)
2018-07-01 10:55:30 +08:00
@property
def unique(self):
return self.__class__.__name__
@property
def title(self):
return self.__register_label__
@export(u'音标', 1)
def fld_phonetic(self):
html = self.get_html()
m = re.search(r'<span class="pron">(.*?)</span>', html)
if m:
return m.groups()[0]
return ''
2018-07-01 10:55:30 +08:00
def _fld_voice(self, html, voice):
"""获取发音字段"""
from hashlib import sha1
for regexp in LANG_TO_REGEXPS[voice]:
match = regexp.search(html)
if match:
val = '/' + match.group(1)
hex_digest = sha1(
val.encode('utf-8') if isinstance(val, unicode)
else val
).hexdigest().lower()
assert len(hex_digest) == 40, "unexpected output from hash library"
name = '.'.join([
'-'.join([
'mdx', self.unique.lower(), hex_digest[:8], hex_digest[8:16],
hex_digest[16:24], hex_digest[24:32], hex_digest[32:],
]),
'mp3',
])
name = self.save_file(val, name)
if name:
return self.get_anki_label(name, 'audio')
2018-07-01 10:55:30 +08:00
return ''
@export(u'英式发音', 2)
def fld_voicebre(self):
return self._fld_voice(self.get_html(), 'br')
@export(u'美式发音', 3)
2018-07-01 10:55:30 +08:00
def fld_voiceame(self):
return self._fld_voice(self.get_html(), 'us')
2018-07-01 10:55:30 +08:00
@export(u'例句', 4)
2018-07-01 10:55:30 +08:00
def fld_sentence(self):
m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())
2018-07-01 10:55:30 +08:00
if m:
soup = parseHtml(m[0])
el_list = soup.findAll('span', {'class':'example'})
if el_list:
maps = [u''.join(str(content).decode('utf-8') for content in element.contents)
for element in el_list]
my_str = ''
for i_str in maps:
i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str)
i_str = i_str.replace('&nbsp;', '')
my_str = my_str + '<li>' + i_str + '</li>'
return self._css(my_str)
2018-07-01 10:55:30 +08:00
return ''
@export(u'释义', 5)
2018-07-01 10:55:30 +08:00
def fld_definate(self):
m = m = re.findall(r'<span class="def"\s*.*>\s*.*<\/span>', self.get_html())
2018-07-01 10:55:30 +08:00
if m:
soup = parseHtml(m[0])
el_list = soup.findAll('span', {'class':'def'})
if el_list:
maps = [u''.join(str(content).decode('utf-8') for content in element.contents)
for element in el_list]
2018-07-01 10:55:30 +08:00
my_str = ''
for i_str in maps:
my_str = my_str + '<li>' + i_str + '</li>'
return self._css(my_str)
2018-07-01 10:55:30 +08:00
return ''
@with_styles(cssfile='_ldoce6.css')
def _css(self, val):
return val