2018-07-01 10:55:30 +08:00
|
|
|
#-*- coding:utf-8 -*-
|
|
|
|
|
import re
|
2018-07-06 12:29:50 +08:00
|
|
|
from .base import MdxService, export, register, with_styles, parseHtml
|
2018-07-01 10:55:30 +08:00
|
|
|
|
2018-07-06 12:29:50 +08:00
|
|
|
PATH = u'D:\\mdx_server\\mdx\\LDOCE6.mdx'
|
2018-07-01 10:55:30 +08:00
|
|
|
|
2018-07-06 12:29:50 +08:00
|
|
|
VOICE_PATTERN = r'<a href="sound://([\w/]+\w*\.mp3)"><img src="img/spkr_%s.png"></a>'
|
|
|
|
|
MAPPINGS = [
|
|
|
|
|
['br', [re.compile(VOICE_PATTERN % r'r')]],
|
|
|
|
|
['us', [re.compile(VOICE_PATTERN % r'b')]]
|
|
|
|
|
]
|
|
|
|
|
LANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS}
|
2018-07-01 10:55:30 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@register(u'本地词典-LDOCE6')
|
|
|
|
|
class Ldoce6(MdxService):
|
|
|
|
|
|
2018-07-05 12:32:38 +08:00
|
|
|
def __init__(self):
|
2018-07-06 12:29:50 +08:00
|
|
|
super(Ldoce6, self).__init__(PATH)
|
2018-07-01 10:55:30 +08:00
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def unique(self):
|
|
|
|
|
return self.__class__.__name__
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def title(self):
|
|
|
|
|
return self.__register_label__
|
|
|
|
|
|
|
|
|
|
@export(u'音标', 1)
|
|
|
|
|
def fld_phonetic(self):
|
|
|
|
|
html = self.get_html()
|
|
|
|
|
m = re.search(r'<span class="pron">(.*?)</span>', html)
|
|
|
|
|
if m:
|
|
|
|
|
return m.groups()[0]
|
2018-07-06 12:29:50 +08:00
|
|
|
return ''
|
2018-07-01 10:55:30 +08:00
|
|
|
|
2018-07-06 12:29:50 +08:00
|
|
|
def _fld_voice(self, html, voice):
|
|
|
|
|
"""获取发音字段"""
|
|
|
|
|
from hashlib import sha1
|
|
|
|
|
for regexp in LANG_TO_REGEXPS[voice]:
|
|
|
|
|
match = regexp.search(html)
|
|
|
|
|
if match:
|
|
|
|
|
val = '/' + match.group(1)
|
|
|
|
|
hex_digest = sha1(
|
|
|
|
|
val.encode('utf-8') if isinstance(val, unicode)
|
|
|
|
|
else val
|
|
|
|
|
).hexdigest().lower()
|
|
|
|
|
|
|
|
|
|
assert len(hex_digest) == 40, "unexpected output from hash library"
|
|
|
|
|
name = '.'.join([
|
|
|
|
|
'-'.join([
|
|
|
|
|
'mdx', self.unique.lower(), hex_digest[:8], hex_digest[8:16],
|
|
|
|
|
hex_digest[16:24], hex_digest[24:32], hex_digest[32:],
|
|
|
|
|
]),
|
|
|
|
|
'mp3',
|
|
|
|
|
])
|
|
|
|
|
name = self.save_file(val, name)
|
|
|
|
|
if name:
|
|
|
|
|
return self.get_anki_label(name, 'audio')
|
2018-07-01 10:55:30 +08:00
|
|
|
return ''
|
|
|
|
|
|
2018-07-06 12:29:50 +08:00
|
|
|
@export(u'英式发音', 2)
|
|
|
|
|
def fld_voicebre(self):
|
|
|
|
|
return self._fld_voice(self.get_html(), 'br')
|
|
|
|
|
|
|
|
|
|
@export(u'美式发音', 3)
|
2018-07-01 10:55:30 +08:00
|
|
|
def fld_voiceame(self):
|
2018-07-06 12:29:50 +08:00
|
|
|
return self._fld_voice(self.get_html(), 'us')
|
2018-07-01 10:55:30 +08:00
|
|
|
|
2018-07-06 12:29:50 +08:00
|
|
|
@export(u'例句', 4)
|
2018-07-01 10:55:30 +08:00
|
|
|
def fld_sentence(self):
|
2018-07-06 12:29:50 +08:00
|
|
|
m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())
|
2018-07-01 10:55:30 +08:00
|
|
|
if m:
|
2018-07-06 12:29:50 +08:00
|
|
|
soup = parseHtml(m[0])
|
|
|
|
|
el_list = soup.findAll('span', {'class':'example'})
|
|
|
|
|
if el_list:
|
|
|
|
|
maps = [u''.join(str(content).decode('utf-8') for content in element.contents)
|
|
|
|
|
for element in el_list]
|
|
|
|
|
my_str = ''
|
|
|
|
|
for i_str in maps:
|
|
|
|
|
i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str)
|
|
|
|
|
i_str = i_str.replace(' ', '')
|
|
|
|
|
my_str = my_str + '<li>' + i_str + '</li>'
|
|
|
|
|
return self._css(my_str)
|
2018-07-01 10:55:30 +08:00
|
|
|
return ''
|
|
|
|
|
|
2018-07-06 12:29:50 +08:00
|
|
|
@export(u'释义', 5)
|
2018-07-01 10:55:30 +08:00
|
|
|
def fld_definate(self):
|
2018-07-06 12:29:50 +08:00
|
|
|
m = m = re.findall(r'<span class="def"\s*.*>\s*.*<\/span>', self.get_html())
|
2018-07-01 10:55:30 +08:00
|
|
|
if m:
|
2018-07-06 12:29:50 +08:00
|
|
|
soup = parseHtml(m[0])
|
|
|
|
|
el_list = soup.findAll('span', {'class':'def'})
|
|
|
|
|
if el_list:
|
|
|
|
|
maps = [u''.join(str(content).decode('utf-8') for content in element.contents)
|
|
|
|
|
for element in el_list]
|
2018-07-01 10:55:30 +08:00
|
|
|
my_str = ''
|
2018-07-06 12:29:50 +08:00
|
|
|
for i_str in maps:
|
|
|
|
|
my_str = my_str + '<li>' + i_str + '</li>'
|
|
|
|
|
return self._css(my_str)
|
2018-07-01 10:55:30 +08:00
|
|
|
return ''
|
2018-07-06 12:29:50 +08:00
|
|
|
|
|
|
|
|
@with_styles(cssfile='_ldoce6.css')
|
|
|
|
|
def _css(self, val):
|
|
|
|
|
return val
|
|
|
|
|
|