anki-word-query/addons/fastwq/service/dict/oxford_learning.py
2018-07-31 11:53:18 +08:00

277 lines
7.5 KiB
Python

# coding=utf-8
#from warnings import filterwarnings
from ..base import *
from ...libs.bs4 import Tag
@register([u'牛津学习词典', u'Oxford Learner'])
class OxfordLearning(WebService):
def __init__(self):
super(OxfordLearning, self).__init__()
def query(self, word):
"""
:param word:
:rtype: WebWord
"""
qry_url = u'https://www.oxfordlearnersdictionaries.com/definition/english/{}'.format(word)
retried = 10
while retried:
try:
rsp = self.get_response(qry_url, timeout=15)
if rsp:
return OxfordLearningDictWord(rsp.decode('utf-8'))
break
except:
retried -= 1
continue
def _get_single_dict(self, single_dict):
if not (self.cached(single_dict) and self.cache_result(single_dict)):
web_word = self.query(self.quote_word)
if web_word:
self.cache_this(
{
'phonetic': '{} {}'.format(web_word.wd_phon_bre, web_word.wd_phon_nam),
'pos': web_word.wd_pos,
'ee': ''.join(web_word.definitions_html),
's_bre': web_word.wd_sound_url_bre,
's_ame': web_word.wd_sound_url_nam,
}
)
else:
self.cache_this(
{
'phonetic': '',
'pos': '',
'ee': '',
's_bre': '',
's_ame': '',
}
)
return self.cache_result(single_dict)
@export('PHON')
def fld_phonetic(self):
return self._get_single_dict('phonetic')
@export([u'词性', u'POS'])
def fld_pos(self):
return self._get_single_dict('pos')
@export('DEF')
@with_styles(cssfile='_oxford.css')
def fld_ee(self):
# return '<div style="margin-left: 20px">' + self._get_single_dict(
# 'ee') + "</div>" if "<li>" not in self._get_single_dict('ee') else self._get_single_dict('ee')
return self._get_single_dict('ee')
def get_sound_bre(self):
url = self._get_single_dict('s_bre')
filename = get_hex_name(self.unique.lower(), url, 'mp3')
if url and self.download(url, filename):
return self.get_anki_label(filename, 'audio')
return ''
def get_sound_ame(self):
url = self._get_single_dict('s_ame')
filename = get_hex_name(self.unique.lower(), url, 'mp3')
if url and self.download(url, filename):
return self.get_anki_label(filename, 'audio')
return ''
@export('BRE_PRON')
def fld_sound_bre(self):
return self.get_sound_bre()
@export('AME_PRON')
def fld_sound_ame(self):
return self.get_sound_ame()
@export([u'英式发音优先', u'British Pronunciation First'])
def fld_sound_pri(self):
bre = self.get_sound_bre()
return bre if bre else self.get_sound_ame()
class OxfordLearningDictWord:
def __init__(self, markups):
if not markups:
return
self.markups = markups
self.bs = parse_html(self.markups)
self._defs = []
self._defs_html = []
@staticmethod
def _cls_dic(class_nm):
return {'class': class_nm}
# region Tags
@property
def tag_web_top(self):
"""
word - class: h
pos - class: pos
:rtype: Tag
"""
return self.bs.find("div", self._cls_dic('webtop-g'))
@property
def tag_pron(self):
"""
:rtype: Tag
"""
return self.bs.find("div", self._cls_dic('pron-gs ei-g'))
@property
def tag_phon_bre(self):
"""
:rtype: Tag
"""
return self.tag_pron.find('span', self._cls_dic('pron-g'), geo='br')
@property
def tag_phon_nam(self):
"""
:rtype: Tag
"""
return self.tag_pron.find('span', self._cls_dic('pron-g'), geo='n_am')
# ---- Explains
@property
def tag_explain(self):
"""
:rtype: Tag
"""
return self.bs.find('span', self._cls_dic('sn-gs'))
# endregion
@property
def wd_phon_bre(self):
"""
:return: pre_fix, phon
"""
try:
_tag_phn = self.tag_phon_bre.find('span', self._cls_dic('phon')).contents[3]
phon = '/{}/'.format(_tag_phn.text if isinstance(_tag_phn, Tag) else _tag_phn)
except:
phon = ''
try:
prefix = self.tag_phon_bre.find('span', self._cls_dic('prefix')).string
except:
prefix = ''
return "{} {}".format(
prefix,
phon
)
@property
def wd_pos(self):
try:
return self.tag_web_top.find("span", 'pos').text
except:
return ''
@property
def wd_phon_nam(self):
"""
:return: pre_fix, phon
"""
try:
_tag_phn = self.tag_phon_nam.find('span', self._cls_dic('phon')).contents[3]
phon = '/{}/'.format(_tag_phn.text if isinstance(_tag_phn, Tag) else _tag_phn)
except:
phon = ''
try:
prefix = self.tag_phon_nam.find('span', self._cls_dic('prefix')).string
except:
prefix = ''
return "{} {}".format(
prefix,
phon
)
@property
def wd_sound_url_bre(self):
try:
return self.tag_phon_bre.find('div', self._cls_dic('sound audio_play_button pron-uk icon-audio'))[
'data-src-mp3']
except:
return ''
@property
def wd_sound_url_nam(self):
try:
return self.tag_phon_nam.find('div', self._cls_dic('sound audio_play_button pron-us icon-audio'))[
'data-src-mp3']
except:
return ''
def get_definitions(self):
defs = []
defs_html = []
if self.tag_explain and not self._defs:
tag_exp = self._clean(self.tag_explain)
lis = [li for li in tag_exp.find_all('li')]
if not lis:
defs_html.append(str(tag_exp.prettify()))
defs.append(tag_exp.text)
else:
for li in lis:
defs_html.append(str(li.prettify()))
defs.append(li.text)
self._defs = defs
self._defs_html = defs_html
return self._defs, self._defs_html
@property
def definitions(self):
return self.get_definitions()[0]
@property
def definitions_html(self):
return self.get_definitions()[1]
def _clean(self, tg):
"""
:type tg:Tag
:return:
"""
if not tg:
return tg
decompose_cls = ['xr-gs', 'sound', 'heading', 'topic', 'collapse', 'oxford3000']
if tg.attrs and 'class' in tg.attrs:
for _cls in decompose_cls:
_tgs = tg.find_all(attrs=self._cls_dic(_cls), recursive=True)
for _tg in _tgs:
_tg.decompose()
rmv_attrs = ['dpsid', 'id', 'psg', 'reg']
try:
tg.attrs = {key: value for key, value in tg.attrs.items()
if key not in rmv_attrs}
except ValueError:
pass
for child in tg.children:
if not isinstance(child, Tag):
continue
self._clean(child)
return tg