277 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			277 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding=utf-8
 | 
						|
#from warnings import filterwarnings
 | 
						|
 | 
						|
from ..base import *
 | 
						|
from ...libs.bs4 import Tag
 | 
						|
 | 
						|
 | 
						|
@register([u'牛津学习词典', u'Oxford Learner'])
 | 
						|
class OxfordLearning(WebService):
 | 
						|
    
 | 
						|
    def __init__(self):
 | 
						|
        super(OxfordLearning, self).__init__()
 | 
						|
 | 
						|
    def query(self, word):
 | 
						|
        """
 | 
						|
        :param word:
 | 
						|
        :rtype:  WebWord
 | 
						|
        """
 | 
						|
        qry_url = u'https://www.oxfordlearnersdictionaries.com/definition/english/{}'.format(word)
 | 
						|
 | 
						|
        retried = 10
 | 
						|
        while retried:
 | 
						|
            try:
 | 
						|
                rsp = self.get_response(qry_url, timeout=15)
 | 
						|
                if rsp:
 | 
						|
                    return OxfordLearningDictWord(rsp.decode('utf-8'))
 | 
						|
                break
 | 
						|
            except:
 | 
						|
                retried -= 1
 | 
						|
                continue
 | 
						|
 | 
						|
    def _get_single_dict(self, single_dict):
 | 
						|
        if not (self.cached(single_dict) and self.cache_result(single_dict)):
 | 
						|
            web_word = self.query(self.quote_word)
 | 
						|
            if web_word:
 | 
						|
                self.cache_this(
 | 
						|
                    {
 | 
						|
                        'phonetic': '{} {}'.format(web_word.wd_phon_bre, web_word.wd_phon_nam),
 | 
						|
                        'pos': web_word.wd_pos,
 | 
						|
                        'ee': ''.join(web_word.definitions_html),
 | 
						|
                        's_bre': web_word.wd_sound_url_bre,
 | 
						|
                        's_ame': web_word.wd_sound_url_nam,
 | 
						|
                    }
 | 
						|
                )
 | 
						|
            else:
 | 
						|
                self.cache_this(
 | 
						|
                    {
 | 
						|
                        'phonetic': '',
 | 
						|
                        'pos': '',
 | 
						|
                        'ee': '',
 | 
						|
                        's_bre': '',
 | 
						|
                        's_ame': '',
 | 
						|
                    }
 | 
						|
                )
 | 
						|
        return self.cache_result(single_dict)
 | 
						|
 | 
						|
    @export('PHON')
 | 
						|
    def fld_phonetic(self):
 | 
						|
        return self._get_single_dict('phonetic')
 | 
						|
 | 
						|
    @export([u'词性', u'POS'])
 | 
						|
    def fld_pos(self):
 | 
						|
        return self._get_single_dict('pos')
 | 
						|
 | 
						|
    @export('DEF')
 | 
						|
    @with_styles(cssfile='_oxford.css')
 | 
						|
    def fld_ee(self):
 | 
						|
        # return '<div style="margin-left: 20px">' + self._get_single_dict(
 | 
						|
        #     'ee') + "</div>" if "<li>" not in self._get_single_dict('ee') else self._get_single_dict('ee')
 | 
						|
        return self._get_single_dict('ee')
 | 
						|
 | 
						|
    def get_sound_bre(self):
 | 
						|
        url = self._get_single_dict('s_bre')
 | 
						|
        filename = get_hex_name(self.unique.lower(), url, 'mp3')
 | 
						|
        if url and self.download(url, filename):
 | 
						|
            return self.get_anki_label(filename, 'audio')
 | 
						|
        return ''
 | 
						|
 | 
						|
    def get_sound_ame(self):
 | 
						|
        url = self._get_single_dict('s_ame')
 | 
						|
        filename = get_hex_name(self.unique.lower(), url, 'mp3')
 | 
						|
        if url and self.download(url, filename):
 | 
						|
            return self.get_anki_label(filename, 'audio')
 | 
						|
        return ''
 | 
						|
 | 
						|
    @export('BRE_PRON')
 | 
						|
    def fld_sound_bre(self):
 | 
						|
        return self.get_sound_bre()
 | 
						|
 | 
						|
    @export('AME_PRON')
 | 
						|
    def fld_sound_ame(self):
 | 
						|
        return self.get_sound_ame()
 | 
						|
 | 
						|
    @export([u'英式发音优先', u'British Pronunciation First'])
 | 
						|
    def fld_sound_pri(self):
 | 
						|
        bre = self.get_sound_bre()
 | 
						|
        return bre if bre else self.get_sound_ame()
 | 
						|
 | 
						|
 | 
						|
class OxfordLearningDictWord:
 | 
						|
 | 
						|
    def __init__(self, markups):
 | 
						|
        if not markups:
 | 
						|
            return
 | 
						|
        self.markups = markups
 | 
						|
        self.bs = parse_html(self.markups)
 | 
						|
        self._defs = []
 | 
						|
        self._defs_html = []
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _cls_dic(class_nm):
 | 
						|
        return {'class': class_nm}
 | 
						|
 | 
						|
    # region Tags
 | 
						|
    @property
 | 
						|
    def tag_web_top(self):
 | 
						|
        """
 | 
						|
 | 
						|
        word - class: h
 | 
						|
        pos - class: pos
 | 
						|
 | 
						|
        :rtype: Tag
 | 
						|
        """
 | 
						|
        return self.bs.find("div", self._cls_dic('webtop-g'))
 | 
						|
 | 
						|
    @property
 | 
						|
    def tag_pron(self):
 | 
						|
        """
 | 
						|
 | 
						|
        :rtype: Tag
 | 
						|
        """
 | 
						|
        return self.bs.find("div", self._cls_dic('pron-gs ei-g'))
 | 
						|
 | 
						|
    @property
 | 
						|
    def tag_phon_bre(self):
 | 
						|
        """
 | 
						|
 | 
						|
        :rtype: Tag
 | 
						|
        """
 | 
						|
        return self.tag_pron.find('span', self._cls_dic('pron-g'), geo='br')
 | 
						|
 | 
						|
    @property
 | 
						|
    def tag_phon_nam(self):
 | 
						|
        """
 | 
						|
 | 
						|
        :rtype: Tag
 | 
						|
        """
 | 
						|
        return self.tag_pron.find('span', self._cls_dic('pron-g'), geo='n_am')
 | 
						|
 | 
						|
    # ---- Explains
 | 
						|
    @property
 | 
						|
    def tag_explain(self):
 | 
						|
        """
 | 
						|
 | 
						|
        :rtype: Tag
 | 
						|
        """
 | 
						|
        return self.bs.find('span', self._cls_dic('sn-gs'))
 | 
						|
 | 
						|
    # endregion
 | 
						|
 | 
						|
    @property
 | 
						|
    def wd_phon_bre(self):
 | 
						|
        """
 | 
						|
 | 
						|
        :return: pre_fix, phon
 | 
						|
        """
 | 
						|
        try:
 | 
						|
            _tag_phn = self.tag_phon_bre.find('span', self._cls_dic('phon')).contents[3]
 | 
						|
            phon = '/{}/'.format(_tag_phn.text if isinstance(_tag_phn, Tag) else _tag_phn)
 | 
						|
        except:
 | 
						|
            phon = ''
 | 
						|
        try:
 | 
						|
            prefix = self.tag_phon_bre.find('span', self._cls_dic('prefix')).string
 | 
						|
        except:
 | 
						|
            prefix = ''
 | 
						|
        return "{} {}".format(
 | 
						|
            prefix,
 | 
						|
            phon
 | 
						|
        )
 | 
						|
 | 
						|
    @property
 | 
						|
    def wd_pos(self):
 | 
						|
        try:
 | 
						|
            return self.tag_web_top.find("span", 'pos').text
 | 
						|
        except:
 | 
						|
            return ''
 | 
						|
 | 
						|
    @property
 | 
						|
    def wd_phon_nam(self):
 | 
						|
        """
 | 
						|
 | 
						|
        :return: pre_fix, phon
 | 
						|
        """
 | 
						|
        try:
 | 
						|
            _tag_phn = self.tag_phon_nam.find('span', self._cls_dic('phon')).contents[3]
 | 
						|
            phon = '/{}/'.format(_tag_phn.text if isinstance(_tag_phn, Tag) else _tag_phn)
 | 
						|
        except:
 | 
						|
            phon = ''
 | 
						|
        try:
 | 
						|
            prefix = self.tag_phon_nam.find('span', self._cls_dic('prefix')).string
 | 
						|
        except:
 | 
						|
            prefix = ''
 | 
						|
        return "{} {}".format(
 | 
						|
            prefix,
 | 
						|
            phon
 | 
						|
        )
 | 
						|
 | 
						|
    @property
 | 
						|
    def wd_sound_url_bre(self):
 | 
						|
        try:
 | 
						|
            return self.tag_phon_bre.find('div', self._cls_dic('sound audio_play_button pron-uk icon-audio'))[
 | 
						|
                'data-src-mp3']
 | 
						|
        except:
 | 
						|
            return ''
 | 
						|
 | 
						|
    @property
 | 
						|
    def wd_sound_url_nam(self):
 | 
						|
        try:
 | 
						|
            return self.tag_phon_nam.find('div', self._cls_dic('sound audio_play_button pron-us icon-audio'))[
 | 
						|
                'data-src-mp3']
 | 
						|
        except:
 | 
						|
            return ''
 | 
						|
 | 
						|
    def get_definitions(self):
 | 
						|
        defs = []
 | 
						|
        defs_html = []
 | 
						|
        if self.tag_explain and not self._defs:
 | 
						|
            tag_exp = self._clean(self.tag_explain)
 | 
						|
            lis = [li for li in tag_exp.find_all('li')]
 | 
						|
            if not lis:
 | 
						|
                defs_html.append(str(tag_exp.prettify()))
 | 
						|
                defs.append(tag_exp.text)
 | 
						|
 | 
						|
            else:
 | 
						|
                for li in lis:
 | 
						|
                    defs_html.append(str(li.prettify()))
 | 
						|
                    defs.append(li.text)
 | 
						|
            self._defs = defs
 | 
						|
            self._defs_html = defs_html
 | 
						|
        return self._defs, self._defs_html
 | 
						|
 | 
						|
    @property
 | 
						|
    def definitions(self):
 | 
						|
        return self.get_definitions()[0]
 | 
						|
 | 
						|
    @property
 | 
						|
    def definitions_html(self):
 | 
						|
        return self.get_definitions()[1]
 | 
						|
 | 
						|
    def _clean(self, tg):
 | 
						|
        """
 | 
						|
 | 
						|
        :type tg:Tag
 | 
						|
        :return:
 | 
						|
        """
 | 
						|
        if not tg:
 | 
						|
            return tg
 | 
						|
        decompose_cls = ['xr-gs', 'sound', 'heading', 'topic', 'collapse', 'oxford3000']
 | 
						|
 | 
						|
        if tg.attrs and 'class' in tg.attrs:
 | 
						|
            for _cls in decompose_cls:
 | 
						|
                _tgs = tg.find_all(attrs=self._cls_dic(_cls), recursive=True)
 | 
						|
                for _tg in _tgs:
 | 
						|
                    _tg.decompose()
 | 
						|
 | 
						|
        rmv_attrs = ['dpsid', 'id', 'psg', 'reg']
 | 
						|
        try:
 | 
						|
            tg.attrs = {key: value for key, value in tg.attrs.items()
 | 
						|
                        if key not in rmv_attrs}
 | 
						|
        except ValueError:
 | 
						|
            pass
 | 
						|
        for child in tg.children:
 | 
						|
            if not isinstance(child, Tag):
 | 
						|
                continue
 | 
						|
            self._clean(child)
 | 
						|
        return tg
 |