170 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			170 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#-*- coding:utf-8 -*-
 | 
						|
 | 
						|
import os
 | 
						|
import re
 | 
						|
from ..base import *
 | 
						|
from ...libs.bs4 import Tag
 | 
						|
 | 
						|
 | 
						|
longman_download_mp3 = True
 | 
						|
longman_download_img = True
 | 
						|
 | 
						|
 | 
						|
@register([u'朗文', u'Longman'])
 | 
						|
class Longman(WebService):
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        super(Longman, self).__init__()
 | 
						|
 | 
						|
    def _get_from_api(self):
 | 
						|
        url = 'https://www.ldoceonline.com/dictionary/{}'.format(self.quote_word)
 | 
						|
        data = self.get_response(url)
 | 
						|
        soup = parse_html(data)
 | 
						|
        # Top Container
 | 
						|
        dictlinks = soup.find_all('span', {'class': 'dictlink'})
 | 
						|
        body_html = ""
 | 
						|
        word_info = {}
 | 
						|
        head_finded = False
 | 
						|
        for dic_link in dictlinks:
 | 
						|
            assert isinstance(dic_link, Tag)
 | 
						|
 | 
						|
            # remove sound tag
 | 
						|
            am_s_tag = dic_link.find('span', title='Play American pronunciation of {}'.format(self.word))
 | 
						|
            br_s_tag = dic_link.find('span', title='Play British pronunciation of {}'.format(self.word))
 | 
						|
            if am_s_tag:
 | 
						|
                word_info['am_mp3'] = am_s_tag.get('data-src-mp3', u'')
 | 
						|
                am_s_tag.decompose()
 | 
						|
            if br_s_tag:
 | 
						|
                word_info['br_mp3'] = br_s_tag.get('data-src-mp3', u'')
 | 
						|
                br_s_tag.decompose()
 | 
						|
 | 
						|
            # remove image
 | 
						|
            image_tag = dic_link.find('img')
 | 
						|
            if image_tag:
 | 
						|
                word_info['image'] = image_tag.get('src', u'')
 | 
						|
                image_tag.decompose()
 | 
						|
 | 
						|
            # Remove related Topics Container
 | 
						|
            related_topic_tag = dic_link.find('div', {'class': "topics_container"})
 | 
						|
            if related_topic_tag:
 | 
						|
                related_topic_tag.decompose()
 | 
						|
 | 
						|
            # Remove Tail
 | 
						|
            tail_tag = dic_link.find("span", {'class': 'Tail'})
 | 
						|
            if tail_tag:
 | 
						|
                tail_tag.decompose()
 | 
						|
 | 
						|
            # Remove SubEntry
 | 
						|
            sub_entries = dic_link.find_all('span', {'class': 'SubEntry'})
 | 
						|
            for sub_entry in sub_entries:
 | 
						|
                sub_entry.decompose()
 | 
						|
 | 
						|
            # word elements
 | 
						|
            head_tag = dic_link.find('span', {'class': "Head"})
 | 
						|
            if head_tag and not head_finded:
 | 
						|
                try:
 | 
						|
                    hyphenation = head_tag.find("span", {'class': 'HYPHENATION'}).string  # Hyphenation
 | 
						|
                except:
 | 
						|
                    hyphenation = u''
 | 
						|
                try:
 | 
						|
                    pron_codes = u''.join(
 | 
						|
                        list(head_tag.find("span", {'class': 'PronCodes'}).strings))  # Hyphenation
 | 
						|
                except:
 | 
						|
                    pron_codes = u''
 | 
						|
                try:
 | 
						|
                    POS = head_tag.find("span", {'class': 'POS'}).string  # Hyphenation
 | 
						|
                except:
 | 
						|
                    POS = u''
 | 
						|
 | 
						|
                try:
 | 
						|
                    Inflections = head_tag.find('span', {'class': 'Inflections'})
 | 
						|
                    if Inflections:
 | 
						|
                        Inflections = str(Inflections)
 | 
						|
                    else:
 | 
						|
                        Inflections = u''
 | 
						|
                except:
 | 
						|
                    Inflections = u''
 | 
						|
 | 
						|
                word_info['phonetic'] = pron_codes
 | 
						|
                word_info['hyphenation'] = hyphenation
 | 
						|
                word_info['pos'] = POS
 | 
						|
                word_info['inflections'] = Inflections
 | 
						|
                head_finded = True
 | 
						|
                #self.cache_this(word_info)
 | 
						|
            if head_tag:
 | 
						|
                head_tag.decompose()
 | 
						|
 | 
						|
            # remove script tag
 | 
						|
            script_tags = dic_link.find_all('script')
 | 
						|
            for t in script_tags:
 | 
						|
                t.decompose()
 | 
						|
 | 
						|
            # remove img tag
 | 
						|
            img_tags = dic_link.find_all('img')
 | 
						|
            for t in img_tags:
 | 
						|
                t.decompose()
 | 
						|
 | 
						|
            # remove example sound tag
 | 
						|
            emp_s_tags = dic_link.find_all('span', {'class': 'speaker exafile fa fa-volume-up'})
 | 
						|
            for t in emp_s_tags:
 | 
						|
                t.decompose()
 | 
						|
 | 
						|
            body_html += str(dic_link)
 | 
						|
 | 
						|
        word_info['ee'] = body_html
 | 
						|
        return self.cache_this(word_info)
 | 
						|
 | 
						|
    @export('PHON')
 | 
						|
    def fld_phonetic(self):
 | 
						|
        return self._get_field('phonetic')
 | 
						|
    
 | 
						|
    def _fld_mp3(self, fld):
 | 
						|
        audio_url = self._get_field(fld)
 | 
						|
        if longman_download_mp3 and audio_url:
 | 
						|
            filename = get_hex_name(self.unique.lower(), audio_url, 'mp3')
 | 
						|
            if os.path.exists(filename) or self.net_download(filename, audio_url):
 | 
						|
                return self.get_anki_label(filename, 'audio')
 | 
						|
        return ''
 | 
						|
 | 
						|
    def _fld_img(self, fld):
 | 
						|
        img_url = self._get_field(fld)
 | 
						|
        if longman_download_img and img_url:
 | 
						|
            # img_url -> https://.../ldoce_XXX.jpg?version=A.B.CC
 | 
						|
            img_url_no_version = re.sub(r'\?version=.*?$', '', img_url)
 | 
						|
            # file extension isn't always jpg
 | 
						|
            file_extension = os.path.splitext(img_url_no_version)[1][1:].strip().lower()
 | 
						|
            filename = get_hex_name(self.unique.lower(), img_url, file_extension)
 | 
						|
            if os.path.exists(filename) or self.net_download(filename, img_url):
 | 
						|
                return self.get_anki_label(filename, 'img')
 | 
						|
        return ''
 | 
						|
 | 
						|
    @export(u'AME_PRON')
 | 
						|
    def fld_mp3_us(self):
 | 
						|
        return self._fld_mp3('am_mp3')
 | 
						|
 | 
						|
    @export(u'BRE_PRON')
 | 
						|
    def fld_mp3_uk(self):
 | 
						|
        return self._fld_mp3('br_mp3')
 | 
						|
 | 
						|
    @export('IMAGE')
 | 
						|
    def fld_image(self):
 | 
						|
        return self._fld_img('image')
 | 
						|
 | 
						|
    @export([u'断字单词', u'Hyphenation'])
 | 
						|
    def fld_hyphenation(self):
 | 
						|
        return self._get_field('hyphenation')
 | 
						|
 | 
						|
    @export([u'词性', u'POS'])
 | 
						|
    def fld_pos(self):
 | 
						|
        return self._get_field('pos')
 | 
						|
 | 
						|
    @export('DEF')
 | 
						|
    @with_styles(cssfile='_longman.css')
 | 
						|
    def fld_ee(self):
 | 
						|
        return self._get_field('ee')
 | 
						|
 | 
						|
    @export([u'变形', u'Inflections'])
 | 
						|
    @with_styles(cssfile='_longman.css')
 | 
						|
    def fld_inflections(self):
 | 
						|
        return self._get_field('inflections')
 |