165 lines
5.4 KiB
Python
165 lines
5.4 KiB
Python
#-*- coding:utf-8 -*-
|
|
|
|
import os
|
|
from ..base import *
|
|
from ...libs.bs4 import Tag
|
|
|
|
|
|
longman_download_mp3 = True
|
|
longman_download_img = True
|
|
|
|
|
|
@register([u'朗文', u'Longman'])
|
|
class Longman(WebService):
|
|
|
|
def __init__(self):
|
|
super(Longman, self).__init__()
|
|
|
|
def _get_from_api(self):
|
|
url = 'https://www.ldoceonline.com/dictionary/{}'.format(self.quote_word)
|
|
data = self.get_response(url)
|
|
soup = parse_html(data)
|
|
# Top Container
|
|
dictlinks = soup.find_all('span', {'class': 'dictlink'})
|
|
body_html = ""
|
|
word_info = {}
|
|
head_finded = False
|
|
for dic_link in dictlinks:
|
|
assert isinstance(dic_link, Tag)
|
|
|
|
# remove sound tag
|
|
am_s_tag = dic_link.find('span', title='Play American pronunciation of {}'.format(self.word))
|
|
br_s_tag = dic_link.find('span', title='Play British pronunciation of {}'.format(self.word))
|
|
if am_s_tag:
|
|
word_info['am_mp3'] = am_s_tag.get('data-src-mp3', u'')
|
|
am_s_tag.decompose()
|
|
if br_s_tag:
|
|
word_info['br_mp3'] = br_s_tag.get('data-src-mp3', u'')
|
|
br_s_tag.decompose()
|
|
|
|
# remove image
|
|
image_tag = dic_link.find('img')
|
|
if image_tag:
|
|
word_info['image'] = image_tag.get('src')
|
|
image_tag.decompose()
|
|
|
|
# Remove related Topics Container
|
|
related_topic_tag = dic_link.find('div', {'class': "topics_container"})
|
|
if related_topic_tag:
|
|
related_topic_tag.decompose()
|
|
|
|
# Remove Tail
|
|
tail_tag = dic_link.find("span", {'class': 'Tail'})
|
|
if tail_tag:
|
|
tail_tag.decompose()
|
|
|
|
# Remove SubEntry
|
|
sub_entries = dic_link.find_all('span', {'class': 'SubEntry'})
|
|
for sub_entry in sub_entries:
|
|
sub_entry.decompose()
|
|
|
|
# word elements
|
|
head_tag = dic_link.find('span', {'class': "Head"})
|
|
if head_tag and not head_finded:
|
|
try:
|
|
hyphenation = head_tag.find("span", {'class': 'HYPHENATION'}).string # Hyphenation
|
|
except:
|
|
hyphenation = u''
|
|
try:
|
|
pron_codes = u''.join(
|
|
list(head_tag.find("span", {'class': 'PronCodes'}).strings)) # Hyphenation
|
|
except:
|
|
pron_codes = u''
|
|
try:
|
|
POS = head_tag.find("span", {'class': 'POS'}).string # Hyphenation
|
|
except:
|
|
POS = u''
|
|
|
|
try:
|
|
Inflections = head_tag.find('span', {'class': 'Inflections'})
|
|
if Inflections:
|
|
Inflections = str(Inflections)
|
|
else:
|
|
Inflections = u''
|
|
except:
|
|
Inflections = u''
|
|
|
|
word_info['phonetic'] = pron_codes
|
|
word_info['hyphenation'] = hyphenation
|
|
word_info['pos'] = POS
|
|
word_info['inflections'] = Inflections
|
|
head_finded = True
|
|
#self.cache_this(word_info)
|
|
if head_tag:
|
|
head_tag.decompose()
|
|
|
|
# remove script tag
|
|
script_tags = dic_link.find_all('script')
|
|
for t in script_tags:
|
|
t.decompose()
|
|
|
|
# remove img tag
|
|
img_tags = dic_link.find_all('img')
|
|
for t in img_tags:
|
|
t.decompose()
|
|
|
|
# remove example sound tag
|
|
emp_s_tags = dic_link.find_all('span', {'class': 'speaker exafile fa fa-volume-up'})
|
|
for t in emp_s_tags:
|
|
t.decompose()
|
|
|
|
body_html += str(dic_link)
|
|
|
|
word_info['ee'] = body_html
|
|
return self.cache_this(word_info)
|
|
|
|
@export('PHON')
|
|
def fld_phonetic(self):
|
|
return self._get_field('phonetic')
|
|
|
|
def _fld_mp3(self, fld):
|
|
audio_url = self._get_field(fld)
|
|
if longman_download_mp3 and audio_url:
|
|
filename = get_hex_name(self.unique.lower(), audio_url, 'mp3')
|
|
if os.path.exists(filename) or self.net_download(filename, audio_url):
|
|
return self.get_anki_label(filename, 'audio')
|
|
return ''
|
|
|
|
def _fld_img(self, fld):
|
|
img_url = self._get_field(fld)
|
|
if longman_download_img and img_url:
|
|
filename = get_hex_name(self.unique.lower(), img_url, 'jpg')
|
|
if os.path.exists(filename) or self.net_download(filename, img_url):
|
|
return self.get_anki_label(filename, 'img')
|
|
return ''
|
|
|
|
@export(u'AME_PRON')
|
|
def fld_mp3_us(self):
|
|
return self._fld_mp3('am_mp3')
|
|
|
|
@export(u'BRE_PRON')
|
|
def fld_mp3_uk(self):
|
|
return self._fld_mp3('br_mp3')
|
|
|
|
@export('IMAGE')
|
|
def fld_image(self):
|
|
return self._fld_img('image')
|
|
|
|
@export([u'断字单词', u'Hyphenation'])
|
|
def fld_hyphenation(self):
|
|
return self._get_field('hyphenation')
|
|
|
|
@export([u'词性', u'POS'])
|
|
def fld_pos(self):
|
|
return self._get_field('pos')
|
|
|
|
@export('DEF')
|
|
@with_styles(cssfile='_longman.css')
|
|
def fld_ee(self):
|
|
return self._get_field('ee')
|
|
|
|
@export([u'变形', u'Inflections'])
|
|
@with_styles(cssfile='_longman.css')
|
|
def fld_inflections(self):
|
|
return self._get_field('inflections')
|