Merge pull request #140 from JavanZhu/master

New Feature and Bugs fix
This commit is contained in:
sthoo 2019-04-12 23:01:00 +08:00 committed by GitHub
commit f0056700d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,9 @@
#-*- coding:utf-8 -*- # -*- coding:utf-8 -*-
import os import os
import re import re
from bs4 import Tag
from ..base import * from ..base import *
cambridge_url_base = u'https://dictionary.cambridge.org/' cambridge_url_base = u'https://dictionary.cambridge.org/'
@ -52,29 +54,71 @@ class Cambridge(WebService):
if snd: if snd:
result['pronunciation'][pn+'mp3'] = cambridge_url_base + snd.get('data-src-mp3') result['pronunciation'][pn+'mp3'] = cambridge_url_base + snd.get('data-src-mp3')
header_found = True header_found = True
# 词性
pg = element.find('span', class_='posgram ico-bg')
# 义 # 义
senses = element.find_all('div', class_='sense-block') if 'english-chinese-simplified' in self._get_url():
senses = element.find_all('div', id=re.compile("english-chinese-simplified*"))
elif 'english-chinese-traditional' in self._get_url():
senses = element.find_all('div', id=re.compile("english-chinese-traditional*"))
else:
senses = element.find_all('div', id=re.compile("cald4*"))
# proficiency之类的词语
if not senses:
senses = element.find_all('div', id=re.compile("cbed*"))
# shoplift之类的词语
if not senses:
senses = element.find_all('div', id=re.compile("cacd*"))
# 词性
span_posgram = element.find('span', class_='posgram ico-bg')
pos_gram = (span_posgram.get_text() if span_posgram else '')
if senses: if senses:
for sense in senses: for sense in senses:
dbs = sense.find_all('div', class_='def-block pad-indent') # 像ambivalent之类词语含有ambivalence解释词性不同
if dbs: runon_title = None
if sense['class'][0] == 'runon':
runon_pos = sense.find('span', class_='pos')
runon_gram = sense.find('span', class_='gram')
if runon_pos is not None:
pos_gram = runon_pos.get_text() + (runon_gram.get_text() if runon_gram else '')
h3_rt = sense.find('h3', class_='runon-title')
runon_title = (h3_rt.get_text() if h3_rt else None)
sense_body = sense.find('div', class_=re.compile("sense-body|runon-body pad-indent"))
if sense_body:
l = result['def_list'] l = result['def_list']
for db in dbs: for block in sense_body:
i = sense.find('span', class_='def-info') if isinstance(block, Tag) is not True:
d = db.find('b', class_='def') continue
tran = db.find('span', class_='trans')
examps = db.find_all('div', class_='examp emphasized') phrase = None
block_type = block['class'][0]
if block_type == 'def-block':
pass
elif block_type == 'phrase-block':
phrase_header = block.find('span', class_='phrase-head')
phrase = phrase_header.get_text() if phrase_header else None
pass
elif block_type == 'runon-body':
pass
else:
continue
span_df = block.find('span', class_='def-info')
def_info = (span_df.get_text().replace('', '') if span_df else '')
d = block.find('b', class_='def')
tran = block.find('span', class_='trans')
examps = block.find_all('div', class_='examp emphasized')
l.append( l.append(
u'<li>{0}{1}{2} {3}{4}</li>'.format( u'<li>{0}{1}{2}{3}{4} {5}{6}</li>'.format(
'<span class="epp-xref">{0}</span>'.format(pg.get_text() if pg else ''), '<span class="epp-xref">{0}</span>'.format(pos_gram) if pos_gram != '' else '',
'<span class="epp-xref">{0}</span>'.format(runon_title) if runon_title else '',
'<span class="epp-xref">{0}</span>'.format(phrase) if phrase else '',
'<span class="epp-xref">{0}</span>'.format(def_info) if def_info.strip() != '' else '',
'<b class="def">{0}</b>'.format(d.get_text()) if d else u'',
u'<span class="epp-xref">{0}</span>'.format(i.get_text()) if i else u'', '<span class="trans">{0}</span>'.format(tran.get_text()) if tran else '',
u'<b class="def">{0}</b>'.format(d.get_text()) if d else u'',
u'<span class="trans">{0}</span>'.format(tran.get_text()) if tran else u'',
u''.join( u''.join(
u'<div class="examp">{0}</div>'.format(e.get_text()) if e else u'' u'<div class="examp">{0}</div>'.format(e.get_text()) if e else u''
for e in examps for e in examps