add detail definitions to baidu_chinese.fix #9

This commit is contained in:
St.Huang 2018-07-08 12:32:26 +08:00
parent f73111fc7c
commit 6be229e325

View File

@ -10,14 +10,14 @@ class Baidu_Chinese(WebService):
def __init__(self): def __init__(self):
super(Baidu_Chinese, self).__init__() super(Baidu_Chinese, self).__init__()
def _get_content(self, lang='eng'): def _get_content(self):
url = u"https://hanyu.baidu.com/s?wd={word}".format(word=self.word) url = u"https://hanyu.baidu.com/s?wd={}&ptype=zici#basicmean".format(self.word)
html = self.get_response(url, timeout=10) html = self.get_response(url, timeout=10)
soup = parseHtml(html) soup = parseHtml(html)
result = { result = {
'pinyin': '', 'pinyin': '',
'basicmean': '', 'basicmean': '',
'syn_ant': '', 'detailmean': '',
'fanyi': '', 'fanyi': '',
'audio_url': '', 'audio_url': '',
} }
@ -30,14 +30,22 @@ class Baidu_Chinese(WebService):
result['pinyin'] = u' '.join(x.get_text() for x in tag) result['pinyin'] = u' '.join(x.get_text() for x in tag)
if tag: if tag:
tag = element.find('a') tag = element.find('a')
result['audio_url'] = tag.get('url') if tag:
result['audio_url'] = tag.get('url')
#基本释义 #基本释义
element = soup.find('div', id='basicmean-wrapper') element = soup.find('div', id='basicmean-wrapper')
if element: if element:
tag = element.find_all('p') tag = element.find_all('dl')
if tag: if tag:
result['basicmean'] = u'<br>'.join(x.get_text().strip() for x in tag) result['basicmean'] = u''.join(str(x) for x in tag)
#详细释义
element = soup.find('div', id='detailmean-wrapper')
if element:
tag = element.find_all('dl')
if tag:
result['detailmean'] = u''.join(str(x) for x in tag)
#英文翻译 #英文翻译
element = soup.find('div', id='fanyi-wrapper') element = soup.find('div', id='fanyi-wrapper')
@ -88,6 +96,10 @@ class Baidu_Chinese(WebService):
def fld_basic(self): def fld_basic(self):
return self._get_field('basicmean') return self._get_field('basicmean')
@export([u'详细释义', u'Detail Definitions'], 4)
def fld_detail(self):
return self._get_field('detailmean')
@export([u'英文翻译', u'Translation[En]'], 5) @export([u'英文翻译', u'Translation[En]'], 5)
def fld_fanyi(self): def fld_fanyi(self):
return self._get_field('fanyi') return self._get_field('fanyi')