From 6be229e325140f2fe3684b47af48dfb694e8a168 Mon Sep 17 00:00:00 2001 From: "St.Huang" Date: Sun, 8 Jul 2018 12:32:26 +0800 Subject: [PATCH] add detail definitions to baidu_chinese.fix #9 --- src/fastwq/service/baidu_chinese.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/fastwq/service/baidu_chinese.py b/src/fastwq/service/baidu_chinese.py index 5d918c8..76e8975 100644 --- a/src/fastwq/service/baidu_chinese.py +++ b/src/fastwq/service/baidu_chinese.py @@ -10,14 +10,14 @@ class Baidu_Chinese(WebService): def __init__(self): super(Baidu_Chinese, self).__init__() - def _get_content(self, lang='eng'): - url = u"https://hanyu.baidu.com/s?wd={word}".format(word=self.word) + def _get_content(self): + url = u"https://hanyu.baidu.com/s?wd={}&ptype=zici#basicmean".format(self.word) html = self.get_response(url, timeout=10) soup = parseHtml(html) result = { 'pinyin': '', 'basicmean': '', - 'syn_ant': '', + 'detailmean': '', 'fanyi': '', 'audio_url': '', } @@ -30,14 +30,22 @@ class Baidu_Chinese(WebService): result['pinyin'] = u' '.join(x.get_text() for x in tag) if tag: tag = element.find('a') - result['audio_url'] = tag.get('url') + if tag: + result['audio_url'] = tag.get('url') #基本释义 element = soup.find('div', id='basicmean-wrapper') if element: - tag = element.find_all('p') + tag = element.find_all('dl') if tag: - result['basicmean'] = u'
'.join(x.get_text().strip() for x in tag) + result['basicmean'] = u''.join(str(x) for x in tag) + + #详细释义 + element = soup.find('div', id='detailmean-wrapper') + if element: + tag = element.find_all('dl') + if tag: + result['detailmean'] = u''.join(str(x) for x in tag) #英文翻译 element = soup.find('div', id='fanyi-wrapper') @@ -88,6 +96,10 @@ class Baidu_Chinese(WebService): def fld_basic(self): return self._get_field('basicmean') + @export([u'详细释义', u'Detail Definitions'], 4) + def fld_detail(self): + return self._get_field('detailmean') + @export([u'英文翻译', u'Translation[En]'], 5) def fld_fanyi(self): return self._get_field('fanyi')