diff --git a/src/fastwq/service/baidu_chinese.py b/src/fastwq/service/baidu_chinese.py
new file mode 100644
index 0000000..5d918c8
--- /dev/null
+++ b/src/fastwq/service/baidu_chinese.py
@@ -0,0 +1,93 @@
+#-*- coding:utf-8 -*-
+from hashlib import sha1
+from .base import WebService, export, register, with_styles, parseHtml
+
+baidu_download_mp3 = True
+
+@register([u'百度汉语', u'Baidu Chinese'])
+class Baidu_Chinese(WebService):
+
+ def __init__(self):
+ super(Baidu_Chinese, self).__init__()
+
+ def _get_content(self, lang='eng'):
+ url = u"https://hanyu.baidu.com/s?wd={word}".format(word=self.word)
+ html = self.get_response(url, timeout=10)
+ soup = parseHtml(html)
+ result = {
+ 'pinyin': '',
+ 'basicmean': '',
+ 'syn_ant': '',
+ 'fanyi': '',
+ 'audio_url': '',
+ }
+
+ #拼音
+ element = soup.find('div', id='pinyin')
+ if element:
+ tag = element.find_all('b')
+ if tag:
+ result['pinyin'] = u' '.join(x.get_text() for x in tag)
+ if tag:
+ tag = element.find('a')
+ result['audio_url'] = tag.get('url')
+
+ #基本释义
+ element = soup.find('div', id='basicmean-wrapper')
+ if element:
+ tag = element.find_all('p')
+ if tag:
+ result['basicmean'] = u'
'.join(x.get_text().strip() for x in tag)
+
+ #英文翻译
+ element = soup.find('div', id='fanyi-wrapper')
+ if element:
+ tag = element.find_all('dt')
+ if tag:
+ result['fanyi'] = u'
'.join(x.get_text().strip() for x in tag)
+
+ return self.cache_this(result)
+
+ def _get_field(self, key, default=u''):
+ return self.cache_result(key) if self.cached(key) else self._get_content().get(key, default)
+
+ @export([u'拼音', u'Phoneticize'], 1)
+ def fld_pinyin(self):
+ return self._get_field('pinyin')
+
+ @export('PRON', 2)
+ def fld_pron(self):
+ audio_url = self._get_field('audio_url')
+ if baidu_download_mp3 and audio_url:
+ filename = u'_baidu_chinese_{}_.mp3'.format(self.word)
+ hex_digest = sha1(
+ self.word.encode('utf-8') if isinstance(self.word, unicode)
+ else self.word
+ ).hexdigest().lower()
+ assert len(hex_digest) == 40, "unexpected output from hash library"
+ filename = '.'.join([
+ '-'.join([
+ self.unique.lower(
+ ), hex_digest[:8], hex_digest[8:16],
+ hex_digest[16:24], hex_digest[24:32], hex_digest[32:],
+ ]),
+ 'mp3',
+ ])
+ try:
+ self.net_download(
+ filename,
+ audio_url,
+ require=dict(mime='audio/mp3', size=512),
+ )
+ return self.get_anki_label(filename, 'audio')
+ except:
+ pass
+ return ''
+
+ @export([u'基本释义', u'Basic Definitions'], 3)
+ def fld_basic(self):
+ return self._get_field('basicmean')
+
+ @export([u'英文翻译', u'Translation[En]'], 5)
+ def fld_fanyi(self):
+ return self._get_field('fanyi')
diff --git a/src/fastwq/service/base.py b/src/fastwq/service/base.py
index 7d5bd9d..50df902 100644
--- a/src/fastwq/service/base.py
+++ b/src/fastwq/service/base.py
@@ -256,6 +256,139 @@ class WebService(Service):
except Exception as e:
pass
+ class TinyDownloadError(ValueError):
+ """Raises when a download is too small."""
+
+ def net_stream(self, targets, require=None, method='GET',
+ awesome_ua=False, add_padding=False,
+ custom_quoter=None, custom_headers=None):
+ """
+ Returns the raw payload string from the specified target(s).
+ If multiple targets are specified, their resulting payloads are
+ glued together.
+
+ Each "target" is a bare URL string or a tuple containing an
+ address and a dict for what to tack onto the query string.
+
+ Finally, a require dict may be passed to enforce a Content-Type
+ using key 'mime' and/or a minimum payload size using key 'size'.
+ If using multiple targets, these requirements apply to each
+ response.
+
+ The underlying library here already understands how to search
+ the environment for proxy settings (e.g. HTTP_PROXY), so we do
+ not need to do anything extra for that.
+
+ If add_padding is True, then some additional null padding will
+ be added onto the stream returned. This is helpful for some web
+ services that sometimes return MP3s that `mplayer` clips early.
+ """
+ DEFAULT_UA = 'Mozilla/5.0'
+ DEFAULT_TIMEOUT = 3
+
+ PADDING = '\0' * 2**11
+
+ assert method in ['GET', 'POST'], "method must be GET or POST"
+ from urllib2 import urlopen, Request, quote
+
+ targets = targets if isinstance(targets, list) else [targets]
+ targets = [
+ (target, None) if isinstance(target, basestring)
+ else (
+ target[0],
+ '&'.join(
+ '='.join([
+ key,
+ (
+ custom_quoter[key] if (custom_quoter and
+ key in custom_quoter)
+ else quote
+ )(
+ val.encode('utf-8') if isinstance(val, unicode)
+ else val if isinstance(val, str)
+ else str(val),
+ safe='',
+ ),
+ ])
+ for key, val in target[1].items()
+ ),
+ )
+ for target in targets
+ ]
+
+ require = require or {}
+
+ payloads = []
+
+ for number, (url, params) in enumerate(targets, 1):
+ desc = "web request" if len(targets) == 1 \
+ else "web request (%d of %d)" % (number, len(targets))
+
+ headers = {'User-Agent': DEFAULT_UA}
+ if custom_headers:
+ headers.update(custom_headers)
+
+ response = urlopen(
+ Request(
+ url=('?'.join([url, params]) if params and method == 'GET'
+ else url),
+ headers=headers,
+ ),
+ data=params if params and method == 'POST' else None,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ if not response:
+ raise IOError("No response for %s" % desc)
+
+ if response.getcode() != 200:
+ value_error = ValueError(
+ "Got %d status for %s" %
+ (response.getcode(), desc)
+ )
+ try:
+ value_error.payload = response.read()
+ response.close()
+ except StandardError:
+ pass
+ raise value_error
+
+ if 'mime' in require and \
+ require['mime'] != format(response.info().
+ gettype()).replace('/x-', '/'):
+ value_error = ValueError(
+ "Request got %s Content-Type for %s; wanted %s" %
+ (response.info().gettype(), desc, require['mime'])
+ )
+ value_error.got_mime = response.info().gettype()
+ value_error.wanted_mime = require['mime']
+ raise value_error
+
+ payload = response.read()
+ response.close()
+
+ if 'size' in require and len(payload) < require['size']:
+ raise self.TinyDownloadError(
+ "Request got %d-byte stream for %s; wanted %d+ bytes" %
+ (len(payload), desc, require['size'])
+ )
+
+ payloads.append(payload)
+
+ if add_padding:
+ payloads.append(PADDING)
+ return ''.join(payloads)
+
+ def net_download(self, path, *args, **kwargs):
+ """
+ Downloads a file to the given path from the specified target(s).
+ See net_stream() for information about available options.
+ """
+
+ payload = self.net_stream(*args, **kwargs)
+ with open(path, 'wb') as response_output:
+ response_output.write(payload)
+
class LocalService(Service):
"""