From 0e0d8ce3ec6f2dd30279fd54b71d06f8b1fb0d3e Mon Sep 17 00:00:00 2001 From: "St.Huang" Date: Sat, 28 Jul 2018 10:25:19 +0800 Subject: [PATCH] rewrite minidict service. --- src/fastwq/service/dict/minidict.py | 319 ++++------------------------ 1 file changed, 41 insertions(+), 278 deletions(-) diff --git a/src/fastwq/service/dict/minidict.py b/src/fastwq/service/dict/minidict.py index 1b29021..e42208c 100644 --- a/src/fastwq/service/dict/minidict.py +++ b/src/fastwq/service/dict/minidict.py @@ -1,10 +1,6 @@ #-*- coding:utf-8 -*- -import json -import re -import urllib -from aqt.utils import showInfo, showText -from ..base import WebService, export, register, with_styles +from ..base import WebService, export, register, with_styles, parse_html @register(u'海词迷你词典') @@ -12,292 +8,59 @@ class MiniDict(WebService): def __init__(self): super(MiniDict, self).__init__() - self.encoder = Encoder() - def _get_content(self, token): - expressions, sentences, variations = [''] * 3 + def _get_content(self): + data = self.get_response(u"http://apii.dict.cn/mini.php?q={}".format(self.word)) + soup = parse_html(data) + result = { + 'expressions': u'', + 'sentences': u'', + 'variations': u'', + 'phonetic': u'', + } - encoded_word = self.encoder.go(self.word, token) - data = urllib.urlencode( - {'q': self.word, 's': 2, 't': encoded_word}) - result = self.get_response( - "http://dict.cn/ajax/dictcontent.php", data=data) - if result: - expressions = json.loads(result).get("e", "") - sentences = json.loads(result).get("s", "") - variations = json.loads(result).get("t", "") - sentences = re.sub('/imgs/audio\.gif', "", sentences) - self.cache_this( - {'expressions': expressions, 'sentences': sentences, 'variations': variations}) - return expressions, sentences, variations + # 音标 + tag = soup.find('span', class_='p') + if tag: + result['phonetic'] = str(tag.get_text()).encode('utf-8') + tag.decompose() + + # 基本释义 + tag = soup.find('div', id='e') + if tag: + result['expressions'] = str(tag).encode('utf-8') + tag.decompose() - def _get_token_phonetic(self): - result = self.get_response( - "http://dict.cn/mini.php?q={}".format(self.word)) + # 例句与用法 + tag = soup.find('div', id='s') + if tag: + result['sentences'] = str(tag).encode('utf-8') + tag.decompose() - page_token, phonetic = '', '' - mt = re.search('', result) - if mt: - page_token = mt.groups()[0] - mp = re.search("(.*?)", result) - if mp: - phonetic = mp.groups()[0] - self.cache_this({'phonetic': phonetic}) - return page_token, phonetic + # 词形变化 + tag = soup.find('div', id='t') + if tag: + result['variations'] = str(tag).encode('utf-8') + tag.decompose() + + return self.cache_this(result) + + def _get_field(self, key, default=u''): + return self.cache_result(key) if self.cached(key) else self._get_content().get(key, default) @export(u'音标') - # @with_css('''''') def fld_phonetic(self): - return self.cache_result('phonetic') if self.cached('phonetic') else self._get_token_phonetic()[1] + return self._get_field('phonetic') @export(u'基本释义') def fld_explains(self): - if self.cached('expressions'): - return self.cache_result('expressions') - # showInfo(u'%s 基本释义cached:\n %s' % (self.word, res)) - else: - page_token, phonetic = self._get_token_phonetic() - return self._get_content(page_token)[0] - # showInfo(u'%s 基本释义获取:\n %s' % (self.word, res)) + return self._get_field('expressions') @export(u'例句与用法') + @with_styles(css='em {color:#cc0066;font-style:normal;}', need_wrap_css=True, wrap_class='minidict') def fld_sentences(self): - if self.cached('sentences'): - return self.cache_result('sentences') - # showInfo(u'%s 基本释义cached:\n %s' % (self.word, res)) - else: - page_token, phonetic = self._get_token_phonetic() - return self._get_content(page_token)[1] - # showInfo(u'%s 基本释义获取:\n %s' % (self.word, res)) + return self._get_field('sentences') @export(u'词形变化') def fld_variations(self): - if self.cached('variations'): - return self.cache_result('variations') - # showInfo(u'%s 基本释义cached:\n %s' % (self.word, res)) - else: - page_token, phonetic = self._get_token_phonetic() - return self._get_content(page_token)[2] - # showInfo(u'%s 基本释义获取:\n %s' % (self.word, res)) - - -class Encoder(object): - - def __init__(self): - pass - - def rshift(self, val, n): - s = val >> n if val >= 0 else (val + 0x100000000) >> n - return s - - def lshift(self, val, n): - return self.toSigned32(val << n) - - def toSigned32(self, n): - n = n & 0xffffffff - return (n ^ 0x80000000) - 0x80000000 - - def RotateLeft(self, a, b): - rshift = self.rshift(a, 32 - b) - lshift = self.lshift(a, b) - s = lshift | rshift - return s - - def AddUnsigned(self, a, b): - lX8 = a & 0x80000000 - lY8 = b & 0x80000000 - c = a & 0x40000000 - lY4 = b & 0x40000000 - lResult = (a & 0x3FFFFFFF) + (b & 0x3FFFFFFF) - if c & lY4: - return self.toSigned32(lResult ^ 0x80000000 ^ lX8 ^ lY8) - if c | lY4: - if lResult & 0x40000000: - return self.toSigned32(lResult ^ 0xC0000000 ^ lX8 ^ lY8) - else: - return self.toSigned32(lResult ^ 0x40000000 ^ lX8 ^ lY8) - else: - return self.toSigned32(lResult ^ lX8 ^ lY8) - - def F(self, x, y, z): - return (x & y) | ((~x) & z) - - def G(self, x, y, z): - return (x & z) | (y & (~z)) - - def H(self, x, y, z): - return (x ^ y ^ z) - - def I(self, x, y, z): - return (y ^ (x | (~z))) - - def FF(self, a, b, c, d, x, s, e): - a = self.AddUnsigned(a, self.AddUnsigned( - self.AddUnsigned(self.F(b, c, d), x), e)) - return self.toSigned32(self.AddUnsigned(self.RotateLeft(a, s), b)) - - def GG(self, a, b, c, d, x, s, e): - a = self.AddUnsigned(a, self.AddUnsigned( - self.AddUnsigned(self.G(b, c, d), x), e)) - return self.toSigned32(self.AddUnsigned(self.RotateLeft(a, s), b)) - - def HH(self, a, b, c, d, x, s, e): - a = self.AddUnsigned(a, self.AddUnsigned( - self.AddUnsigned(self.H(b, c, d), x), e)) - return self.toSigned32(self.AddUnsigned(self.RotateLeft(a, s), b)) - - def II(self, a, b, c, d, x, s, e): - a = self.AddUnsigned(a, self.AddUnsigned( - self.AddUnsigned(self.I(b, c, d), x), e)) - return self.toSigned32(self.AddUnsigned(self.RotateLeft(a, s), b)) - - def ConvertToWordArray(self, a): - c = len(a) - d = c + 8 - e = (d - (d % 64)) / 64 - f = (e + 1) * 16 - g = [0] * f # notice - h = 0 - i = 0 - while i < c: - b = (i - (i % 4)) / 4 - h = (i % 4) * 8 - g[b] = (g[b] | self.lshift(ord(a[i]), h)) - i += 1 - b = (i - (i % 4)) / 4 - h = (i % 4) * 8 - g[b] = g[b] | self.lshift(0x80, h) - g[f - 2] = self.lshift(c, 3) - g[f - 1] = self.rshift(c, 29) - return g - - def WordToHex(self, a): - b = "" - for lCount in range(0, 4): - lByte = self.rshift(a, lCount * 8) & 255 - WordToHexValue_temp = "0" + hex(lByte)[2:-1] - b = b + \ - WordToHexValue_temp[ - len(WordToHexValue_temp) - 2:len(WordToHexValue_temp)] - return b - - def Utf8Encode(self, a): - a = a.replace('\r\n', "\n") - b = "" - for n in range(len(a)): - c = ord(a[n]) - if (c < 128): - b += unichr(c) - elif ((c > 127) and (c < 2048)): - b += unichr((c >> 6) | 192) - b += unichr((c & 63) | 128) - else: - b += unichr((c >> 12) | 224) - b += unichr(((c >> 6) & 63) | 128) - b += unichr((c & 63) | 128) - b += ''.join(map(unichr, [100, 105, 99, 116, 99, 110])) - if self.pagetoken: - b += self.pagetoken - return b - - def go(self, word, pagetoken=None): - self.word = word - self.pagetoken = pagetoken - l = 7 - S12 = 12 - S13 = 17 - S14 = 22 - m = 5 - S22 = 9 - S23 = 14 - S24 = 20 - o = 4 - S32 = 11 - S33 = 16 - S34 = 23 - p = 6 - S42 = 10 - S43 = 15 - S44 = 21 - j = self.Utf8Encode(self.word) - x = self.ConvertToWordArray(j) - a = 0x67452301 - b = 0xEFCDAB89 - c = 0x98BADCFE - d = 0x10325476 - for k in range(0, len(x), 16): - AA = a - BB = b - CC = c - DD = d - a = self.FF(a, b, c, d, x[k + 0], l, 0xD76AA478) - d = self.FF(d, a, b, c, x[k + 1], S12, 0xE8C7B756) - c = self.FF(c, d, a, b, x[k + 2], S13, 0x242070DB) - b = self.FF(b, c, d, a, x[k + 3], S14, 0xC1BDCEEE) - a = self.FF(a, b, c, d, x[k + 4], l, 0xF57C0FAF) - d = self.FF(d, a, b, c, x[k + 5], S12, 0x4787C62A) - c = self.FF(c, d, a, b, x[k + 6], S13, 0xA8304613) - b = self.FF(b, c, d, a, x[k + 7], S14, 0xFD469501) - a = self.FF(a, b, c, d, x[k + 8], l, 0x698098D8) - d = self.FF(d, a, b, c, x[k + 9], S12, 0x8B44F7AF) - c = self.FF(c, d, a, b, x[k + 10], S13, 0xFFFF5BB1) - b = self.FF(b, c, d, a, x[k + 11], S14, 0x895CD7BE) - a = self.FF(a, b, c, d, x[k + 12], l, 0x6B901122) - d = self.FF(d, a, b, c, x[k + 13], S12, 0xFD987193) - c = self.FF(c, d, a, b, x[k + 14], S13, 0xA679438E) - b = self.FF(b, c, d, a, x[k + 15], S14, 0x49B40821) - a = self.GG(a, b, c, d, x[k + 1], m, 0xF61E2562) - d = self.GG(d, a, b, c, x[k + 6], S22, 0xC040B340) - c = self.GG(c, d, a, b, x[k + 11], S23, 0x265E5A51) - b = self.GG(b, c, d, a, x[k + 0], S24, 0xE9B6C7AA) - a = self.GG(a, b, c, d, x[k + 5], m, 0xD62F105D) - d = self.GG(d, a, b, c, x[k + 10], S22, 0x2441453) - c = self.GG(c, d, a, b, x[k + 15], S23, 0xD8A1E681) - b = self.GG(b, c, d, a, x[k + 4], S24, 0xE7D3FBC8) - a = self.GG(a, b, c, d, x[k + 9], m, 0x21E1CDE6) - d = self.GG(d, a, b, c, x[k + 14], S22, 0xC33707D6) - c = self.GG(c, d, a, b, x[k + 3], S23, 0xF4D50D87) - b = self.GG(b, c, d, a, x[k + 8], S24, 0x455A14ED) - a = self.GG(a, b, c, d, x[k + 13], m, 0xA9E3E905) - d = self.GG(d, a, b, c, x[k + 2], S22, 0xFCEFA3F8) - c = self.GG(c, d, a, b, x[k + 7], S23, 0x676F02D9) - b = self.GG(b, c, d, a, x[k + 12], S24, 0x8D2A4C8A) - a = self.HH(a, b, c, d, x[k + 5], o, 0xFFFA3942) - d = self.HH(d, a, b, c, x[k + 8], S32, 0x8771F681) - c = self.HH(c, d, a, b, x[k + 11], S33, 0x6D9D6122) - b = self.HH(b, c, d, a, x[k + 14], S34, 0xFDE5380C) - a = self.HH(a, b, c, d, x[k + 1], o, 0xA4BEEA44) - d = self.HH(d, a, b, c, x[k + 4], S32, 0x4BDECFA9) - c = self.HH(c, d, a, b, x[k + 7], S33, 0xF6BB4B60) - b = self.HH(b, c, d, a, x[k + 10], S34, 0xBEBFBC70) - a = self.HH(a, b, c, d, x[k + 13], o, 0x289B7EC6) - d = self.HH(d, a, b, c, x[k + 0], S32, 0xEAA127FA) - c = self.HH(c, d, a, b, x[k + 3], S33, 0xD4EF3085) - b = self.HH(b, c, d, a, x[k + 6], S34, 0x4881D05) - a = self.HH(a, b, c, d, x[k + 9], o, 0xD9D4D039) - d = self.HH(d, a, b, c, x[k + 12], S32, 0xE6DB99E5) - c = self.HH(c, d, a, b, x[k + 15], S33, 0x1FA27CF8) - b = self.HH(b, c, d, a, x[k + 2], S34, 0xC4AC5665) - a = self.II(a, b, c, d, x[k + 0], p, 0xF4292244) - d = self.II(d, a, b, c, x[k + 7], S42, 0x432AFF97) - c = self.II(c, d, a, b, x[k + 14], S43, 0xAB9423A7) - b = self.II(b, c, d, a, x[k + 5], S44, 0xFC93A039) - a = self.II(a, b, c, d, x[k + 12], p, 0x655B59C3) - d = self.II(d, a, b, c, x[k + 3], S42, 0x8F0CCC92) - c = self.II(c, d, a, b, x[k + 10], S43, 0xFFEFF47D) - b = self.II(b, c, d, a, x[k + 1], S44, 0x85845DD1) - a = self.II(a, b, c, d, x[k + 8], p, 0x6FA87E4F) - d = self.II(d, a, b, c, x[k + 15], S42, 0xFE2CE6E0) - c = self.II(c, d, a, b, x[k + 6], S43, 0xA3014314) - b = self.II(b, c, d, a, x[k + 13], S44, 0x4E0811A1) - a = self.II(a, b, c, d, x[k + 4], p, 0xF7537E82) - d = self.II(d, a, b, c, x[k + 11], S42, 0xBD3AF235) - c = self.II(c, d, a, b, x[k + 2], S43, 0x2AD7D2BB) - b = self.II(b, c, d, a, x[k + 9], S44, 0xEB86D391) - a = self.AddUnsigned(a, AA) - b = self.AddUnsigned(b, BB) - c = self.AddUnsigned(c, CC) - d = self.AddUnsigned(d, DD) - q = self.WordToHex(a) + self.WordToHex(b) + \ - self.WordToHex(c) + self.WordToHex(d) - return q.lower() + return self._get_field('variations')