2018-07-01 10:55:30 +08:00
|
|
|
|
#-*- coding:utf-8 -*-
|
|
|
|
|
|
#
|
|
|
|
|
|
# Copyright © 2016–2017 Liang Feng <finalion@gmail.com>
|
|
|
|
|
|
#
|
|
|
|
|
|
# Support: Report an issue at https://github.com/finalion/WordQuery/issues
|
|
|
|
|
|
#
|
|
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
|
# any later version; http://www.gnu.org/copyleft/gpl.html.
|
|
|
|
|
|
#
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
|
#
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
# import ntpath
|
|
|
|
|
|
import re
|
|
|
|
|
|
import urllib
|
|
|
|
|
|
import urllib2
|
|
|
|
|
|
import urlparse
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
|
|
from aqt.utils import showInfo, showText
|
2018-07-14 21:21:31 +08:00
|
|
|
|
from ..base import QueryResult, WebService, export, register, with_styles
|
2018-07-01 10:55:30 +08:00
|
|
|
|
|
|
|
|
|
|
|
2018-07-09 19:25:08 +08:00
|
|
|
|
@register('MDX_SERVER')
|
2018-07-01 10:55:30 +08:00
|
|
|
|
class RemoteMdx(WebService):
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
super(RemoteMdx, self).__init__()
|
|
|
|
|
|
self.cache = defaultdict(set)
|
|
|
|
|
|
|
|
|
|
|
|
def active(self, dict_path, word):
|
|
|
|
|
|
self.word = word
|
|
|
|
|
|
self.url = dict_path + \
|
|
|
|
|
|
'/' if not dict_path.endswith('/') else dict_path
|
|
|
|
|
|
try:
|
|
|
|
|
|
req = urllib2.urlopen(self.url + word)
|
|
|
|
|
|
result, js = self.adapt_to_anki(req.read())
|
|
|
|
|
|
return QueryResult(result=result, js=js)
|
|
|
|
|
|
except:
|
|
|
|
|
|
return QueryResult.default()
|
|
|
|
|
|
|
|
|
|
|
|
def download_media_files(self, data):
|
|
|
|
|
|
diff = data.difference(self.cache[self.url])
|
|
|
|
|
|
self.cache[self.url].update(diff)
|
|
|
|
|
|
errors, styles = list(), list()
|
|
|
|
|
|
for each in diff:
|
|
|
|
|
|
basename = os.path.basename(each.replace('\\', os.path.sep))
|
|
|
|
|
|
saved_basename = '_' + basename
|
|
|
|
|
|
abs_url = urlparse.urljoin(self.url, each)
|
|
|
|
|
|
if basename.endswith('.css') or basename.endswith('.js'):
|
|
|
|
|
|
styles.append(saved_basename)
|
|
|
|
|
|
if not os.path.exists(saved_basename):
|
|
|
|
|
|
try:
|
|
|
|
|
|
urllib.urlretrieve(abs_url, saved_basename)
|
|
|
|
|
|
except:
|
|
|
|
|
|
errors.append(each)
|
|
|
|
|
|
return errors, styles
|
|
|
|
|
|
|
|
|
|
|
|
def adapt_to_anki(self, html):
|
|
|
|
|
|
"""
|
|
|
|
|
|
1. convert the media path to actual path in anki's collection media folder.
|
|
|
|
|
|
2. remove the js codes
|
|
|
|
|
|
3. import css, to make sure the css file can be synced. TO VALIDATE!
|
|
|
|
|
|
"""
|
|
|
|
|
|
media_files_set = set()
|
|
|
|
|
|
mcss = re.findall(r'href="(\S+?\.css)"', html)
|
|
|
|
|
|
media_files_set.update(set(mcss))
|
|
|
|
|
|
mjs = re.findall(r'src="([\w\./]\S+?\.js)"', html)
|
|
|
|
|
|
media_files_set.update(set(mjs))
|
|
|
|
|
|
msrc = re.findall(r'<img.*?src="([\w\./]\S+?)".*?>', html)
|
|
|
|
|
|
media_files_set.update(set(msrc))
|
|
|
|
|
|
for each in media_files_set:
|
|
|
|
|
|
html = html.replace(each, '_' + each.split('/')[-1])
|
|
|
|
|
|
errors, styles = self.download_media_files(media_files_set)
|
|
|
|
|
|
html = u'<br>'.join([u"<style>@import url('%s');</style>".format(style)
|
|
|
|
|
|
for style in styles if style.endswith('.css')]) + html
|
|
|
|
|
|
js = re.findall(r'<script.*?>.*?</script>', html, re.DOTALL)
|
|
|
|
|
|
# for each in js:
|
|
|
|
|
|
# html = html.replace(each, '')
|
|
|
|
|
|
# showText(html)
|
|
|
|
|
|
return unicode(html), u'\n'.join(js)
|