anki-word-query/src/fastwq/service/dict/remotemdx.py

90 lines
3.4 KiB
Python
Raw Normal View History

2018-07-01 10:55:30 +08:00
#-*- coding:utf-8 -*-
#
# Copyright © 20162017 Liang Feng <finalion@gmail.com>
#
# Support: Report an issue at https://github.com/finalion/WordQuery/issues
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version; http://www.gnu.org/copyleft/gpl.html.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
# import ntpath
import re
import urllib
import urllib2
import urlparse
from collections import defaultdict
from aqt.utils import showInfo, showText
2018-07-14 21:21:31 +08:00
from ..base import QueryResult, WebService, export, register, with_styles
2018-07-01 10:55:30 +08:00
2018-07-09 19:25:08 +08:00
@register('MDX_SERVER')
2018-07-01 10:55:30 +08:00
class RemoteMdx(WebService):
def __init__(self):
super(RemoteMdx, self).__init__()
self.cache = defaultdict(set)
def active(self, dict_path, word):
self.word = word
self.url = dict_path + \
'/' if not dict_path.endswith('/') else dict_path
try:
req = urllib2.urlopen(self.url + word)
result, js = self.adapt_to_anki(req.read())
return QueryResult(result=result, js=js)
except:
return QueryResult.default()
def download_media_files(self, data):
diff = data.difference(self.cache[self.url])
self.cache[self.url].update(diff)
errors, styles = list(), list()
for each in diff:
basename = os.path.basename(each.replace('\\', os.path.sep))
saved_basename = '_' + basename
abs_url = urlparse.urljoin(self.url, each)
if basename.endswith('.css') or basename.endswith('.js'):
styles.append(saved_basename)
if not os.path.exists(saved_basename):
try:
urllib.urlretrieve(abs_url, saved_basename)
except:
errors.append(each)
return errors, styles
def adapt_to_anki(self, html):
"""
1. convert the media path to actual path in anki's collection media folder.
2. remove the js codes
3. import css, to make sure the css file can be synced. TO VALIDATE!
"""
media_files_set = set()
mcss = re.findall(r'href="(\S+?\.css)"', html)
media_files_set.update(set(mcss))
mjs = re.findall(r'src="([\w\./]\S+?\.js)"', html)
media_files_set.update(set(mjs))
msrc = re.findall(r'<img.*?src="([\w\./]\S+?)".*?>', html)
media_files_set.update(set(msrc))
for each in media_files_set:
html = html.replace(each, '_' + each.split('/')[-1])
errors, styles = self.download_media_files(media_files_set)
html = u'<br>'.join([u"<style>@import url('%s');</style>".format(style)
for style in styles if style.endswith('.css')]) + html
js = re.findall(r'<script.*?>.*?</script>', html, re.DOTALL)
# for each in js:
# html = html.replace(each, '')
# showText(html)
return unicode(html), u'\n'.join(js)