88 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			88 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#-*- coding:utf-8 -*-
 | 
						|
#
 | 
						|
# Copyright (C) 2018 Liang Feng <finalion@gmail.com>
 | 
						|
#
 | 
						|
# Support: Report an issue at https://github.com/finalion/WordQuery/issues
 | 
						|
#
 | 
						|
# This program is free software: you can redistribute it and/or modify
 | 
						|
# it under the terms of the GNU General Public License as published by
 | 
						|
# the Free Software Foundation, either version 3 of the License, or
 | 
						|
# any later version; http://www.gnu.org/copyleft/gpl.html.
 | 
						|
#
 | 
						|
# This program is distributed in the hope that it will be useful,
 | 
						|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 | 
						|
# GNU General Public License for more details.
 | 
						|
#
 | 
						|
# You should have received a copy of the GNU General Public License
 | 
						|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
import os
 | 
						|
# import ntpath
 | 
						|
import re
 | 
						|
import urllib
 | 
						|
from collections import defaultdict
 | 
						|
 | 
						|
from aqt.utils import showInfo, showText
 | 
						|
from ..base import QueryResult, WebService, export, register, with_styles
 | 
						|
 | 
						|
 | 
						|
@register('MDX_SERVER')
 | 
						|
class RemoteMdx(WebService):
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        super(RemoteMdx, self).__init__()
 | 
						|
        self.cache = defaultdict(set)
 | 
						|
 | 
						|
    def active(self, dict_path, word):
 | 
						|
        self.word = word
 | 
						|
        self.url = dict_path + \
 | 
						|
            '/' if not dict_path.endswith('/') else dict_path
 | 
						|
        try:
 | 
						|
            html = self.get_response(self.url + word)
 | 
						|
            result, js = self.adapt_to_anki(html)
 | 
						|
            return QueryResult(result=result, js=js)
 | 
						|
        except:
 | 
						|
            return QueryResult.default()
 | 
						|
 | 
						|
    def download_media_files(self, data):
 | 
						|
        diff = data.difference(self.cache[self.url])
 | 
						|
        self.cache[self.url].update(diff)
 | 
						|
        errors, styles = list(), list()
 | 
						|
        for each in diff:
 | 
						|
            basename = os.path.basename(each.replace('\\', os.path.sep))
 | 
						|
            saved_basename = '_' + basename
 | 
						|
            abs_url = urllib.parse.urljoin(self.url, each)
 | 
						|
            if basename.endswith('.css') or basename.endswith('.js'):
 | 
						|
                styles.append(saved_basename)
 | 
						|
            if not os.path.exists(saved_basename):
 | 
						|
                try:
 | 
						|
                    self.download(abs_url, saved_basename)
 | 
						|
                except:
 | 
						|
                    errors.append(each)
 | 
						|
        return errors, styles
 | 
						|
 | 
						|
    def adapt_to_anki(self, html):
 | 
						|
        """
 | 
						|
        1. convert the media path to actual path in anki's collection media folder.
 | 
						|
        2. remove the js codes
 | 
						|
        3. import css, to make sure the css file can be synced. TO VALIDATE!
 | 
						|
        """
 | 
						|
        media_files_set = set()
 | 
						|
        mcss = re.findall(r'href="(\S+?\.css)"', html)
 | 
						|
        media_files_set.update(set(mcss))
 | 
						|
        mjs = re.findall(r'src="([\w\./]\S+?\.js)"', html)
 | 
						|
        media_files_set.update(set(mjs))
 | 
						|
        msrc = re.findall(r'<img.*?src="([\w\./]\S+?)".*?>', html)
 | 
						|
        media_files_set.update(set(msrc))
 | 
						|
        for each in media_files_set:
 | 
						|
            html = html.replace(each, '_' + each.split('/')[-1])
 | 
						|
        errors, styles = self.download_media_files(media_files_set)
 | 
						|
        html = u'<br>'.join([u"<style>@import url('%s');</style>".format(style)
 | 
						|
                             for style in styles if style.endswith('.css')]) + html
 | 
						|
        js = re.findall(r'<script.*?>.*?</script>', html, re.DOTALL)
 | 
						|
        # for each in js:
 | 
						|
        #     html = html.replace(each, '')
 | 
						|
        # showText(html)
 | 
						|
        return str(html), u'\n'.join(js)
 |