diff --git a/addons/fastwq/service/dict/LDOCE6.py b/addons/fastwq/service/dict/LDOCE6.py index 8117a9c..3b8a437 100644 --- a/addons/fastwq/service/dict/LDOCE6.py +++ b/addons/fastwq/service/dict/LDOCE6.py @@ -1,4 +1,5 @@ #-*- coding:utf-8 -*- +import os import re from ..base import * @@ -62,6 +63,24 @@ class Ldoce6(MdxService): def fld_voiceame(self): return self._fld_voice(self.get_html(), 'us') + def _fld_image(self, img): + val = '/' + img + # file extension isn't always jpg + file_extension = os.path.splitext(img)[1][1:].strip().lower() + name = get_hex_name('mdx-'+self.unique.lower(), val, file_extension) + name = self.save_file(val, name) + if name: + return self.get_anki_label(name, 'img') + return '' + + @export('IMAGE') + def fld_image(self): + html = self.get_html() + m = re.search(r'', html) + if m: + return self._fld_image(m.groups()[0]) + return '' + @export('EXAMPLE') def fld_sentence(self): m = re.findall(r'\s*.*<\/span>', self.get_html()) @@ -73,12 +92,37 @@ class Ldoce6(MdxService): for element in el_list] my_str = '' for i_str in maps: - i_str = re.sub(r']+?href=\"sound\:.*\.mp3\".*', '', i_str) - i_str = i_str.replace(' ', '') + i_str = re.sub(r']+?href=\"sound\:.*\.mp3\".*', '', i_str).strip() my_str = my_str + '
  • ' + i_str + '
  • ' return self._css(my_str) return '' + def _fld_audio(self, audio): + name = get_hex_name('mdx-'+self.unique.lower(), audio, 'mp3') + name = self.save_file(audio, name) + if name: + return self.get_anki_label(name, 'audio') + return '' + + @export(u'Examples with audios') + def fld_sentence_audio(self): + m = re.findall(r'\s*.*<\/span>', self.get_html()) + if m: + soup = parse_html(m[0]) + el_list = soup.findAll('span', {'class':'example'}) + if el_list: + maps = [u''.join(str(content).decode('utf-8') for content in element.contents) + for element in el_list] + my_str = '' + for i_str in maps: + sound = re.search(r']+?href=\"sound\:\/(.*?\.mp3)\".*', i_str) + if sound: + mp3 = self._fld_audio(sound.groups()[0]) + i_str = re.sub(r']+?href=\"sound\:.*\.mp3\".*', '', i_str).strip() + my_str = my_str + '
  • ' + i_str + ' ' + mp3 + '
  • ' + return self._css(my_str) + return '' + @export('DEF') def fld_definate(self): m = m = re.findall(r'\s*.*<\/span>', self.get_html()) diff --git a/addons21/fastwq/service/dict/LDOCE6.py b/addons21/fastwq/service/dict/LDOCE6.py index 2493d09..39fde8b 100644 --- a/addons21/fastwq/service/dict/LDOCE6.py +++ b/addons21/fastwq/service/dict/LDOCE6.py @@ -1,4 +1,5 @@ #-*- coding:utf-8 -*- +import os import re from ..base import * @@ -62,6 +63,24 @@ class Ldoce6(MdxService): def fld_voiceame(self): return self._fld_voice(self.get_html(), 'us') + def _fld_image(self, img): + val = '/' + img + # file extension isn't always jpg + file_extension = os.path.splitext(img)[1][1:].strip().lower() + name = get_hex_name('mdx-'+self.unique.lower(), val, file_extension) + name = self.save_file(val, name) + if name: + return self.get_anki_label(name, 'img') + return '' + + @export('IMAGE') + def fld_image(self): + html = self.get_html() + m = re.search(r'', html) + if m: + return self._fld_image(m.groups()[0]) + return '' + @export('EXAMPLE') def fld_sentence(self): m = re.findall(r'\s*.*<\/span>', self.get_html()) @@ -73,12 +92,37 @@ class Ldoce6(MdxService): for element in el_list] my_str = '' for i_str in maps: - i_str = re.sub(r']+?href=\"sound\:.*\.mp3\".*', '', i_str) - i_str = i_str.replace(' ', '') + i_str = re.sub(r']+?href=\"sound\:.*\.mp3\".*', '', i_str).strip() my_str = my_str + '
  • ' + i_str + '
  • ' return self._css(my_str) return '' + def _fld_audio(self, audio): + name = get_hex_name('mdx-'+self.unique.lower(), audio, 'mp3') + name = self.save_file(audio, name) + if name: + return self.get_anki_label(name, 'audio') + return '' + + @export(u'Examples with audios') + def fld_sentence_audio(self): + m = re.findall(r'\s*.*<\/span>', self.get_html()) + if m: + soup = parse_html(m[0]) + el_list = soup.findAll('span', {'class':'example'}) + if el_list: + maps = [u''.join(str(content).decode('utf-8') for content in element.contents) + for element in el_list] + my_str = '' + for i_str in maps: + sound = re.search(r']+?href=\"sound\:\/(.*?\.mp3)\".*', i_str) + if sound: + mp3 = self._fld_audio(sound.groups()[0]) + i_str = re.sub(r']+?href=\"sound\:.*\.mp3\".*', '', i_str).strip() + my_str = my_str + '
  • ' + i_str + ' ' + mp3 + '
  • ' + return self._css(my_str) + return '' + @export('DEF') def fld_definate(self): m = m = re.findall(r'\s*.*<\/span>', self.get_html())