2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#-*- coding:utf-8 -*-
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import re
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-30 17:42:14 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from ..base import *
							 | 
						
					
						
							
								
									
										
										
										
											2018-08-01 15:36:20 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from ...context import config
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								VOICE_PATTERN = r'<a href="sound://([\w/]+\w*\.mp3)"><img src="img/spkr_%s.png"></a>'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								MAPPINGS = [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ['br', [re.compile(VOICE_PATTERN % r'r')]],
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ['us', [re.compile(VOICE_PATTERN % r'b')]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								LANG_TO_REGEXPS = {lang: regexps for lang, regexps in MAPPINGS}
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 20:56:20 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								@register([u'本地词典-LDOCE6', u'MDX-LDOCE6'])
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class Ldoce6(MdxService):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-05 12:32:38 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self):
							 | 
						
					
						
							
								
									
										
										
										
											2018-08-01 15:36:20 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        super(Ldoce6, self).__init__(config.LDOCE6_PATH)
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    @property
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def title(self):
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-14 01:18:03 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return getattr(self, '__register_label__', self.unique)
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    @export('PHON')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def fld_phonetic(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        html = self.get_html()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        m = re.search(r'<span class="pron">(.*?)</span>', html)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if m:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return m.groups()[0]
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return ''
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def _fld_voice(self, html, voice):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """获取发音字段"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for regexp in LANG_TO_REGEXPS[voice]:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            match = regexp.search(html)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if match:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                val = '/' + match.group(1)
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-30 17:42:14 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                name = get_hex_name('mdx-'+self.unique.lower(), val, 'mp3')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                name = self.save_file(val, name)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if name:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    return self.get_anki_label(name, 'audio')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    @export('BRE_PRON')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def fld_voicebre(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self._fld_voice(self.get_html(), 'br')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    @export('AME_PRON')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def fld_voiceame(self):
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return self._fld_voice(self.get_html(), 'us')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    @export('EXAMPLE')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def fld_sentence(self):
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        m = re.findall(r'<span class="example"\s*.*>\s*.*<\/span>', self.get_html())
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if m:
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            soup = parse_html(m[0])
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            el_list = soup.findAll('span', {'class':'example'})
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if el_list:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                maps = [u''.join(str(content).decode('utf-8') for content in element.contents) 
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                    for element in el_list]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            my_str = ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            for i_str in maps:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                i_str = re.sub(r'<a[^>]+?href=\"sound\:.*\.mp3\".*</a>', '', i_str)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                i_str = i_str.replace(' ', '')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                my_str = my_str + '<li>' + i_str + '</li>'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return self._css(my_str)
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    @export('DEF')
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def fld_definate(self):
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        m = m = re.findall(r'<span class="def"\s*.*>\s*.*<\/span>', self.get_html())
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if m:
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-13 22:08:35 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            soup = parse_html(m[0])
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            el_list = soup.findAll('span', {'class':'def'})
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if el_list:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                maps = [u''.join(str(content).decode('utf-8') for content in element.contents) 
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                    for element in el_list]
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            my_str = ''
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            for i_str in maps:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                my_str = my_str + '<li>' + i_str + '</li>'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return self._css(my_str)
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-01 10:55:30 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return ''
							 | 
						
					
						
							
								
									
										
										
										
											2018-07-06 12:29:50 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    @with_styles(cssfile='_ldoce6.css')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _css(self, val):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return val
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    
							 |