Download images and thumbnails from Oxford Learner

This commit is contained in:
Patrizia 2018-08-19 14:49:54 +02:00
parent de1851fc6d
commit 85a22ecd29
2 changed files with 96 additions and 0 deletions

View File

@ -37,6 +37,8 @@ class OxfordLearning(WebService):
{ {
'phonetic': '{} {}'.format(web_word.wd_phon_bre, web_word.wd_phon_nam), 'phonetic': '{} {}'.format(web_word.wd_phon_bre, web_word.wd_phon_nam),
'pos': web_word.wd_pos, 'pos': web_word.wd_pos,
'img_full': web_word.wd_image_full_url,
'img_thumb': web_word.wd_image_thumb_url,
'ee': ''.join(web_word.definitions_html), 'ee': ''.join(web_word.definitions_html),
's_bre': web_word.wd_sound_url_bre, 's_bre': web_word.wd_sound_url_bre,
's_ame': web_word.wd_sound_url_nam, 's_ame': web_word.wd_sound_url_nam,
@ -47,6 +49,8 @@ class OxfordLearning(WebService):
{ {
'phonetic': '', 'phonetic': '',
'pos': '', 'pos': '',
'img_full': '',
'img_thumb': '',
'ee': '', 'ee': '',
's_bre': '', 's_bre': '',
's_ame': '', 's_ame': '',
@ -69,6 +73,20 @@ class OxfordLearning(WebService):
# 'ee') + "</div>" if "<li>" not in self._get_single_dict('ee') else self._get_single_dict('ee') # 'ee') + "</div>" if "<li>" not in self._get_single_dict('ee') else self._get_single_dict('ee')
return self._get_single_dict('ee') return self._get_single_dict('ee')
def get_image_full(self):
url = self._get_single_dict('img_full')
filename = get_hex_name(self.unique.lower(), url, 'jpg')
if url and self.download(url, filename):
return self.get_anki_label(filename, 'img')
return ''
def get_image_thumb(self):
url = self._get_single_dict('img_thumb')
filename = get_hex_name(self.unique.lower(), url, 'jpg')
if url and self.download(url, filename):
return self.get_anki_label(filename, 'img')
return ''
def get_sound_bre(self): def get_sound_bre(self):
url = self._get_single_dict('s_bre') url = self._get_single_dict('s_bre')
filename = get_hex_name(self.unique.lower(), url, 'mp3') filename = get_hex_name(self.unique.lower(), url, 'mp3')
@ -83,6 +101,14 @@ class OxfordLearning(WebService):
return self.get_anki_label(filename, 'audio') return self.get_anki_label(filename, 'audio')
return '' return ''
@export('IMAGE')
def fld_image_full(self):
return self.get_image_full()
@export(u'Thumbnails')
def fld_image_thumb(self):
return self.get_image_thumb()
@export('BRE_PRON') @export('BRE_PRON')
def fld_sound_bre(self): def fld_sound_bre(self):
return self.get_sound_bre() return self.get_sound_bre()
@ -123,6 +149,14 @@ class OxfordLearningDictWord:
""" """
return self.bs.find("div", self._cls_dic('webtop-g')) return self.bs.find("div", self._cls_dic('webtop-g'))
@property
def tag_img(self):
"""
:rtype: Tag
"""
return self.bs.find('a', self._cls_dic('topic'))
@property @property
def tag_pron(self): def tag_pron(self):
""" """
@ -205,6 +239,20 @@ class OxfordLearningDictWord:
phon phon
) )
@property
def wd_image_full_url(self):
try:
return self.tag_img['href']
except:
return ''
@property
def wd_image_thumb_url(self):
try:
return self.tag_img.find('img', self._cls_dic('thumb'))['src']
except:
return ''
@property @property
def wd_sound_url_bre(self): def wd_sound_url_bre(self):
try: try:

View File

@ -43,6 +43,8 @@ class OxfordLearning(WebService):
{ {
'phonetic': '{} {}'.format(web_word.wd_phon_bre, web_word.wd_phon_nam), 'phonetic': '{} {}'.format(web_word.wd_phon_bre, web_word.wd_phon_nam),
'pos': web_word.wd_pos, 'pos': web_word.wd_pos,
'img_full': web_word.wd_image_full_url,
'img_thumb': web_word.wd_image_thumb_url,
'ee': ''.join(web_word.definitions_html), 'ee': ''.join(web_word.definitions_html),
's_bre': web_word.wd_sound_url_bre, 's_bre': web_word.wd_sound_url_bre,
's_ame': web_word.wd_sound_url_nam, 's_ame': web_word.wd_sound_url_nam,
@ -53,6 +55,8 @@ class OxfordLearning(WebService):
{ {
'phonetic': '', 'phonetic': '',
'pos': '', 'pos': '',
'img_full': '',
'img_thumb': '',
'ee': '', 'ee': '',
's_bre': '', 's_bre': '',
's_ame': '', 's_ame': '',
@ -75,6 +79,20 @@ class OxfordLearning(WebService):
# 'ee') + "</div>" if "<li>" not in self._get_single_dict('ee') else self._get_single_dict('ee') # 'ee') + "</div>" if "<li>" not in self._get_single_dict('ee') else self._get_single_dict('ee')
return self._get_single_dict('ee') return self._get_single_dict('ee')
def get_image_full(self):
url = self._get_single_dict('img_full')
filename = get_hex_name(self.unique.lower(), url, 'jpg')
if url and self.download(url, filename):
return self.get_anki_label(filename, 'img')
return ''
def get_image_thumb(self):
url = self._get_single_dict('img_thumb')
filename = get_hex_name(self.unique.lower(), url, 'jpg')
if url and self.download(url, filename):
return self.get_anki_label(filename, 'img')
return ''
def get_sound_bre(self): def get_sound_bre(self):
url = self._get_single_dict('s_bre') url = self._get_single_dict('s_bre')
filename = get_hex_name(self.unique.lower(), url, 'mp3') filename = get_hex_name(self.unique.lower(), url, 'mp3')
@ -89,6 +107,14 @@ class OxfordLearning(WebService):
return self.get_anki_label(filename, 'audio') return self.get_anki_label(filename, 'audio')
return '' return ''
@export('IMAGE')
def fld_image_full(self):
return self.get_image_full()
@export(u'Thumbnails')
def fld_image_thumb(self):
return self.get_image_thumb()
@export('BRE_PRON') @export('BRE_PRON')
def fld_sound_bre(self): def fld_sound_bre(self):
return self.get_sound_bre() return self.get_sound_bre()
@ -129,6 +155,14 @@ class OxfordLearningDictWord:
""" """
return self.bs.find("div", self._cls_dic('webtop-g')) return self.bs.find("div", self._cls_dic('webtop-g'))
@property
def tag_img(self):
"""
:rtype: Tag
"""
return self.bs.find('a', self._cls_dic('topic'))
@property @property
def tag_pron(self): def tag_pron(self):
""" """
@ -211,6 +245,20 @@ class OxfordLearningDictWord:
phon phon
) )
@property
def wd_image_full_url(self):
try:
return self.tag_img['href']
except:
return ''
@property
def wd_image_thumb_url(self):
try:
return self.tag_img.find('img', self._cls_dic('thumb'))['src']
except:
return ''
@property @property
def wd_sound_url_bre(self): def wd_sound_url_bre(self):
try: try: