Merge pull request #37 from patzzz/img-longman-fix

Fix file extension for images downloaded from Longman
This commit is contained in:
sthoo 2018-08-19 21:03:18 +08:00 committed by GitHub
commit 70d2a02547
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 4 deletions

View File

@ -1,6 +1,7 @@
#-*- coding:utf-8 -*-
import os
import re
from ..base import *
from ...libs.bs4 import Tag
@ -40,7 +41,7 @@ class Longman(WebService):
# remove image
image_tag = dic_link.find('img')
if image_tag:
word_info['image'] = image_tag.get('src')
word_info['image'] = image_tag.get('src', u'')
image_tag.decompose()
# Remove related Topics Container
@ -128,7 +129,11 @@ class Longman(WebService):
def _fld_img(self, fld):
img_url = self._get_field(fld)
if longman_download_img and img_url:
filename = get_hex_name(self.unique.lower(), img_url, 'jpg')
# img_url -> https://.../ldoce_XXX.jpg?version=A.B.CC
img_url_no_version = re.sub(r'\?version=.*?$', '', img_url)
# file extension isn't always jpg
file_extension = os.path.splitext(img_url_no_version)[1][1:].strip().lower()
filename = get_hex_name(self.unique.lower(), img_url, file_extension)
if os.path.exists(filename) or self.net_download(filename, img_url):
return self.get_anki_label(filename, 'img')
return ''

View File

@ -1,6 +1,7 @@
#-*- coding:utf-8 -*-
import os
import re
from bs4 import Tag
from ..base import *
@ -40,7 +41,7 @@ class Longman(WebService):
# remove image
image_tag = dic_link.find('img')
if image_tag:
word_info['image'] = image_tag.get('src')
word_info['image'] = image_tag.get('src', u'')
image_tag.decompose()
# Remove related Topics Container
@ -128,7 +129,11 @@ class Longman(WebService):
def _fld_img(self, fld):
img_url = self._get_field(fld)
if longman_download_img and img_url:
filename = get_hex_name(self.unique.lower(), img_url, 'jpg')
# img_url -> https://.../ldoce_XXX.jpg?version=A.B.CC
img_url_no_version = re.sub(r'\?version=.*?$', '', img_url)
# file extension isn't always jpg
file_extension = os.path.splitext(img_url_no_version)[1][1:].strip().lower()
filename = get_hex_name(self.unique.lower(), img_url, file_extension)
if os.path.exists(filename) or self.net_download(filename, img_url):
return self.get_anki_label(filename, 'img')
return ''