Merge pull request #37 from patzzz/img-longman-fix
Fix file extension for images downloaded from Longman
This commit is contained in:
		
						commit
						70d2a02547
					
				@ -1,6 +1,7 @@
 | 
				
			|||||||
#-*- coding:utf-8 -*-
 | 
					#-*- coding:utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from ..base import *
 | 
					from ..base import *
 | 
				
			||||||
from ...libs.bs4 import Tag
 | 
					from ...libs.bs4 import Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -40,7 +41,7 @@ class Longman(WebService):
 | 
				
			|||||||
            # remove image
 | 
					            # remove image
 | 
				
			||||||
            image_tag = dic_link.find('img')
 | 
					            image_tag = dic_link.find('img')
 | 
				
			||||||
            if image_tag:
 | 
					            if image_tag:
 | 
				
			||||||
                word_info['image'] = image_tag.get('src')
 | 
					                word_info['image'] = image_tag.get('src', u'')
 | 
				
			||||||
                image_tag.decompose()
 | 
					                image_tag.decompose()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Remove related Topics Container
 | 
					            # Remove related Topics Container
 | 
				
			||||||
@ -128,7 +129,11 @@ class Longman(WebService):
 | 
				
			|||||||
    def _fld_img(self, fld):
 | 
					    def _fld_img(self, fld):
 | 
				
			||||||
        img_url = self._get_field(fld)
 | 
					        img_url = self._get_field(fld)
 | 
				
			||||||
        if longman_download_img and img_url:
 | 
					        if longman_download_img and img_url:
 | 
				
			||||||
            filename = get_hex_name(self.unique.lower(), img_url, 'jpg')
 | 
					            # img_url -> https://.../ldoce_XXX.jpg?version=A.B.CC
 | 
				
			||||||
 | 
					            img_url_no_version = re.sub(r'\?version=.*?$', '', img_url)
 | 
				
			||||||
 | 
					            # file extension isn't always jpg
 | 
				
			||||||
 | 
					            file_extension = os.path.splitext(img_url_no_version)[1][1:].strip().lower()
 | 
				
			||||||
 | 
					            filename = get_hex_name(self.unique.lower(), img_url, file_extension)
 | 
				
			||||||
            if os.path.exists(filename) or self.net_download(filename, img_url):
 | 
					            if os.path.exists(filename) or self.net_download(filename, img_url):
 | 
				
			||||||
                return self.get_anki_label(filename, 'img')
 | 
					                return self.get_anki_label(filename, 'img')
 | 
				
			||||||
        return ''
 | 
					        return ''
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
#-*- coding:utf-8 -*-
 | 
					#-*- coding:utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from bs4 import Tag
 | 
					from bs4 import Tag
 | 
				
			||||||
from ..base import *
 | 
					from ..base import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -40,7 +41,7 @@ class Longman(WebService):
 | 
				
			|||||||
            # remove image
 | 
					            # remove image
 | 
				
			||||||
            image_tag = dic_link.find('img')
 | 
					            image_tag = dic_link.find('img')
 | 
				
			||||||
            if image_tag:
 | 
					            if image_tag:
 | 
				
			||||||
                word_info['image'] = image_tag.get('src')
 | 
					                word_info['image'] = image_tag.get('src', u'')
 | 
				
			||||||
                image_tag.decompose()
 | 
					                image_tag.decompose()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Remove related Topics Container
 | 
					            # Remove related Topics Container
 | 
				
			||||||
@ -128,7 +129,11 @@ class Longman(WebService):
 | 
				
			|||||||
    def _fld_img(self, fld):
 | 
					    def _fld_img(self, fld):
 | 
				
			||||||
        img_url = self._get_field(fld)
 | 
					        img_url = self._get_field(fld)
 | 
				
			||||||
        if longman_download_img and img_url:
 | 
					        if longman_download_img and img_url:
 | 
				
			||||||
            filename = get_hex_name(self.unique.lower(), img_url, 'jpg')
 | 
					            # img_url -> https://.../ldoce_XXX.jpg?version=A.B.CC
 | 
				
			||||||
 | 
					            img_url_no_version = re.sub(r'\?version=.*?$', '', img_url)
 | 
				
			||||||
 | 
					            # file extension isn't always jpg
 | 
				
			||||||
 | 
					            file_extension = os.path.splitext(img_url_no_version)[1][1:].strip().lower()
 | 
				
			||||||
 | 
					            filename = get_hex_name(self.unique.lower(), img_url, file_extension)
 | 
				
			||||||
            if os.path.exists(filename) or self.net_download(filename, img_url):
 | 
					            if os.path.exists(filename) or self.net_download(filename, img_url):
 | 
				
			||||||
                return self.get_anki_label(filename, 'img')
 | 
					                return self.get_anki_label(filename, 'img')
 | 
				
			||||||
        return ''
 | 
					        return ''
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user