diff --git a/addons/fastwq/service/dict/cambridge.py b/addons/fastwq/service/dict/cambridge.py index ad00ad7..079cc2b 100644 --- a/addons/fastwq/service/dict/cambridge.py +++ b/addons/fastwq/service/dict/cambridge.py @@ -22,47 +22,55 @@ class Cambridge(WebService): 'def': '' } - #页 - element = soup.find('div', class_='entry-body__el clrd js-share-holder') + #english + element = soup.find('div', class_='link') if element: - #音 - header = element.find('div', class_='pos-header') - if header: - tags = header.find_all('span', class_='pron-info') - if tags: - for tag in tags: - r = tag.find('span', class_='region') - reg = str(r.get_text()).decode('utf-8') if r else u'' - pn = 'AmE' if reg=='us' else 'BrE' - p = tag.find('span', class_='pron') - result['pronunciation'][pn] = str(p.get_text()).decode('utf-8') if p else u'' - snd = tag.find('span', class_='circle circle-btn sound audio_play_button') - if snd: - result['pronunciation'][pn+'mp3'] = cambridge_url_base + snd.get('data-src-mp3') - #义 - body = element.find('div', class_='pos-body') - if body: - tags = body.find_all('div', class_='def-block pad-indent') - if tags: - l = [] - for tag in tags: - i = tag.find('span', class_='def-info') - d = tag.find('b', class_='def') - e = tag.find('div', class_='examp emphasized') - l.append( - u'
  • {0}\ - {1}\ -
    {2}
  • '.format( - str(i.get_text()).decode('utf-8') if i else u'', - str(d.get_text()).decode('utf-8') if d else u'', - str(e.get_text()).decode('utf-8') if e else u'' - ) - ) - result['def'] = u'' - img = body.find('img', class_='lightboxLink') - if img: - result['image'] = cambridge_url_base + img.get('data-image') - result['thumb'] = cambridge_url_base + img.get('src') + #页 + elements = element.find_all('div', class_='entry-body__el clrd js-share-holder') + header_found = False + for element in elements: + if element: + #音 + if not header_found: + header = element.find('div', class_='pos-header') + if header: + tags = header.find_all('span', class_='pron-info') + if tags: + for tag in tags: + r = tag.find('span', class_='region') + reg = str(r.get_text()).decode('utf-8') if r else u'' + pn = 'AmE' if reg=='us' else 'BrE' + p = tag.find('span', class_='pron') + result['pronunciation'][pn] = str(p.get_text()).decode('utf-8') if p else u'' + snd = tag.find('span', class_='circle circle-btn sound audio_play_button') + if snd: + result['pronunciation'][pn+'mp3'] = cambridge_url_base + snd.get('data-src-mp3') + header_found = True + #义 + body = element.find('div', class_='pos-body') + if body: + tags = body.find_all('div', class_='def-block pad-indent') + if tags: + l = result['def_list'] + for tag in tags: + i = tag.find('span', class_='def-info') + d = tag.find('b', class_='def') + es = tag.find_all('div', class_='examp emphasized') + l.append( + u'
  • {0}{1}{2}
  • '.format( + u'{0}'.format(str(i.get_text()).decode('utf-8')) if i else u'', + u'{0}'.format(str(d.get_text()).decode('utf-8')) if d else u'', + u''.join( + u'
    {0}
    '.format(str(e.get_text()).decode('utf-8')) if e else u'' + for e in es + ) + ) + ) + result['def'] = u'' + img = body.find('img', class_='lightboxLink') + if img: + result['image'] = cambridge_url_base + img.get('data-image') + result['thumb'] = cambridge_url_base + img.get('src') return self.cache_this(result) diff --git a/addons21/fastwq/service/dict/cambridge.py b/addons21/fastwq/service/dict/cambridge.py index ad460df..ae9feb5 100644 --- a/addons21/fastwq/service/dict/cambridge.py +++ b/addons21/fastwq/service/dict/cambridge.py @@ -19,50 +19,59 @@ class Cambridge(WebService): 'pronunciation': {'AmE': '', 'BrE': '', 'AmEmp3': '', 'BrEmp3': ''}, 'image': '', 'thumb': '', - 'def': '' + 'def': '', + 'def_list': [] } - #页 - element = soup.find('div', class_='entry-body__el clrd js-share-holder') + #english + element = soup.find('div', class_='link') if element: - #音 - header = element.find('div', class_='pos-header') - if header: - tags = header.find_all('span', class_='pron-info') - if tags: - for tag in tags: - r = tag.find('span', class_='region') - reg = r.get_text() if r else u'' - pn = 'AmE' if reg=='us' else 'BrE' - p = tag.find('span', class_='pron') - result['pronunciation'][pn] = p.get_text() if p else u'' - snd = tag.find('span', class_='circle circle-btn sound audio_play_button') - if snd: - result['pronunciation'][pn+'mp3'] = cambridge_url_base + snd.get('data-src-mp3') - #义 - body = element.find('div', class_='pos-body') - if body: - tags = body.find_all('div', class_='def-block pad-indent') - if tags: - l = [] - for tag in tags: - i = tag.find('span', class_='def-info') - d = tag.find('b', class_='def') - e = tag.find('div', class_='examp emphasized') - l.append( - u'
  • {0}\ - {1}\ -
    {2}
  • '.format( - i.get_text() if i else u'', - d.get_text() if d else u'', - e.get_text() if e else u'' - ) - ) - result['def'] = u'' - img = body.find('img', class_='lightboxLink') - if img: - result['image'] = cambridge_url_base + img.get('data-image') - result['thumb'] = cambridge_url_base + img.get('src') + #页 + elements = element.find_all('div', class_='entry-body__el clrd js-share-holder') + header_found = False + for element in elements: + if element: + #音 + if not header_found: + header = element.find('div', class_='pos-header') + if header: + tags = header.find_all('span', class_='pron-info') + if tags: + for tag in tags: + r = tag.find('span', class_='region') + reg = r.get_text() if r else u'' + pn = 'AmE' if reg=='us' else 'BrE' + p = tag.find('span', class_='pron') + result['pronunciation'][pn] = p.get_text() if p else u'' + snd = tag.find('span', class_='circle circle-btn sound audio_play_button') + if snd: + result['pronunciation'][pn+'mp3'] = cambridge_url_base + snd.get('data-src-mp3') + header_found = True + #义 + body = element.find('div', class_='pos-body') + if body: + tags = body.find_all('div', class_='def-block pad-indent') + if tags: + l = result['def_list'] + for tag in tags: + i = tag.find('span', class_='def-info') + d = tag.find('b', class_='def') + es = tag.find_all('div', class_='examp emphasized') + l.append( + u'
  • {0}{1}{2}
  • '.format( + u'{0}'.format(i.get_text()) if i else u'', + u'{0}'.format(d.get_text()) if d else u'', + u''.join( + u'
    {0}
    '.format(e.get_text()) if e else u'' + for e in es + ) + ) + ) + result['def'] = u'' + img = body.find('img', class_='lightboxLink') + if img: + result['image'] = cambridge_url_base + img.get('data-image') + result['thumb'] = cambridge_url_base + img.get('src') return self.cache_this(result)