This commit is contained in:
St.Huang 2018-07-26 22:04:04 +08:00
parent ca41b686e6
commit 920718ef7b
2 changed files with 53 additions and 65 deletions

View File

@ -1,11 +1,12 @@
#-*- coding:utf-8 -*- #-*- coding:utf-8 -*-
from struct import unpack
from warnings import warn
import gzip import gzip
import hashlib import hashlib
import os
import re import re
import warnings import warnings
import os from struct import unpack
import six
class _StarDictIfo(object): class _StarDictIfo(object):
@ -49,10 +50,11 @@ class _StarDictIfo(object):
def __init__(self, dict_prefix, container): def __init__(self, dict_prefix, container):
ifo_filename = '%s.ifo' % dict_prefix ifo_filename = '%s.ifo' % dict_prefix
try: try:
_file = open(ifo_filename) _file = open(ifo_filename)
except IOError: except Exception as e:
raise Exception('.ifo file does not exists') raise Exception('ifo file opening error: "{}"'.format(e))
_file.readline() _file.readline()
@ -131,8 +133,7 @@ class _StarDictIdx(object):
try: try:
file = open_file(idx_filename, idx_filename_gz) file = open_file(idx_filename, idx_filename_gz)
except Exception as e: except Exception as e:
# warn(e.message) raise Exception('idx file opening error: "{}"'.format(e))
raise Exception('.idx file does not exists')
self._file = file.read() self._file = file.read()
@ -158,10 +159,12 @@ class _StarDictIdx(object):
""" unpack parsed records """ """ unpack parsed records """
for matched_record in matched_records: for matched_record in matched_records:
c = matched_record.find(b'\x00') + 1 c = matched_record.find(b'\x00')
if c == 0:
continue
record_tuple = unpack( record_tuple = unpack(
'!%sc%sL' % (c, idx_offset_format), matched_record) '!%sc%sL' % (c + 1, idx_offset_format), matched_record)
word, cords = record_tuple[:c - 1], record_tuple[c:] word, cords = record_tuple[:c], record_tuple[c + 1:]
self._idx[b''.join(word)] = cords self._idx[b''.join(word)] = cords
def __getitem__(self, word): def __getitem__(self, word):
@ -197,14 +200,14 @@ class _StarDictIdx(object):
if not self._container.in_memory: if not self._container.in_memory:
warnings.warn( warnings.warn(
'Iter dict items with in_memory=False may cause serious performance problem') 'Iter dict items with in_memory=False may cause serious performance problem')
for key in self._idx.iterkeys(): for key in six.iterkeys(self._idx):
yield key.decode('utf-8') yield key.decode('utf-8')
def keys(self): def keys(self):
""" """
returns keys returns keys
""" """
if sys.version_info[0] == 3: if six.PY3:
return self.iterkeys() return self.iterkeys()
if not self._container.in_memory: if not self._container.in_memory:
@ -360,18 +363,16 @@ class _StarDictDict(object):
dict_filename = '%s.dict' % dict_prefix dict_filename = '%s.dict' % dict_prefix
dict_filename_dz = '%s.dz' % dict_filename dict_filename_dz = '%s.dz' % dict_filename
try:
f = open_file(dict_filename, dict_filename_dz)
except Exception as e:
raise Exception('dict file opening error: "{}"'.format(e))
if in_memory: if in_memory:
try: self._file = f.read()
f = open_file(dict_filename, dict_filename_dz) f.close()
self._file = f.read()
f.close()
except:
raise Exception('.dict file does not exists')
else: else:
try: self._file = f
self._file = open_file(dict_filename, dict_filename_dz)
except:
raise Exception('.dict file does not exists')
def __getitem__(self, word): def __getitem__(self, word):
""" """
@ -414,8 +415,8 @@ class Dictionary(dict):
but changes are not stored anywhere and available in runtime only. but changes are not stored anywhere and available in runtime only.
We assume in this documentation that "x" or "y" is instances of the We assume in this documentation that "x" or "y" is instances of the
StarDictDict class and "x.{ifo,idx{,.gz},dict{,.dz),syn}" or StarDictDict class and "x.{ifo,idx{,.gz},dict{,.dz},syn}" or
"y.{ifo,idx{,.gz},dict{,.dz),syn}" is files of the corresponding stardict "y.{ifo,idx{,.gz},dict{,.dz},syn}" is files of the corresponding stardict
dictionaries. dictionaries.
@ -424,9 +425,9 @@ class Dictionary(dict):
""" """
def __init__(self, filename, in_memory=False): def __init__(self, filename_prefix, in_memory=False):
""" """
filename: path to dictionary files, '.ifo' file filename_prefix: path to dictionary files without files extensions
initializes new StarDictDict instance from stardict dictionary files initializes new StarDictDict instance from stardict dictionary files
provided by filename_prefix provided by filename_prefix
@ -434,40 +435,22 @@ class Dictionary(dict):
self.in_memory = in_memory self.in_memory = in_memory
self.filename_prefix = os.path.splitext(filename)[0] # reading somedict.ifo
self.ifo = _StarDictIfo(dict_prefix=filename_prefix, container=self)
# reading somedict.idx or somedict.idx.gz
self.idx = _StarDictIdx(dict_prefix=filename_prefix, container=self)
# reading somedict.dict or somedict.dict.dz
self.dict = _StarDictDict(
dict_prefix=filename_prefix, container=self, in_memory=in_memory)
# reading somedict.syn (optional)
self.syn = _StarDictSyn(dict_prefix=filename_prefix, container=self)
# initializing cache # initializing cache
self._dict_cache = {} self._dict_cache = {}
self.ifo = None
self.idx = None
def get_header(self):
# reading somedict.ifo
self.ifo = _StarDictIfo(
dict_prefix=self.filename_prefix, container=self)
def check_build(self):
if not self.ifo:
self.get_header()
if not self.idx:
# reading somedict.idx or somedict.idx.gz
self.idx = _StarDictIdx(
dict_prefix=self.filename_prefix, container=self)
# reading somedict.dict or somedict.dict.dz
self.dict = _StarDictDict(
dict_prefix=self.filename_prefix, container=self, in_memory=self.in_memory)
# reading somedict.syn (optional)
self.syn = _StarDictSyn(
dict_prefix=self.filename_prefix, container=self)
@staticmethod
def get_filename_prefix(path):
return os.path.splitext(path)[0]
def __cmp__(self, y): def __cmp__(self, y):
""" """
raises NotImplemented exception raises NotImplemented exception
@ -662,12 +645,16 @@ def open_file(regular, gz):
Open regular file if it exists, gz file otherwise. Open regular file if it exists, gz file otherwise.
If no file exists, raise ValueError. If no file exists, raise ValueError.
""" """
try: if os.path.exists(regular):
return open(regular, 'rb') try:
except IOError as e: return open(regular, 'rb')
# warn(e.message) except Exception as e:
raise Exception('regular file opening error: "{}"'.format(e))
if os.path.exists(gz):
try: try:
return gzip.open(gz, 'rb') return gzip.open(gz, 'rb')
except IOError: except Exception as e:
# warn(e.message) raise Exception('gz file opening error: "{}"'.format(e))
raise ValueError('Neither regular nor gz file exists')
raise ValueError('Neither regular nor gz file exists')

View File

@ -680,12 +680,13 @@ class StardictService(LocalService):
super(StardictService, self).__init__(dict_path) super(StardictService, self).__init__(dict_path)
self.query_interval = 0.05 self.query_interval = 0.05
if StardictService.check(self.dict_path): if StardictService.check(self.dict_path):
dict_path = dict_path[:-4]
self.builder = self._get_builer( self.builder = self._get_builer(
dict_path, dict_path,
service_wrap(StardictBuilder, dict_path, in_memory=False) service_wrap(StardictBuilder, dict_path, in_memory=False)
) )
if self.builder: #if self.builder:
self.builder.get_header() # self.builder.get_header()
@staticmethod @staticmethod
def check(dict_path): def check(dict_path):