diff --git a/addons21/fastwq/libs/mdict/mdict_query.py b/addons21/fastwq/libs/mdict/mdict_query.py index 2ec0442..6dbbce8 100644 --- a/addons21/fastwq/libs/mdict/mdict_query.py +++ b/addons21/fastwq/libs/mdict/mdict_query.py @@ -10,10 +10,10 @@ import zlib from io import BytesIO from struct import pack, unpack -import chardet - from .readmdict import MDD, MDX +# import chardet + # LZO compression is used for engine version < 2.0 try: import lzo @@ -30,7 +30,14 @@ version = '1.1' class IndexBuilder(object): #todo: enable history - def __init__(self, fname, encoding = "", passcode = None, force_rebuild = False, enable_history = False, sql_index = True, check = False): + def __init__(self, + fname, + encoding="", + passcode=None, + force_rebuild=False, + enable_history=False, + sql_index=True, + check=False): self._mdx_file = fname self._mdd_file = "" self._encoding = '' @@ -41,8 +48,8 @@ class IndexBuilder(object): self._sql_index = sql_index self._check = check _filename, _file_extension = os.path.splitext(fname) - assert(_file_extension == '.mdx') - assert(os.path.isfile(fname)) + assert (_file_extension == '.mdx') + assert (os.path.isfile(fname)) self._mdx_db = _filename + ".mdx.db" # make index anyway if force_rebuild: @@ -72,10 +79,12 @@ class IndexBuilder(object): self._make_mdd_index(self._mdd_db) print("mdd.db rebuilt!") return None - cursor = conn.execute("SELECT * FROM META WHERE key = \"encoding\"") + cursor = conn.execute( + "SELECT * FROM META WHERE key = \"encoding\"") for cc in cursor: self._encoding = cc[1] - cursor = conn.execute("SELECT * FROM META WHERE key = \"stylesheet\"") + cursor = conn.execute( + "SELECT * FROM META WHERE key = \"stylesheet\"") for cc in cursor: self._stylesheet = json.loads(cc[1]) @@ -83,7 +92,8 @@ class IndexBuilder(object): for cc in cursor: self._title = cc[1] - cursor = conn.execute("SELECT * FROM META WHERE key = \"description\"") + cursor = conn.execute( + "SELECT * FROM META WHERE key = \"description\"") for cc in cursor: self._description = cc[1] @@ -108,14 +118,13 @@ class IndexBuilder(object): if not os.path.isfile(self._mdd_db): self._make_mdd_index(self._mdd_db) pass - def _replace_stylesheet(self, txt): # substitute stylesheet definition encoding = 'utf-8' if isinstance(txt, bytes): - encode_type = chardet.detect(txt) - encoding = encode_type['encoding'] + # encode_type = chardet.detect(txt) + # encoding = encode_type['encoding'] txt = txt.decode(encoding) txt_list = re.split('`\d+`', txt) txt_tag = re.findall('`\d+`', txt) @@ -134,12 +143,11 @@ class IndexBuilder(object): os.remove(db_name) mdx = MDX(self._mdx_file) self._mdx_db = db_name - returned_index = mdx.get_index(check_block = self._check) + returned_index = mdx.get_index(check_block=self._check) index_list = returned_index['index_dict_list'] conn = sqlite3.connect(db_name) c = conn.cursor() - c.execute( - ''' CREATE TABLE MDX_INDEX + c.execute(''' CREATE TABLE MDX_INDEX (key_text text not null, file_pos integer, compressed_size integer, @@ -148,53 +156,39 @@ class IndexBuilder(object): record_start integer, record_end integer, offset integer - )''' - ) + )''') - tuple_list = [ - (item['key_text'], - item['file_pos'], - item['compressed_size'], - item['decompressed_size'], - item['record_block_type'], - item['record_start'], - item['record_end'], - item['offset'] - ) - for item in index_list - ] + tuple_list = [(item['key_text'], item['file_pos'], + item['compressed_size'], item['decompressed_size'], + item['record_block_type'], item['record_start'], + item['record_end'], item['offset']) + for item in index_list] c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)', tuple_list) # build the metadata table meta = returned_index['meta'] - c.execute( - '''CREATE TABLE META + c.execute('''CREATE TABLE META (key text, value text )''') #for k,v in meta: # c.execute( - # 'INSERT INTO META VALUES (?,?)', + # 'INSERT INTO META VALUES (?,?)', # (k, v) # ) - - c.executemany( - 'INSERT INTO META VALUES (?,?)', - [('encoding', meta['encoding']), - ('stylesheet', meta['stylesheet']), - ('title', meta['title']), - ('description', meta['description']), - ('version', version) - ] - ) - + + c.executemany('INSERT INTO META VALUES (?,?)', + [('encoding', meta['encoding']), + ('stylesheet', meta['stylesheet']), + ('title', meta['title']), + ('description', meta['description']), + ('version', version)]) + if self._sql_index: - c.execute( - ''' + c.execute(''' CREATE INDEX key_index ON MDX_INDEX (key_text) - ''' - ) + ''') conn.commit() conn.close() @@ -204,17 +198,15 @@ class IndexBuilder(object): self._title = meta['title'] self._description = meta['description'] - def _make_mdd_index(self, db_name): if os.path.exists(db_name): os.remove(db_name) mdd = MDD(self._mdd_file) self._mdd_db = db_name - index_list = mdd.get_index(check_block = self._check) + index_list = mdd.get_index(check_block=self._check) conn = sqlite3.connect(db_name) c = conn.cursor() - c.execute( - ''' CREATE TABLE MDX_INDEX + c.execute(''' CREATE TABLE MDX_INDEX (key_text text not null unique, file_pos integer, compressed_size integer, @@ -223,29 +215,19 @@ class IndexBuilder(object): record_start integer, record_end integer, offset integer - )''' - ) + )''') - tuple_list = [ - (item['key_text'], - item['file_pos'], - item['compressed_size'], - item['decompressed_size'], - item['record_block_type'], - item['record_start'], - item['record_end'], - item['offset'] - ) - for item in index_list - ] + tuple_list = [(item['key_text'], item['file_pos'], + item['compressed_size'], item['decompressed_size'], + item['record_block_type'], item['record_start'], + item['record_end'], item['offset']) + for item in index_list] c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)', tuple_list) if self._sql_index: - c.execute( - ''' + c.execute(''' CREATE UNIQUE INDEX key_index ON MDX_INDEX (key_text) - ''' - ) + ''') conn.commit() conn.close() @@ -266,32 +248,40 @@ class IndexBuilder(object): print("LZO compression is not supported") # decompress header = b'\xf0' + pack('>I', index['decompressed_size']) - _record_block = lzo.decompress(record_block_compressed[8:], initSize = decompressed_size, blockSize=1308672) - # zlib compression + _record_block = lzo.decompress( + record_block_compressed[8:], + initSize=decompressed_size, + blockSize=1308672) + # zlib compression elif record_block_type == 2: # decompress _record_block = zlib.decompress(record_block_compressed[8:]) - data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']] + data = _record_block[index['record_start'] - + index['offset']:index['record_end'] - + index['offset']] return data def get_mdx_by_index(self, fmdx, index): - data = self.get_data_by_index(fmdx,index) - record = data.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8') + data = self.get_data_by_index(fmdx, index) + record = data.decode( + self._encoding, errors='ignore').strip(u'\x00').encode('utf-8') if self._stylesheet: record = self._replace_stylesheet(record) record = record.decode('utf-8') return record def get_mdd_by_index(self, fmdx, index): - return self.get_data_by_index(fmdx,index) + return self.get_data_by_index(fmdx, index) @staticmethod - def lookup_indexes(db,keyword,ignorecase=None): + def lookup_indexes(db, keyword, ignorecase=None): indexes = [] if ignorecase: - sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(keyword) + sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format( + keyword) else: - sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(keyword) + sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format( + keyword) with sqlite3.connect(db) as conn: cursor = conn.execute(sql) for result in cursor: @@ -306,29 +296,31 @@ class IndexBuilder(object): indexes.append(index) return indexes - def mdx_lookup(self, keyword,ignorecase=None): + def mdx_lookup(self, keyword, ignorecase=None): lookup_result_list = [] - indexes = self.lookup_indexes(self._mdx_db,keyword,ignorecase) - with open(self._mdx_file,'rb') as mdx_file: + indexes = self.lookup_indexes(self._mdx_db, keyword, ignorecase) + with open(self._mdx_file, 'rb') as mdx_file: for index in indexes: - lookup_result_list.append(self.get_mdx_by_index(mdx_file, index)) + lookup_result_list.append( + self.get_mdx_by_index(mdx_file, index)) return lookup_result_list - def mdd_lookup(self, keyword,ignorecase=None): + def mdd_lookup(self, keyword, ignorecase=None): lookup_result_list = [] - indexes = self.lookup_indexes(self._mdd_db,keyword,ignorecase) - with open(self._mdd_file,'rb') as mdd_file: + indexes = self.lookup_indexes(self._mdd_db, keyword, ignorecase) + with open(self._mdd_file, 'rb') as mdd_file: for index in indexes: - lookup_result_list.append(self.get_mdd_by_index(mdd_file, index)) + lookup_result_list.append( + self.get_mdd_by_index(mdd_file, index)) return lookup_result_list @staticmethod - def get_keys(db,query = ''): + def get_keys(db, query=''): if not db: return [] if query: if '*' in query: - query = query.replace('*','%') + query = query.replace('*', '%') else: query = query + '%' sql = 'SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"' @@ -339,12 +331,11 @@ class IndexBuilder(object): keys = [item[0] for item in cursor] return keys - def get_mdd_keys(self, query = ''): - return self.get_keys(self._mdd_db,query) - - def get_mdx_keys(self, query = ''): - return self.get_keys(self._mdx_db,query) + def get_mdd_keys(self, query=''): + return self.get_keys(self._mdd_db, query) + def get_mdx_keys(self, query=''): + return self.get_keys(self._mdx_db, query) # mdx_builder = IndexBuilder("oald.mdx") @@ -353,5 +344,5 @@ class IndexBuilder(object): # keys1 = mdx_builder.get_mdx_keys('abstrac') # keys2 = mdx_builder.get_mdx_keys('*tion') # for key in keys2: - # text = mdx_builder.mdx_lookup(key)[0] +# text = mdx_builder.mdx_lookup(key)[0] # pass