From 6bffbdd3ce3cc44fcd187caf692a7a0281cb08c7 Mon Sep 17 00:00:00 2001 From: "St.Huang" Date: Wed, 11 Jul 2018 07:51:43 +0800 Subject: [PATCH] update mdict lib. --- src/fastwq/libs/mdict/mdict_query.py | 146 +++++++++++---------------- 1 file changed, 60 insertions(+), 86 deletions(-) diff --git a/src/fastwq/libs/mdict/mdict_query.py b/src/fastwq/libs/mdict/mdict_query.py index afbb302..5af7007 100644 --- a/src/fastwq/libs/mdict/mdict_query.py +++ b/src/fastwq/libs/mdict/mdict_query.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# version: python 3.5 from readmdict import MDX, MDD @@ -243,7 +242,8 @@ class IndexBuilder(object): conn.commit() conn.close() - def get_mdx_by_index(self, fmdx, index): + @staticmethod + def get_data_by_index(fmdx, index): fmdx.seek(index['file_pos']) record_block_compressed = fmdx.read(index['compressed_size']) record_block_type = record_block_compressed[:4] @@ -263,105 +263,79 @@ class IndexBuilder(object): elif record_block_type == 2: # decompress _record_block = zlib.decompress(record_block_compressed[8:]) - record = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']] - record = record = record.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8') + data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']] + return data + + def get_mdx_by_index(self, fmdx, index): + data = self.get_data_by_index(fmdx,index) + record = data.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8') if self._stylesheet: record = self._replace_stylesheet(record) record = record.decode('utf-8') return record def get_mdd_by_index(self, fmdx, index): - fmdx.seek(index['file_pos']) - record_block_compressed = fmdx.read(index['compressed_size']) - record_block_type = record_block_compressed[:4] - record_block_type = index['record_block_type'] - decompressed_size = index['decompressed_size'] - #adler32 = unpack('>I', record_block_compressed[4:8])[0] - if record_block_type == 0: - _record_block = record_block_compressed[8:] - # lzo compression - elif record_block_type == 1: - if lzo is None: - print("LZO compression is not supported") - # decompress - header = b'\xf0' + pack('>I', index['decompressed_size']) - _record_block = lzo.decompress(record_block_compressed[8:], initSize = decompressed_size, blockSize=1308672) - # zlib compression - elif record_block_type == 2: - # decompress - _record_block = zlib.decompress(record_block_compressed[8:]) - data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']] - return data + return self.get_data_by_index(fmdx,index) - def mdx_lookup(self, keyword): - conn = sqlite3.connect(self._mdx_db) - cursor = conn.execute("SELECT * FROM MDX_INDEX WHERE key_text = " + "\"" + keyword + "\"") + @staticmethod + def lookup_indexes(db,keyword,ignorecase=None): + indexes = [] + if ignorecase: + sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(keyword) + else: + sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(keyword) + with sqlite3.connect(db) as conn: + cursor = conn.execute(sql) + for result in cursor: + index = {} + index['file_pos'] = result[1] + index['compressed_size'] = result[2] + index['decompressed_size'] = result[3] + index['record_block_type'] = result[4] + index['record_start'] = result[5] + index['record_end'] = result[6] + index['offset'] = result[7] + indexes.append(index) + return indexes + + def mdx_lookup(self, keyword,ignorecase=None): lookup_result_list = [] - mdx_file = open(self._mdx_file,'rb') - for result in cursor: - index = {} - index['file_pos'] = result[1] - index['compressed_size'] = result[2] - index['decompressed_size'] = result[3] - index['record_block_type'] = result[4] - index['record_start'] = result[5] - index['record_end'] = result[6] - index['offset'] = result[7] - lookup_result_list.append(self.get_mdx_by_index(mdx_file, index)) - conn.close() - mdx_file.close() + indexes = self.lookup_indexes(self._mdx_db,keyword,ignorecase) + with open(self._mdx_file,'rb') as mdx_file: + for index in indexes: + lookup_result_list.append(self.get_mdx_by_index(mdx_file, index)) return lookup_result_list - - def mdd_lookup(self, keyword): - conn = sqlite3.connect(self._mdd_db) - cursor = conn.execute("SELECT * FROM MDX_INDEX WHERE key_text = " + "\"" + keyword + "\"") + + def mdd_lookup(self, keyword,ignorecase=None): lookup_result_list = [] - mdd_file = open(self._mdd_file,'rb') - for result in cursor: - index = {} - index['file_pos'] = result[1] - index['compressed_size'] = result[2] - index['decompressed_size'] = result[3] - index['record_block_type'] = result[4] - index['record_start'] = result[5] - index['record_end'] = result[6] - index['offset'] = result[7] - lookup_result_list.append(self.get_mdd_by_index(mdd_file, index)) - mdd_file.close() - conn.close() + indexes = self.lookup_indexes(self._mdd_db,keyword,ignorecase) + with open(self._mdd_file,'rb') as mdd_file: + for index in indexes: + lookup_result_list.append(self.get_mdd_by_index(mdd_file, index)) return lookup_result_list + @staticmethod + def get_keys(db,query = ''): + if not db: + return [] + if query: + if '*' in query: + query = query.replace('*','%') + else: + query = query + '%' + sql = 'SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"' + else: + sql = 'SELECT key_text FROM MDX_INDEX' + with sqlite3.connect(db) as conn: + cursor = conn.execute(sql) + keys = [item[0] for item in cursor] + return keys + def get_mdd_keys(self, query = ''): - if not self._mdd_db: - return [] - conn = sqlite3.connect(self._mdd_db) - if query: - if '*' in query: - query = query.replace('*','%') - else: - query = query + '%' - cursor = conn.execute('SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"') - keys = [item[0] for item in cursor] - else: - cursor = conn.execute('SELECT key_text FROM MDX_INDEX') - keys = [item[0] for item in cursor] - conn.close() - return keys + return self.get_keys(self._mdd_db,query) def get_mdx_keys(self, query = ''): - conn = sqlite3.connect(self._mdx_db) - if query: - if '*' in query: - query = query.replace('*','%') - else: - query = query + '%' - cursor = conn.execute('SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"') - keys = [item[0] for item in cursor] - else: - cursor = conn.execute('SELECT key_text FROM MDX_INDEX') - keys = [item[0] for item in cursor] - conn.close() - return keys + return self.get_keys(self._mdx_db,query)