update mdict lib.
This commit is contained in:
parent
99601197c1
commit
6bffbdd3ce
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# version: python 3.5
|
|
||||||
|
|
||||||
|
|
||||||
from readmdict import MDX, MDD
|
from readmdict import MDX, MDD
|
||||||
@ -243,34 +242,8 @@ class IndexBuilder(object):
|
|||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
def get_mdx_by_index(self, fmdx, index):
|
@staticmethod
|
||||||
fmdx.seek(index['file_pos'])
|
def get_data_by_index(fmdx, index):
|
||||||
record_block_compressed = fmdx.read(index['compressed_size'])
|
|
||||||
record_block_type = record_block_compressed[:4]
|
|
||||||
record_block_type = index['record_block_type']
|
|
||||||
decompressed_size = index['decompressed_size']
|
|
||||||
#adler32 = unpack('>I', record_block_compressed[4:8])[0]
|
|
||||||
if record_block_type == 0:
|
|
||||||
_record_block = record_block_compressed[8:]
|
|
||||||
# lzo compression
|
|
||||||
elif record_block_type == 1:
|
|
||||||
if lzo is None:
|
|
||||||
print("LZO compression is not supported")
|
|
||||||
# decompress
|
|
||||||
header = b'\xf0' + pack('>I', index['decompressed_size'])
|
|
||||||
_record_block = lzo.decompress(record_block_compressed[8:], initSize = decompressed_size, blockSize=1308672)
|
|
||||||
# zlib compression
|
|
||||||
elif record_block_type == 2:
|
|
||||||
# decompress
|
|
||||||
_record_block = zlib.decompress(record_block_compressed[8:])
|
|
||||||
record = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']]
|
|
||||||
record = record = record.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
|
|
||||||
if self._stylesheet:
|
|
||||||
record = self._replace_stylesheet(record)
|
|
||||||
record = record.decode('utf-8')
|
|
||||||
return record
|
|
||||||
|
|
||||||
def get_mdd_by_index(self, fmdx, index):
|
|
||||||
fmdx.seek(index['file_pos'])
|
fmdx.seek(index['file_pos'])
|
||||||
record_block_compressed = fmdx.read(index['compressed_size'])
|
record_block_compressed = fmdx.read(index['compressed_size'])
|
||||||
record_block_type = record_block_compressed[:4]
|
record_block_type = record_block_compressed[:4]
|
||||||
@ -293,11 +266,26 @@ class IndexBuilder(object):
|
|||||||
data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']]
|
data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def mdx_lookup(self, keyword):
|
def get_mdx_by_index(self, fmdx, index):
|
||||||
conn = sqlite3.connect(self._mdx_db)
|
data = self.get_data_by_index(fmdx,index)
|
||||||
cursor = conn.execute("SELECT * FROM MDX_INDEX WHERE key_text = " + "\"" + keyword + "\"")
|
record = data.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
|
||||||
lookup_result_list = []
|
if self._stylesheet:
|
||||||
mdx_file = open(self._mdx_file,'rb')
|
record = self._replace_stylesheet(record)
|
||||||
|
record = record.decode('utf-8')
|
||||||
|
return record
|
||||||
|
|
||||||
|
def get_mdd_by_index(self, fmdx, index):
|
||||||
|
return self.get_data_by_index(fmdx,index)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def lookup_indexes(db,keyword,ignorecase=None):
|
||||||
|
indexes = []
|
||||||
|
if ignorecase:
|
||||||
|
sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(keyword)
|
||||||
|
else:
|
||||||
|
sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(keyword)
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
cursor = conn.execute(sql)
|
||||||
for result in cursor:
|
for result in cursor:
|
||||||
index = {}
|
index = {}
|
||||||
index['file_pos'] = result[1]
|
index['file_pos'] = result[1]
|
||||||
@ -307,61 +295,47 @@ class IndexBuilder(object):
|
|||||||
index['record_start'] = result[5]
|
index['record_start'] = result[5]
|
||||||
index['record_end'] = result[6]
|
index['record_end'] = result[6]
|
||||||
index['offset'] = result[7]
|
index['offset'] = result[7]
|
||||||
|
indexes.append(index)
|
||||||
|
return indexes
|
||||||
|
|
||||||
|
def mdx_lookup(self, keyword,ignorecase=None):
|
||||||
|
lookup_result_list = []
|
||||||
|
indexes = self.lookup_indexes(self._mdx_db,keyword,ignorecase)
|
||||||
|
with open(self._mdx_file,'rb') as mdx_file:
|
||||||
|
for index in indexes:
|
||||||
lookup_result_list.append(self.get_mdx_by_index(mdx_file, index))
|
lookup_result_list.append(self.get_mdx_by_index(mdx_file, index))
|
||||||
conn.close()
|
|
||||||
mdx_file.close()
|
|
||||||
return lookup_result_list
|
return lookup_result_list
|
||||||
|
|
||||||
def mdd_lookup(self, keyword):
|
def mdd_lookup(self, keyword,ignorecase=None):
|
||||||
conn = sqlite3.connect(self._mdd_db)
|
|
||||||
cursor = conn.execute("SELECT * FROM MDX_INDEX WHERE key_text = " + "\"" + keyword + "\"")
|
|
||||||
lookup_result_list = []
|
lookup_result_list = []
|
||||||
mdd_file = open(self._mdd_file,'rb')
|
indexes = self.lookup_indexes(self._mdd_db,keyword,ignorecase)
|
||||||
for result in cursor:
|
with open(self._mdd_file,'rb') as mdd_file:
|
||||||
index = {}
|
for index in indexes:
|
||||||
index['file_pos'] = result[1]
|
|
||||||
index['compressed_size'] = result[2]
|
|
||||||
index['decompressed_size'] = result[3]
|
|
||||||
index['record_block_type'] = result[4]
|
|
||||||
index['record_start'] = result[5]
|
|
||||||
index['record_end'] = result[6]
|
|
||||||
index['offset'] = result[7]
|
|
||||||
lookup_result_list.append(self.get_mdd_by_index(mdd_file, index))
|
lookup_result_list.append(self.get_mdd_by_index(mdd_file, index))
|
||||||
mdd_file.close()
|
|
||||||
conn.close()
|
|
||||||
return lookup_result_list
|
return lookup_result_list
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_keys(db,query = ''):
|
||||||
|
if not db:
|
||||||
|
return []
|
||||||
|
if query:
|
||||||
|
if '*' in query:
|
||||||
|
query = query.replace('*','%')
|
||||||
|
else:
|
||||||
|
query = query + '%'
|
||||||
|
sql = 'SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"'
|
||||||
|
else:
|
||||||
|
sql = 'SELECT key_text FROM MDX_INDEX'
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
cursor = conn.execute(sql)
|
||||||
|
keys = [item[0] for item in cursor]
|
||||||
|
return keys
|
||||||
|
|
||||||
def get_mdd_keys(self, query = ''):
|
def get_mdd_keys(self, query = ''):
|
||||||
if not self._mdd_db:
|
return self.get_keys(self._mdd_db,query)
|
||||||
return []
|
|
||||||
conn = sqlite3.connect(self._mdd_db)
|
|
||||||
if query:
|
|
||||||
if '*' in query:
|
|
||||||
query = query.replace('*','%')
|
|
||||||
else:
|
|
||||||
query = query + '%'
|
|
||||||
cursor = conn.execute('SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"')
|
|
||||||
keys = [item[0] for item in cursor]
|
|
||||||
else:
|
|
||||||
cursor = conn.execute('SELECT key_text FROM MDX_INDEX')
|
|
||||||
keys = [item[0] for item in cursor]
|
|
||||||
conn.close()
|
|
||||||
return keys
|
|
||||||
|
|
||||||
def get_mdx_keys(self, query = ''):
|
def get_mdx_keys(self, query = ''):
|
||||||
conn = sqlite3.connect(self._mdx_db)
|
return self.get_keys(self._mdx_db,query)
|
||||||
if query:
|
|
||||||
if '*' in query:
|
|
||||||
query = query.replace('*','%')
|
|
||||||
else:
|
|
||||||
query = query + '%'
|
|
||||||
cursor = conn.execute('SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"')
|
|
||||||
keys = [item[0] for item in cursor]
|
|
||||||
else:
|
|
||||||
cursor = conn.execute('SELECT key_text FROM MDX_INDEX')
|
|
||||||
keys = [item[0] for item in cursor]
|
|
||||||
conn.close()
|
|
||||||
return keys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user