bug fixes

This commit is contained in:
sthoo 2019-03-09 23:06:55 +08:00
parent 27dd65c635
commit 0e502406d1

View File

@ -10,10 +10,10 @@ import zlib
from io import BytesIO
from struct import pack, unpack
import chardet
from .readmdict import MDD, MDX
# import chardet
# LZO compression is used for engine version < 2.0
try:
import lzo
@ -30,7 +30,14 @@ version = '1.1'
class IndexBuilder(object):
#todo: enable history
def __init__(self, fname, encoding = "", passcode = None, force_rebuild = False, enable_history = False, sql_index = True, check = False):
def __init__(self,
fname,
encoding="",
passcode=None,
force_rebuild=False,
enable_history=False,
sql_index=True,
check=False):
self._mdx_file = fname
self._mdd_file = ""
self._encoding = ''
@ -41,8 +48,8 @@ class IndexBuilder(object):
self._sql_index = sql_index
self._check = check
_filename, _file_extension = os.path.splitext(fname)
assert(_file_extension == '.mdx')
assert(os.path.isfile(fname))
assert (_file_extension == '.mdx')
assert (os.path.isfile(fname))
self._mdx_db = _filename + ".mdx.db"
# make index anyway
if force_rebuild:
@ -72,10 +79,12 @@ class IndexBuilder(object):
self._make_mdd_index(self._mdd_db)
print("mdd.db rebuilt!")
return None
cursor = conn.execute("SELECT * FROM META WHERE key = \"encoding\"")
cursor = conn.execute(
"SELECT * FROM META WHERE key = \"encoding\"")
for cc in cursor:
self._encoding = cc[1]
cursor = conn.execute("SELECT * FROM META WHERE key = \"stylesheet\"")
cursor = conn.execute(
"SELECT * FROM META WHERE key = \"stylesheet\"")
for cc in cursor:
self._stylesheet = json.loads(cc[1])
@ -83,7 +92,8 @@ class IndexBuilder(object):
for cc in cursor:
self._title = cc[1]
cursor = conn.execute("SELECT * FROM META WHERE key = \"description\"")
cursor = conn.execute(
"SELECT * FROM META WHERE key = \"description\"")
for cc in cursor:
self._description = cc[1]
@ -109,13 +119,12 @@ class IndexBuilder(object):
self._make_mdd_index(self._mdd_db)
pass
def _replace_stylesheet(self, txt):
# substitute stylesheet definition
encoding = 'utf-8'
if isinstance(txt, bytes):
encode_type = chardet.detect(txt)
encoding = encode_type['encoding']
# encode_type = chardet.detect(txt)
# encoding = encode_type['encoding']
txt = txt.decode(encoding)
txt_list = re.split('`\d+`', txt)
txt_tag = re.findall('`\d+`', txt)
@ -134,12 +143,11 @@ class IndexBuilder(object):
os.remove(db_name)
mdx = MDX(self._mdx_file)
self._mdx_db = db_name
returned_index = mdx.get_index(check_block = self._check)
returned_index = mdx.get_index(check_block=self._check)
index_list = returned_index['index_dict_list']
conn = sqlite3.connect(db_name)
c = conn.cursor()
c.execute(
''' CREATE TABLE MDX_INDEX
c.execute(''' CREATE TABLE MDX_INDEX
(key_text text not null,
file_pos integer,
compressed_size integer,
@ -148,27 +156,18 @@ class IndexBuilder(object):
record_start integer,
record_end integer,
offset integer
)'''
)
)''')
tuple_list = [
(item['key_text'],
item['file_pos'],
item['compressed_size'],
item['decompressed_size'],
item['record_block_type'],
item['record_start'],
item['record_end'],
item['offset']
)
for item in index_list
]
tuple_list = [(item['key_text'], item['file_pos'],
item['compressed_size'], item['decompressed_size'],
item['record_block_type'], item['record_start'],
item['record_end'], item['offset'])
for item in index_list]
c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)',
tuple_list)
# build the metadata table
meta = returned_index['meta']
c.execute(
'''CREATE TABLE META
c.execute('''CREATE TABLE META
(key text,
value text
)''')
@ -179,22 +178,17 @@ class IndexBuilder(object):
# (k, v)
# )
c.executemany(
'INSERT INTO META VALUES (?,?)',
[('encoding', meta['encoding']),
('stylesheet', meta['stylesheet']),
('title', meta['title']),
('description', meta['description']),
('version', version)
]
)
c.executemany('INSERT INTO META VALUES (?,?)',
[('encoding', meta['encoding']),
('stylesheet', meta['stylesheet']),
('title', meta['title']),
('description', meta['description']),
('version', version)])
if self._sql_index:
c.execute(
'''
c.execute('''
CREATE INDEX key_index ON MDX_INDEX (key_text)
'''
)
''')
conn.commit()
conn.close()
@ -204,17 +198,15 @@ class IndexBuilder(object):
self._title = meta['title']
self._description = meta['description']
def _make_mdd_index(self, db_name):
if os.path.exists(db_name):
os.remove(db_name)
mdd = MDD(self._mdd_file)
self._mdd_db = db_name
index_list = mdd.get_index(check_block = self._check)
index_list = mdd.get_index(check_block=self._check)
conn = sqlite3.connect(db_name)
c = conn.cursor()
c.execute(
''' CREATE TABLE MDX_INDEX
c.execute(''' CREATE TABLE MDX_INDEX
(key_text text not null unique,
file_pos integer,
compressed_size integer,
@ -223,29 +215,19 @@ class IndexBuilder(object):
record_start integer,
record_end integer,
offset integer
)'''
)
)''')
tuple_list = [
(item['key_text'],
item['file_pos'],
item['compressed_size'],
item['decompressed_size'],
item['record_block_type'],
item['record_start'],
item['record_end'],
item['offset']
)
for item in index_list
]
tuple_list = [(item['key_text'], item['file_pos'],
item['compressed_size'], item['decompressed_size'],
item['record_block_type'], item['record_start'],
item['record_end'], item['offset'])
for item in index_list]
c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)',
tuple_list)
if self._sql_index:
c.execute(
'''
c.execute('''
CREATE UNIQUE INDEX key_index ON MDX_INDEX (key_text)
'''
)
''')
conn.commit()
conn.close()
@ -266,32 +248,40 @@ class IndexBuilder(object):
print("LZO compression is not supported")
# decompress
header = b'\xf0' + pack('>I', index['decompressed_size'])
_record_block = lzo.decompress(record_block_compressed[8:], initSize = decompressed_size, blockSize=1308672)
# zlib compression
_record_block = lzo.decompress(
record_block_compressed[8:],
initSize=decompressed_size,
blockSize=1308672)
# zlib compression
elif record_block_type == 2:
# decompress
_record_block = zlib.decompress(record_block_compressed[8:])
data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']]
data = _record_block[index['record_start'] -
index['offset']:index['record_end'] -
index['offset']]
return data
def get_mdx_by_index(self, fmdx, index):
data = self.get_data_by_index(fmdx,index)
record = data.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
data = self.get_data_by_index(fmdx, index)
record = data.decode(
self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
if self._stylesheet:
record = self._replace_stylesheet(record)
record = record.decode('utf-8')
return record
def get_mdd_by_index(self, fmdx, index):
return self.get_data_by_index(fmdx,index)
return self.get_data_by_index(fmdx, index)
@staticmethod
def lookup_indexes(db,keyword,ignorecase=None):
def lookup_indexes(db, keyword, ignorecase=None):
indexes = []
if ignorecase:
sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(keyword)
sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(
keyword)
else:
sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(keyword)
sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(
keyword)
with sqlite3.connect(db) as conn:
cursor = conn.execute(sql)
for result in cursor:
@ -306,29 +296,31 @@ class IndexBuilder(object):
indexes.append(index)
return indexes
def mdx_lookup(self, keyword,ignorecase=None):
def mdx_lookup(self, keyword, ignorecase=None):
lookup_result_list = []
indexes = self.lookup_indexes(self._mdx_db,keyword,ignorecase)
with open(self._mdx_file,'rb') as mdx_file:
indexes = self.lookup_indexes(self._mdx_db, keyword, ignorecase)
with open(self._mdx_file, 'rb') as mdx_file:
for index in indexes:
lookup_result_list.append(self.get_mdx_by_index(mdx_file, index))
lookup_result_list.append(
self.get_mdx_by_index(mdx_file, index))
return lookup_result_list
def mdd_lookup(self, keyword,ignorecase=None):
def mdd_lookup(self, keyword, ignorecase=None):
lookup_result_list = []
indexes = self.lookup_indexes(self._mdd_db,keyword,ignorecase)
with open(self._mdd_file,'rb') as mdd_file:
indexes = self.lookup_indexes(self._mdd_db, keyword, ignorecase)
with open(self._mdd_file, 'rb') as mdd_file:
for index in indexes:
lookup_result_list.append(self.get_mdd_by_index(mdd_file, index))
lookup_result_list.append(
self.get_mdd_by_index(mdd_file, index))
return lookup_result_list
@staticmethod
def get_keys(db,query = ''):
def get_keys(db, query=''):
if not db:
return []
if query:
if '*' in query:
query = query.replace('*','%')
query = query.replace('*', '%')
else:
query = query + '%'
sql = 'SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"'
@ -339,12 +331,11 @@ class IndexBuilder(object):
keys = [item[0] for item in cursor]
return keys
def get_mdd_keys(self, query = ''):
return self.get_keys(self._mdd_db,query)
def get_mdx_keys(self, query = ''):
return self.get_keys(self._mdx_db,query)
def get_mdd_keys(self, query=''):
return self.get_keys(self._mdd_db, query)
def get_mdx_keys(self, query=''):
return self.get_keys(self._mdx_db, query)
# mdx_builder = IndexBuilder("oald.mdx")
@ -353,5 +344,5 @@ class IndexBuilder(object):
# keys1 = mdx_builder.get_mdx_keys('abstrac')
# keys2 = mdx_builder.get_mdx_keys('*tion')
# for key in keys2:
# text = mdx_builder.mdx_lookup(key)[0]
# text = mdx_builder.mdx_lookup(key)[0]
# pass