bug fixes
This commit is contained in:
parent
27dd65c635
commit
0e502406d1
@ -10,10 +10,10 @@ import zlib
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
|
|
||||||
import chardet
|
|
||||||
|
|
||||||
from .readmdict import MDD, MDX
|
from .readmdict import MDD, MDX
|
||||||
|
|
||||||
|
# import chardet
|
||||||
|
|
||||||
# LZO compression is used for engine version < 2.0
|
# LZO compression is used for engine version < 2.0
|
||||||
try:
|
try:
|
||||||
import lzo
|
import lzo
|
||||||
@ -30,7 +30,14 @@ version = '1.1'
|
|||||||
|
|
||||||
class IndexBuilder(object):
|
class IndexBuilder(object):
|
||||||
#todo: enable history
|
#todo: enable history
|
||||||
def __init__(self, fname, encoding = "", passcode = None, force_rebuild = False, enable_history = False, sql_index = True, check = False):
|
def __init__(self,
|
||||||
|
fname,
|
||||||
|
encoding="",
|
||||||
|
passcode=None,
|
||||||
|
force_rebuild=False,
|
||||||
|
enable_history=False,
|
||||||
|
sql_index=True,
|
||||||
|
check=False):
|
||||||
self._mdx_file = fname
|
self._mdx_file = fname
|
||||||
self._mdd_file = ""
|
self._mdd_file = ""
|
||||||
self._encoding = ''
|
self._encoding = ''
|
||||||
@ -41,8 +48,8 @@ class IndexBuilder(object):
|
|||||||
self._sql_index = sql_index
|
self._sql_index = sql_index
|
||||||
self._check = check
|
self._check = check
|
||||||
_filename, _file_extension = os.path.splitext(fname)
|
_filename, _file_extension = os.path.splitext(fname)
|
||||||
assert(_file_extension == '.mdx')
|
assert (_file_extension == '.mdx')
|
||||||
assert(os.path.isfile(fname))
|
assert (os.path.isfile(fname))
|
||||||
self._mdx_db = _filename + ".mdx.db"
|
self._mdx_db = _filename + ".mdx.db"
|
||||||
# make index anyway
|
# make index anyway
|
||||||
if force_rebuild:
|
if force_rebuild:
|
||||||
@ -72,10 +79,12 @@ class IndexBuilder(object):
|
|||||||
self._make_mdd_index(self._mdd_db)
|
self._make_mdd_index(self._mdd_db)
|
||||||
print("mdd.db rebuilt!")
|
print("mdd.db rebuilt!")
|
||||||
return None
|
return None
|
||||||
cursor = conn.execute("SELECT * FROM META WHERE key = \"encoding\"")
|
cursor = conn.execute(
|
||||||
|
"SELECT * FROM META WHERE key = \"encoding\"")
|
||||||
for cc in cursor:
|
for cc in cursor:
|
||||||
self._encoding = cc[1]
|
self._encoding = cc[1]
|
||||||
cursor = conn.execute("SELECT * FROM META WHERE key = \"stylesheet\"")
|
cursor = conn.execute(
|
||||||
|
"SELECT * FROM META WHERE key = \"stylesheet\"")
|
||||||
for cc in cursor:
|
for cc in cursor:
|
||||||
self._stylesheet = json.loads(cc[1])
|
self._stylesheet = json.loads(cc[1])
|
||||||
|
|
||||||
@ -83,7 +92,8 @@ class IndexBuilder(object):
|
|||||||
for cc in cursor:
|
for cc in cursor:
|
||||||
self._title = cc[1]
|
self._title = cc[1]
|
||||||
|
|
||||||
cursor = conn.execute("SELECT * FROM META WHERE key = \"description\"")
|
cursor = conn.execute(
|
||||||
|
"SELECT * FROM META WHERE key = \"description\"")
|
||||||
for cc in cursor:
|
for cc in cursor:
|
||||||
self._description = cc[1]
|
self._description = cc[1]
|
||||||
|
|
||||||
@ -108,14 +118,13 @@ class IndexBuilder(object):
|
|||||||
if not os.path.isfile(self._mdd_db):
|
if not os.path.isfile(self._mdd_db):
|
||||||
self._make_mdd_index(self._mdd_db)
|
self._make_mdd_index(self._mdd_db)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _replace_stylesheet(self, txt):
|
def _replace_stylesheet(self, txt):
|
||||||
# substitute stylesheet definition
|
# substitute stylesheet definition
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
if isinstance(txt, bytes):
|
if isinstance(txt, bytes):
|
||||||
encode_type = chardet.detect(txt)
|
# encode_type = chardet.detect(txt)
|
||||||
encoding = encode_type['encoding']
|
# encoding = encode_type['encoding']
|
||||||
txt = txt.decode(encoding)
|
txt = txt.decode(encoding)
|
||||||
txt_list = re.split('`\d+`', txt)
|
txt_list = re.split('`\d+`', txt)
|
||||||
txt_tag = re.findall('`\d+`', txt)
|
txt_tag = re.findall('`\d+`', txt)
|
||||||
@ -134,12 +143,11 @@ class IndexBuilder(object):
|
|||||||
os.remove(db_name)
|
os.remove(db_name)
|
||||||
mdx = MDX(self._mdx_file)
|
mdx = MDX(self._mdx_file)
|
||||||
self._mdx_db = db_name
|
self._mdx_db = db_name
|
||||||
returned_index = mdx.get_index(check_block = self._check)
|
returned_index = mdx.get_index(check_block=self._check)
|
||||||
index_list = returned_index['index_dict_list']
|
index_list = returned_index['index_dict_list']
|
||||||
conn = sqlite3.connect(db_name)
|
conn = sqlite3.connect(db_name)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute(
|
c.execute(''' CREATE TABLE MDX_INDEX
|
||||||
''' CREATE TABLE MDX_INDEX
|
|
||||||
(key_text text not null,
|
(key_text text not null,
|
||||||
file_pos integer,
|
file_pos integer,
|
||||||
compressed_size integer,
|
compressed_size integer,
|
||||||
@ -148,53 +156,39 @@ class IndexBuilder(object):
|
|||||||
record_start integer,
|
record_start integer,
|
||||||
record_end integer,
|
record_end integer,
|
||||||
offset integer
|
offset integer
|
||||||
)'''
|
)''')
|
||||||
)
|
|
||||||
|
|
||||||
tuple_list = [
|
tuple_list = [(item['key_text'], item['file_pos'],
|
||||||
(item['key_text'],
|
item['compressed_size'], item['decompressed_size'],
|
||||||
item['file_pos'],
|
item['record_block_type'], item['record_start'],
|
||||||
item['compressed_size'],
|
item['record_end'], item['offset'])
|
||||||
item['decompressed_size'],
|
for item in index_list]
|
||||||
item['record_block_type'],
|
|
||||||
item['record_start'],
|
|
||||||
item['record_end'],
|
|
||||||
item['offset']
|
|
||||||
)
|
|
||||||
for item in index_list
|
|
||||||
]
|
|
||||||
c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)',
|
c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)',
|
||||||
tuple_list)
|
tuple_list)
|
||||||
# build the metadata table
|
# build the metadata table
|
||||||
meta = returned_index['meta']
|
meta = returned_index['meta']
|
||||||
c.execute(
|
c.execute('''CREATE TABLE META
|
||||||
'''CREATE TABLE META
|
|
||||||
(key text,
|
(key text,
|
||||||
value text
|
value text
|
||||||
)''')
|
)''')
|
||||||
|
|
||||||
#for k,v in meta:
|
#for k,v in meta:
|
||||||
# c.execute(
|
# c.execute(
|
||||||
# 'INSERT INTO META VALUES (?,?)',
|
# 'INSERT INTO META VALUES (?,?)',
|
||||||
# (k, v)
|
# (k, v)
|
||||||
# )
|
# )
|
||||||
|
|
||||||
c.executemany(
|
c.executemany('INSERT INTO META VALUES (?,?)',
|
||||||
'INSERT INTO META VALUES (?,?)',
|
[('encoding', meta['encoding']),
|
||||||
[('encoding', meta['encoding']),
|
('stylesheet', meta['stylesheet']),
|
||||||
('stylesheet', meta['stylesheet']),
|
('title', meta['title']),
|
||||||
('title', meta['title']),
|
('description', meta['description']),
|
||||||
('description', meta['description']),
|
('version', version)])
|
||||||
('version', version)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
if self._sql_index:
|
if self._sql_index:
|
||||||
c.execute(
|
c.execute('''
|
||||||
'''
|
|
||||||
CREATE INDEX key_index ON MDX_INDEX (key_text)
|
CREATE INDEX key_index ON MDX_INDEX (key_text)
|
||||||
'''
|
''')
|
||||||
)
|
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
@ -204,17 +198,15 @@ class IndexBuilder(object):
|
|||||||
self._title = meta['title']
|
self._title = meta['title']
|
||||||
self._description = meta['description']
|
self._description = meta['description']
|
||||||
|
|
||||||
|
|
||||||
def _make_mdd_index(self, db_name):
|
def _make_mdd_index(self, db_name):
|
||||||
if os.path.exists(db_name):
|
if os.path.exists(db_name):
|
||||||
os.remove(db_name)
|
os.remove(db_name)
|
||||||
mdd = MDD(self._mdd_file)
|
mdd = MDD(self._mdd_file)
|
||||||
self._mdd_db = db_name
|
self._mdd_db = db_name
|
||||||
index_list = mdd.get_index(check_block = self._check)
|
index_list = mdd.get_index(check_block=self._check)
|
||||||
conn = sqlite3.connect(db_name)
|
conn = sqlite3.connect(db_name)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute(
|
c.execute(''' CREATE TABLE MDX_INDEX
|
||||||
''' CREATE TABLE MDX_INDEX
|
|
||||||
(key_text text not null unique,
|
(key_text text not null unique,
|
||||||
file_pos integer,
|
file_pos integer,
|
||||||
compressed_size integer,
|
compressed_size integer,
|
||||||
@ -223,29 +215,19 @@ class IndexBuilder(object):
|
|||||||
record_start integer,
|
record_start integer,
|
||||||
record_end integer,
|
record_end integer,
|
||||||
offset integer
|
offset integer
|
||||||
)'''
|
)''')
|
||||||
)
|
|
||||||
|
|
||||||
tuple_list = [
|
tuple_list = [(item['key_text'], item['file_pos'],
|
||||||
(item['key_text'],
|
item['compressed_size'], item['decompressed_size'],
|
||||||
item['file_pos'],
|
item['record_block_type'], item['record_start'],
|
||||||
item['compressed_size'],
|
item['record_end'], item['offset'])
|
||||||
item['decompressed_size'],
|
for item in index_list]
|
||||||
item['record_block_type'],
|
|
||||||
item['record_start'],
|
|
||||||
item['record_end'],
|
|
||||||
item['offset']
|
|
||||||
)
|
|
||||||
for item in index_list
|
|
||||||
]
|
|
||||||
c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)',
|
c.executemany('INSERT INTO MDX_INDEX VALUES (?,?,?,?,?,?,?,?)',
|
||||||
tuple_list)
|
tuple_list)
|
||||||
if self._sql_index:
|
if self._sql_index:
|
||||||
c.execute(
|
c.execute('''
|
||||||
'''
|
|
||||||
CREATE UNIQUE INDEX key_index ON MDX_INDEX (key_text)
|
CREATE UNIQUE INDEX key_index ON MDX_INDEX (key_text)
|
||||||
'''
|
''')
|
||||||
)
|
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
@ -266,32 +248,40 @@ class IndexBuilder(object):
|
|||||||
print("LZO compression is not supported")
|
print("LZO compression is not supported")
|
||||||
# decompress
|
# decompress
|
||||||
header = b'\xf0' + pack('>I', index['decompressed_size'])
|
header = b'\xf0' + pack('>I', index['decompressed_size'])
|
||||||
_record_block = lzo.decompress(record_block_compressed[8:], initSize = decompressed_size, blockSize=1308672)
|
_record_block = lzo.decompress(
|
||||||
# zlib compression
|
record_block_compressed[8:],
|
||||||
|
initSize=decompressed_size,
|
||||||
|
blockSize=1308672)
|
||||||
|
# zlib compression
|
||||||
elif record_block_type == 2:
|
elif record_block_type == 2:
|
||||||
# decompress
|
# decompress
|
||||||
_record_block = zlib.decompress(record_block_compressed[8:])
|
_record_block = zlib.decompress(record_block_compressed[8:])
|
||||||
data = _record_block[index['record_start'] - index['offset']:index['record_end'] - index['offset']]
|
data = _record_block[index['record_start'] -
|
||||||
|
index['offset']:index['record_end'] -
|
||||||
|
index['offset']]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_mdx_by_index(self, fmdx, index):
|
def get_mdx_by_index(self, fmdx, index):
|
||||||
data = self.get_data_by_index(fmdx,index)
|
data = self.get_data_by_index(fmdx, index)
|
||||||
record = data.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
|
record = data.decode(
|
||||||
|
self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
|
||||||
if self._stylesheet:
|
if self._stylesheet:
|
||||||
record = self._replace_stylesheet(record)
|
record = self._replace_stylesheet(record)
|
||||||
record = record.decode('utf-8')
|
record = record.decode('utf-8')
|
||||||
return record
|
return record
|
||||||
|
|
||||||
def get_mdd_by_index(self, fmdx, index):
|
def get_mdd_by_index(self, fmdx, index):
|
||||||
return self.get_data_by_index(fmdx,index)
|
return self.get_data_by_index(fmdx, index)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def lookup_indexes(db,keyword,ignorecase=None):
|
def lookup_indexes(db, keyword, ignorecase=None):
|
||||||
indexes = []
|
indexes = []
|
||||||
if ignorecase:
|
if ignorecase:
|
||||||
sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(keyword)
|
sql = 'SELECT * FROM MDX_INDEX WHERE lower(key_text) = lower("{}")'.format(
|
||||||
|
keyword)
|
||||||
else:
|
else:
|
||||||
sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(keyword)
|
sql = 'SELECT * FROM MDX_INDEX WHERE key_text = "{}"'.format(
|
||||||
|
keyword)
|
||||||
with sqlite3.connect(db) as conn:
|
with sqlite3.connect(db) as conn:
|
||||||
cursor = conn.execute(sql)
|
cursor = conn.execute(sql)
|
||||||
for result in cursor:
|
for result in cursor:
|
||||||
@ -306,29 +296,31 @@ class IndexBuilder(object):
|
|||||||
indexes.append(index)
|
indexes.append(index)
|
||||||
return indexes
|
return indexes
|
||||||
|
|
||||||
def mdx_lookup(self, keyword,ignorecase=None):
|
def mdx_lookup(self, keyword, ignorecase=None):
|
||||||
lookup_result_list = []
|
lookup_result_list = []
|
||||||
indexes = self.lookup_indexes(self._mdx_db,keyword,ignorecase)
|
indexes = self.lookup_indexes(self._mdx_db, keyword, ignorecase)
|
||||||
with open(self._mdx_file,'rb') as mdx_file:
|
with open(self._mdx_file, 'rb') as mdx_file:
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
lookup_result_list.append(self.get_mdx_by_index(mdx_file, index))
|
lookup_result_list.append(
|
||||||
|
self.get_mdx_by_index(mdx_file, index))
|
||||||
return lookup_result_list
|
return lookup_result_list
|
||||||
|
|
||||||
def mdd_lookup(self, keyword,ignorecase=None):
|
def mdd_lookup(self, keyword, ignorecase=None):
|
||||||
lookup_result_list = []
|
lookup_result_list = []
|
||||||
indexes = self.lookup_indexes(self._mdd_db,keyword,ignorecase)
|
indexes = self.lookup_indexes(self._mdd_db, keyword, ignorecase)
|
||||||
with open(self._mdd_file,'rb') as mdd_file:
|
with open(self._mdd_file, 'rb') as mdd_file:
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
lookup_result_list.append(self.get_mdd_by_index(mdd_file, index))
|
lookup_result_list.append(
|
||||||
|
self.get_mdd_by_index(mdd_file, index))
|
||||||
return lookup_result_list
|
return lookup_result_list
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_keys(db,query = ''):
|
def get_keys(db, query=''):
|
||||||
if not db:
|
if not db:
|
||||||
return []
|
return []
|
||||||
if query:
|
if query:
|
||||||
if '*' in query:
|
if '*' in query:
|
||||||
query = query.replace('*','%')
|
query = query.replace('*', '%')
|
||||||
else:
|
else:
|
||||||
query = query + '%'
|
query = query + '%'
|
||||||
sql = 'SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"'
|
sql = 'SELECT key_text FROM MDX_INDEX WHERE key_text LIKE \"' + query + '\"'
|
||||||
@ -339,12 +331,11 @@ class IndexBuilder(object):
|
|||||||
keys = [item[0] for item in cursor]
|
keys = [item[0] for item in cursor]
|
||||||
return keys
|
return keys
|
||||||
|
|
||||||
def get_mdd_keys(self, query = ''):
|
def get_mdd_keys(self, query=''):
|
||||||
return self.get_keys(self._mdd_db,query)
|
return self.get_keys(self._mdd_db, query)
|
||||||
|
|
||||||
def get_mdx_keys(self, query = ''):
|
|
||||||
return self.get_keys(self._mdx_db,query)
|
|
||||||
|
|
||||||
|
def get_mdx_keys(self, query=''):
|
||||||
|
return self.get_keys(self._mdx_db, query)
|
||||||
|
|
||||||
|
|
||||||
# mdx_builder = IndexBuilder("oald.mdx")
|
# mdx_builder = IndexBuilder("oald.mdx")
|
||||||
@ -353,5 +344,5 @@ class IndexBuilder(object):
|
|||||||
# keys1 = mdx_builder.get_mdx_keys('abstrac')
|
# keys1 = mdx_builder.get_mdx_keys('abstrac')
|
||||||
# keys2 = mdx_builder.get_mdx_keys('*tion')
|
# keys2 = mdx_builder.get_mdx_keys('*tion')
|
||||||
# for key in keys2:
|
# for key in keys2:
|
||||||
# text = mdx_builder.mdx_lookup(key)[0]
|
# text = mdx_builder.mdx_lookup(key)[0]
|
||||||
# pass
|
# pass
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user