From 847ca060709085a60fbe292d89f9f85afe2a5560 Mon Sep 17 00:00:00 2001 From: "St.Huang" Date: Wed, 29 Aug 2018 22:44:52 +0800 Subject: [PATCH] fix #46 --- README.md | 1 + addons/FastWQ.py | 3 + .../fastwq/libs/snowballstemmer/__init__.py | 27 + addons/fastwq/libs/snowballstemmer/among.py | 15 + .../libs/snowballstemmer/basestemmer.py | 351 ++++++ .../libs/snowballstemmer/english_stemmer.py | 1115 +++++++++++++++++ addons/fastwq/query/common.py | 32 +- addons21/fastwq/__init__.py | 3 + .../fastwq/libs/snowballstemmer/__init__.py | 27 + addons21/fastwq/libs/snowballstemmer/among.py | 15 + .../libs/snowballstemmer/basestemmer.py | 351 ++++++ .../libs/snowballstemmer/english_stemmer.py | 1115 +++++++++++++++++ addons21/fastwq/query/common.py | 32 +- 13 files changed, 3069 insertions(+), 18 deletions(-) create mode 100644 addons/fastwq/libs/snowballstemmer/__init__.py create mode 100644 addons/fastwq/libs/snowballstemmer/among.py create mode 100644 addons/fastwq/libs/snowballstemmer/basestemmer.py create mode 100644 addons/fastwq/libs/snowballstemmer/english_stemmer.py create mode 100644 addons21/fastwq/libs/snowballstemmer/__init__.py create mode 100644 addons21/fastwq/libs/snowballstemmer/among.py create mode 100644 addons21/fastwq/libs/snowballstemmer/basestemmer.py create mode 100644 addons21/fastwq/libs/snowballstemmer/english_stemmer.py diff --git a/README.md b/README.md index 8444732..86950d1 100644 --- a/README.md +++ b/README.md @@ -74,3 +74,4 @@ It forks from [WordQuery](https://github.com/finalion/WordQuery), added **multi- - [pystardict](https://github.com/lig/pystardict) - [WordQuery](https://github.com/finalion/WordQuery) - [AnkiHub](https://github.com/dayjaby/AnkiHub) + - [snowball_py](https://github.com/shibukawa/snowball_py) diff --git a/addons/FastWQ.py b/addons/FastWQ.py index 51c329f..5e9a558 100644 --- a/addons/FastWQ.py +++ b/addons/FastWQ.py @@ -17,9 +17,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import sys from anki.hooks import addHook from anki.utils import isMac +sys.dont_write_bytecode = True + ############## other config here ################## shortcut = ('Ctrl+Alt' if isMac else 'Ctrl') + '+Q' ################################################### diff --git a/addons/fastwq/libs/snowballstemmer/__init__.py b/addons/fastwq/libs/snowballstemmer/__init__.py new file mode 100644 index 0000000..d297d9f --- /dev/null +++ b/addons/fastwq/libs/snowballstemmer/__init__.py @@ -0,0 +1,27 @@ +__all__ = ('language', 'stemmer') + +from .english_stemmer import EnglishStemmer + +language = { + 'english': EnglishStemmer, +} + +try: + import Stemmer + cext_available = True +except ImportError: + cext_available = False + +def algorithms(): + if cext_available: + return Stemmer.language() + else: + return list(language.keys()) + +def stemmer(lang): + if cext_available: + return Stemmer.Stemmer(lang) + if lang.lower() in language: + return language[lang.lower()]() + else: + raise KeyError("Stemming algorithm '%s' not found" % lang) diff --git a/addons/fastwq/libs/snowballstemmer/among.py b/addons/fastwq/libs/snowballstemmer/among.py new file mode 100644 index 0000000..5a99ad2 --- /dev/null +++ b/addons/fastwq/libs/snowballstemmer/among.py @@ -0,0 +1,15 @@ + +class Among(object): + def __init__(self, s, substring_i, result, method=None): + """ + @ivar s_size search string size + @ivar s search string + @ivar substring index to longest matching substring + @ivar result of the lookup + @ivar method method to use if substring matches + """ + self.s_size = len(s) + self.s = s + self.substring_i = substring_i + self.result = result + self.method = method diff --git a/addons/fastwq/libs/snowballstemmer/basestemmer.py b/addons/fastwq/libs/snowballstemmer/basestemmer.py new file mode 100644 index 0000000..d7ed09b --- /dev/null +++ b/addons/fastwq/libs/snowballstemmer/basestemmer.py @@ -0,0 +1,351 @@ +class BaseStemmer(object): + def __init__(self): + self.set_current("") + self.maxCacheSize = 10000 + self._cache = {} + self._counter = 0 + + def set_current(self, value): + ''' + Set the self.current string. + ''' + self.current = value + self.cursor = 0 + self.limit = len(self.current) + self.limit_backward = 0 + self.bra = self.cursor + self.ket = self.limit + + def get_current(self): + ''' + Get the self.current string. + ''' + return self.current + + def copy_from(self, other): + self.current = other.current + self.cursor = other.cursor + self.limit = other.limit + self.limit_backward = other.limit_backward + self.bra = other.bra + self.ket = other.ket + + def in_grouping(self, s, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if ch > max or ch < min: + return False + ch -= min + if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0: + return False + self.cursor += 1 + return True + + def in_grouping_b(self, s, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if ch > max or ch < min: + return False + ch -= min + if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0: + return False + self.cursor -= 1 + return True + + def out_grouping(self, s, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if ch > max or ch < min: + self.cursor += 1 + return True + ch -= min + if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0: + self.cursor += 1 + return True + return False + + def out_grouping_b(self, s, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if ch > max or ch < min: + self.cursor -= 1 + return True + ch -= min + if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0: + self.cursor -= 1 + return True + return False + + def in_range(self, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if ch > max or ch < min: + return False + self.cursor += 1 + return True + + def in_range_b(self, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if ch > max or ch < min: + return False + self.cursor -= 1 + return True + + def out_range(self, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if not (ch > max or ch < min): + return False + self.cursor += 1 + return True + + def out_range_b(self, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if not (ch > max or ch < min): + return False + self.cursor -= 1 + return True + + def eq_s(self, s_size, s): + if self.limit - self.cursor < s_size: + return False + if self.current[self.cursor:self.cursor + s_size] != s: + return False + self.cursor += s_size + return True + + def eq_s_b(self, s_size, s): + if self.cursor - self.limit_backward < s_size: + return False + if self.current[self.cursor - s_size:self.cursor] != s: + return False + self.cursor -= s_size + return True + + def eq_v(self, s): + return self.eq_s(len(s), s) + + def eq_v_b(self, s): + return self.eq_s_b(len(s), s) + + def find_among(self, v, v_size): + i = 0 + j = v_size + + c = self.cursor + l = self.limit + + common_i = 0 + common_j = 0 + + first_key_inspected = False + + while True: + k = i + ((j - i) >> 1) + diff = 0 + common = min(common_i, common_j) # smalle + w = v[k] + for i2 in range(common, w.s_size): + if c + common == l: + diff = -1 + break + diff = ord(self.current[c + common]) - ord(w.s[i2]) + if diff != 0: + break + common += 1 + if diff < 0: + j = k + common_j = common + else: + i = k + common_i = common + if j - i <= 1: + if i > 0: + break # v->s has been inspected + if j == i: + break # only one item in v + # - but now we need to go round once more to get + # v->s inspected. self looks messy, but is actually + # the optimal approach. + if first_key_inspected: + break + first_key_inspected = True + while True: + w = v[i] + if common_i >= w.s_size: + self.cursor = c + w.s_size + if w.method is None: + return w.result + method = getattr(self, w.method) + res = method() + self.cursor = c + w.s_size + if res: + return w.result + i = w.substring_i + if i < 0: + return 0 + return -1 # not reachable + + def find_among_b(self, v, v_size): + ''' + find_among_b is for backwards processing. Same comments apply + ''' + i = 0 + j = v_size + + c = self.cursor + lb = self.limit_backward; + + common_i = 0 + common_j = 0 + + first_key_inspected = False + + while True: + k = i + ((j - i) >> 1) + diff = 0 + common = min(common_i, common_j) + w = v[k] + for i2 in range(w.s_size - 1 - common, -1, -1): + if c - common == lb: + diff = -1 + break + diff = ord(self.current[c - 1 - common]) - ord(w.s[i2]) + if diff != 0: + break + common += 1 + if diff < 0: + j = k + common_j = common + else: + i = k + common_i = common + if j - i <= 1: + if i > 0: + break + if j == i: + break + if first_key_inspected: + break + first_key_inspected = True + while True: + w = v[i] + if common_i >= w.s_size: + self.cursor = c - w.s_size + if w.method is None: + return w.result + method = getattr(self, w.method) + res = method() + self.cursor = c - w.s_size + if res: + return w.result + i = w.substring_i + if i < 0: + return 0 + return -1 # not reachable + + def replace_s(self, c_bra, c_ket, s): + ''' + to replace chars between c_bra and c_ket in self.current by the + chars in s. + + @type c_bra int + @type c_ket int + @type s: string + ''' + adjustment = len(s) - (c_ket - c_bra) + self.current = self.current[0:c_bra] + s + self.current[c_ket:] + self.limit += adjustment + if self.cursor >= c_ket: + self.cursor += adjustment + elif self.cursor > c_bra: + self.cursor = c_bra + return adjustment + + def slice_check(self): + if self.bra < 0 or self.bra > self.ket or self.ket > self.limit or self.limit > len(self.current): + return False + return True + + def slice_from(self, s): + ''' + @type s string + ''' + result = False + if self.slice_check(): + self.replace_s(self.bra, self.ket, s) + result = True + return result + + def slice_del(self): + return self.slice_from("") + + def insert(self, c_bra, c_ket, s): + ''' + @type c_bra int + @type c_ket int + @type s: string + ''' + adjustment = self.replace_s(c_bra, c_ket, s) + if c_bra <= self.bra: + self.bra += adjustment + if c_bra <= self.ket: + self.ket += adjustment + + def slice_to(self, s): + ''' + Copy the slice into the supplied StringBuffer + + @type s: string + ''' + result = '' + if self.slice_check(): + result = self.current[self.bra:self.ket] + return result + + def assign_to(self, s): + ''' + @type s: string + ''' + return self.current[0:self.limit] + + def _stem_word(self, word): + cache = self._cache.get(word) + if cache is None: + self.set_current(word) + self._stem() + result = self.get_current() + self._cache[word] = [result, self._counter] + else: + cache[1] = self._counter + result = cache[0] + self._counter += 1 + return result + + def _clear_cache(self): + removecount = int(len(self._cache) - self.maxCacheSize * 8 / 10) + oldcaches = sorted(self._cache.items(), key=lambda cache: cache[1][1])[0:removecount] + for key, value in oldcaches: + del self._cache[key] + + def stemWord(self, word): + result = self._stem_word(word) + if len(self._cache) > self.maxCacheSize: + self._clear_cache() + return result + + def stemWords(self, words): + result = [self._stem_word(word) for word in words] + if len(self._cache) > self.maxCacheSize: + self._clear_cache() + return result diff --git a/addons/fastwq/libs/snowballstemmer/english_stemmer.py b/addons/fastwq/libs/snowballstemmer/english_stemmer.py new file mode 100644 index 0000000..dccbc4b --- /dev/null +++ b/addons/fastwq/libs/snowballstemmer/english_stemmer.py @@ -0,0 +1,1115 @@ +# self file was generated automatically by the Snowball to Python interpreter + +from .basestemmer import BaseStemmer +from .among import Among + + +class EnglishStemmer(BaseStemmer): + ''' + self class was automatically generated by a Snowball to Python interpreter + It implements the stemming algorithm defined by a snowball script. + ''' + serialVersionUID = 1 + + a_0 = [ + Among(u"arsen", -1, -1), + Among(u"commun", -1, -1), + Among(u"gener", -1, -1) + ] + + a_1 = [ + Among(u"'", -1, 1), + Among(u"'s'", 0, 1), + Among(u"'s", -1, 1) + ] + + a_2 = [ + Among(u"ied", -1, 2), + Among(u"s", -1, 3), + Among(u"ies", 1, 2), + Among(u"sses", 1, 1), + Among(u"ss", 1, -1), + Among(u"us", 1, -1) + ] + + a_3 = [ + Among(u"", -1, 3), + Among(u"bb", 0, 2), + Among(u"dd", 0, 2), + Among(u"ff", 0, 2), + Among(u"gg", 0, 2), + Among(u"bl", 0, 1), + Among(u"mm", 0, 2), + Among(u"nn", 0, 2), + Among(u"pp", 0, 2), + Among(u"rr", 0, 2), + Among(u"at", 0, 1), + Among(u"tt", 0, 2), + Among(u"iz", 0, 1) + ] + + a_4 = [ + Among(u"ed", -1, 2), + Among(u"eed", 0, 1), + Among(u"ing", -1, 2), + Among(u"edly", -1, 2), + Among(u"eedly", 3, 1), + Among(u"ingly", -1, 2) + ] + + a_5 = [ + Among(u"anci", -1, 3), + Among(u"enci", -1, 2), + Among(u"ogi", -1, 13), + Among(u"li", -1, 16), + Among(u"bli", 3, 12), + Among(u"abli", 4, 4), + Among(u"alli", 3, 8), + Among(u"fulli", 3, 14), + Among(u"lessli", 3, 15), + Among(u"ousli", 3, 10), + Among(u"entli", 3, 5), + Among(u"aliti", -1, 8), + Among(u"biliti", -1, 12), + Among(u"iviti", -1, 11), + Among(u"tional", -1, 1), + Among(u"ational", 14, 7), + Among(u"alism", -1, 8), + Among(u"ation", -1, 7), + Among(u"ization", 17, 6), + Among(u"izer", -1, 6), + Among(u"ator", -1, 7), + Among(u"iveness", -1, 11), + Among(u"fulness", -1, 9), + Among(u"ousness", -1, 10) + ] + + a_6 = [ + Among(u"icate", -1, 4), + Among(u"ative", -1, 6), + Among(u"alize", -1, 3), + Among(u"iciti", -1, 4), + Among(u"ical", -1, 4), + Among(u"tional", -1, 1), + Among(u"ational", 5, 2), + Among(u"ful", -1, 5), + Among(u"ness", -1, 5) + ] + + a_7 = [ + Among(u"ic", -1, 1), + Among(u"ance", -1, 1), + Among(u"ence", -1, 1), + Among(u"able", -1, 1), + Among(u"ible", -1, 1), + Among(u"ate", -1, 1), + Among(u"ive", -1, 1), + Among(u"ize", -1, 1), + Among(u"iti", -1, 1), + Among(u"al", -1, 1), + Among(u"ism", -1, 1), + Among(u"ion", -1, 2), + Among(u"er", -1, 1), + Among(u"ous", -1, 1), + Among(u"ant", -1, 1), + Among(u"ent", -1, 1), + Among(u"ment", 15, 1), + Among(u"ement", 16, 1) + ] + + a_8 = [ + Among(u"e", -1, 1), + Among(u"l", -1, 2) + ] + + a_9 = [ + Among(u"succeed", -1, -1), + Among(u"proceed", -1, -1), + Among(u"exceed", -1, -1), + Among(u"canning", -1, -1), + Among(u"inning", -1, -1), + Among(u"earring", -1, -1), + Among(u"herring", -1, -1), + Among(u"outing", -1, -1) + ] + + a_10 = [ + Among(u"andes", -1, -1), + Among(u"atlas", -1, -1), + Among(u"bias", -1, -1), + Among(u"cosmos", -1, -1), + Among(u"dying", -1, 3), + Among(u"early", -1, 9), + Among(u"gently", -1, 7), + Among(u"howe", -1, -1), + Among(u"idly", -1, 6), + Among(u"lying", -1, 4), + Among(u"news", -1, -1), + Among(u"only", -1, 10), + Among(u"singly", -1, 11), + Among(u"skies", -1, 2), + Among(u"skis", -1, 1), + Among(u"sky", -1, -1), + Among(u"tying", -1, 5), + Among(u"ugly", -1, 8) + ] + + g_v = [17, 65, 16, 1] + + g_v_WXY = [1, 17, 65, 208, 1] + + g_valid_LI = [55, 141, 2] + + B_Y_found = False + I_p2 = 0 + I_p1 = 0 + + def copy_from(self, other): + self.B_Y_found = other.B_Y_found + self.I_p2 = other.I_p2 + self.I_p1 = other.I_p1 + super.copy_from(other) + + + def r_prelude(self): + # (, line 25 + # unset Y_found, line 26 + self.B_Y_found = False + # do, line 27 + v_1 = self.cursor + try: + # (, line 27 + # [, line 27 + self.bra = self.cursor + # literal, line 27 + if not self.eq_s(1, u"'"): + raise lab0() + # ], line 27 + self.ket = self.cursor + # delete, line 27 + if not self.slice_del(): + return False + + except lab0: pass + self.cursor = v_1 + # do, line 28 + v_2 = self.cursor + try: + # (, line 28 + # [, line 28 + self.bra = self.cursor + # literal, line 28 + if not self.eq_s(1, u"y"): + raise lab1() + # ], line 28 + self.ket = self.cursor + # <-, line 28 + if not self.slice_from(u"Y"): + return False + # set Y_found, line 28 + self.B_Y_found = True + except lab1: pass + self.cursor = v_2 + # do, line 29 + v_3 = self.cursor + try: + # repeat, line 29 + try: + while True: + try: + v_4 = self.cursor + try: + # (, line 29 + # goto, line 29 + try: + while True: + v_5 = self.cursor + try: + # (, line 29 + if not self.in_grouping(EnglishStemmer.g_v, 97, 121): + raise lab7() + # [, line 29 + self.bra = self.cursor + # literal, line 29 + if not self.eq_s(1, u"y"): + raise lab7() + # ], line 29 + self.ket = self.cursor + self.cursor = v_5 + raise lab6() + except lab7: pass + self.cursor = v_5 + if self.cursor >= self.limit: + raise lab5() + self.cursor += 1 + except lab6: pass + # <-, line 29 + if not self.slice_from(u"Y"): + return False + # set Y_found, line 29 + self.B_Y_found = True + raise lab4() + except lab5: pass + self.cursor = v_4 + raise lab3() + except lab4: pass + except lab3: pass + except lab2: pass + self.cursor = v_3 + return True + + def r_mark_regions(self): + # (, line 32 + self.I_p1 = self.limit; + self.I_p2 = self.limit; + # do, line 35 + v_1 = self.cursor + try: + # (, line 35 + # or, line 41 + try: + v_2 = self.cursor + try: + # among, line 36 + if self.find_among(EnglishStemmer.a_0, 3) == 0: + raise lab2() + raise lab1() + except lab2: pass + self.cursor = v_2 + # (, line 41 + # gopast, line 41 + try: + while True: + try: + if not self.in_grouping(EnglishStemmer.g_v, 97, 121): + raise lab4() + raise lab3() + except lab4: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab3: pass + # gopast, line 41 + try: + while True: + try: + if not self.out_grouping(EnglishStemmer.g_v, 97, 121): + raise lab6() + raise lab5() + except lab6: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab5: pass + except lab1: pass + # setmark p1, line 42 + self.I_p1 = self.cursor + # gopast, line 43 + try: + while True: + try: + if not self.in_grouping(EnglishStemmer.g_v, 97, 121): + raise lab8() + raise lab7() + except lab8: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab7: pass + # gopast, line 43 + try: + while True: + try: + if not self.out_grouping(EnglishStemmer.g_v, 97, 121): + raise lab10() + raise lab9() + except lab10: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab9: pass + # setmark p2, line 43 + self.I_p2 = self.cursor + except lab0: pass + self.cursor = v_1 + return True + + def r_shortv(self): + # (, line 49 + # or, line 51 + try: + v_1 = self.limit - self.cursor + try: + # (, line 50 + if not self.out_grouping_b(EnglishStemmer.g_v_WXY, 89, 121): + raise lab1() + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab1() + if not self.out_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # (, line 52 + if not self.out_grouping_b(EnglishStemmer.g_v, 97, 121): + return False + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + return False + # atlimit, line 52 + if self.cursor > self.limit_backward: + return False + except lab0: pass + return True + + def r_R1(self): + if not self.I_p1 <= self.cursor: + return False + return True + + def r_R2(self): + if not self.I_p2 <= self.cursor: + return False + return True + + def r_Step_1a(self): + # (, line 58 + # try, line 59 + v_1 = self.limit - self.cursor + try: + # (, line 59 + # [, line 60 + self.ket = self.cursor + # substring, line 60 + among_var = self.find_among_b(EnglishStemmer.a_1, 3) + if among_var == 0: + self.cursor = self.limit - v_1 + raise lab0() + # ], line 60 + self.bra = self.cursor + if among_var == 0: + self.cursor = self.limit - v_1 + raise lab0() + elif among_var == 1: + # (, line 62 + # delete, line 62 + if not self.slice_del(): + return False + + except lab0: pass + # [, line 65 + self.ket = self.cursor + # substring, line 65 + among_var = self.find_among_b(EnglishStemmer.a_2, 6) + if among_var == 0: + return False + # ], line 65 + self.bra = self.cursor + if among_var == 0: + return False + elif among_var == 1: + # (, line 66 + # <-, line 66 + if not self.slice_from(u"ss"): + return False + elif among_var == 2: + # (, line 68 + # or, line 68 + try: + v_2 = self.limit - self.cursor + try: + # (, line 68 + # hop, line 68 + c = self.cursor - 2 + if self.limit_backward > c or c > self.limit: + raise lab2() + self.cursor = c + # <-, line 68 + if not self.slice_from(u"i"): + return False + raise lab1() + except lab2: pass + self.cursor = self.limit - v_2 + # <-, line 68 + if not self.slice_from(u"ie"): + return False + except lab1: pass + elif among_var == 3: + # (, line 69 + # next, line 69 + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + # gopast, line 69 + try: + while True: + try: + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab4() + raise lab3() + except lab4: pass + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + except lab3: pass + # delete, line 69 + if not self.slice_del(): + return False + + return True + + def r_Step_1b(self): + # (, line 74 + # [, line 75 + self.ket = self.cursor + # substring, line 75 + among_var = self.find_among_b(EnglishStemmer.a_4, 6) + if among_var == 0: + return False + # ], line 75 + self.bra = self.cursor + if among_var == 0: + return False + elif among_var == 1: + # (, line 77 + # call R1, line 77 + if not self.r_R1(): + return False + # <-, line 77 + if not self.slice_from(u"ee"): + return False + elif among_var == 2: + # (, line 79 + # test, line 80 + v_1 = self.limit - self.cursor + # gopast, line 80 + try: + while True: + try: + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab1() + raise lab0() + except lab1: pass + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + except lab0: pass + self.cursor = self.limit - v_1 + # delete, line 80 + if not self.slice_del(): + return False + + # test, line 81 + v_3 = self.limit - self.cursor + # substring, line 81 + among_var = self.find_among_b(EnglishStemmer.a_3, 13) + if among_var == 0: + return False + self.cursor = self.limit - v_3 + if among_var == 0: + return False + elif among_var == 1: + # (, line 83 + # <+, line 83 + c = self.cursor + self.insert(self.cursor, self.cursor, u"e") + self.cursor = c + elif among_var == 2: + # (, line 86 + # [, line 86 + self.ket = self.cursor + # next, line 86 + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + # ], line 86 + self.bra = self.cursor + # delete, line 86 + if not self.slice_del(): + return False + + elif among_var == 3: + # (, line 87 + # atmark, line 87 + if self.cursor != self.I_p1: + return False + # test, line 87 + v_4 = self.limit - self.cursor + # call shortv, line 87 + if not self.r_shortv(): + return False + self.cursor = self.limit - v_4 + # <+, line 87 + c = self.cursor + self.insert(self.cursor, self.cursor, u"e") + self.cursor = c + return True + + def r_Step_1c(self): + # (, line 93 + # [, line 94 + self.ket = self.cursor + # or, line 94 + try: + v_1 = self.limit - self.cursor + try: + # literal, line 94 + if not self.eq_s_b(1, u"y"): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # literal, line 94 + if not self.eq_s_b(1, u"Y"): + return False + except lab0: pass + # ], line 94 + self.bra = self.cursor + if not self.out_grouping_b(EnglishStemmer.g_v, 97, 121): + return False + # not, line 95 + v_2 = self.limit - self.cursor + try: + # atlimit, line 95 + if self.cursor > self.limit_backward: + raise lab2() + return False + except lab2: pass + self.cursor = self.limit - v_2 + # <-, line 96 + if not self.slice_from(u"i"): + return False + return True + + def r_Step_2(self): + # (, line 99 + # [, line 100 + self.ket = self.cursor + # substring, line 100 + among_var = self.find_among_b(EnglishStemmer.a_5, 24) + if among_var == 0: + return False + # ], line 100 + self.bra = self.cursor + # call R1, line 100 + if not self.r_R1(): + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 101 + # <-, line 101 + if not self.slice_from(u"tion"): + return False + elif among_var == 2: + # (, line 102 + # <-, line 102 + if not self.slice_from(u"ence"): + return False + elif among_var == 3: + # (, line 103 + # <-, line 103 + if not self.slice_from(u"ance"): + return False + elif among_var == 4: + # (, line 104 + # <-, line 104 + if not self.slice_from(u"able"): + return False + elif among_var == 5: + # (, line 105 + # <-, line 105 + if not self.slice_from(u"ent"): + return False + elif among_var == 6: + # (, line 107 + # <-, line 107 + if not self.slice_from(u"ize"): + return False + elif among_var == 7: + # (, line 109 + # <-, line 109 + if not self.slice_from(u"ate"): + return False + elif among_var == 8: + # (, line 111 + # <-, line 111 + if not self.slice_from(u"al"): + return False + elif among_var == 9: + # (, line 112 + # <-, line 112 + if not self.slice_from(u"ful"): + return False + elif among_var == 10: + # (, line 114 + # <-, line 114 + if not self.slice_from(u"ous"): + return False + elif among_var == 11: + # (, line 116 + # <-, line 116 + if not self.slice_from(u"ive"): + return False + elif among_var == 12: + # (, line 118 + # <-, line 118 + if not self.slice_from(u"ble"): + return False + elif among_var == 13: + # (, line 119 + # literal, line 119 + if not self.eq_s_b(1, u"l"): + return False + # <-, line 119 + if not self.slice_from(u"og"): + return False + elif among_var == 14: + # (, line 120 + # <-, line 120 + if not self.slice_from(u"ful"): + return False + elif among_var == 15: + # (, line 121 + # <-, line 121 + if not self.slice_from(u"less"): + return False + elif among_var == 16: + # (, line 122 + if not self.in_grouping_b(EnglishStemmer.g_valid_LI, 99, 116): + return False + # delete, line 122 + if not self.slice_del(): + return False + + return True + + def r_Step_3(self): + # (, line 126 + # [, line 127 + self.ket = self.cursor + # substring, line 127 + among_var = self.find_among_b(EnglishStemmer.a_6, 9) + if among_var == 0: + return False + # ], line 127 + self.bra = self.cursor + # call R1, line 127 + if not self.r_R1(): + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 128 + # <-, line 128 + if not self.slice_from(u"tion"): + return False + elif among_var == 2: + # (, line 129 + # <-, line 129 + if not self.slice_from(u"ate"): + return False + elif among_var == 3: + # (, line 130 + # <-, line 130 + if not self.slice_from(u"al"): + return False + elif among_var == 4: + # (, line 132 + # <-, line 132 + if not self.slice_from(u"ic"): + return False + elif among_var == 5: + # (, line 134 + # delete, line 134 + if not self.slice_del(): + return False + + elif among_var == 6: + # (, line 136 + # call R2, line 136 + if not self.r_R2(): + return False + # delete, line 136 + if not self.slice_del(): + return False + + return True + + def r_Step_4(self): + # (, line 140 + # [, line 141 + self.ket = self.cursor + # substring, line 141 + among_var = self.find_among_b(EnglishStemmer.a_7, 18) + if among_var == 0: + return False + # ], line 141 + self.bra = self.cursor + # call R2, line 141 + if not self.r_R2(): + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 144 + # delete, line 144 + if not self.slice_del(): + return False + + elif among_var == 2: + # (, line 145 + # or, line 145 + try: + v_1 = self.limit - self.cursor + try: + # literal, line 145 + if not self.eq_s_b(1, u"s"): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # literal, line 145 + if not self.eq_s_b(1, u"t"): + return False + except lab0: pass + # delete, line 145 + if not self.slice_del(): + return False + + return True + + def r_Step_5(self): + # (, line 149 + # [, line 150 + self.ket = self.cursor + # substring, line 150 + among_var = self.find_among_b(EnglishStemmer.a_8, 2) + if among_var == 0: + return False + # ], line 150 + self.bra = self.cursor + if among_var == 0: + return False + elif among_var == 1: + # (, line 151 + # or, line 151 + try: + v_1 = self.limit - self.cursor + try: + # call R2, line 151 + if not self.r_R2(): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # (, line 151 + # call R1, line 151 + if not self.r_R1(): + return False + # not, line 151 + v_2 = self.limit - self.cursor + try: + # call shortv, line 151 + if not self.r_shortv(): + raise lab2() + return False + except lab2: pass + self.cursor = self.limit - v_2 + except lab0: pass + # delete, line 151 + if not self.slice_del(): + return False + + elif among_var == 2: + # (, line 152 + # call R2, line 152 + if not self.r_R2(): + return False + # literal, line 152 + if not self.eq_s_b(1, u"l"): + return False + # delete, line 152 + if not self.slice_del(): + return False + + return True + + def r_exception2(self): + # (, line 156 + # [, line 158 + self.ket = self.cursor + # substring, line 158 + if self.find_among_b(EnglishStemmer.a_9, 8) == 0: + return False + # ], line 158 + self.bra = self.cursor + # atlimit, line 158 + if self.cursor > self.limit_backward: + return False + return True + + def r_exception1(self): + # (, line 168 + # [, line 170 + self.bra = self.cursor + # substring, line 170 + among_var = self.find_among(EnglishStemmer.a_10, 18) + if among_var == 0: + return False + # ], line 170 + self.ket = self.cursor + # atlimit, line 170 + if self.cursor < self.limit: + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 174 + # <-, line 174 + if not self.slice_from(u"ski"): + return False + elif among_var == 2: + # (, line 175 + # <-, line 175 + if not self.slice_from(u"sky"): + return False + elif among_var == 3: + # (, line 176 + # <-, line 176 + if not self.slice_from(u"die"): + return False + elif among_var == 4: + # (, line 177 + # <-, line 177 + if not self.slice_from(u"lie"): + return False + elif among_var == 5: + # (, line 178 + # <-, line 178 + if not self.slice_from(u"tie"): + return False + elif among_var == 6: + # (, line 182 + # <-, line 182 + if not self.slice_from(u"idl"): + return False + elif among_var == 7: + # (, line 183 + # <-, line 183 + if not self.slice_from(u"gentl"): + return False + elif among_var == 8: + # (, line 184 + # <-, line 184 + if not self.slice_from(u"ugli"): + return False + elif among_var == 9: + # (, line 185 + # <-, line 185 + if not self.slice_from(u"earli"): + return False + elif among_var == 10: + # (, line 186 + # <-, line 186 + if not self.slice_from(u"onli"): + return False + elif among_var == 11: + # (, line 187 + # <-, line 187 + if not self.slice_from(u"singl"): + return False + return True + + def r_postlude(self): + # (, line 203 + # Boolean test Y_found, line 203 + if not self.B_Y_found: + return False + # repeat, line 203 + try: + while True: + try: + v_1 = self.cursor + try: + # (, line 203 + # goto, line 203 + try: + while True: + v_2 = self.cursor + try: + # (, line 203 + # [, line 203 + self.bra = self.cursor + # literal, line 203 + if not self.eq_s(1, u"Y"): + raise lab4() + # ], line 203 + self.ket = self.cursor + self.cursor = v_2 + raise lab3() + except lab4: pass + self.cursor = v_2 + if self.cursor >= self.limit: + raise lab2() + self.cursor += 1 + except lab3: pass + # <-, line 203 + if not self.slice_from(u"y"): + return False + raise lab1() + except lab2: pass + self.cursor = v_1 + raise lab0() + except lab1: pass + except lab0: pass + return True + + def _stem(self): + # (, line 205 + # or, line 207 + try: + v_1 = self.cursor + try: + # call exception1, line 207 + if not self.r_exception1(): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = v_1 + try: + # not, line 208 + v_2 = self.cursor + try: + # hop, line 208 + c = self.cursor + 3 + if 0 > c or c > self.limit: + raise lab3() + self.cursor = c + raise lab2() + except lab3: pass + self.cursor = v_2 + raise lab0() + except lab2: pass + self.cursor = v_1 + # (, line 208 + # do, line 209 + v_3 = self.cursor + try: + # call prelude, line 209 + if not self.r_prelude(): + raise lab4() + except lab4: pass + self.cursor = v_3 + # do, line 210 + v_4 = self.cursor + try: + # call mark_regions, line 210 + if not self.r_mark_regions(): + raise lab5() + except lab5: pass + self.cursor = v_4 + # backwards, line 211 + self.limit_backward = self.cursor + self.cursor = self.limit + # (, line 211 + # do, line 213 + v_5 = self.limit - self.cursor + try: + # call Step_1a, line 213 + if not self.r_Step_1a(): + raise lab6() + except lab6: pass + self.cursor = self.limit - v_5 + # or, line 215 + try: + v_6 = self.limit - self.cursor + try: + # call exception2, line 215 + if not self.r_exception2(): + raise lab8() + raise lab7() + except lab8: pass + self.cursor = self.limit - v_6 + # (, line 215 + # do, line 217 + v_7 = self.limit - self.cursor + try: + # call Step_1b, line 217 + if not self.r_Step_1b(): + raise lab9() + except lab9: pass + self.cursor = self.limit - v_7 + # do, line 218 + v_8 = self.limit - self.cursor + try: + # call Step_1c, line 218 + if not self.r_Step_1c(): + raise lab10() + except lab10: pass + self.cursor = self.limit - v_8 + # do, line 220 + v_9 = self.limit - self.cursor + try: + # call Step_2, line 220 + if not self.r_Step_2(): + raise lab11() + except lab11: pass + self.cursor = self.limit - v_9 + # do, line 221 + v_10 = self.limit - self.cursor + try: + # call Step_3, line 221 + if not self.r_Step_3(): + raise lab12() + except lab12: pass + self.cursor = self.limit - v_10 + # do, line 222 + v_11 = self.limit - self.cursor + try: + # call Step_4, line 222 + if not self.r_Step_4(): + raise lab13() + except lab13: pass + self.cursor = self.limit - v_11 + # do, line 224 + v_12 = self.limit - self.cursor + try: + # call Step_5, line 224 + if not self.r_Step_5(): + raise lab14() + except lab14: pass + self.cursor = self.limit - v_12 + except lab7: pass + self.cursor = self.limit_backward + # do, line 227 + v_13 = self.cursor + try: + # call postlude, line 227 + if not self.r_postlude(): + raise lab15() + except lab15: pass + self.cursor = v_13 + except lab0: pass + return True + + def equals(self, o): + return isinstance(o, EnglishStemmer) + + def hashCode(self): + return hash("EnglishStemmer") +class lab0(BaseException): pass +class lab1(BaseException): pass +class lab2(BaseException): pass +class lab3(BaseException): pass +class lab4(BaseException): pass +class lab5(BaseException): pass +class lab6(BaseException): pass +class lab7(BaseException): pass +class lab8(BaseException): pass +class lab9(BaseException): pass +class lab10(BaseException): pass +class lab11(BaseException): pass +class lab12(BaseException): pass +class lab13(BaseException): pass +class lab14(BaseException): pass +class lab15(BaseException): pass diff --git a/addons/fastwq/query/common.py b/addons/fastwq/query/common.py index 59264bf..ec21b28 100644 --- a/addons/fastwq/query/common.py +++ b/addons/fastwq/query/common.py @@ -33,6 +33,7 @@ from ..context import config from ..service import service_pool, QueryResult, copy_static_file from ..service.base import LocalService from ..utils import wrap_css +from ..libs.snowballstemmer import stemmer __all__ = [ @@ -263,14 +264,27 @@ def query_flds(note, fileds=None): def cloze_deletion(text, term): '''create cloze deletion text''' + text = text.replace('’', '\'') result = text - words = re.finditer(r"\b" + re.escape(term) + r"\b", text, flags=re.IGNORECASE) - words = [m.start() for m in words][::-1] - index = 1 - for word in words: - if not text[word - 1].isalnum() or text[word + len(term)].isalnum(): - if not "{{" in text[word:word + len(term)] or "}}" in text[word:word + len(term)]: - result = result[:word + len(term)] + "}}" + result[word + len(term):] - result = result[:word] + "{{c" + str(index) + "::" + result[word:] - #index += 1 + offset = 0 + term = _stemmer.stemWord(term).lower() + + terms = re.finditer(r"\b[\w'-]*\b", text) + tags = re.finditer(r"<[^>]+>", text) + for m in terms: + s = m.start() + e = m.end() + f = False + for tag in tags: + if s >= tag.start() and e <= tag.end(): + f = True + break + if f: + continue + word = text[s:e] + if _stemmer.stemWord(word).lower() == term: + result = result[:s+offset] + "{{c1::" + word + "}}" + result[e+offset:] + offset += 8 return result + +_stemmer = stemmer('english') diff --git a/addons21/fastwq/__init__.py b/addons21/fastwq/__init__.py index f909626..738b626 100644 --- a/addons21/fastwq/__init__.py +++ b/addons21/fastwq/__init__.py @@ -17,9 +17,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import sys from anki.hooks import addHook from anki.utils import isMac +sys.dont_write_bytecode = True + ############## other config here ################## shortcut = ('Ctrl+Alt' if isMac else 'Ctrl') + '+Q' ################################################### diff --git a/addons21/fastwq/libs/snowballstemmer/__init__.py b/addons21/fastwq/libs/snowballstemmer/__init__.py new file mode 100644 index 0000000..d297d9f --- /dev/null +++ b/addons21/fastwq/libs/snowballstemmer/__init__.py @@ -0,0 +1,27 @@ +__all__ = ('language', 'stemmer') + +from .english_stemmer import EnglishStemmer + +language = { + 'english': EnglishStemmer, +} + +try: + import Stemmer + cext_available = True +except ImportError: + cext_available = False + +def algorithms(): + if cext_available: + return Stemmer.language() + else: + return list(language.keys()) + +def stemmer(lang): + if cext_available: + return Stemmer.Stemmer(lang) + if lang.lower() in language: + return language[lang.lower()]() + else: + raise KeyError("Stemming algorithm '%s' not found" % lang) diff --git a/addons21/fastwq/libs/snowballstemmer/among.py b/addons21/fastwq/libs/snowballstemmer/among.py new file mode 100644 index 0000000..5a99ad2 --- /dev/null +++ b/addons21/fastwq/libs/snowballstemmer/among.py @@ -0,0 +1,15 @@ + +class Among(object): + def __init__(self, s, substring_i, result, method=None): + """ + @ivar s_size search string size + @ivar s search string + @ivar substring index to longest matching substring + @ivar result of the lookup + @ivar method method to use if substring matches + """ + self.s_size = len(s) + self.s = s + self.substring_i = substring_i + self.result = result + self.method = method diff --git a/addons21/fastwq/libs/snowballstemmer/basestemmer.py b/addons21/fastwq/libs/snowballstemmer/basestemmer.py new file mode 100644 index 0000000..d7ed09b --- /dev/null +++ b/addons21/fastwq/libs/snowballstemmer/basestemmer.py @@ -0,0 +1,351 @@ +class BaseStemmer(object): + def __init__(self): + self.set_current("") + self.maxCacheSize = 10000 + self._cache = {} + self._counter = 0 + + def set_current(self, value): + ''' + Set the self.current string. + ''' + self.current = value + self.cursor = 0 + self.limit = len(self.current) + self.limit_backward = 0 + self.bra = self.cursor + self.ket = self.limit + + def get_current(self): + ''' + Get the self.current string. + ''' + return self.current + + def copy_from(self, other): + self.current = other.current + self.cursor = other.cursor + self.limit = other.limit + self.limit_backward = other.limit_backward + self.bra = other.bra + self.ket = other.ket + + def in_grouping(self, s, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if ch > max or ch < min: + return False + ch -= min + if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0: + return False + self.cursor += 1 + return True + + def in_grouping_b(self, s, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if ch > max or ch < min: + return False + ch -= min + if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0: + return False + self.cursor -= 1 + return True + + def out_grouping(self, s, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if ch > max or ch < min: + self.cursor += 1 + return True + ch -= min + if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0: + self.cursor += 1 + return True + return False + + def out_grouping_b(self, s, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if ch > max or ch < min: + self.cursor -= 1 + return True + ch -= min + if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0: + self.cursor -= 1 + return True + return False + + def in_range(self, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if ch > max or ch < min: + return False + self.cursor += 1 + return True + + def in_range_b(self, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if ch > max or ch < min: + return False + self.cursor -= 1 + return True + + def out_range(self, min, max): + if self.cursor >= self.limit: + return False + ch = ord(self.current[self.cursor]) + if not (ch > max or ch < min): + return False + self.cursor += 1 + return True + + def out_range_b(self, min, max): + if self.cursor <= self.limit_backward: + return False + ch = ord(self.current[self.cursor - 1]) + if not (ch > max or ch < min): + return False + self.cursor -= 1 + return True + + def eq_s(self, s_size, s): + if self.limit - self.cursor < s_size: + return False + if self.current[self.cursor:self.cursor + s_size] != s: + return False + self.cursor += s_size + return True + + def eq_s_b(self, s_size, s): + if self.cursor - self.limit_backward < s_size: + return False + if self.current[self.cursor - s_size:self.cursor] != s: + return False + self.cursor -= s_size + return True + + def eq_v(self, s): + return self.eq_s(len(s), s) + + def eq_v_b(self, s): + return self.eq_s_b(len(s), s) + + def find_among(self, v, v_size): + i = 0 + j = v_size + + c = self.cursor + l = self.limit + + common_i = 0 + common_j = 0 + + first_key_inspected = False + + while True: + k = i + ((j - i) >> 1) + diff = 0 + common = min(common_i, common_j) # smalle + w = v[k] + for i2 in range(common, w.s_size): + if c + common == l: + diff = -1 + break + diff = ord(self.current[c + common]) - ord(w.s[i2]) + if diff != 0: + break + common += 1 + if diff < 0: + j = k + common_j = common + else: + i = k + common_i = common + if j - i <= 1: + if i > 0: + break # v->s has been inspected + if j == i: + break # only one item in v + # - but now we need to go round once more to get + # v->s inspected. self looks messy, but is actually + # the optimal approach. + if first_key_inspected: + break + first_key_inspected = True + while True: + w = v[i] + if common_i >= w.s_size: + self.cursor = c + w.s_size + if w.method is None: + return w.result + method = getattr(self, w.method) + res = method() + self.cursor = c + w.s_size + if res: + return w.result + i = w.substring_i + if i < 0: + return 0 + return -1 # not reachable + + def find_among_b(self, v, v_size): + ''' + find_among_b is for backwards processing. Same comments apply + ''' + i = 0 + j = v_size + + c = self.cursor + lb = self.limit_backward; + + common_i = 0 + common_j = 0 + + first_key_inspected = False + + while True: + k = i + ((j - i) >> 1) + diff = 0 + common = min(common_i, common_j) + w = v[k] + for i2 in range(w.s_size - 1 - common, -1, -1): + if c - common == lb: + diff = -1 + break + diff = ord(self.current[c - 1 - common]) - ord(w.s[i2]) + if diff != 0: + break + common += 1 + if diff < 0: + j = k + common_j = common + else: + i = k + common_i = common + if j - i <= 1: + if i > 0: + break + if j == i: + break + if first_key_inspected: + break + first_key_inspected = True + while True: + w = v[i] + if common_i >= w.s_size: + self.cursor = c - w.s_size + if w.method is None: + return w.result + method = getattr(self, w.method) + res = method() + self.cursor = c - w.s_size + if res: + return w.result + i = w.substring_i + if i < 0: + return 0 + return -1 # not reachable + + def replace_s(self, c_bra, c_ket, s): + ''' + to replace chars between c_bra and c_ket in self.current by the + chars in s. + + @type c_bra int + @type c_ket int + @type s: string + ''' + adjustment = len(s) - (c_ket - c_bra) + self.current = self.current[0:c_bra] + s + self.current[c_ket:] + self.limit += adjustment + if self.cursor >= c_ket: + self.cursor += adjustment + elif self.cursor > c_bra: + self.cursor = c_bra + return adjustment + + def slice_check(self): + if self.bra < 0 or self.bra > self.ket or self.ket > self.limit or self.limit > len(self.current): + return False + return True + + def slice_from(self, s): + ''' + @type s string + ''' + result = False + if self.slice_check(): + self.replace_s(self.bra, self.ket, s) + result = True + return result + + def slice_del(self): + return self.slice_from("") + + def insert(self, c_bra, c_ket, s): + ''' + @type c_bra int + @type c_ket int + @type s: string + ''' + adjustment = self.replace_s(c_bra, c_ket, s) + if c_bra <= self.bra: + self.bra += adjustment + if c_bra <= self.ket: + self.ket += adjustment + + def slice_to(self, s): + ''' + Copy the slice into the supplied StringBuffer + + @type s: string + ''' + result = '' + if self.slice_check(): + result = self.current[self.bra:self.ket] + return result + + def assign_to(self, s): + ''' + @type s: string + ''' + return self.current[0:self.limit] + + def _stem_word(self, word): + cache = self._cache.get(word) + if cache is None: + self.set_current(word) + self._stem() + result = self.get_current() + self._cache[word] = [result, self._counter] + else: + cache[1] = self._counter + result = cache[0] + self._counter += 1 + return result + + def _clear_cache(self): + removecount = int(len(self._cache) - self.maxCacheSize * 8 / 10) + oldcaches = sorted(self._cache.items(), key=lambda cache: cache[1][1])[0:removecount] + for key, value in oldcaches: + del self._cache[key] + + def stemWord(self, word): + result = self._stem_word(word) + if len(self._cache) > self.maxCacheSize: + self._clear_cache() + return result + + def stemWords(self, words): + result = [self._stem_word(word) for word in words] + if len(self._cache) > self.maxCacheSize: + self._clear_cache() + return result diff --git a/addons21/fastwq/libs/snowballstemmer/english_stemmer.py b/addons21/fastwq/libs/snowballstemmer/english_stemmer.py new file mode 100644 index 0000000..dccbc4b --- /dev/null +++ b/addons21/fastwq/libs/snowballstemmer/english_stemmer.py @@ -0,0 +1,1115 @@ +# self file was generated automatically by the Snowball to Python interpreter + +from .basestemmer import BaseStemmer +from .among import Among + + +class EnglishStemmer(BaseStemmer): + ''' + self class was automatically generated by a Snowball to Python interpreter + It implements the stemming algorithm defined by a snowball script. + ''' + serialVersionUID = 1 + + a_0 = [ + Among(u"arsen", -1, -1), + Among(u"commun", -1, -1), + Among(u"gener", -1, -1) + ] + + a_1 = [ + Among(u"'", -1, 1), + Among(u"'s'", 0, 1), + Among(u"'s", -1, 1) + ] + + a_2 = [ + Among(u"ied", -1, 2), + Among(u"s", -1, 3), + Among(u"ies", 1, 2), + Among(u"sses", 1, 1), + Among(u"ss", 1, -1), + Among(u"us", 1, -1) + ] + + a_3 = [ + Among(u"", -1, 3), + Among(u"bb", 0, 2), + Among(u"dd", 0, 2), + Among(u"ff", 0, 2), + Among(u"gg", 0, 2), + Among(u"bl", 0, 1), + Among(u"mm", 0, 2), + Among(u"nn", 0, 2), + Among(u"pp", 0, 2), + Among(u"rr", 0, 2), + Among(u"at", 0, 1), + Among(u"tt", 0, 2), + Among(u"iz", 0, 1) + ] + + a_4 = [ + Among(u"ed", -1, 2), + Among(u"eed", 0, 1), + Among(u"ing", -1, 2), + Among(u"edly", -1, 2), + Among(u"eedly", 3, 1), + Among(u"ingly", -1, 2) + ] + + a_5 = [ + Among(u"anci", -1, 3), + Among(u"enci", -1, 2), + Among(u"ogi", -1, 13), + Among(u"li", -1, 16), + Among(u"bli", 3, 12), + Among(u"abli", 4, 4), + Among(u"alli", 3, 8), + Among(u"fulli", 3, 14), + Among(u"lessli", 3, 15), + Among(u"ousli", 3, 10), + Among(u"entli", 3, 5), + Among(u"aliti", -1, 8), + Among(u"biliti", -1, 12), + Among(u"iviti", -1, 11), + Among(u"tional", -1, 1), + Among(u"ational", 14, 7), + Among(u"alism", -1, 8), + Among(u"ation", -1, 7), + Among(u"ization", 17, 6), + Among(u"izer", -1, 6), + Among(u"ator", -1, 7), + Among(u"iveness", -1, 11), + Among(u"fulness", -1, 9), + Among(u"ousness", -1, 10) + ] + + a_6 = [ + Among(u"icate", -1, 4), + Among(u"ative", -1, 6), + Among(u"alize", -1, 3), + Among(u"iciti", -1, 4), + Among(u"ical", -1, 4), + Among(u"tional", -1, 1), + Among(u"ational", 5, 2), + Among(u"ful", -1, 5), + Among(u"ness", -1, 5) + ] + + a_7 = [ + Among(u"ic", -1, 1), + Among(u"ance", -1, 1), + Among(u"ence", -1, 1), + Among(u"able", -1, 1), + Among(u"ible", -1, 1), + Among(u"ate", -1, 1), + Among(u"ive", -1, 1), + Among(u"ize", -1, 1), + Among(u"iti", -1, 1), + Among(u"al", -1, 1), + Among(u"ism", -1, 1), + Among(u"ion", -1, 2), + Among(u"er", -1, 1), + Among(u"ous", -1, 1), + Among(u"ant", -1, 1), + Among(u"ent", -1, 1), + Among(u"ment", 15, 1), + Among(u"ement", 16, 1) + ] + + a_8 = [ + Among(u"e", -1, 1), + Among(u"l", -1, 2) + ] + + a_9 = [ + Among(u"succeed", -1, -1), + Among(u"proceed", -1, -1), + Among(u"exceed", -1, -1), + Among(u"canning", -1, -1), + Among(u"inning", -1, -1), + Among(u"earring", -1, -1), + Among(u"herring", -1, -1), + Among(u"outing", -1, -1) + ] + + a_10 = [ + Among(u"andes", -1, -1), + Among(u"atlas", -1, -1), + Among(u"bias", -1, -1), + Among(u"cosmos", -1, -1), + Among(u"dying", -1, 3), + Among(u"early", -1, 9), + Among(u"gently", -1, 7), + Among(u"howe", -1, -1), + Among(u"idly", -1, 6), + Among(u"lying", -1, 4), + Among(u"news", -1, -1), + Among(u"only", -1, 10), + Among(u"singly", -1, 11), + Among(u"skies", -1, 2), + Among(u"skis", -1, 1), + Among(u"sky", -1, -1), + Among(u"tying", -1, 5), + Among(u"ugly", -1, 8) + ] + + g_v = [17, 65, 16, 1] + + g_v_WXY = [1, 17, 65, 208, 1] + + g_valid_LI = [55, 141, 2] + + B_Y_found = False + I_p2 = 0 + I_p1 = 0 + + def copy_from(self, other): + self.B_Y_found = other.B_Y_found + self.I_p2 = other.I_p2 + self.I_p1 = other.I_p1 + super.copy_from(other) + + + def r_prelude(self): + # (, line 25 + # unset Y_found, line 26 + self.B_Y_found = False + # do, line 27 + v_1 = self.cursor + try: + # (, line 27 + # [, line 27 + self.bra = self.cursor + # literal, line 27 + if not self.eq_s(1, u"'"): + raise lab0() + # ], line 27 + self.ket = self.cursor + # delete, line 27 + if not self.slice_del(): + return False + + except lab0: pass + self.cursor = v_1 + # do, line 28 + v_2 = self.cursor + try: + # (, line 28 + # [, line 28 + self.bra = self.cursor + # literal, line 28 + if not self.eq_s(1, u"y"): + raise lab1() + # ], line 28 + self.ket = self.cursor + # <-, line 28 + if not self.slice_from(u"Y"): + return False + # set Y_found, line 28 + self.B_Y_found = True + except lab1: pass + self.cursor = v_2 + # do, line 29 + v_3 = self.cursor + try: + # repeat, line 29 + try: + while True: + try: + v_4 = self.cursor + try: + # (, line 29 + # goto, line 29 + try: + while True: + v_5 = self.cursor + try: + # (, line 29 + if not self.in_grouping(EnglishStemmer.g_v, 97, 121): + raise lab7() + # [, line 29 + self.bra = self.cursor + # literal, line 29 + if not self.eq_s(1, u"y"): + raise lab7() + # ], line 29 + self.ket = self.cursor + self.cursor = v_5 + raise lab6() + except lab7: pass + self.cursor = v_5 + if self.cursor >= self.limit: + raise lab5() + self.cursor += 1 + except lab6: pass + # <-, line 29 + if not self.slice_from(u"Y"): + return False + # set Y_found, line 29 + self.B_Y_found = True + raise lab4() + except lab5: pass + self.cursor = v_4 + raise lab3() + except lab4: pass + except lab3: pass + except lab2: pass + self.cursor = v_3 + return True + + def r_mark_regions(self): + # (, line 32 + self.I_p1 = self.limit; + self.I_p2 = self.limit; + # do, line 35 + v_1 = self.cursor + try: + # (, line 35 + # or, line 41 + try: + v_2 = self.cursor + try: + # among, line 36 + if self.find_among(EnglishStemmer.a_0, 3) == 0: + raise lab2() + raise lab1() + except lab2: pass + self.cursor = v_2 + # (, line 41 + # gopast, line 41 + try: + while True: + try: + if not self.in_grouping(EnglishStemmer.g_v, 97, 121): + raise lab4() + raise lab3() + except lab4: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab3: pass + # gopast, line 41 + try: + while True: + try: + if not self.out_grouping(EnglishStemmer.g_v, 97, 121): + raise lab6() + raise lab5() + except lab6: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab5: pass + except lab1: pass + # setmark p1, line 42 + self.I_p1 = self.cursor + # gopast, line 43 + try: + while True: + try: + if not self.in_grouping(EnglishStemmer.g_v, 97, 121): + raise lab8() + raise lab7() + except lab8: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab7: pass + # gopast, line 43 + try: + while True: + try: + if not self.out_grouping(EnglishStemmer.g_v, 97, 121): + raise lab10() + raise lab9() + except lab10: pass + if self.cursor >= self.limit: + raise lab0() + self.cursor += 1 + except lab9: pass + # setmark p2, line 43 + self.I_p2 = self.cursor + except lab0: pass + self.cursor = v_1 + return True + + def r_shortv(self): + # (, line 49 + # or, line 51 + try: + v_1 = self.limit - self.cursor + try: + # (, line 50 + if not self.out_grouping_b(EnglishStemmer.g_v_WXY, 89, 121): + raise lab1() + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab1() + if not self.out_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # (, line 52 + if not self.out_grouping_b(EnglishStemmer.g_v, 97, 121): + return False + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + return False + # atlimit, line 52 + if self.cursor > self.limit_backward: + return False + except lab0: pass + return True + + def r_R1(self): + if not self.I_p1 <= self.cursor: + return False + return True + + def r_R2(self): + if not self.I_p2 <= self.cursor: + return False + return True + + def r_Step_1a(self): + # (, line 58 + # try, line 59 + v_1 = self.limit - self.cursor + try: + # (, line 59 + # [, line 60 + self.ket = self.cursor + # substring, line 60 + among_var = self.find_among_b(EnglishStemmer.a_1, 3) + if among_var == 0: + self.cursor = self.limit - v_1 + raise lab0() + # ], line 60 + self.bra = self.cursor + if among_var == 0: + self.cursor = self.limit - v_1 + raise lab0() + elif among_var == 1: + # (, line 62 + # delete, line 62 + if not self.slice_del(): + return False + + except lab0: pass + # [, line 65 + self.ket = self.cursor + # substring, line 65 + among_var = self.find_among_b(EnglishStemmer.a_2, 6) + if among_var == 0: + return False + # ], line 65 + self.bra = self.cursor + if among_var == 0: + return False + elif among_var == 1: + # (, line 66 + # <-, line 66 + if not self.slice_from(u"ss"): + return False + elif among_var == 2: + # (, line 68 + # or, line 68 + try: + v_2 = self.limit - self.cursor + try: + # (, line 68 + # hop, line 68 + c = self.cursor - 2 + if self.limit_backward > c or c > self.limit: + raise lab2() + self.cursor = c + # <-, line 68 + if not self.slice_from(u"i"): + return False + raise lab1() + except lab2: pass + self.cursor = self.limit - v_2 + # <-, line 68 + if not self.slice_from(u"ie"): + return False + except lab1: pass + elif among_var == 3: + # (, line 69 + # next, line 69 + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + # gopast, line 69 + try: + while True: + try: + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab4() + raise lab3() + except lab4: pass + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + except lab3: pass + # delete, line 69 + if not self.slice_del(): + return False + + return True + + def r_Step_1b(self): + # (, line 74 + # [, line 75 + self.ket = self.cursor + # substring, line 75 + among_var = self.find_among_b(EnglishStemmer.a_4, 6) + if among_var == 0: + return False + # ], line 75 + self.bra = self.cursor + if among_var == 0: + return False + elif among_var == 1: + # (, line 77 + # call R1, line 77 + if not self.r_R1(): + return False + # <-, line 77 + if not self.slice_from(u"ee"): + return False + elif among_var == 2: + # (, line 79 + # test, line 80 + v_1 = self.limit - self.cursor + # gopast, line 80 + try: + while True: + try: + if not self.in_grouping_b(EnglishStemmer.g_v, 97, 121): + raise lab1() + raise lab0() + except lab1: pass + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + except lab0: pass + self.cursor = self.limit - v_1 + # delete, line 80 + if not self.slice_del(): + return False + + # test, line 81 + v_3 = self.limit - self.cursor + # substring, line 81 + among_var = self.find_among_b(EnglishStemmer.a_3, 13) + if among_var == 0: + return False + self.cursor = self.limit - v_3 + if among_var == 0: + return False + elif among_var == 1: + # (, line 83 + # <+, line 83 + c = self.cursor + self.insert(self.cursor, self.cursor, u"e") + self.cursor = c + elif among_var == 2: + # (, line 86 + # [, line 86 + self.ket = self.cursor + # next, line 86 + if self.cursor <= self.limit_backward: + return False + self.cursor -= 1 + # ], line 86 + self.bra = self.cursor + # delete, line 86 + if not self.slice_del(): + return False + + elif among_var == 3: + # (, line 87 + # atmark, line 87 + if self.cursor != self.I_p1: + return False + # test, line 87 + v_4 = self.limit - self.cursor + # call shortv, line 87 + if not self.r_shortv(): + return False + self.cursor = self.limit - v_4 + # <+, line 87 + c = self.cursor + self.insert(self.cursor, self.cursor, u"e") + self.cursor = c + return True + + def r_Step_1c(self): + # (, line 93 + # [, line 94 + self.ket = self.cursor + # or, line 94 + try: + v_1 = self.limit - self.cursor + try: + # literal, line 94 + if not self.eq_s_b(1, u"y"): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # literal, line 94 + if not self.eq_s_b(1, u"Y"): + return False + except lab0: pass + # ], line 94 + self.bra = self.cursor + if not self.out_grouping_b(EnglishStemmer.g_v, 97, 121): + return False + # not, line 95 + v_2 = self.limit - self.cursor + try: + # atlimit, line 95 + if self.cursor > self.limit_backward: + raise lab2() + return False + except lab2: pass + self.cursor = self.limit - v_2 + # <-, line 96 + if not self.slice_from(u"i"): + return False + return True + + def r_Step_2(self): + # (, line 99 + # [, line 100 + self.ket = self.cursor + # substring, line 100 + among_var = self.find_among_b(EnglishStemmer.a_5, 24) + if among_var == 0: + return False + # ], line 100 + self.bra = self.cursor + # call R1, line 100 + if not self.r_R1(): + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 101 + # <-, line 101 + if not self.slice_from(u"tion"): + return False + elif among_var == 2: + # (, line 102 + # <-, line 102 + if not self.slice_from(u"ence"): + return False + elif among_var == 3: + # (, line 103 + # <-, line 103 + if not self.slice_from(u"ance"): + return False + elif among_var == 4: + # (, line 104 + # <-, line 104 + if not self.slice_from(u"able"): + return False + elif among_var == 5: + # (, line 105 + # <-, line 105 + if not self.slice_from(u"ent"): + return False + elif among_var == 6: + # (, line 107 + # <-, line 107 + if not self.slice_from(u"ize"): + return False + elif among_var == 7: + # (, line 109 + # <-, line 109 + if not self.slice_from(u"ate"): + return False + elif among_var == 8: + # (, line 111 + # <-, line 111 + if not self.slice_from(u"al"): + return False + elif among_var == 9: + # (, line 112 + # <-, line 112 + if not self.slice_from(u"ful"): + return False + elif among_var == 10: + # (, line 114 + # <-, line 114 + if not self.slice_from(u"ous"): + return False + elif among_var == 11: + # (, line 116 + # <-, line 116 + if not self.slice_from(u"ive"): + return False + elif among_var == 12: + # (, line 118 + # <-, line 118 + if not self.slice_from(u"ble"): + return False + elif among_var == 13: + # (, line 119 + # literal, line 119 + if not self.eq_s_b(1, u"l"): + return False + # <-, line 119 + if not self.slice_from(u"og"): + return False + elif among_var == 14: + # (, line 120 + # <-, line 120 + if not self.slice_from(u"ful"): + return False + elif among_var == 15: + # (, line 121 + # <-, line 121 + if not self.slice_from(u"less"): + return False + elif among_var == 16: + # (, line 122 + if not self.in_grouping_b(EnglishStemmer.g_valid_LI, 99, 116): + return False + # delete, line 122 + if not self.slice_del(): + return False + + return True + + def r_Step_3(self): + # (, line 126 + # [, line 127 + self.ket = self.cursor + # substring, line 127 + among_var = self.find_among_b(EnglishStemmer.a_6, 9) + if among_var == 0: + return False + # ], line 127 + self.bra = self.cursor + # call R1, line 127 + if not self.r_R1(): + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 128 + # <-, line 128 + if not self.slice_from(u"tion"): + return False + elif among_var == 2: + # (, line 129 + # <-, line 129 + if not self.slice_from(u"ate"): + return False + elif among_var == 3: + # (, line 130 + # <-, line 130 + if not self.slice_from(u"al"): + return False + elif among_var == 4: + # (, line 132 + # <-, line 132 + if not self.slice_from(u"ic"): + return False + elif among_var == 5: + # (, line 134 + # delete, line 134 + if not self.slice_del(): + return False + + elif among_var == 6: + # (, line 136 + # call R2, line 136 + if not self.r_R2(): + return False + # delete, line 136 + if not self.slice_del(): + return False + + return True + + def r_Step_4(self): + # (, line 140 + # [, line 141 + self.ket = self.cursor + # substring, line 141 + among_var = self.find_among_b(EnglishStemmer.a_7, 18) + if among_var == 0: + return False + # ], line 141 + self.bra = self.cursor + # call R2, line 141 + if not self.r_R2(): + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 144 + # delete, line 144 + if not self.slice_del(): + return False + + elif among_var == 2: + # (, line 145 + # or, line 145 + try: + v_1 = self.limit - self.cursor + try: + # literal, line 145 + if not self.eq_s_b(1, u"s"): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # literal, line 145 + if not self.eq_s_b(1, u"t"): + return False + except lab0: pass + # delete, line 145 + if not self.slice_del(): + return False + + return True + + def r_Step_5(self): + # (, line 149 + # [, line 150 + self.ket = self.cursor + # substring, line 150 + among_var = self.find_among_b(EnglishStemmer.a_8, 2) + if among_var == 0: + return False + # ], line 150 + self.bra = self.cursor + if among_var == 0: + return False + elif among_var == 1: + # (, line 151 + # or, line 151 + try: + v_1 = self.limit - self.cursor + try: + # call R2, line 151 + if not self.r_R2(): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = self.limit - v_1 + # (, line 151 + # call R1, line 151 + if not self.r_R1(): + return False + # not, line 151 + v_2 = self.limit - self.cursor + try: + # call shortv, line 151 + if not self.r_shortv(): + raise lab2() + return False + except lab2: pass + self.cursor = self.limit - v_2 + except lab0: pass + # delete, line 151 + if not self.slice_del(): + return False + + elif among_var == 2: + # (, line 152 + # call R2, line 152 + if not self.r_R2(): + return False + # literal, line 152 + if not self.eq_s_b(1, u"l"): + return False + # delete, line 152 + if not self.slice_del(): + return False + + return True + + def r_exception2(self): + # (, line 156 + # [, line 158 + self.ket = self.cursor + # substring, line 158 + if self.find_among_b(EnglishStemmer.a_9, 8) == 0: + return False + # ], line 158 + self.bra = self.cursor + # atlimit, line 158 + if self.cursor > self.limit_backward: + return False + return True + + def r_exception1(self): + # (, line 168 + # [, line 170 + self.bra = self.cursor + # substring, line 170 + among_var = self.find_among(EnglishStemmer.a_10, 18) + if among_var == 0: + return False + # ], line 170 + self.ket = self.cursor + # atlimit, line 170 + if self.cursor < self.limit: + return False + if among_var == 0: + return False + elif among_var == 1: + # (, line 174 + # <-, line 174 + if not self.slice_from(u"ski"): + return False + elif among_var == 2: + # (, line 175 + # <-, line 175 + if not self.slice_from(u"sky"): + return False + elif among_var == 3: + # (, line 176 + # <-, line 176 + if not self.slice_from(u"die"): + return False + elif among_var == 4: + # (, line 177 + # <-, line 177 + if not self.slice_from(u"lie"): + return False + elif among_var == 5: + # (, line 178 + # <-, line 178 + if not self.slice_from(u"tie"): + return False + elif among_var == 6: + # (, line 182 + # <-, line 182 + if not self.slice_from(u"idl"): + return False + elif among_var == 7: + # (, line 183 + # <-, line 183 + if not self.slice_from(u"gentl"): + return False + elif among_var == 8: + # (, line 184 + # <-, line 184 + if not self.slice_from(u"ugli"): + return False + elif among_var == 9: + # (, line 185 + # <-, line 185 + if not self.slice_from(u"earli"): + return False + elif among_var == 10: + # (, line 186 + # <-, line 186 + if not self.slice_from(u"onli"): + return False + elif among_var == 11: + # (, line 187 + # <-, line 187 + if not self.slice_from(u"singl"): + return False + return True + + def r_postlude(self): + # (, line 203 + # Boolean test Y_found, line 203 + if not self.B_Y_found: + return False + # repeat, line 203 + try: + while True: + try: + v_1 = self.cursor + try: + # (, line 203 + # goto, line 203 + try: + while True: + v_2 = self.cursor + try: + # (, line 203 + # [, line 203 + self.bra = self.cursor + # literal, line 203 + if not self.eq_s(1, u"Y"): + raise lab4() + # ], line 203 + self.ket = self.cursor + self.cursor = v_2 + raise lab3() + except lab4: pass + self.cursor = v_2 + if self.cursor >= self.limit: + raise lab2() + self.cursor += 1 + except lab3: pass + # <-, line 203 + if not self.slice_from(u"y"): + return False + raise lab1() + except lab2: pass + self.cursor = v_1 + raise lab0() + except lab1: pass + except lab0: pass + return True + + def _stem(self): + # (, line 205 + # or, line 207 + try: + v_1 = self.cursor + try: + # call exception1, line 207 + if not self.r_exception1(): + raise lab1() + raise lab0() + except lab1: pass + self.cursor = v_1 + try: + # not, line 208 + v_2 = self.cursor + try: + # hop, line 208 + c = self.cursor + 3 + if 0 > c or c > self.limit: + raise lab3() + self.cursor = c + raise lab2() + except lab3: pass + self.cursor = v_2 + raise lab0() + except lab2: pass + self.cursor = v_1 + # (, line 208 + # do, line 209 + v_3 = self.cursor + try: + # call prelude, line 209 + if not self.r_prelude(): + raise lab4() + except lab4: pass + self.cursor = v_3 + # do, line 210 + v_4 = self.cursor + try: + # call mark_regions, line 210 + if not self.r_mark_regions(): + raise lab5() + except lab5: pass + self.cursor = v_4 + # backwards, line 211 + self.limit_backward = self.cursor + self.cursor = self.limit + # (, line 211 + # do, line 213 + v_5 = self.limit - self.cursor + try: + # call Step_1a, line 213 + if not self.r_Step_1a(): + raise lab6() + except lab6: pass + self.cursor = self.limit - v_5 + # or, line 215 + try: + v_6 = self.limit - self.cursor + try: + # call exception2, line 215 + if not self.r_exception2(): + raise lab8() + raise lab7() + except lab8: pass + self.cursor = self.limit - v_6 + # (, line 215 + # do, line 217 + v_7 = self.limit - self.cursor + try: + # call Step_1b, line 217 + if not self.r_Step_1b(): + raise lab9() + except lab9: pass + self.cursor = self.limit - v_7 + # do, line 218 + v_8 = self.limit - self.cursor + try: + # call Step_1c, line 218 + if not self.r_Step_1c(): + raise lab10() + except lab10: pass + self.cursor = self.limit - v_8 + # do, line 220 + v_9 = self.limit - self.cursor + try: + # call Step_2, line 220 + if not self.r_Step_2(): + raise lab11() + except lab11: pass + self.cursor = self.limit - v_9 + # do, line 221 + v_10 = self.limit - self.cursor + try: + # call Step_3, line 221 + if not self.r_Step_3(): + raise lab12() + except lab12: pass + self.cursor = self.limit - v_10 + # do, line 222 + v_11 = self.limit - self.cursor + try: + # call Step_4, line 222 + if not self.r_Step_4(): + raise lab13() + except lab13: pass + self.cursor = self.limit - v_11 + # do, line 224 + v_12 = self.limit - self.cursor + try: + # call Step_5, line 224 + if not self.r_Step_5(): + raise lab14() + except lab14: pass + self.cursor = self.limit - v_12 + except lab7: pass + self.cursor = self.limit_backward + # do, line 227 + v_13 = self.cursor + try: + # call postlude, line 227 + if not self.r_postlude(): + raise lab15() + except lab15: pass + self.cursor = v_13 + except lab0: pass + return True + + def equals(self, o): + return isinstance(o, EnglishStemmer) + + def hashCode(self): + return hash("EnglishStemmer") +class lab0(BaseException): pass +class lab1(BaseException): pass +class lab2(BaseException): pass +class lab3(BaseException): pass +class lab4(BaseException): pass +class lab5(BaseException): pass +class lab6(BaseException): pass +class lab7(BaseException): pass +class lab8(BaseException): pass +class lab9(BaseException): pass +class lab10(BaseException): pass +class lab11(BaseException): pass +class lab12(BaseException): pass +class lab13(BaseException): pass +class lab14(BaseException): pass +class lab15(BaseException): pass diff --git a/addons21/fastwq/query/common.py b/addons21/fastwq/query/common.py index 9587211..180b76c 100644 --- a/addons21/fastwq/query/common.py +++ b/addons21/fastwq/query/common.py @@ -33,6 +33,7 @@ from ..context import config from ..service import service_pool, QueryResult, copy_static_file from ..service.base import LocalService from ..utils import wrap_css +from ..libs.snowballstemmer import stemmer __all__ = [ @@ -264,14 +265,27 @@ def query_flds(note, fileds=None): def cloze_deletion(text, term): '''create cloze deletion text''' + text = text.replace('’', '\'') result = text - words = re.finditer(r"\b" + re.escape(term) + r"\b", text, flags=re.IGNORECASE) - words = [m.start() for m in words][::-1] - index = 1 - for word in words: - if not text[word - 1].isalnum() or text[word + len(term)].isalnum(): - if not "{{" in text[word:word + len(term)] or "}}" in text[word:word + len(term)]: - result = result[:word + len(term)] + "}}" + result[word + len(term):] - result = result[:word] + "{{c" + str(index) + "::" + result[word:] - #index += 1 + offset = 0 + term = _stemmer.stemWord(term).lower() + + terms = re.finditer(r"\b[\w'-]*\b", text) + tags = re.finditer(r"<[^>]+>", text) + for m in terms: + s = m.start() + e = m.end() + f = False + for tag in tags: + if s >= tag.start() and e <= tag.end(): + f = True + break + if f: + continue + word = text[s:e] + if _stemmer.stemWord(word).lower() == term: + result = result[:s+offset] + "{{c1::" + word + "}}" + result[e+offset:] + offset += 8 return result + +_stemmer = stemmer('english')