fix #46

2018-08-29 22:44:52 +08:00 · 2018-08-29 22:44:52 +08:00 · 847ca06070
commit 847ca06070
parent 7817b677c1
13 changed files with 3069 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -74,3 +74,4 @@ It forks from [WordQuery](https://github.com/finalion/WordQuery), added **multi-
  - [pystardict](https://github.com/lig/pystardict)
  - [WordQuery](https://github.com/finalion/WordQuery)
  - [AnkiHub](https://github.com/dayjaby/AnkiHub)
+  - [snowball_py](https://github.com/shibukawa/snowball_py)
--- a/addons/FastWQ.py
+++ b/addons/FastWQ.py
@ -17,9 +17,12 @@
 # You should have received a copy of the GNU General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.

+import sys
 from anki.hooks import addHook
 from anki.utils import isMac

+sys.dont_write_bytecode = True
+
 ############## other config here ##################
 shortcut = ('Ctrl+Alt' if isMac else 'Ctrl') + '+Q'
 ###################################################
--- a/addons/fastwq/libs/snowballstemmer/init.py
+++ b/addons/fastwq/libs/snowballstemmer/init.py
@ -0,0 +1,27 @@
+__all__ = ('language', 'stemmer')
+
+from .english_stemmer import EnglishStemmer
+
+language = {
+    'english': EnglishStemmer,
+}
+
+try:
+    import Stemmer
+    cext_available = True
+except ImportError:
+    cext_available = False
+
+def algorithms():
+    if cext_available:
+        return Stemmer.language()
+    else:
+        return list(language.keys())
+
+def stemmer(lang):
+    if cext_available:
+        return Stemmer.Stemmer(lang)
+    if lang.lower() in language:
+        return language[lang.lower()]()
+    else:
+        raise KeyError("Stemming algorithm '%s' not found" % lang)
--- a/addons/fastwq/libs/snowballstemmer/among.py
+++ b/addons/fastwq/libs/snowballstemmer/among.py
@ -0,0 +1,15 @@
+
+class Among(object):
+    def __init__(self, s, substring_i, result, method=None):
+        """
+        @ivar s_size search string size
+        @ivar s search string
+        @ivar substring index to longest matching substring
+        @ivar result of the lookup
+        @ivar method method to use if substring matches
+        """
+        self.s_size = len(s)
+        self.s = s
+        self.substring_i = substring_i
+        self.result = result
+        self.method = method
--- a/addons/fastwq/libs/snowballstemmer/basestemmer.py
+++ b/addons/fastwq/libs/snowballstemmer/basestemmer.py
@ -0,0 +1,351 @@
+class BaseStemmer(object):
+    def __init__(self):
+        self.set_current("")
+        self.maxCacheSize = 10000
+        self._cache = {}
+        self._counter = 0
+
+    def set_current(self, value):
+        '''
+        Set the self.current string.
+        '''
+        self.current = value
+        self.cursor = 0
+        self.limit = len(self.current)
+        self.limit_backward = 0
+        self.bra = self.cursor
+        self.ket = self.limit
+
+    def get_current(self):
+        '''
+        Get the self.current string.
+        '''
+        return self.current
+
+    def copy_from(self, other):
+        self.current          = other.current
+        self.cursor           = other.cursor
+        self.limit            = other.limit
+        self.limit_backward   = other.limit_backward
+        self.bra              = other.bra
+        self.ket              = other.ket
+
+    def in_grouping(self, s, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if ch > max or ch < min:
+            return False
+        ch -= min
+        if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
+            return False
+        self.cursor += 1
+        return True
+
+    def in_grouping_b(self, s, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if ch > max or ch < min:
+            return False
+        ch -= min
+        if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
+            return False
+        self.cursor -= 1
+        return True
+
+    def out_grouping(self, s, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if ch > max or ch < min:
+            self.cursor += 1
+            return True
+        ch -= min
+        if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
+            self.cursor += 1
+            return True
+        return False
+
+    def out_grouping_b(self, s, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if ch > max or ch < min:
+            self.cursor -= 1
+            return True
+        ch -= min
+        if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
+            self.cursor -= 1
+            return True
+        return False
+
+    def in_range(self, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if ch > max or ch < min:
+            return False
+        self.cursor += 1
+        return True
+
+    def in_range_b(self, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if ch > max or ch < min:
+            return False
+        self.cursor -= 1
+        return True
+
+    def out_range(self, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if not (ch > max or ch < min):
+            return False
+        self.cursor += 1
+        return True
+
+    def out_range_b(self, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if not (ch > max or ch < min):
+            return False
+        self.cursor -= 1
+        return True
+
+    def eq_s(self, s_size, s):
+        if self.limit - self.cursor < s_size:
+            return False
+        if self.current[self.cursor:self.cursor + s_size] != s:
+            return False
+        self.cursor += s_size
+        return True
+
+    def eq_s_b(self, s_size, s):
+        if self.cursor - self.limit_backward < s_size:
+            return False
+        if self.current[self.cursor - s_size:self.cursor] != s:
+            return False
+        self.cursor -= s_size
+        return True
+
+    def eq_v(self, s):
+        return self.eq_s(len(s), s)
+
+    def eq_v_b(self, s):
+        return self.eq_s_b(len(s), s)
+
+    def find_among(self, v, v_size):
+        i = 0
+        j = v_size
+
+        c = self.cursor
+        l = self.limit
+
+        common_i = 0
+        common_j = 0
+
+        first_key_inspected = False
+
+        while True:
+            k = i + ((j - i) >> 1)
+            diff = 0
+            common = min(common_i, common_j) # smalle
+            w = v[k]
+            for i2 in range(common, w.s_size):
+                if c + common == l:
+                    diff = -1
+                    break
+                diff = ord(self.current[c + common]) - ord(w.s[i2])
+                if diff != 0:
+                    break
+                common += 1
+            if diff < 0:
+                j = k
+                common_j = common
+            else:
+                i = k
+                common_i = common
+            if j - i <= 1:
+                if i > 0:
+                    break # v->s has been inspected
+                if j == i:
+                    break # only one item in v
+                # - but now we need to go round once more to get
+                # v->s inspected. self looks messy, but is actually
+                # the optimal approach.
+                if first_key_inspected:
+                    break
+                first_key_inspected = True
+        while True:
+            w = v[i]
+            if common_i >= w.s_size:
+                self.cursor = c + w.s_size
+                if w.method is None:
+                    return w.result
+                method = getattr(self, w.method)
+                res = method()
+                self.cursor = c + w.s_size
+                if res:
+                    return w.result
+            i = w.substring_i
+            if i < 0:
+                return 0
+        return -1 # not reachable
+
+    def find_among_b(self, v, v_size):
+        '''
+        find_among_b is for backwards processing. Same comments apply
+        '''
+        i = 0
+        j = v_size
+
+        c = self.cursor
+        lb = self.limit_backward;
+
+        common_i = 0
+        common_j = 0
+
+        first_key_inspected = False
+
+        while True:
+            k = i + ((j - i) >> 1)
+            diff = 0
+            common = min(common_i, common_j)
+            w = v[k]
+            for i2 in range(w.s_size - 1 - common, -1, -1):
+                if c - common == lb:
+                    diff = -1
+                    break
+                diff = ord(self.current[c - 1 - common]) - ord(w.s[i2])
+                if diff != 0:
+                    break
+                common += 1
+            if diff < 0:
+                j = k
+                common_j = common
+            else:
+                i = k
+                common_i = common
+            if j - i <= 1:
+                if i > 0:
+                    break
+                if j == i:
+                    break
+                if first_key_inspected:
+                    break
+                first_key_inspected = True
+        while True:
+            w = v[i]
+            if common_i >= w.s_size:
+                self.cursor = c - w.s_size
+                if w.method is None:
+                    return w.result
+                method = getattr(self, w.method)
+                res = method()
+                self.cursor = c - w.s_size
+                if res:
+                    return w.result
+            i = w.substring_i
+            if i < 0:
+                return 0
+        return -1 # not reachable
+
+    def replace_s(self, c_bra, c_ket, s):
+        '''
+        to replace chars between c_bra and c_ket in self.current by the
+        chars in s.
+
+        @type c_bra int
+        @type c_ket int
+        @type s: string
+        '''
+        adjustment = len(s) - (c_ket - c_bra)
+        self.current = self.current[0:c_bra] + s + self.current[c_ket:]
+        self.limit += adjustment
+        if self.cursor >= c_ket:
+            self.cursor += adjustment
+        elif self.cursor > c_bra:
+            self.cursor = c_bra
+        return adjustment
+
+    def slice_check(self):
+        if self.bra < 0 or self.bra > self.ket or self.ket > self.limit or self.limit > len(self.current):
+            return False
+        return True
+
+    def slice_from(self, s):
+        '''
+        @type s string
+        '''
+        result = False
+        if self.slice_check():
+            self.replace_s(self.bra, self.ket, s)
+            result = True
+        return result
+
+    def slice_del(self):
+        return self.slice_from("")
+
+    def insert(self, c_bra, c_ket, s):
+        '''
+        @type c_bra int
+        @type c_ket int
+        @type s: string
+        '''
+        adjustment = self.replace_s(c_bra, c_ket, s)
+        if c_bra <= self.bra:
+            self.bra += adjustment
+        if c_bra <= self.ket:
+            self.ket += adjustment
+
+    def slice_to(self, s):
+        '''
+        Copy the slice into the supplied StringBuffer
+
+        @type s: string
+        '''
+        result = ''
+        if self.slice_check():
+            result = self.current[self.bra:self.ket]
+        return result
+
+    def assign_to(self, s):
+        '''
+        @type s: string
+        '''
+        return self.current[0:self.limit]
+
+    def _stem_word(self, word):
+        cache = self._cache.get(word)
+        if cache is None:
+            self.set_current(word)
+            self._stem()
+            result = self.get_current()
+            self._cache[word] = [result, self._counter]
+        else:
+            cache[1] = self._counter
+            result = cache[0]
+        self._counter += 1
+        return result
+
+    def _clear_cache(self):
+        removecount = int(len(self._cache) - self.maxCacheSize * 8 / 10)
+        oldcaches = sorted(self._cache.items(), key=lambda cache: cache[1][1])[0:removecount]
+        for key, value in oldcaches:
+            del self._cache[key]
+
+    def stemWord(self, word):
+        result = self._stem_word(word)
+        if len(self._cache) > self.maxCacheSize:
+            self._clear_cache()
+        return result
+
+    def stemWords(self, words):
+        result = [self._stem_word(word) for word in words]
+        if len(self._cache) > self.maxCacheSize:
+            self._clear_cache()
+        return result
--- a/addons/fastwq/libs/snowballstemmer/english_stemmer.py
+++ b/addons/fastwq/libs/snowballstemmer/english_stemmer.py
--- a/addons/fastwq/query/common.py
+++ b/addons/fastwq/query/common.py
@ -33,6 +33,7 @@ from ..context import config
 from ..service import service_pool, QueryResult, copy_static_file
 from ..service.base import LocalService
 from ..utils import wrap_css
+from ..libs.snowballstemmer import stemmer


 __all__ = [
@ -263,14 +264,27 @@ def query_flds(note, fileds=None):

 def cloze_deletion(text, term):
    '''create cloze deletion text'''
+    text = text.replace('’', '\'')
    result = text
-    words = re.finditer(r"\b" + re.escape(term) + r"\b", text, flags=re.IGNORECASE)
-    words = [m.start() for m in words][::-1]
-    index = 1
-    for word in words:
-        if not text[word - 1].isalnum() or text[word + len(term)].isalnum():
-            if not "{{" in text[word:word + len(term)] or "}}" in text[word:word + len(term)]:
-                result = result[:word + len(term)] + "}}" + result[word + len(term):]
-                result = result[:word] + "{{c" + str(index) + "::" + result[word:]
-                #index += 1
+    offset = 0
+    term = _stemmer.stemWord(term).lower()
+
+    terms = re.finditer(r"\b[\w'-]*\b", text)
+    tags = re.finditer(r"<[^>]+>", text)
+    for m in terms:
+        s = m.start()
+        e = m.end()
+        f = False
+        for tag in tags:
+            if s >= tag.start() and e <= tag.end():
+                f = True
+                break
+        if f:
+            continue
+        word = text[s:e]
+        if _stemmer.stemWord(word).lower() == term:
+            result = result[:s+offset] + "{{c1::" + word + "}}" + result[e+offset:]
+            offset += 8
    return result
+
+_stemmer = stemmer('english')
--- a/addons21/fastwq/init.py
+++ b/addons21/fastwq/init.py
@ -17,9 +17,12 @@
 # You should have received a copy of the GNU General Public License
 # along with this program. If not, see <http://www.gnu.org/licenses/>.

+import sys
 from anki.hooks import addHook
 from anki.utils import isMac

+sys.dont_write_bytecode = True
+
 ############## other config here ##################
 shortcut = ('Ctrl+Alt' if isMac else 'Ctrl') + '+Q'
 ###################################################
--- a/addons21/fastwq/libs/snowballstemmer/init.py
+++ b/addons21/fastwq/libs/snowballstemmer/init.py
@ -0,0 +1,27 @@
+__all__ = ('language', 'stemmer')
+
+from .english_stemmer import EnglishStemmer
+
+language = {
+    'english': EnglishStemmer,
+}
+
+try:
+    import Stemmer
+    cext_available = True
+except ImportError:
+    cext_available = False
+
+def algorithms():
+    if cext_available:
+        return Stemmer.language()
+    else:
+        return list(language.keys())
+
+def stemmer(lang):
+    if cext_available:
+        return Stemmer.Stemmer(lang)
+    if lang.lower() in language:
+        return language[lang.lower()]()
+    else:
+        raise KeyError("Stemming algorithm '%s' not found" % lang)
--- a/addons21/fastwq/libs/snowballstemmer/among.py
+++ b/addons21/fastwq/libs/snowballstemmer/among.py
@ -0,0 +1,15 @@
+
+class Among(object):
+    def __init__(self, s, substring_i, result, method=None):
+        """
+        @ivar s_size search string size
+        @ivar s search string
+        @ivar substring index to longest matching substring
+        @ivar result of the lookup
+        @ivar method method to use if substring matches
+        """
+        self.s_size = len(s)
+        self.s = s
+        self.substring_i = substring_i
+        self.result = result
+        self.method = method
--- a/addons21/fastwq/libs/snowballstemmer/basestemmer.py
+++ b/addons21/fastwq/libs/snowballstemmer/basestemmer.py
@ -0,0 +1,351 @@
+class BaseStemmer(object):
+    def __init__(self):
+        self.set_current("")
+        self.maxCacheSize = 10000
+        self._cache = {}
+        self._counter = 0
+
+    def set_current(self, value):
+        '''
+        Set the self.current string.
+        '''
+        self.current = value
+        self.cursor = 0
+        self.limit = len(self.current)
+        self.limit_backward = 0
+        self.bra = self.cursor
+        self.ket = self.limit
+
+    def get_current(self):
+        '''
+        Get the self.current string.
+        '''
+        return self.current
+
+    def copy_from(self, other):
+        self.current          = other.current
+        self.cursor           = other.cursor
+        self.limit            = other.limit
+        self.limit_backward   = other.limit_backward
+        self.bra              = other.bra
+        self.ket              = other.ket
+
+    def in_grouping(self, s, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if ch > max or ch < min:
+            return False
+        ch -= min
+        if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
+            return False
+        self.cursor += 1
+        return True
+
+    def in_grouping_b(self, s, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if ch > max or ch < min:
+            return False
+        ch -= min
+        if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
+            return False
+        self.cursor -= 1
+        return True
+
+    def out_grouping(self, s, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if ch > max or ch < min:
+            self.cursor += 1
+            return True
+        ch -= min
+        if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
+            self.cursor += 1
+            return True
+        return False
+
+    def out_grouping_b(self, s, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if ch > max or ch < min:
+            self.cursor -= 1
+            return True
+        ch -= min
+        if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
+            self.cursor -= 1
+            return True
+        return False
+
+    def in_range(self, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if ch > max or ch < min:
+            return False
+        self.cursor += 1
+        return True
+
+    def in_range_b(self, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if ch > max or ch < min:
+            return False
+        self.cursor -= 1
+        return True
+
+    def out_range(self, min, max):
+        if self.cursor >= self.limit:
+            return False
+        ch = ord(self.current[self.cursor])
+        if not (ch > max or ch < min):
+            return False
+        self.cursor += 1
+        return True
+
+    def out_range_b(self, min, max):
+        if self.cursor <= self.limit_backward:
+            return False
+        ch = ord(self.current[self.cursor - 1])
+        if not (ch > max or ch < min):
+            return False
+        self.cursor -= 1
+        return True
+
+    def eq_s(self, s_size, s):
+        if self.limit - self.cursor < s_size:
+            return False
+        if self.current[self.cursor:self.cursor + s_size] != s:
+            return False
+        self.cursor += s_size
+        return True
+
+    def eq_s_b(self, s_size, s):
+        if self.cursor - self.limit_backward < s_size:
+            return False
+        if self.current[self.cursor - s_size:self.cursor] != s:
+            return False
+        self.cursor -= s_size
+        return True
+
+    def eq_v(self, s):
+        return self.eq_s(len(s), s)
+
+    def eq_v_b(self, s):
+        return self.eq_s_b(len(s), s)
+
+    def find_among(self, v, v_size):
+        i = 0
+        j = v_size
+
+        c = self.cursor
+        l = self.limit
+
+        common_i = 0
+        common_j = 0
+
+        first_key_inspected = False
+
+        while True:
+            k = i + ((j - i) >> 1)
+            diff = 0
+            common = min(common_i, common_j) # smalle
+            w = v[k]
+            for i2 in range(common, w.s_size):
+                if c + common == l:
+                    diff = -1
+                    break
+                diff = ord(self.current[c + common]) - ord(w.s[i2])
+                if diff != 0:
+                    break
+                common += 1
+            if diff < 0:
+                j = k
+                common_j = common
+            else:
+                i = k
+                common_i = common
+            if j - i <= 1:
+                if i > 0:
+                    break # v->s has been inspected
+                if j == i:
+                    break # only one item in v
+                # - but now we need to go round once more to get
+                # v->s inspected. self looks messy, but is actually
+                # the optimal approach.
+                if first_key_inspected:
+                    break
+                first_key_inspected = True
+        while True:
+            w = v[i]
+            if common_i >= w.s_size:
+                self.cursor = c + w.s_size
+                if w.method is None:
+                    return w.result
+                method = getattr(self, w.method)
+                res = method()
+                self.cursor = c + w.s_size
+                if res:
+                    return w.result
+            i = w.substring_i
+            if i < 0:
+                return 0
+        return -1 # not reachable
+
+    def find_among_b(self, v, v_size):
+        '''
+        find_among_b is for backwards processing. Same comments apply
+        '''
+        i = 0
+        j = v_size
+
+        c = self.cursor
+        lb = self.limit_backward;
+
+        common_i = 0
+        common_j = 0
+
+        first_key_inspected = False
+
+        while True:
+            k = i + ((j - i) >> 1)
+            diff = 0
+            common = min(common_i, common_j)
+            w = v[k]
+            for i2 in range(w.s_size - 1 - common, -1, -1):
+                if c - common == lb:
+                    diff = -1
+                    break
+                diff = ord(self.current[c - 1 - common]) - ord(w.s[i2])
+                if diff != 0:
+                    break
+                common += 1
+            if diff < 0:
+                j = k
+                common_j = common
+            else:
+                i = k
+                common_i = common
+            if j - i <= 1:
+                if i > 0:
+                    break
+                if j == i:
+                    break
+                if first_key_inspected:
+                    break
+                first_key_inspected = True
+        while True:
+            w = v[i]
+            if common_i >= w.s_size:
+                self.cursor = c - w.s_size
+                if w.method is None:
+                    return w.result
+                method = getattr(self, w.method)
+                res = method()
+                self.cursor = c - w.s_size
+                if res:
+                    return w.result
+            i = w.substring_i
+            if i < 0:
+                return 0
+        return -1 # not reachable
+
+    def replace_s(self, c_bra, c_ket, s):
+        '''
+        to replace chars between c_bra and c_ket in self.current by the
+        chars in s.
+
+        @type c_bra int
+        @type c_ket int
+        @type s: string
+        '''
+        adjustment = len(s) - (c_ket - c_bra)
+        self.current = self.current[0:c_bra] + s + self.current[c_ket:]
+        self.limit += adjustment
+        if self.cursor >= c_ket:
+            self.cursor += adjustment
+        elif self.cursor > c_bra:
+            self.cursor = c_bra
+        return adjustment
+
+    def slice_check(self):
+        if self.bra < 0 or self.bra > self.ket or self.ket > self.limit or self.limit > len(self.current):
+            return False
+        return True
+
+    def slice_from(self, s):
+        '''
+        @type s string
+        '''
+        result = False
+        if self.slice_check():
+            self.replace_s(self.bra, self.ket, s)
+            result = True
+        return result
+
+    def slice_del(self):
+        return self.slice_from("")
+
+    def insert(self, c_bra, c_ket, s):
+        '''
+        @type c_bra int
+        @type c_ket int
+        @type s: string
+        '''
+        adjustment = self.replace_s(c_bra, c_ket, s)
+        if c_bra <= self.bra:
+            self.bra += adjustment
+        if c_bra <= self.ket:
+            self.ket += adjustment
+
+    def slice_to(self, s):
+        '''
+        Copy the slice into the supplied StringBuffer
+
+        @type s: string
+        '''
+        result = ''
+        if self.slice_check():
+            result = self.current[self.bra:self.ket]
+        return result
+
+    def assign_to(self, s):
+        '''
+        @type s: string
+        '''
+        return self.current[0:self.limit]
+
+    def _stem_word(self, word):
+        cache = self._cache.get(word)
+        if cache is None:
+            self.set_current(word)
+            self._stem()
+            result = self.get_current()
+            self._cache[word] = [result, self._counter]
+        else:
+            cache[1] = self._counter
+            result = cache[0]
+        self._counter += 1
+        return result
+
+    def _clear_cache(self):
+        removecount = int(len(self._cache) - self.maxCacheSize * 8 / 10)
+        oldcaches = sorted(self._cache.items(), key=lambda cache: cache[1][1])[0:removecount]
+        for key, value in oldcaches:
+            del self._cache[key]
+
+    def stemWord(self, word):
+        result = self._stem_word(word)
+        if len(self._cache) > self.maxCacheSize:
+            self._clear_cache()
+        return result
+
+    def stemWords(self, words):
+        result = [self._stem_word(word) for word in words]
+        if len(self._cache) > self.maxCacheSize:
+            self._clear_cache()
+        return result
--- a/addons21/fastwq/libs/snowballstemmer/english_stemmer.py
+++ b/addons21/fastwq/libs/snowballstemmer/english_stemmer.py
--- a/addons21/fastwq/query/common.py
+++ b/addons21/fastwq/query/common.py
@ -33,6 +33,7 @@ from ..context import config
 from ..service import service_pool, QueryResult, copy_static_file
 from ..service.base import LocalService
 from ..utils import wrap_css
+from ..libs.snowballstemmer import stemmer


 __all__ = [
@ -264,14 +265,27 @@ def query_flds(note, fileds=None):

 def cloze_deletion(text, term):
    '''create cloze deletion text'''
+    text = text.replace('’', '\'')
    result = text
-    words = re.finditer(r"\b" + re.escape(term) + r"\b", text, flags=re.IGNORECASE)
-    words = [m.start() for m in words][::-1]
-    index = 1
-    for word in words:
-        if not text[word - 1].isalnum() or text[word + len(term)].isalnum():
-            if not "{{" in text[word:word + len(term)] or "}}" in text[word:word + len(term)]:
-                result = result[:word + len(term)] + "}}" + result[word + len(term):]
-                result = result[:word] + "{{c" + str(index) + "::" + result[word:]
-                #index += 1
+    offset = 0
+    term = _stemmer.stemWord(term).lower()
+
+    terms = re.finditer(r"\b[\w'-]*\b", text)
+    tags = re.finditer(r"<[^>]+>", text)
+    for m in terms:
+        s = m.start()
+        e = m.end()
+        f = False
+        for tag in tags:
+            if s >= tag.start() and e <= tag.end():
+                f = True
+                break
+        if f:
+            continue
+        word = text[s:e]
+        if _stemmer.stemWord(word).lower() == term:
+            result = result[:s+offset] + "{{c1::" + word + "}}" + result[e+offset:]
+            offset += 8
    return result
+
+_stemmer = stemmer('english')