From a7638eeac0006f2baafd1ccadea3fee9fad42038 Mon Sep 17 00:00:00 2001 From: "Vriezen, E.C. (Emma)" <e.vriezen@student.ru.nl> Date: Mon, 16 Dec 2019 21:38:19 +0100 Subject: [PATCH] Fixed all of the wrong classifications, by making sure that shadow words are never matched (as mistakes) to source words that appear after the next correctly matched words. --- umbra/mistake_finder.py | 11 ++++++++--- umbra/words.py | 27 ++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py index 31b9e1ee..a1c59e30 100644 --- a/umbra/mistake_finder.py +++ b/umbra/mistake_finder.py @@ -30,8 +30,6 @@ class MistakeFinder: # Loop over the shadow: for index, word in enumerate(self._shadow): if not word.correct and word.mistake is None: - if word.word == "geen": - print("boo") self._determine_mistake(index, word) # Loop over the source: for word in self._source: @@ -131,8 +129,15 @@ class MistakeFinder: src_index = 0 else: src_index = self._source.index(self._shadow[last_shd_index].source) + next_shd_index = self._shadow.find_next_matched_shadow(index) + if next_shd_index < 0: + src_end_index = len(self._source) - 1 + else: + src_end_index = self._source.index( + self._shadow[next_shd_index].source) while self._source[src_index].get_difference(shd_word) > 0 and\ - not form_mistake and src_index < len(self._source) - 1: + not form_mistake and src_index < len(self._source) and\ + src_index <= src_end_index: src_word = self._source[src_index] form_mistake = self.form_related(src_word, shd_word) if form_mistake: diff --git a/umbra/words.py b/umbra/words.py index 6629ddb4..345d7229 100644 --- a/umbra/words.py +++ b/umbra/words.py @@ -248,6 +248,27 @@ class Sentence(list): return index return -1 + def find_next_matched_shadow(self, index): + """Find the index of the next shadowed word. + + Args: + index: the index after which should be sought for a shadow word + that is matched with a source word. + + Returns: + last_index: the index of the next matched shadow. This is -1 + if there is no matches shadow after or on the specified index, or + if the index was out of bounds. + """ + if 0 <= index < len(self): + while index < len(self): + if self[index].has_source(): + return index + index += 1 + else: + index = -1 + return index + def find_previous_anchor(self, index): """Find the index of the last anchor before specified index. @@ -264,7 +285,9 @@ class Sentence(list): if self[index].is_anchor(): return index index -= 1 - return -1 + else: + index = -1 + return index def find_last_matched_shadow(self, index): """Find the index of the last shadowed word. @@ -283,6 +306,8 @@ class Sentence(list): if self[index].has_source(): return index index -= 1 + else: + index = -1 return index def __str__(self): -- GitLab