From a7638eeac0006f2baafd1ccadea3fee9fad42038 Mon Sep 17 00:00:00 2001
From: "Vriezen, E.C. (Emma)" <e.vriezen@student.ru.nl>
Date: Mon, 16 Dec 2019 21:38:19 +0100
Subject: [PATCH] Fixed all of the wrong classifications, by making sure that
 shadow words are never matched (as mistakes) to source words that appear
 after the next correctly matched words.

---
 umbra/mistake_finder.py | 11 ++++++++---
 umbra/words.py          | 27 ++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py
index 31b9e1ee..a1c59e30 100644
--- a/umbra/mistake_finder.py
+++ b/umbra/mistake_finder.py
@@ -30,8 +30,6 @@ class MistakeFinder:
         # Loop over the shadow:
         for index, word in enumerate(self._shadow):
             if not word.correct and word.mistake is None:
-                if word.word == "geen":
-                    print("boo")
                 self._determine_mistake(index, word)
         # Loop over the source:
         for word in self._source:
@@ -131,8 +129,15 @@ class MistakeFinder:
             src_index = 0
         else:
             src_index = self._source.index(self._shadow[last_shd_index].source)
+        next_shd_index = self._shadow.find_next_matched_shadow(index)
+        if next_shd_index < 0:
+            src_end_index = len(self._source) - 1
+        else:
+            src_end_index = self._source.index(
+                self._shadow[next_shd_index].source)
         while self._source[src_index].get_difference(shd_word) > 0 and\
-                not form_mistake and src_index < len(self._source) - 1:
+                not form_mistake and src_index < len(self._source) and\
+                src_index <= src_end_index:
             src_word = self._source[src_index]
             form_mistake = self.form_related(src_word, shd_word)
             if form_mistake:
diff --git a/umbra/words.py b/umbra/words.py
index 6629ddb4..345d7229 100644
--- a/umbra/words.py
+++ b/umbra/words.py
@@ -248,6 +248,27 @@ class Sentence(list):
                 return index
         return -1
 
+    def find_next_matched_shadow(self, index):
+        """Find the index of the next shadowed word.
+
+        Args:
+            index: the index after which should be sought for a shadow word
+            that is matched with a source word.
+
+        Returns:
+            last_index: the index of the next matched shadow. This is -1
+            if there is no matches shadow after or on the specified index, or
+            if the index was out of bounds.
+        """
+        if 0 <= index < len(self):
+            while index < len(self):
+                if self[index].has_source():
+                    return index
+                index += 1
+        else:
+            index = -1
+        return index
+
     def find_previous_anchor(self, index):
         """Find the index of the last anchor before specified index.
 
@@ -264,7 +285,9 @@ class Sentence(list):
                 if self[index].is_anchor():
                     return index
                 index -= 1
-        return -1
+        else:
+            index = -1
+        return index
 
     def find_last_matched_shadow(self, index):
         """Find the index of the last shadowed word.
@@ -283,6 +306,8 @@ class Sentence(list):
                 if self[index].has_source():
                     return index
                 index -= 1
+        else:
+            index = -1
         return index
     
     def __str__(self):
-- 
GitLab