Ghost User · Alfen, T. van (Tanja) · be0dc76f
--- a/umbra/mistake_finder.py

+ 83

− 19
+++ b/umbra/mistake_finder.py

+ 83

− 19
 @@ -16,34 +16,91 @@ class MistakeFinder:
        # Loop over the shadow:
        for index, word in enumerate(self._shadow):
            if not word.correct and word.mistake is None:
-                self._determine_mistake(index, word)
+                self._determine_mistake(word, index)
        # Loop over the source:
        for word in self._source:
            if not word.shadowed and word.mistake is None:
                # If not yet marked as mistake, then it is skipped:
                word.mistake = Mistake.SKIPPED
+
+        # PRINT FOR TEST OF SO FAR IMPLEMENTED MISTAKE CHECKS:
        for word in self._shadow:
-            if word.mistake == Mistake.REPETITION:
+            if word.mistake == Mistake.REPETITION or word.mistake\
+                    == Mistake.SEMANTIC:
                print(str(word) + " " + str(word.mistake))

-    def _determine_mistake(self, index, word):
+    def _determine_mistake(self, word, index):
+        """Stepwise check for the type of mistake.
+
+        Args:
+            word: Word instance for which mistake type should be determined.
+            index: index of word in the shadow.
+        """
        if not self._check_repetition(word, index):  # If not a repetition,
-            # if not self._check_semantic_mistake(word):  # semantic,
-                # if not self._check_phonentic_mistake(word):  # or phonetic,
-                    # ... then shadow word is random:
-                    word.mistake = Mistake.RANDOM
+            # if not self._check_form_mistake(word):  # form,
+                if not self._check_semantic_mistake(word, index):  # semantic,
+                    # if not self._check_phonentic_mistake(word):  # phonetic;
+                        # ... then shadow word is random:
+                            word.mistake = Mistake.RANDOM
+
+    def _check_semantic_mistake(self, shd_word, index):
+        """Checks if shd_word is a semantic mistake or not. If yes, it flags
+        shd_word and the source word it belongs to as semantic mistakes.
+
+        Args:
+            shd_word: the Word which is checked for semantic mistake.
+            index: index of shd_word in shadow.
+        Returns:
+            semantic_mistake: True if shd_word is a semantic mistake, False
+            if not.
+        """
+        semantic_mistake = False
+        shd_anchor_index = self._shadow.find_previous_anchor(index)
+        if shd_anchor_index < 0:  # If there is no anchor found,
+            src_index = 0  # then just start at the beginning.
+        else:
+            src_index = self._source.index(
+                self._shadow[shd_anchor_index].anchor)
+        while self._source[src_index].get_difference(shd_word) > 0 and\
+                not semantic_mistake:
+            src_word = self._source[src_index]
+            if self._semantically_related(src_word, shd_word):
+                shd_word.mistake = Mistake.SEMANTIC
+                src_word.mistake = Mistake.SEMANTIC
+                semantic_mistake = True
+            src_index += 1
+        return semantic_mistake

    def _check_repetition(self, word, index):
+        """Checks if word is a repetition mistake or not. If yes, it flags word
+        as repetition mistake.
+
+        Args:
+            word: the Word which is checked for repetition mistake.
+            index: index of word in shadow.
+        Returns:
+            found: True if word is a repetition mistake, False otherwise.
+        """
        assert 0 <= index < len(self._shadow)
-        found = self._check_pre_repetition(word, index)
-        if not found:
+        found = self._check_post_repetition(word, index)
+        if not found:  # Check if word is the start of the next word.
            if index < len(self._shadow)-1 and self._shadow[index+1].word.find(
                    word.word) == 0:
                word.mistake = Mistake.REPETITION
-                return True
+                found = True
        return found

-    def _check_pre_repetition(self, word, index):
+    def _check_post_repetition(self, word, index):
+        """Checks if word is the end of a chain of repetitions. Flags the word
+        and the possible rest of the chain as repetition mistakes, if a
+        repetition is found.
+
+        Args:
+            word: the Word which is checked for repetition mistake.
+            index: index of word in shadow.
+        Returns:
+            chain: True if word is a repetition mistake, False otherwise.
+        """
        found_before = False
        stop = False
        diff = 0
 @@ -66,20 +123,27 @@ class MistakeFinder:
        if chain:
            for j in range(index, index+diff+1):
                self._shadow[j].mistake = Mistake.REPETITION
-        else:
-            return False
-        return True
+        return chain

+    def _semantically_related(self, src_word, shd_word):
+        """Checks if src_word and shd_word are semantically related. If yes, it
+        flags both as semantic mistakes.

-    def _check_semantic_mistake(self, shd_word):
+        Args:
+            src_word: SourceWord instance
+            shd_word: ShadowWord instance
+        Returns:
+            related: True if src_word and shd_word are semantically related,
+            False if not.
+        """
        shd_string = shd_word.word
-        src_word = shd_word.source
        src_string = src_word.word
-        if self._seman_checker.semantically_related(src_string, shd_string):
+        related = self._seman_checker.semantically_related(src_string,
+                                                           shd_string)
+        if related:
            shd_word.mistake = Mistake.SEMANTIC
            src_word.mistake = Mistake.SEMANTIC
-            return True
-        return False
+        return related

    def _check_phonetic_mistake(self, shd_word):
        pass