Increased modularity in MistakeFinder, changed Statistics accordingly and made...

Increased modularity in MistakeFinder, changed Statistics accordingly and made MistakeDefinitions.txt more readable.

Increased modularity in MistakeFinder, changed Statistics accordingly and made...
Increased modularity in MistakeFinder, changed Statistics accordingly and made MistakeDefinitions.txt more readable.
d3c6d677 · Vriezen, E.C. (Emma) · 44b9307f · d3c6d677 · d3c6d677 · d3c6d677
Commit d3c6d677 authored 5 years ago by Vriezen, E.C. (Emma)
--- a/documentation/MistakeDefinitions.txt
+++ b/documentation/MistakeDefinitions.txt
 Stuttering/repetition mistakes:

- 2 or more times the same word in a row in the shadow, while it only appears once in the source. Example: 'en' and 'en' in file 7 (2nd 'en' is repetition)
- incorrectly shadowed word before a correctly shadowed word is the beginning of the correctly shadowed word. Example: 'toe' and 'toestuurt' in file 7 ('toe' is repetition)
- incorrectly shadowed word after a correctly shadowed word is the end of the correctly shadowed word. Example: 'bestaat' and 'staat' in file 9 ('staat' is repetition)
- Same as two above, but with more than 1 word. Example 'je', 'dat', 'je' and 'dat' in file 17 (latter 'je' and 'dat' are repetitions)
\ No newline at end of file
+- 2 or more times the same word in a row in the shadow, while it only appears
+once in the source. Example: 'en' and 'en' in file 7 (2nd 'en' is repetition)
+- incorrectly shadowed word before a correctly shadowed word is the beginning
+of the correctly shadowed word. Example: 'toe' and 'toestuurt' in file 7
+('toe' is repetition)
+- incorrectly shadowed word after a correctly shadowed word is the end of the
+correctly shadowed word. Example: 'bestaat' and 'staat' in file 9
+('staat' is repetition)
+- Same as two above, but with more than 1 word. Example 'je', 'dat', 'je' and
+'dat' in file 17 (latter 'je' and 'dat' are repetitions)
\ No newline at end of file
--- a/umbra/mistake_finder.py
+++ b/umbra/mistake_finder.py
@@ -5,24 +5,33 @@ from mistake_enum import Mistake
 class MistakeFinder:
    """Finds the mistakes in already aligned lists of Words"""

-    @staticmethod
+    def __init__(self):
+        self.source = None
+        self.shadow = None
+
    def start(self, source, shadow):
        """Find all the mistakes and classify them."""
+        self.source = source
+        self.shadow = shadow
        # Loop over the shadow:
-        for index, word in enumerate(shadow):
-            if not self._check_repetition(word, index):  # If not a repetition,
-                if not self._check_semantic_mistake(word):  # semantic,
-                    if not self._check_phonentic_mistake(word):  # or phonetic,
-                        # ... then shadow word is random:
-                        word.mistake = Mistake.RANDOM
+        for index, word in enumerate(self.shadow):
+            if not word.correct:
+                self._determine_mistake(index, word)
        # Loop over the source:
        for word in source:
            if not word.shadowed and word.mistake is None:
                # If not yet marked as mistake, then it is skipped:
                word.mistake = Mistake.SKIPPED

-    # def _check_repetition(self, word, index):
-    #
+    def _determine_mistake(self, index, word):
+        if not self._check_repetition(word, index):  # If not a repetition,
+            if not self._check_semantic_mistake(word):  # semantic,
+                if not self._check_phonentic_mistake(word):  # or phonetic,
+                    # ... then shadow word is random:
+                    word.mistake = Mistake.RANDOM
+
+    def _check_repetition(self, word, index):
+        raise

    # TODO: WordNet integration. Function below should be usable already!
    # def _check_semantic_mistake(self, shd_word):

--- a/umbra/statistics.py
+++ b/umbra/statistics.py
@@ -53,7 +53,8 @@ class Statistics:
                print(f'source: {s_word.source} shadow: {s_word}')
        correctness = self._strategy.correctly_shadowed(source)
        # TODO: Make the mistake finding work with the statement below
-        # MistakeFinder.start(source_align_em, shadow_align_em)
+        # finder = MistakeFinder()
+        # finder.start(source_align_em, shadow_align_em)
        
        return source_align, shadow_align, correctness