diff --git a/umbra/anchor_algorithm.py b/umbra/anchor_algorithm.py index 38d97004628da033591eef01c1793a167b1ade19..fb2417f503c2981a6c609fa73eaa040ea1f17f90 100644 --- a/umbra/anchor_algorithm.py +++ b/umbra/anchor_algorithm.py @@ -39,8 +39,8 @@ class AnchorAlgorithm(AlignmentStrategy): if src_last_anchor < src_index: shd_index = self._shadow.index(word.anchor) self._search_between_anchors(src_last_anchor, - shd_last_anchor, src_index, - shd_index) + shd_last_anchor, src_index, + shd_index) shd_last_anchor = self._shadow.index(word.anchor) + 1 src_last_anchor = src_index + 1 diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py new file mode 100644 index 0000000000000000000000000000000000000000..fa8a7540b58a34e47d16ea0ead3e579810680e59 --- /dev/null +++ b/umbra/mistake_finder.py @@ -0,0 +1,38 @@ +from mistake_enum import Mistake + + +# TODO: Finish this skeleton. +class MistakeFinder: + """Finds the mistakes in already aligned lists of Words""" + + @staticmethod + def start(self, source, shadow): + """Find all the mistakes and classify them.""" + # Loop over the shadow: + for index, word in enumerate(shadow): + if not self._check_repetition(word, index): # If not a repetition, + if not self._check_semantic_mistake(word): # semantic, + if not self._check_phonentic_mistake(word): # or phonetic, + # ... then shadow word is random: + word.mistake = Mistake.RANDOM + # Loop over the source: + for word in source: + if not word.shadowed and word.mistake is None: + # If not yet marked as mistake, then it is skipped: + word.mistake = Mistake.SKIPPED + + # def _check_repetition(self, word, index): + # + + # TODO: WordNet integration. Function below should be usable already! + # def _check_semantic_mistake(self, shd_word): + # shd_string = shd_word.word + # src_string = shd_word.source.word + # if model.seman_check.semantically_related(src_string, shd_string): + # shd_word.mistake = Mistake.SEMANTIC + # src_word.mistake = Mistake.SEMANTIC + # return True + # return False + + # def _check_phonetic_mistake(self, shd_word): + # diff --git a/umbra/statistics.py b/umbra/statistics.py index b124b9f7bb516d14eaeb8e5564898bfb760392a5..4560d54fc9567acfab147daef35a3ea960bf7fd9 100644 --- a/umbra/statistics.py +++ b/umbra/statistics.py @@ -1,6 +1,7 @@ from saa_algorithm import SaaAlgorithm from saa_Romeo import SaaRomeo from anchor_algorithm import AnchorAlgorithm +from mistake_finder import MistakeFinder class Statistics: @@ -27,7 +28,7 @@ class Statistics: shadow: the words in the shadow file """ - #Alignment 0 + # Alignment 0 print('Romeo') self._strategy = SaaRomeo() source_align, shadow_align = self._strategy.align(source, shadow) @@ -51,6 +52,8 @@ class Statistics: if s_word.has_source(): print(f'source: {s_word.source} shadow: {s_word}') correctness = self._strategy.correctly_shadowed(source) - + # TODO: Make the mistake finding work with the statement below + # MistakeFinder.start(source_align_em, shadow_align_em) + return source_align, shadow_align, correctness