Skip to content
Snippets Groups Projects
Commit 9a1156c1 authored by Alfen, T. van (Tanja)'s avatar Alfen, T. van (Tanja)
Browse files

Merge branch 'SC-113/implement-repetition-mistakes'

parents 7def4f12 67653870
No related branches found
No related tags found
1 merge request!46Sc 113/implement repetition mistakes
...@@ -43,6 +43,13 @@ class AnchorAlgorithm(AlignmentStrategy): ...@@ -43,6 +43,13 @@ class AnchorAlgorithm(AlignmentStrategy):
shd_index) shd_index)
shd_last_anchor = self._shadow.index(word.anchor) + 1 shd_last_anchor = self._shadow.index(word.anchor) + 1
src_last_anchor = src_index + 1 src_last_anchor = src_index + 1
# If the last word is not an anchor, then still search for matches:
elif src_index == len(self._source)-1:
src_index += 1
shd_index = len(self._shadow)
self._search_between_anchors(src_last_anchor,
shd_last_anchor, src_index,
shd_index)
def _search_between_anchors(self, src_start, shd_start, src_end, shd_end): def _search_between_anchors(self, src_start, shd_start, src_end, shd_end):
"""Search for shadowed words between two anchored pairs of words. """Search for shadowed words between two anchored pairs of words.
...@@ -74,7 +81,7 @@ class AnchorAlgorithm(AlignmentStrategy): ...@@ -74,7 +81,7 @@ class AnchorAlgorithm(AlignmentStrategy):
competing_over = None competing_over = None
for shadow_index in range(shd_start, shd_end): for shadow_index in range(shd_start, shd_end):
shd_word = self._shadow[shadow_index] shd_word = self._shadow[shadow_index]
if src_word.__eq__(shd_word) and not found: if src_word.word == shd_word.word and not found:
if 0.05 < src_word.get_difference(shd_word) < 3.0: if 0.05 < src_word.get_difference(shd_word) < 3.0:
if not shd_word.correct: if not shd_word.correct:
found = True found = True
......
...@@ -5,24 +5,71 @@ from mistake_enum import Mistake ...@@ -5,24 +5,71 @@ from mistake_enum import Mistake
class MistakeFinder: class MistakeFinder:
"""Finds the mistakes in already aligned lists of Words""" """Finds the mistakes in already aligned lists of Words"""
@staticmethod def __init__(self):
self._source = None
self._shadow = None
def start(self, source, shadow): def start(self, source, shadow):
"""Find all the mistakes and classify them.""" """Find all the mistakes and classify them."""
self._source = source
self._shadow = shadow
# Loop over the shadow: # Loop over the shadow:
for index, word in enumerate(shadow): for index, word in enumerate(self._shadow):
if not self._check_repetition(word, index): # If not a repetition, if not word.correct and word.mistake is None:
if not self._check_semantic_mistake(word): # semantic, self._determine_mistake(index, word)
if not self._check_phonentic_mistake(word): # or phonetic,
# ... then shadow word is random:
word.mistake = Mistake.RANDOM
# Loop over the source: # Loop over the source:
for word in source: for word in self._source:
if not word.shadowed and word.mistake is None: if not word.shadowed and word.mistake is None:
# If not yet marked as mistake, then it is skipped: # If not yet marked as mistake, then it is skipped:
word.mistake = Mistake.SKIPPED word.mistake = Mistake.SKIPPED
for word in self._shadow:
if word.mistake == Mistake.REPETITION:
print(str(word) + " " + str(word.mistake))
def _determine_mistake(self, index, word):
if not self._check_repetition(word, index): # If not a repetition,
# if not self._check_semantic_mistake(word): # semantic,
# if not self._check_phonentic_mistake(word): # or phonetic,
# ... then shadow word is random:
word.mistake = Mistake.RANDOM
def _check_repetition(self, word, index): def _check_repetition(self, word, index):
pass assert 0 <= index < len(self._shadow)
found = self._check_pre_repetition(word, index)
if not found:
if index < len(self._shadow)-1 and self._shadow[index+1].word.find(
word.word) == 0:
word.mistake = Mistake.REPETITION
return True
return found
def _check_pre_repetition(self, word, index):
found_before = False
stop = False
diff = 0
i = index
while not found_before and not stop and i > 0:
i -= 1
stop = self._shadow[i].is_anchor()
found_at = self._shadow[i].word.find(word.word)
if found_at >= 0 and found_at == len(self._shadow[i].word)-\
len(word.word):
found_before = True
chain = found_before
while chain and index-i > 1:
diff += 1
i += 1
chain = self._shadow[i].word == self._shadow[index+diff].word
if chain:
for j in range(index, index+diff+1):
self._shadow[j].mistake = Mistake.REPETITION
else:
return False
return True
# TODO: WordNet integration. Function below should be usable already! # TODO: WordNet integration. Function below should be usable already!
def _check_semantic_mistake(self, shd_word): def _check_semantic_mistake(self, shd_word):
......
from saa_algorithm import SaaAlgorithm from saa_algorithm import SaaAlgorithm
import copy
from saa_Romeo import SaaRomeo from saa_Romeo import SaaRomeo
from anchor_algorithm import AnchorAlgorithm from anchor_algorithm import AnchorAlgorithm
from mistake_finder import MistakeFinder from mistake_finder import MistakeFinder
...@@ -27,6 +28,9 @@ class Statistics: ...@@ -27,6 +28,9 @@ class Statistics:
source: the words in the source file source: the words in the source file
shadow: the words in the shadow file shadow: the words in the shadow file
""" """
# Make a deepcopy such that the testing is equal for both strategies:
source_em = copy.deepcopy(source)
shadow_em = copy.deepcopy(shadow)
# Alignment 0 # Alignment 0
print('Romeo') print('Romeo')
...@@ -37,23 +41,20 @@ class Statistics: ...@@ -37,23 +41,20 @@ class Statistics:
print(f'source: {s_word.source} shadow: {s_word}') print(f'source: {s_word.source} shadow: {s_word}')
correctness = self._strategy.correctly_shadowed(source) correctness = self._strategy.correctly_shadowed(source)
# Reset the is_shadowed property # Alignment 1 (Thijs) (yet to be implemented)
for word in source:
word.shadowed = False
# The other strategies have as of yet not been adapted
# Alignment 1 (Thijs)
# Alignment 2 # Alignment 2
print('\n Emma') print('\n Emma')
self._strategy = AnchorAlgorithm() self._strategy = AnchorAlgorithm()
source_align_em, shadow_align_em = self._strategy.align(source, shadow) source_align_em, shadow_align_em = self._strategy.align(source_em,
shadow_em)
for s_word in shadow_align_em: for s_word in shadow_align_em:
if s_word.has_source(): if s_word.has_source():
print(f'source: {s_word.source} shadow: {s_word}') print(f'source: {s_word.source} shadow: {s_word}')
correctness = self._strategy.correctly_shadowed(source) correctness = self._strategy.correctly_shadowed(source_em)
# TODO: Make the mistake finding work with the statement below # TODO: Make the mistake finding work with the statement below
# MistakeFinder.start(source_align_em, shadow_align_em) finder = MistakeFinder()
finder.start(source_align_em, shadow_align_em)
return source_align, shadow_align, correctness
return source_align_em, shadow_align_em, correctness
...@@ -92,7 +92,11 @@ class Word: ...@@ -92,7 +92,11 @@ class Word:
Args: Args:
anchor: Word instance to anchor to anchor: Word instance to anchor to
""" """
self._anchor = anchor self._set_anchor(anchor)
def _set_anchor(self, anchor):
"""Anchor setter. Has to be overridden in the subclass."""
raise NotImplementedError
def get_difference(self, other): def get_difference(self, other):
"""Get the difference between the onset of this word and the other. """Get the difference between the onset of this word and the other.
...@@ -162,7 +166,11 @@ class ShadowWord(Word): ...@@ -162,7 +166,11 @@ class ShadowWord(Word):
""" """
assert self._correct is False assert self._correct is False
assert mistake != Mistake.SKIPPED assert mistake != Mistake.SKIPPED
self._mistake(mistake) self._mistake = mistake
def _set_anchor(self, anchor):
self._anchor = anchor
self._source = anchor
class SourceWord(Word): class SourceWord(Word):
...@@ -195,7 +203,10 @@ class SourceWord(Word): ...@@ -195,7 +203,10 @@ class SourceWord(Word):
mistake: Mistake Enum mistake: Mistake Enum
""" """
assert mistake != Mistake.RANDOM assert mistake != Mistake.RANDOM
self._mistake(mistake) self._mistake = mistake
def _set_anchor(self, anchor):
self._anchor = anchor
class Sentence(list): class Sentence(list):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment