diff --git a/umbra/form_checker.py b/umbra/form_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..eaa896193a5b3f582fb1d9ebfc6eb227aa31de59 --- /dev/null +++ b/umbra/form_checker.py @@ -0,0 +1,7 @@ +class FormChecker: + + def __init__(self): + pass + + def form_related(self, source_word, shadow_word): + pass \ No newline at end of file diff --git a/umbra/mistake_enum.py b/umbra/mistake_enum.py index cbad0cff910dcb01f4d6a2fd329bc3e735d13094..fbe711589f230eba61aafd4098175694e155df8f 100644 --- a/umbra/mistake_enum.py +++ b/umbra/mistake_enum.py @@ -16,3 +16,4 @@ class Mistake(Enum): SEMANTIC = "semantic" SKIPPED = "skipped" # For source words that are not shadowed RANDOM = "random" # For shadow words that do not reflect a source word + FORM = "form" # For verbs that are shadowed in another form diff --git a/umbra/needleman_wunsch.py b/umbra/needleman_wunsch.py index a44adbd6788d20eb4854071689c7019b90ca881b..baab1aeaeb99ab5f115738d951a8ff8c36d87d52 100644 --- a/umbra/needleman_wunsch.py +++ b/umbra/needleman_wunsch.py @@ -16,6 +16,7 @@ class NeedlemanWunsch(AlignmentStrategy): self._gap_sc = -1 self._seman_match = 2 self._repetition = 0 + self._form_match = 2 self._pointers = ['diag', 'up', 'left'] self._source = None self._shadow = None @@ -23,7 +24,8 @@ class NeedlemanWunsch(AlignmentStrategy): self._seman_checker = seman_checker def alignment_options(self, match=None, mismatch=None, - gap_sc=None, seman_match=None, repetition=None): + gap_sc=None, seman_match=None, repetition=None, + form_match=None): """ Set the scores that are allocated whilst aligning. Can be changed one at a time or more at once. @@ -44,6 +46,10 @@ class NeedlemanWunsch(AlignmentStrategy): self._gap_sc = gap_sc if seman_match: self._seman_match = seman_match + if repetition: + self._repetition = repetition + if form_match: + self._form_match = form_match def align(self, source, shadow): """ This is the main function of finding alignments. @@ -93,6 +99,9 @@ class NeedlemanWunsch(AlignmentStrategy): for j in range(1, n+1): if self._source[j - 1] == self._shadow[i - 1]: value = self._match + elif self._form_checker.form_related(self._source[j-1].word, + self._shadow[i-1].word): + value = self._form_match elif self._seman_checker.semantically_related( self._source[j-1].word, self._shadow[i-1].word): value = self._seman_match @@ -174,4 +183,8 @@ class NeedlemanWunsch(AlignmentStrategy): source.shadowed = True source.mistake = Mistake.SEMANTIC shadow.mistake = Mistake.SEMANTIC + elif self._form_checker.form_related(source.word, shadow.word): + source.shadowed = True + source.mistake = Mistake.FORM + shadow.mistake = Mistake.FORM return source, shadow