diff --git a/umbra/dutch_mmetaphone.py b/umbra/dutch_mmetaphone.py index 6a3d1cbadbebb5d8afb577dd7d1c4cbb600e170b..caa5dc53e3cb0efb03a3305f02881481853460c0 100644 --- a/umbra/dutch_mmetaphone.py +++ b/umbra/dutch_mmetaphone.py @@ -213,8 +213,6 @@ class DutchPhonetics: source = source[:amb_pos] + "+" + source[amb_pos + 1:] amb_pos = source.find(amb) - print(source, representations) - return source, representations @staticmethod diff --git a/umbra/filereader.py b/umbra/filereader.py index 31a534be239d3d0434f3b34066eda4794ac73622..930d56c2c5aa7aa6bebcc9b050aedc55e047511a 100644 --- a/umbra/filereader.py +++ b/umbra/filereader.py @@ -111,15 +111,6 @@ class FileWriter(ABC): def write(self): pass - def write_per_part(self): - """ - Write result per participant. - """ - for pn, pr in self.data: - results, stats = _format_results(pn, pr) - self.path = _participant_path(pn) - self.write() - @abstractmethod def _participant_path(self, number): pass diff --git a/umbra/form_checker.py b/umbra/form_checker.py index 50d1e0fb5bdec571bdaf9fbdbf3e8709254246af..e5e823d7e75f8081e54c9fd78607f37df7385028 100644 --- a/umbra/form_checker.py +++ b/umbra/form_checker.py @@ -8,25 +8,38 @@ class FormChecker: """ Constructor """ - self.initialize_frames() - self._prefixes = ['','ge','be','ver','on','ont'] - self._affixes = ['','en','t','te','ten','de','den','s',"'s"] + self._initialize_frames() + self._prefixes = ['', 'ge', 'be', 'ver', 'on', 'ont'] + self._affixes = ['', 'en', 't', 'te', 'ten', 'de', 'den', 's', "'s"] + self._verb_prefixes = ['ge', 'ver', 'be', 'ont', 'her', 'mis'] + self._verb_affixes = ['den', 'ten', 'de', 'te', 'en', 't', 'd', 'n', + ''] + self._consonants = ['b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', + 's', 't', 'z', 'c', 'h', 'j', 'q', 'v', 'w', 'x'] + self._vowels = ['a', 'e', 'o', 'u'] - def initialize_frames(self): + def _initialize_frames(self): """ Help the constructor by initializing the frames for irregular verbs. """ - self._irr_verbs_frame = pd.read_csv(ut.get_path('Irregular verbs.csv')) - self._irr_verbs_vte = self._irr_verbs_frame.copy() - self._irr_verbs_vtm = self._irr_verbs_frame.copy() - self._irr_verbs_vdw = self._irr_verbs_frame.copy() + self._irr_verbs_frame = pd.read_csv(ut.get_path( + 'resources/irregular_verbs.csv')) + self._irr_verbs_vte = self._create_frame('verleden tijd enkelvoud') + self._irr_verbs_vtm = self._create_frame('verleden tijd meervoud') + self._irr_verbs_vdw = self._create_frame('voltooid deelwoord') self._irr_verbs_frame = self._irr_verbs_frame.values - self._irr_verbs_vte = self._irr_verbs_vte.sort_values( - 'verleden tijd enkelvoud').values - self._irr_verbs_vtm = self._irr_verbs_vtm.sort_values( - 'verleden tijd meervoud').values - self._irr_verbs_vdw = self._irr_verbs_vdw.sort_values( - 'voltooid deelwoord').values + + def _create_frame(self, key): + """ + Create a new dataframe based on the key value given as parameter + + Args: + key: Key value for which column to sort the dataframe on. + + Returns: + A new dataframe sorted on the column corresponding to the key + """ + return self._irr_verbs_frame.copy().sort_values(key).values def form_related(self, source_word, shadow_word): """ @@ -39,10 +52,11 @@ class FormChecker: Returns: A boolean value indicating whether the arguments are form-related """ - return self.fix_related(source_word,shadow_word) or \ - self.irr_verb_related(source_word,shadow_word) + return self._fix_related(source_word, shadow_word) \ + or self._irr_verb_related(source_word, shadow_word) \ + or self._reg_verb_related(source_word, shadow_word) - def irr_verb_related(self, source_word, shadow_word): + def _irr_verb_related(self, source_word, shadow_word): """ Check whether the arguments are different versions of the same irregular verb. @@ -55,21 +69,13 @@ class FormChecker: A boolean value indicating whether the arguments are different versions of the same irregular verb. """ - if self.irr_verb_helper(self._irr_verbs_frame, 0, - source_word,shadow_word): - return True - elif self.irr_verb_helper(self._irr_verbs_vte,1 - , source_word,shadow_word): - return True - elif self.irr_verb_helper(self._irr_verbs_vtm,2 - , source_word,shadow_word): - return True - elif self.irr_verb_helper(self._irr_verbs_vdw,3 - , source_word,shadow_word): - return True + for key in range(4): + if self._irr_verb_helper(self._irr_verbs_frame, key, source_word, + shadow_word): + return True return False - def irr_verb_helper(self, frame, key, source_word, shadow_word): + def _irr_verb_helper(self, frame, key, source_word, shadow_word): """ Search in one frame to see whether the shadow word and source word are in the same row. @@ -86,14 +92,129 @@ class FormChecker: """ i = bisect_left(frame[key], source_word) - if i != len(frame[key]) and shadow_word in frame[i]: + return i != len(frame[key]) and shadow_word in frame[i] + + def _reg_verb_related(self, source_word, shadow_word): + """ + Check whether two words are different conjugations of the same verb + + Args: + source_word: lemma of type string, corresponding to a source word + shadow_word: lemma of type string, corresponding to a shadow word + + Returns: + A boolean value indicating whether the two input words are + different conjugations of the same verb. + """ + return self._reg_verb_helper(source_word, shadow_word) or \ + self._reg_verb_helper(shadow_word, source_word) + + def _reg_verb_helper(self, word1, word2): + """ + Help the _reg_verb_related function, this function is one-directional + + Args: + word1: lemma of type string + word2: lemma of type string + + Returns: + A boolean value indicating whether word1 and word2 are different + conjugations of the same verb. + """ + stem = self._get_stem(word1) + vdw1, vdw2 = self._create_vdws(stem) + if vdw1 == word2 or vdw2 == word2: return True + for affix in self._verb_affixes: + if stem+affix == word2: + return True return False - def reg_verb_related(self, source_word, shadow_word): - pass + def _create_vdws(self, stem): + """ + Creates the perfect time form of the verb based on the stem + + Args: + stem: the stem of a word as determined by other functions + + Returns: + The two possible perfect time forms based on the stem + + """ + word = 'ge' + stem + for v_prefix in self._verb_prefixes: + if stem[:len(v_prefix)] == v_prefix and len(stem) > \ + len(v_prefix) + 1: + word = word[2:] + word_1 = word+'d' + word_2 = word+'t' + return word_1, word_2 + + def _get_stem(self, word): + """ + Find the stem of the word given as input + + Args: + word: Lemma of type String + + Returns: + The stem of the word given as input + """ + stem = '' + for affix in self._verb_affixes: + length = len(affix) + if (word[-length:] == affix or affix == '') \ + and not self._is_vdw(word): + if affix == '' or len(word) == length: + stem = word + else: + stem = word[:-length] + stem = self._create_single_stem(stem, affix) + return stem + elif word[-length:] == affix: + stem = word[2:-length] + return stem + return stem + + def _is_vdw(self, word): + """ + Check whether the input word is in perfect time form + + Args: + word: Lemma of type string + + Returns: + A boolean value indicating whether the input word is in perfect + time form. + """ + return word[:2] == 'ge' and (word[-1] == 't' or word[-1] == 'd') + + def _create_single_stem(self, stem, affix): + """ + Find the single-person stem based on the stem and affix + + Args: + stem: The stem of a word, of type String + affix: The affix which was found, which was used in creating the + stem - def fix_related(self,source_word,shadow_word): + Returns: + The single-person stem of the stem given as input, based on the + stem itself and the affix + """ + if affix == 'en' and len(stem)>2 and stem != affix: + if stem[-1] in self._consonants and stem[-1] == stem[-2]: + stem = stem[:-1] + elif stem[-1] in self._consonants and stem[-2] in self._vowels \ + and stem[-3] in self._consonants: + stem = stem[:-1] + stem[-2:] + if stem[-1] == 'z': + return stem[:-1] + 's' + elif stem[-1] == 'v': + return stem[:-1] + 'f' + return stem + + def _fix_related(self, source_word, shadow_word): """ Check whether the arguments are related in terms of their prefixes and affixes. @@ -106,10 +227,10 @@ class FormChecker: A boolean value indicating whether the arguments are related in terms of their prefixes and affixes. """ - return self.prefix_related(source_word,shadow_word) or \ - self.affix_related(source_word,shadow_word) + return self._prefix_related(source_word, shadow_word) or \ + self._affix_related(source_word, shadow_word) - def prefix_related(self, source_word, shadow_word): + def _prefix_related(self, source_word, shadow_word): """ Check whether the arguments are related in terms of their prefixes @@ -130,7 +251,7 @@ class FormChecker: return True return False - def affix_related(self, source_word, shadow_word): + def _affix_related(self, source_word, shadow_word): """ Check whether the arguments are related in terms of their prefixes and postfixes. @@ -150,4 +271,4 @@ class FormChecker: for aff2 in self._affixes: if shadow_rest + aff2 == source_word: return True - return False \ No newline at end of file + return False diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py index 4c59796aa5691b732ee2fd6e2b4f045c20e0a0bf..8587e1bd14972ab7c37faeb5314a5f9c08b158f5 100644 --- a/umbra/mistake_finder.py +++ b/umbra/mistake_finder.py @@ -1,10 +1,11 @@ from mistake_enum import Mistake +from dutch_mmetaphone import DutchPhonetics class MistakeFinder: """Finds the mistakes in already aligned lists of Words""" - def __init__(self, seman_checker): + def __init__(self, seman_checker, form_checker): """ Constructor @@ -12,6 +13,7 @@ class MistakeFinder: seman_checker: Instance of the SemanticChecker class """ self._seman_checker = seman_checker + self._form_checker = form_checker self._source = None self._shadow = None @@ -28,7 +30,7 @@ class MistakeFinder: # Loop over the shadow: for index, word in enumerate(self._shadow): if not word.correct and word.mistake is None: - self._determine_mistake(index,word) + self._determine_mistake(index, word) # Loop over the source: for word in self._source: if not word.shadowed and word.mistake is None: @@ -38,7 +40,8 @@ class MistakeFinder: # PRINT FOR TEST OF SO FAR IMPLEMENTED MISTAKE CHECKS: for word in self._shadow: if word.mistake == Mistake.REPETITION or word.mistake\ - == Mistake.SEMANTIC: + == Mistake.SEMANTIC or word.mistake == Mistake.FORM or\ + word.mistake == Mistake.PHONETIC: print(str(word) + " " + str(word.mistake)) def _determine_mistake(self, index, word): @@ -50,12 +53,44 @@ class MistakeFinder: word: Instance of the Words class """ - if not self._check_repetition(word, index): # If not a repetition, - # if not self._check_form_mistake(word): # form, - if not self._check_semantic_mistake(word,index): # semantic, - # if not self._check_phonentic_mistake(word): # phonetic; + # Check for all the types of mistakes, and if it is none of them ... + if not self._check_repetition(word, index): + if not self._check_form_mistake(word, index): + if not self._check_semantic_mistake(word, index): + if not self._check_phonetic_mistake(word, index): # ... then shadow word is random: - word.mistake = Mistake.RANDOM + word.mistake = Mistake.RANDOM + + def _check_phonetic_mistake(self, shd_word, index): + """Checks if shd_word is a phonetic mistake or not. If yes, it flags + shd_word and the source word it belongs to as phonetic mistakes. + + Args: + shd_word: the Word which is checked for semantic mistake. + index: index of shd_word in shadow. + Returns: + phonetic_mistake: True if shd_word is a phonetic mistake, False + if not. + """ + phonetic_mistake = False + shd_anchor_index = self._shadow.find_previous_anchor(index) + if shd_anchor_index < 0: # If there is no anchor found, + src_index = 0 # then just start at the beginning. + else: + src_index = self._source.index( + self._shadow[shd_anchor_index].anchor) + while self._source[src_index].get_difference(shd_word) > 0 and \ + not phonetic_mistake and src_index < ( + len(self._source) - 1): # Crude fix, but removes error + src_word = self._source[src_index] + phonetic_mistake = self.phonetically_related(src_word, shd_word) + if phonetic_mistake: + shd_word.mistake = Mistake.PHONETIC + shd_word.source = src_word + if not src_word.shadowed and src_word.mistake is None: + src_word.mistake = Mistake.PHONETIC + src_index += 1 + return phonetic_mistake def _check_semantic_mistake(self, shd_word, index): """Checks if shd_word is a semantic mistake or not. If yes, it flags @@ -76,15 +111,38 @@ class MistakeFinder: src_index = self._source.index( self._shadow[shd_anchor_index].anchor) while self._source[src_index].get_difference(shd_word) > 0 and\ - not semantic_mistake and src_index < (len(self._source)-1): # Crude fix, but removes error + not semantic_mistake and src_index < (len(self._source)-1): src_word = self._source[src_index] - if self.semantically_related(src_word, shd_word): + semantic_mistake = self.semantically_related(src_word, shd_word) + if semantic_mistake: shd_word.mistake = Mistake.SEMANTIC - src_word.mistake = Mistake.SEMANTIC - semantic_mistake = True + shd_word.source = src_word + if not src_word.shadowed and src_word.mistake is None: + src_word.mistake = Mistake.SEMANTIC src_index += 1 return semantic_mistake + def _check_form_mistake(self, shd_word, index): + if shd_word.word == "te": + print("te") + form_mistake = False + last_shd_index = self._shadow.find_last_matched_shadow(index) + if last_shd_index < 0: + src_index = 0 + else: + src_index = self._source.index(self._shadow[last_shd_index].source) + while self._source[src_index].get_difference(shd_word) > 0 and\ + not form_mistake and src_index < len(self._source) - 1: + src_word = self._source[src_index] + form_mistake = self.form_related(src_word, shd_word) + if form_mistake: + shd_word.mistake = Mistake.FORM + shd_word.source = src_word + if not src_word.shadowed and src_word.mistake is None: + src_word.mistake = Mistake.FORM + src_index += 1 + return form_mistake + def _check_repetition(self, word, index): """ Check whether a word can be seen as a repetition mistake @@ -100,8 +158,8 @@ class MistakeFinder: assert 0 <= index < len(self._shadow) found = self._check_pre_repetition(word, index) if not found: # Check if word is the start of the next word. - if index < len(self._shadow)-1 and self._shadow[index+1].word.find( - word.word) == 0: + if index < len(self._shadow)-1 and self._shadow[index+1].word.\ + startswith(word.word): word.mistake = Mistake.REPETITION found = True return found @@ -125,9 +183,7 @@ class MistakeFinder: while not found_before and not stop and i > 0: i -= 1 stop = self._shadow[i].is_anchor() - found_at = self._shadow[i].word.find(word.word) - if found_at >= 0 and found_at == len(self._shadow[i].word)-\ - len(word.word): + if self._shadow[i].word.endswith(word.word): found_before = True chain = found_before @@ -142,8 +198,7 @@ class MistakeFinder: return chain def semantically_related(self, src_word, shd_word): - """Checks if src_word and shd_word are semantically related. If yes, it - flags both as semantic mistakes. + """Checks if src_word and shd_word are semantically related. Args: src_word: SourceWord instance @@ -155,10 +210,40 @@ class MistakeFinder: shd_string = shd_word.word src_string = src_word.word related = self._seman_checker.semantically_related(src_string, - shd_string) - if related: - shd_word.mistake = Mistake.SEMANTIC - src_word.mistake = Mistake.SEMANTIC + shd_string)\ + and shd_string != src_string + return related + + def phonetically_related(self, src_word, shd_word): + """Checks if src_word and shd_word are phonetically related. + + Args: + src_word: SourceWord instance + shd_word: ShadowWord instance + Returns: + related: True if src_word and shd_word are phonetically related, + False if not. + """ + shd_string = shd_word.word + src_string = src_word.word + related = DutchPhonetics.compare(src_string, shd_string)\ + and shd_string != src_string + return related + + def form_related(self, src_word, shd_word): + """Checks if src_word and shd_word are related in form. + + Args: + src_word: SourceWord instance + shd_word: ShadowWord instance + Returns: + related: True if src_word and shd_word are related in form, + False if not. + """ + shd_string = shd_word.word + src_string = src_word.word + related = self._form_checker.form_related(src_string, shd_string) and\ + shd_string != src_string return related def print_for_nw(self, source, shadow): @@ -171,19 +256,5 @@ class MistakeFinder: self._source = source self._shadow = shadow for word in self._shadow: - if word.mistake == Mistake.REPETITION or word.mistake\ - == Mistake.SEMANTIC or word.mistake == Mistake.FORM: - print(str(word) + " " + str(word.mistake)) - - def _check_phonetic_mistake(self, shd_word): - """ - Check whether a word can be seen as a phonetic mistake - - Args: - shd_word: An instance of the Words class - - Returns: - A boolean value indicating whether the word can be seen as a - phonetic mistake. - """ - pass + if not word.correct and word.mistake != Mistake.RANDOM: + print(str(word) + " " + str(word.mistake)) \ No newline at end of file diff --git a/umbra/model.py b/umbra/model.py index c87e19116f3ae0213a3dbb560e76594e426450e8..b35120477749acd2b62de020eb35bc5f448eaaf9 100644 --- a/umbra/model.py +++ b/umbra/model.py @@ -2,6 +2,7 @@ from statistics import Statistics from shadow_task import ShadowTask import pandas as pd + class Model: """Internal data representation and processing.""" def __init__(self): diff --git a/umbra/needleman_wunsch.py b/umbra/needleman_wunsch.py index b7a333fef14736ced43d71fe42ea36c19d7f7b2a..ddc27a4953fdb9863792b62e5b41b19888fa9f5d 100644 --- a/umbra/needleman_wunsch.py +++ b/umbra/needleman_wunsch.py @@ -3,6 +3,7 @@ from alignment_strategy import AlignmentStrategy from words import Sentence import numpy as np from mistake_enum import Mistake +from dutch_mmetaphone import DutchPhonetics class NeedlemanWunsch(AlignmentStrategy): @@ -15,6 +16,7 @@ class NeedlemanWunsch(AlignmentStrategy): self._mismatch = -2 self._gap_sc = -1 self._seman_match = 2 + self._phon_match = 2 self._repetition = 0 self._form_match = 2 self._pointers = ['diag', 'up', 'left'] @@ -36,6 +38,8 @@ class NeedlemanWunsch(AlignmentStrategy): gap_sc: the score that is allocated for a gap seman_match: the score that is allocated when two words align by virtue of semantic equivalence + phon_match: the score that is allocated when two words sound the + same. repetition: the score that is allocated when a shadow word is a stuttering form_match: the score that is allocated when two words align by @@ -49,6 +53,8 @@ class NeedlemanWunsch(AlignmentStrategy): self._gap_sc = gap_sc if seman_match: self._seman_match = seman_match + if phon_match: + self._phon_match = phon_match if repetition: self._repetition = repetition if form_match: @@ -98,15 +104,20 @@ class NeedlemanWunsch(AlignmentStrategy): n = len(self._source) m = len(self._shadow) for i in range(1, m+1): + shadow_word = self._shadow[i - 1] for j in range(1, n+1): - if self._source[j - 1] == self._shadow[i - 1]: + source_word = self._source[j - 1] + if source_word == shadow_word: value = self._match elif self._form_checker.form_related(self._source[j-1].word, self._shadow[i-1].word): value = self._form_match elif self._seman_checker.semantically_related( - self._source[j-1].word, self._shadow[i-1].word): + source_word.word, shadow_word.word): value = self._seman_match + elif DutchPhonetics.compare(source_word.word, + shadow_word.word): + value = self._phon_match else: value = self._mismatch match_value = self._matrix[i-1, j-1]['value'] + value @@ -157,6 +168,8 @@ class NeedlemanWunsch(AlignmentStrategy): self._shadow[i - 1].mistake = Mistake.RANDOM if self._check_repetition(i - 1): self._shadow[i - 1].mistake = Mistake.REPETITION + else: + self._shadow[i - 1].mistake = Mistake.RANDOM i -= 1 alignment_source.reverse() @@ -199,4 +212,10 @@ class NeedlemanWunsch(AlignmentStrategy): source.shadow = shadow source.mistake = Mistake.FORM shadow.mistake = Mistake.FORM + elif DutchPhonetics.compare(source.word, shadow.word): + source.shadowed = True + source.mistake = Mistake.PHONETIC + shadow.mistake = Mistake.PHONETIC + else: + shadow.mistake = Mistake.RANDOM return source, shadow diff --git a/umbra/Irregular verbs.csv b/umbra/resources/irregular_verbs.csv similarity index 100% rename from umbra/Irregular verbs.csv rename to umbra/resources/irregular_verbs.csv diff --git a/umbra/semantic_checker.py b/umbra/semantic_checker.py index e4b0c47a95e587ff72b7bcf9d10543e2b9157234..8e7fd970475754b88b55c0df769b411a19f71990 100644 --- a/umbra/semantic_checker.py +++ b/umbra/semantic_checker.py @@ -6,17 +6,19 @@ from utils import Utils as ut class SemanticChecker: - def __init__(self): + def __init__(self, parser=""): """ Constructor Args: parser: object of the Wn_grid_parser class """ - self._path = "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz" - self._parser = Wn_grid_parser(ut.get_path(self._path)) - # "./umbra/OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz") # Ubuntu path, temporary 'fix' - self._parser.load_synonyms_dicts() + path = "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz" + if parser != "": + self._parser = parser + else: + self._parser = Wn_grid_parser(ut.get_path(path)) + self._parser.load_synonyms_dicts() self._lemma2synsets = getattr(self._parser,'lemma2synsets') self._reltypes = getattr(self._parser,'reltypes') self._syn_ids = getattr(self._parser,'syn_ids') diff --git a/umbra/statistics.py b/umbra/statistics.py index 2c3814d027740b6baa7276ed15fe695c68925f9a..92ac8111b6021065a96a4ce5decba39caa8b5e72 100644 --- a/umbra/statistics.py +++ b/umbra/statistics.py @@ -4,6 +4,7 @@ from anchor_algorithm import AnchorAlgorithm from mistake_finder import MistakeFinder from semantic_checker import SemanticChecker from form_checker import FormChecker +from dutch_mmetaphone import DutchPhonetics from mistake_counter import MistakeCounter from utils import Utils as ut import time @@ -20,11 +21,13 @@ class Statistics: # currently the case, so the two operate seperately in this class. # _parser and _seman_checker should be moved to MistakeFinder when # possible - self.path = ut.get_path("OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz") - + self.path = ut.get_path( + "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz") self._seman_checker = SemanticChecker() self._form_checker = FormChecker() - self._mistake_finder = MistakeFinder(self._seman_checker) + self._phon_checker = DutchPhonetics() + self._mistake_finder = MistakeFinder(self._seman_checker, + self._form_checker) self._mistake_counter = MistakeCounter() @property @@ -58,11 +61,13 @@ class Statistics: results = 'Needleman-Wunsch' discrete_start_time = time.time() self._strategy = NeedlemanWunsch(self._seman_checker, - self._form_checker) + self._form_checker, + self._phon_checker) source_align, shadow_align = self._strategy.align(source, shadow) correctness = self._strategy.correctly_shadowed(source) discrete_time = time.time() - discrete_start_time results += f'taken time:{discrete_time}\n' + self._mistake_finder.print_for_nw(source_align, shadow_align) # Alignment 2 @@ -89,4 +94,5 @@ class Statistics: source_word.get_difference (source_word.shadow)]) trial.delays = delays_per_word + return correctness diff --git a/umbra/tests/context.py b/umbra/tests/context.py index a0421618d039700ca0aa954ed28e88f801068e0f..0f971d8e79cd21b41ecd4cede9bae631bb292cb3 100644 --- a/umbra/tests/context.py +++ b/umbra/tests/context.py @@ -1,6 +1,19 @@ -import os -import sys +class XTest(pyfakefs.fake_filesystem_unittest.TestCase): -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../umbra'))) + @classmethod + def setUpClass(cls): + print("setup class") -from controller import Controller + def setUp(self): + self.setUpPyfakefs() + print("setup") + + def tearDown(self): + print("teardown controller test") + + @classmethod + def tearDownClass(cls): + print("teardown class") + + +print("hello world"); diff --git a/umbra/tests/test_controller.py b/umbra/tests/test_controller.py index 8db4575b31a564b06dcc933a1a9417b2afe70aad..6ffb780cc8701af1d965d5b2b00ff20f1839aace 100644 --- a/umbra/tests/test_controller.py +++ b/umbra/tests/test_controller.py @@ -1,64 +1,161 @@ import unittest import unittest.mock as mock from ..controller import Controller +import pyfakefs -class ControllerTest(unittest.TestCase): +class ControllerTest(pyfakefs.fake_filesystem_unittest.TestCase): @classmethod def setUpClass(cls): print("setup class") def setUp(self): + self.setUpPyfakefs() print("setup") def test_import(self): assert 0 == 0 - def test_set_path(self): + def makeMVC(self): view = mock.Mock() model = mock.Mock() controller = Controller(view, model) + return model, view, controller + + + def test_set_path_shadow(self): + model, view, controller = self.makeMVC() controller._set_path(["foo"], "shadow") self.assertEqual(controller._shadow_files, ["foo"]) + + def test_set_path_source(self): + model, view, controller = self.makeMVC() controller._set_path(["bar"], "source") self.assertEqual(controller._source_files, ["bar"]) + + def test_set_path_empty(self): + model, view, controller = self.makeMVC() controller._set_path([], "source") self.assertEqual(controller._source_files, []) + def test_no_filepath(self): + model, view, controller = self.makeMVC() + controller._set_path([], "") + self.assertEqual(controller._source_files, []) + self.assertEqual(controller._shadow_files, []) + def test_compare_files_view(self): - view = mock.Mock() - model = mock.Mock() - controller = Controller(view, model) + model, view, controller = self.makeMVC() controller._source_files = ["foo"] controller._compare_files() view.update_message.assert_called_with("no shadow") controller._shadow_files = ["bar"] controller._compare_files() - calls = [mock.call("no shadow"), mock.call("files ok"), mock.call("comparison complete")] view.update_message.assert_has_calls(calls) calls_but = [mock.call("save", "normal"), mock.call("compare", "disabled")] view.button_status.assert_has_calls(calls_but) - def test_read_folder(self): - #test case for no csv both types - view = mock.Mock() - model = mock.Mock() + def test_read_folder_source(self): + model, view, controller = self.makeMVC() model.get_multi_data.return_value = {} fr = mock.Mock() fr.read.return_value = "foo" - controller = Controller(view, model) controller._filereader = fr - controller._shadow_files = ["foo"] - controller._read_folder("source") - controller._source_files = ["bar"] - controller._read_folder("shadow") - controller._source_files = ["00.T.csv"] + controller._source_files = ["00.T.csv"] #filename is barely legal but ok controller._read_folder("source") model.get_multi_data.assert_called_with("source") controller._filereader.read.assert_has_calls([mock.call("00.T.csv", "source")]) + def test_read_folder_none(self): + model, view, controller = self.makeMVC() + controller._shadow_files = ["foo"] + controller._read_folder("shadow") # Nothing should happen there + controller._source_files = ["bar"] # because no .csv in filepath + controller._read_folder("source") + controller._read_folder("") # try the empty + + def test_read_folder_shadow(self): + model, view, controller = self.makeMVC() + model.get_multi_data.return_value = {} + fr = mock.Mock() + fr.read.return_value = "foo" + controller._filereader = fr + controller._shadow_files = ["1_AO0.TextGrid.csv"] #filename is barely legal but ok + controller._read_folder("shadow") + model.get_multi_data.assert_called_with("shadow") + controller._filereader.read.assert_has_calls([mock.call("1_AO0.TextGrid.csv", "shadow")]) + + def test_save_results(self): + model, view, controller = self.makeMVC() + model.analysis_complete.return_value = False + controller._save_results() + view.update_message.assert_has_calls([mock.call("no comparison")]) + view.ask_save_location.return_value = "baz/bar/yit" + model.analysis_complete.return_value = True + fw = mock.Mock() + controller._filewriter = fw + controller._save_results() + view.ask_save_location.assert_called() + fw.write_multiple.assert_called_with("baz/bar/yit", model.multi_results) + view.update_message.assert_called_with("saved") + + def test_select_folder(self): + model, view, controller = self.makeMVC() + self.fs.create_file('~/foo/bar/baz/file1.csv') + self.fs.create_file('~/foo/bar/baz/file2.csv') + view.dir_dialog.return_value = "~/foo/bar/baz" + controller._select_folder() + assert controller.shadow_files == ['~/foo/bar/baz/file1.csv', '~/foo/bar/baz/file2.csv'] + view.update_files.assert_called_with(["~/foo/bar/baz/file1.csv", "~/foo/bar/baz/file2.csv"], "shadow") + view.button_status.assert_called_with("select shadow", "disabled") + + def test_select_source_folder(self): + model, view, controller = self.makeMVC() + self.fs.create_file('~/foo/bar/baz/file1.csv') + self.fs.create_file('~/foo/bar/baz/file2.csv') + view.dir_dialog.return_value = "~/foo/bar/baz" + controller._select_folder("source") + assert controller.source_files == ['~/foo/bar/baz/file1.csv', '~/foo/bar/baz/file2.csv'] + view.update_files.assert_called_with(["~/foo/bar/baz/file1.csv", "~/foo/bar/baz/file2.csv"], "source") + view.button_status.assert_called_with("select source", "disabled") + + def test_select_no_file(self): + model, view, controller = self.makeMVC() + self.fs.create_file('~/foo/bar/baz/file1.csv') + self.fs.create_file('~/foo/bar/baz/file2.csv') + view.dir_dialog.return_value = "" + controller._select_folder() + assert controller.source_files == [] + view.update_files.assert_not_called() + view.button_status.assert_not_called() + view.update_message.assert_called_with("no_file") + + def test_delete_files_one(self): + model, view, controller = self.makeMVC() + view.selected.return_value = "foo" + controller._source_files = ["foo","bar","baz"] + controller._delete_files("source") + assert controller._source_files == ["bar","baz"] + view.button_status.assert_called_with("select_folder source","normal") + + def test_delete_files_all(self): + model, view, controller = self.makeMVC() + view.selected.return_value = "foo" + controller._source_files = ["foo","bar","baz"] + controller._delete_files("source", True) + assert controller._source_files == [] + view.button_status.assert_called_with("select source","normal") + + def test_delete_files_anomalous(self): + model, view, controller = self.makeMVC() + view.selected.return_value = "foo" + controller._source_files = [] + controller._delete_files("source", True) + assert controller._source_files == [] + view.button_status.assert_called_with("select source","normal") + def tearDown(self): print("teardown controller test") diff --git a/umbra/tests/test_dwn.py b/umbra/tests/test_dwn.py new file mode 100644 index 0000000000000000000000000000000000000000..435f40bf52e2c6d79ae6bf2485309111bdfc6d16 --- /dev/null +++ b/umbra/tests/test_dwn.py @@ -0,0 +1,57 @@ +from ..OpenDutchWordnet.wn_grid_parser import Wn_grid_parser +from ..semantic_checker import SemanticChecker +from ..utils import Utils as ut +import unittest + +class DWNTest(unittest.TestCase): + + @classmethod + def setUpClass(self): + instance = Wn_grid_parser("./OpenDutchWordnet/resources/odwn/" \ + "odwn_orbn_gwg-LMF_1.3.xml.gz") + instance.load_synonyms_dicts() + self.sc = SemanticChecker(instance) + print("setup class dwn") + + def setUp(self): + print("setup") + + def test_hypernym(self): + self.assertTrue(self.sc.semantically_related('hond', 'carnivoor')) + + def test_hyponym(self): + self.assertTrue(self.sc.semantically_related('carnivoor', 'hond')) + + def test_known(self): + self.assertFalse(self.sc.semantically_related('water', 'hond')) + + def test_synonym(self): + self.assertTrue(self.sc.semantically_related('huis', 'woning')) + + def test_first_unknown(self): + self.assertFalse(self.sc.semantically_related('huis', 'foo')) + + def test_shared_hypernym(self): + self.assertTrue(self.sc.semantically_related('vork', 'mes')) + + def test_second_unknown(self): + self.assertFalse(self.sc.semantically_related('foo', 'carnivoor')) + + def test_both_unknown(self): + self.assertFalse(self.sc.semantically_related('baz', 'foo')) + + def test_article(self): + self.assertFalse(self.sc.semantically_related('de', 'het')) + + def test_numeral(self): + self.assertFalse(self.sc.semantically_related('een', 'ander')) + + def test_foreign(self): + self.assertFalse(self.sc.semantically_related('dog', 'pet')) + + def tearDown(self): + print("teardown controller test") + + @classmethod + def tearDownClass(cls): + print("teardown class") diff --git a/umbra/tests/test_view.py b/umbra/tests/test_view.py new file mode 100644 index 0000000000000000000000000000000000000000..c35339df6701490c9fc8c119bf8b4b130cfb657c --- /dev/null +++ b/umbra/tests/test_view.py @@ -0,0 +1,24 @@ +import unittest +import unittest.mock as mock +from ..view import View +import pyfakefs + + +class ControllerTest(pyfakefs.fake_filesystem_unittest.TestCase): + + @classmethod + def setUpClass(cls): + print("setup class") + + def setUp(self): + print("setup") + + def test_button_creation(self): + assert 0 == 0 + + def tearDown(self): + print("teaeenrdown controller test") + + @classmethod + def tearDownClass(cls): + print("teardown class") diff --git a/umbra/utils.py b/umbra/utils.py index c53b3583b77481d3a5e8dc15217cfc56690045fe..2d228e35b4812ae9f3aeba627ed08b54e5e16850 100644 --- a/umbra/utils.py +++ b/umbra/utils.py @@ -10,6 +10,15 @@ class Utils: return "./umbra/"+path return path + @staticmethod + def set_icon(window, path): + """Set icon, depending on OS. Constitutes a 'fix' for TkInter + on linux not being able to take .ico icons. + """ + if sys.platform.startswith('linux'): + return + window.iconbitmap(path) + @staticmethod def time_to_int(*times): """Helper function that turns a sequence of time strings into a list of diff --git a/umbra/view.py b/umbra/view.py index ebd3b73f01ffb22f6c68118fb436854385ce54f5..732655ee97118c0712e7cfc48f98feb3d7ca20f6 100644 --- a/umbra/view.py +++ b/umbra/view.py @@ -52,18 +52,9 @@ class View: # Message self._create_label('message', self._frame, "", 5, 1) - self._spit = False # Window Icon - self._window.iconbitmap("./resources/logo.ico") - - @property - def spit(self): - return self._spit - - @spit.setter - def spit(self, boolean): - self._spit = boolean + ut.set_icon(self._window, ut.get_path("resources/logo.ico")) def display(self): """Start main loop, displaying GUI elements.""" @@ -181,10 +172,7 @@ class View: def ask_save_location(self): """Ask user for location to save file.""" - path = filedialog.asksaveasfilename(title="Save file", - parent=self._window, - filetypes=((".txt files", "*.txt"), - ("all files", "*.*"))) + path = filedialog.askdirectory() if path == "": self.update_message('not_saved') return path @@ -388,7 +376,7 @@ class SplashView(tk.Toplevel): tk.Toplevel.__init__(self, parent) self.grab_set() self.title("Umbra") - self.iconbitmap(ut.get_path("resources/logo.ico")) + ut.set_icon(self, ut.get_path("resources/logo.ico")) im_path = Image.open(ut.get_path("resources/splash.png")) ph = ImageTk.PhotoImage(im_path) load_img = tk.Label(self, image=ph) diff --git a/umbra/words.py b/umbra/words.py index 5208f9733a8c28769290ea6e08f64574716ad0bb..2801d8b87326a6b532fc2957115ec3c3c6837e7c 100644 --- a/umbra/words.py +++ b/umbra/words.py @@ -16,6 +16,7 @@ class Word: return 1 def __eq__(self, word): + # TODO: move self._onset < word.onset out of __eq__ return self._word == word.word and self._onset < word.onset @property @@ -256,6 +257,25 @@ class Sentence(list): return index index -= 1 return -1 + + def find_last_matched_shadow(self, index): + """Find the index of the last shadowed word. + + Args: + index: the index before which should be sought for a shadow word + that is matched with a source word. + + Returns: + last_index: the index of the previous matched shadow. This is -1 + if there is no matches shadow before or on the specified index, or + if the index was out of bounds. + """ + if 0 <= index < len(self): + while index >= 0: + if self[index].has_source(): + return index + index -= 1 + return index def __str__(self): return ' '.join([word.word for word in self])