diff --git a/umbra/controller.py b/umbra/controller.py index 9d957cd7f7c1f5c2a523d0af09d9c6ee5070ee4e..649a9317be054035d072051049bee11ffab03f48 100644 --- a/umbra/controller.py +++ b/umbra/controller.py @@ -1,7 +1,8 @@ from filereader import CSVReader -from filereader import CSVWriter +from filereader import CSVWriter, TxtWriter from utils import Utils as ut import os +import numpy as np class Controller: @@ -27,7 +28,7 @@ class Controller: return self._source_files @source_files.setter - def shadow_files(self, val): + def source_files(self, val): self._source_files = val @property @@ -97,7 +98,7 @@ class Controller: """ files = getattr(self, '_{}_files'.format(type)) selection = self._view.select_files(type) - # TODO: Check if not already present + # TODO: Check if not already present. R: "make it a set"? files.extend(selection) self._view.update_files(files, type) if files: @@ -120,9 +121,11 @@ class Controller: files.remove(s) self._view.button_status("select {}".format(type), "normal") else: - selection = [file for file in files if selection.lower() in file.lower()][0] + selection = [file for file in files if selection.lower() in + file.lower()][0] files.remove(selection) - self._view.button_status("select_folder {}".format(type), "normal") + self._view.button_status("select_folder {}".format(type), + "normal") self._view.update_files(files, type) def _compare_files(self): @@ -137,22 +140,20 @@ class Controller: self._view.update_message('files ok') self._view.button_status("save", "normal") - self._read_folder("shadow") - self._read_folder("source") + self._read_folder() self._model.compare() if self._model.analysis_complete: self._view.update_message('comparison complete') self._view.button_status("compare", "disabled") - def _save_results(self): + def _save_results(self): # TODO: This function will not work as of now, something for later. """Save analysis results to file. If there is multiple results, save those. """ if not self._model.analysis_complete(): self._view.update_message('no comparison') else: - if self._model.multi_results: path = self._view.ask_save_location() results = self._model.multi_results @@ -162,43 +163,22 @@ class Controller: and not self._model.analysis_results: self._view.update_message('nonexistent') - """ - def _read_files(self, path, type): - Read data from file paths and save to model. - - Args: - type (str): Role of file ('source' or 'shadow') - - data = self._filereader.read(path, type) - if type == "source": - self._model.data_source = data - self._model.id = ut.id_regex(path) - else: - self._model.data_shadow = data - """ - - def _read_folder(self, type): + def _read_folder(self): """ - Read in multiple source files. - Extensible to consider "type". Now assume shadow - files. + Read in multiple files. """ - if type == "shadow": - for file_path in self._shadow_files: - if "AO" not in file_path or ".csv" not in file_path: - # Very dirty - pass # TODO: raise error - else: - dict = self._model.get_multi_data(type) - key = ut.shadow_regex(file_path) - data = self._filereader.read(file_path, type) - ut.add_to_dict(key, data, dict, alternative=True) - elif type == "source": - for file_path in self._source_files: - if ".csv" not in file_path: - pass - else: - dict = self._model.get_multi_data(type) - key = ut.id_regex(file_path) - data = self._filereader.read(file_path, type) - ut.add_to_dict(key, data, dict, alternative=True) + non_matched_shadows = np.array([]) + non_matched_shadows = np.append(non_matched_shadows, self._shadow_files) + for file_path in self._source_files: + video = ut.id_regex(file_path) + source_data = self._filereader.read(file_path, "source") + shadows = np.array([]) + for shadow_candidate in non_matched_shadows: + participant, task = ut.shadow_regex(shadow_candidate, video) + if task is not None: + shadow_data = self._filereader.read(shadow_candidate, + "shadow") + self._model.add_task(participant, video, task, source_data, + shadow_data) + np.append(shadows, shadow_candidate) + non_matched_shadows = np.delete(non_matched_shadows, shadows) diff --git a/umbra/mistake_counter.py b/umbra/mistake_counter.py index 3fdbceec8470029be0b1b2ecc0357940e929a9e7..30441c339fb0b4e299126838196991ca8e4963bf 100644 --- a/umbra/mistake_counter.py +++ b/umbra/mistake_counter.py @@ -33,12 +33,6 @@ class MistakeCounter: skipped = self.count_mistakes(source, Mistake.SKIPPED) random = self.count_mistakes(shadow, Mistake.RANDOM) form = self.count_mistakes(source, Mistake.FORM) - print("nr of repetition mistakes:", repetition) - print("nr of phonetic mistakes:", phonetic) - print("nr of semantic mistakes:", semantic) - print("nr of skipped words in source:", skipped) - print("nr of random words in shadow:", random) - print("nr of form mistakes:", form) return repetition, phonetic, semantic, skipped, random, form def count_mistakes(self, sentence, mistake): @@ -78,5 +72,10 @@ class MistakeCounter: self.analyse_mistakes(source, shadow) mistakes = repetition+phonetic+semantic+skipped+random+form accuracy = (len(source)-mistakes)/len(source) - print("The accuracy of the shadowing task is", accuracy) - return accuracy \ No newline at end of file + return ({'accuracy' : accuracy, + '#mistakes': mistakes, + '#phonetic': phonetic, + '#repetition': repetition, + '#semantic': semantic, + '#skipped': skipped, + '#random': random}) diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py index 90ac984de2a947b19cd92e25189221cb30062f74..8587e1bd14972ab7c37faeb5314a5f9c08b158f5 100644 --- a/umbra/mistake_finder.py +++ b/umbra/mistake_finder.py @@ -257,4 +257,4 @@ class MistakeFinder: self._shadow = shadow for word in self._shadow: if not word.correct and word.mistake != Mistake.RANDOM: - print(str(word) + " " + str(word.mistake)) + print(str(word) + " " + str(word.mistake)) \ No newline at end of file diff --git a/umbra/model.py b/umbra/model.py index 46fe906652857c146efdb9801d1abfc831fa859a..4953bb8ee1e4930cf3517d0a7027b6a912e7c5b5 100644 --- a/umbra/model.py +++ b/umbra/model.py @@ -1,136 +1,105 @@ from statistics import Statistics -from utils import Utils as ut +from shadow_task import ShadowTask +import pandas as pd class Model: """Internal data representation and processing.""" def __init__(self): self._stats = Statistics(None) - self._data_source = None - self._multi_data_shadow = {} # dict because fast and indexing - self._multi_data_source = {} - self._data_shadow = None - self._analysis_results = {} - self._multi_results = {} - self._save_pref = None - self._id = "" # this is a string + self._analysed = False + self._shadow_tasks = [] @property - def multi_results(self): - return self._multi_results + def shadow_tasks(self): + """Getter for the shadow tasks. - @multi_results.setter - def multi_results(self, results): - self._multi_results = results - - @property - def analysis_results(self): - return self._analysis_results - - @analysis_results.setter - def analysis_results(self, results): - self._analysis_results = results - - @property - def data_shadow(self): - return self._data_shadow - - @data_shadow.setter - def data_shadow(self, data): - self._data_shadow = data - - @property - def multi_data_shadow(self): - return self._multi_data_shadow - - @multi_data_shadow.setter - def multi_data_shadow(self, data): - self._multi_data_shadow = data - - @property - def id(self): - return self._id - - @id.setter # TODO: DO we still use this? As far as I have seen, we do not. Also, change name of id here. - def id(self, id): - self._id = id - - @property - def save_pref(self): - return self._save_pref - - @save_pref.setter - def save_pref(self, save_pref_code): - self._save_pref = save_pref_code - - @property - def data_source(self): - return self._data_source - - @data_source.setter - def data_source(self, data): - self._data_source = data - - @property - def multi_data_source(self): - return self._multi_data_source - - @multi_data_source.setter - def multi_data_source(self, data): - self._multi_data_source = data + Returns: + the shadow tasks + """ + return self._shadow_tasks - def has_source(self): - """Check whether self._data_source has a value.""" - return self._data_source is not None \ - or self._multi_data_source is not None + @shadow_tasks.setter + def shadow_tasks(self, tasks): + """Set the shadow tasks with the given tasks. - def has_shadow(self): - """Check whether self._data_shadow has a value.""" - return self._data_shadow is not None \ - or self._multi_data_shadow is not None + Args: + tasks: the shadow tasks there are + """ + self._shadow_tasks = tasks def analysis_complete(self): """Check whether self._analysis_results has a value.""" - return self._analysis_results or self._multi_results + return self._analysed - def get_multi_data(self, data_type): #TODO: Why do we have this as well when you can also just get multi_data_source itself? - """Get data according to type - - Args: - data_type: the type of data to get (either source or shadow) + def compare(self): + """Run the analyses""" + for trial in self._shadow_tasks: + self._stats.analyze(trial) + self._analysed = True + + def add_task(self, pnr, vnr, condition, source, shadow): + """ Add a task specifying all the necessary information + + Args: + pnr: the participant number + vnr: the video number + condition: the task of the shadowing trial + source: the source file + shadow: the shadow file """ - if data_type == "source": - return self._multi_data_source - else: - return self._multi_data_shadow + self._shadow_tasks.append( + ShadowTask(pnr, vnr, condition, source, shadow)) - def compare(self): - """"Run the analyses and saves the results.""" - if self.has_source() and self.has_shadow(): - if self._multi_data_shadow and not self._multi_data_source: - for key, data in self._multi_data_shadow.items(): - if self._id is ut.filter_key(key): - result = self._stats.analyze(self._data_source, data) - ut.add_to_dict(key+"_res", result, self._multi_results) - else: - pass - elif self._multi_data_shadow and self._multi_data_source: - - for key, source in self._multi_data_source.items(): - for key2, shadow in self._multi_data_shadow.items(): - if key == ut.filter_key(key2): - - result = self._stats.analyze(source, shadow) - ut.add_to_dict(key+"_res", - result, self._multi_results) - else: - pass - - elif self._data_source and self._data_shadow: - self._analysis_results = \ - self._stats.analyze(self._data_source, self._data_shadow) - - else: - print("cannot do this yet") - else: - pass + def create_delay_frame(self): + """Create the frame for displaying delays. + + Returns: + The dataframe with delays + """ + delay_frame = ({'participant': [], + 'assignment': [], + 'video': [], + 'words': [], + 'delays': []}) + for trial in self._shadow_tasks: + for result in trial.delays: + delay_frame['participant'].append(result[0]) + delay_frame['assignment'].append(result[1]) + delay_frame['video'].append(result[2]) + delay_frame['words'].append(result[3]) + delay_frame['delays'].append(result[4]) + + return pd.DataFrame(data=delay_frame) + + def create_mistake_frame(self): + """ Create the frame for the output + + Returns: + The filled data frame + """ + d = ({'participant': [], + 'assignment': [], + 'video': [], + 'accuracy': [], + '#mistakes': [], + '#phonetic': [], + '#repetition': [], + '#form': [], + '#semantic': [], + '#skipped': [], + '#random': []}) + for trial in self._shadow_tasks: + d['participant'].append(trial.participant) + d['assignment'].append(trial.condition) + d['video'].append(trial.video) + d['accuracy'].append(trial.results['accuracy']) + d['#mistakes'].append(trial.results['#mistakes']) + d['#phonetic'].append(trial.results['#phonetic']) + d['#repetition'].append(trial.results['#repetition']) + d['#form'].append(trial.results['#form']) + d['#semantic'].append(trial.results['#semantic']) + d['#skipped'].append(trial.results['#skipped']) + d['#random'].append(trial.results['#random']) + + return pd.DataFrame(data=d) diff --git a/umbra/needleman_wunsch.py b/umbra/needleman_wunsch.py index 22928809ac7b09f38d6b99b0af96acae3221925d..149f133348ae21f29a048b6fec35177b886cb6a6 100644 --- a/umbra/needleman_wunsch.py +++ b/umbra/needleman_wunsch.py @@ -3,14 +3,13 @@ from alignment_strategy import AlignmentStrategy from words import Sentence import numpy as np from mistake_enum import Mistake -from dutch_mmetaphone import DutchPhonetics class NeedlemanWunsch(AlignmentStrategy): """Class that aligns the words of a shadow and source file according to the Needleman-Wunsch algorithm""" - def __init__(self, seman_checker, form_checker): + def __init__(self, seman_checker, form_checker, phon_checker): super().__init__() self._match = 4 self._mismatch = -2 @@ -25,10 +24,11 @@ class NeedlemanWunsch(AlignmentStrategy): self._matrix = None self._seman_checker = seman_checker self._form_checker = form_checker + self._phon_checker = phon_checker def alignment_options(self, match=None, mismatch=None, - gap_sc=None, seman_match=None, repetition=None, - form_match=None): + gap_sc=None, seman_match=None, phon_match=None, + repetition=None, form_match=None): """ Set the scores that are allocated whilst aligning. Can be changed one at a time or more at once. @@ -42,6 +42,8 @@ class NeedlemanWunsch(AlignmentStrategy): same. repetition: the score that is allocated when a shadow word is a stuttering + form_match: the score that is allocated when two words align by + virtue of form equivalence. """ if match: self._match = match @@ -113,8 +115,8 @@ class NeedlemanWunsch(AlignmentStrategy): elif self._seman_checker.semantically_related( source_word.word, shadow_word.word): value = self._seman_match - elif DutchPhonetics.compare(source_word.word, - shadow_word.word): + elif self._phon_checker.compare(source_word.word, + shadow_word.word): value = self._phon_match else: value = self._mismatch @@ -163,10 +165,9 @@ class NeedlemanWunsch(AlignmentStrategy): elif self._matrix[i][j]['pointer'] == 'up': alignment_source.append(Gap()) alignment_shadow.append(self._shadow[i - 1]) + self._shadow[i - 1].mistake = Mistake.RANDOM if self._check_repetition(i - 1): self._shadow[i - 1].mistake = Mistake.REPETITION - else: - self._shadow[i - 1].mistake = Mistake.RANDOM i -= 1 alignment_source.reverse() @@ -197,17 +198,19 @@ class NeedlemanWunsch(AlignmentStrategy): if source == shadow: source.shadowed = True shadow.correct = True - shadow.source = source + source.shadow = shadow elif self._seman_checker.semantically_related(source.word, shadow.word): source.shadowed = True source.mistake = Mistake.SEMANTIC shadow.mistake = Mistake.SEMANTIC + source.shadow = shadow elif self._form_checker.form_related(source.word, shadow.word): source.shadowed = True + source.shadow = shadow source.mistake = Mistake.FORM shadow.mistake = Mistake.FORM - elif DutchPhonetics.compare(source.word, shadow.word): + elif self._phon_checker.compare(source.word, shadow.word): source.shadowed = True source.mistake = Mistake.PHONETIC shadow.mistake = Mistake.PHONETIC diff --git a/umbra/shadow_task.py b/umbra/shadow_task.py new file mode 100644 index 0000000000000000000000000000000000000000..892df0773880d63284c3eda04a6dbb9f94d91791 --- /dev/null +++ b/umbra/shadow_task.py @@ -0,0 +1,54 @@ + +class ShadowTask: + """The task class that wraps all necessary data""" + + def __init__(self, participant, video, condition, source, shadow): + + self._participant = participant + self._video = video + self._condition = condition + self._source = source + self._shadow = shadow + self._results = None + self._delays = None + + @property + def participant(self): + return self._participant + + @property + def video(self): + return self._video + + @property + def condition(self): + return self._condition + + @property + def source(self): + return self._source + + @property + def shadow(self): + return self._shadow + + @property + def results(self): + return self._results + + @results.setter + def results(self, results): + self._results = results + + @property + def delays(self): + return self._delays + + @delays.setter + def delays(self, delays): + self._delays = delays + + def __str__(self): + return "participant: %s video: %s condition: %s " \ + "result: %s" % (self._participant, self._video, + self._condition, self._results) \ No newline at end of file diff --git a/umbra/statistics.py b/umbra/statistics.py index ef135a88089c90a9a9760404ae00ca7bd0e6c92e..1fe76449d452f96b5aa9d9a14f278515b7bd587d 100644 --- a/umbra/statistics.py +++ b/umbra/statistics.py @@ -4,6 +4,7 @@ from anchor_algorithm import AnchorAlgorithm from mistake_finder import MistakeFinder from semantic_checker import SemanticChecker from form_checker import FormChecker +from dutch_mmetaphone import DutchPhonetics from mistake_counter import MistakeCounter from utils import Utils as ut import time @@ -24,6 +25,7 @@ class Statistics: "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz") self._seman_checker = SemanticChecker() self._form_checker = FormChecker() + self._phon_checker = DutchPhonetics() self._mistake_finder = MistakeFinder(self._seman_checker, self._form_checker) self._mistake_counter = MistakeCounter() @@ -36,15 +38,19 @@ class Statistics: def strategy(self, strategy): self._strategy = strategy - def analyze(self, source, shadow): + def analyze(self, trial): """Perform the necessary analyses. Currently uses all strategies for testing purposes. Should only use the specified strategy upon release. Args: - source: the words in the source file - shadow: the words in the shadow file + trial: the trial to analyze + + Returns: + correctness """ + source = trial.source + shadow = trial.shadow # Make a deepcopy such that the testing is equal for both strategies: source_em = copy.deepcopy(source) @@ -55,18 +61,14 @@ class Statistics: results = 'Needleman-Wunsch' discrete_start_time = time.time() self._strategy = NeedlemanWunsch(self._seman_checker, - self._form_checker) + self._form_checker, + self._phon_checker) source_align, shadow_align = self._strategy.align(source, shadow) - for s_word in shadow_align: - if s_word.has_source(): - results += f'source: {s_word.source} shadow: {s_word}\n' correctness = self._strategy.correctly_shadowed(source) discrete_time = time.time() - discrete_start_time results += f'taken time:{discrete_time}\n' self._mistake_finder.print_for_nw(source_align, shadow_align) - self._mistake_finder.start(source_align, shadow_align) - self._mistake_counter.calculate_accuracy(source_align, shadow_align) # Alignment 2 print('\n Anchor-algorithm') @@ -75,14 +77,24 @@ class Statistics: self._strategy = AnchorAlgorithm() source_align_em, shadow_align_em = self._strategy.align(source_em, shadow_em) - for s_word in shadow_align_em: - if s_word.has_source(): - results += f'source: {s_word.source} shadow: {s_word}\n' correctness = self._strategy.correctly_shadowed(source_em) self._mistake_finder.start(source_align_em, shadow_align_em) discrete_time = time.time() - discrete_start_time results += f'taken time:{discrete_time}\n' - self._mistake_counter.\ - calculate_accuracy(source_align_em, shadow_align_em) - return source_align_em, shadow_align_em, correctness + # Now the results are from the Anchor algorithm analysis. + trial.results = self._mistake_counter.calculate_accuracy(source_em, + shadow_em) + + #IMPORTANT: This will not work on source_em because the matching is + # the other way around. + delays_per_word = [] + for source_word in source: + if source_word.has_shadow(): + delays_per_word.append([trial.participant, trial.condition, + trial.video, source_word.word, + source_word.get_difference + (source_word.shadow)]) + trial.delays = delays_per_word + + return correctness diff --git a/umbra/tests/test_controller.py b/umbra/tests/test_controller.py index 6ffb780cc8701af1d965d5b2b00ff20f1839aace..4eadfb4b2bde157fd269e5c49a896274b19ab74d 100644 --- a/umbra/tests/test_controller.py +++ b/umbra/tests/test_controller.py @@ -57,35 +57,36 @@ class ControllerTest(pyfakefs.fake_filesystem_unittest.TestCase): calls_but = [mock.call("save", "normal"), mock.call("compare", "disabled")] view.button_status.assert_has_calls(calls_but) - def test_read_folder_source(self): - model, view, controller = self.makeMVC() - model.get_multi_data.return_value = {} - fr = mock.Mock() - fr.read.return_value = "foo" - controller._filereader = fr - controller._source_files = ["00.T.csv"] #filename is barely legal but ok - controller._read_folder("source") - model.get_multi_data.assert_called_with("source") - controller._filereader.read.assert_has_calls([mock.call("00.T.csv", "source")]) - - def test_read_folder_none(self): - model, view, controller = self.makeMVC() - controller._shadow_files = ["foo"] - controller._read_folder("shadow") # Nothing should happen there - controller._source_files = ["bar"] # because no .csv in filepath - controller._read_folder("source") - controller._read_folder("") # try the empty - - def test_read_folder_shadow(self): - model, view, controller = self.makeMVC() - model.get_multi_data.return_value = {} - fr = mock.Mock() - fr.read.return_value = "foo" - controller._filereader = fr - controller._shadow_files = ["1_AO0.TextGrid.csv"] #filename is barely legal but ok - controller._read_folder("shadow") - model.get_multi_data.assert_called_with("shadow") - controller._filereader.read.assert_has_calls([mock.call("1_AO0.TextGrid.csv", "shadow")]) + # TODO: Rewrite tests below to mirror new _read_folder() structure + # def test_read_folder_source(self): + # model, view, controller = self.makeMVC() + # model.get_multi_data.return_value = {} + # fr = mock.Mock() + # fr.read.return_value = "foo" + # controller._filereader = fr + # controller._source_files = ["00.T.csv"] #filename is barely legal but ok + # controller._read_folder("source") + # model.get_multi_data.assert_called_with("source") + # controller._filereader.read.assert_has_calls([mock.call("00.T.csv", "source")]) + # + # def test_read_folder_none(self): + # model, view, controller = self.makeMVC() + # controller._shadow_files = ["foo"] + # controller._read_folder("shadow") # Nothing should happen there + # controller._source_files = ["bar"] # because no .csv in filepath + # controller._read_folder("source") + # controller._read_folder("") # try the empty + # + # def test_read_folder_shadow(self): + # model, view, controller = self.makeMVC() + # model.get_multi_data.return_value = {} + # fr = mock.Mock() + # fr.read.return_value = "foo" + # controller._filereader = fr + # controller._shadow_files = ["1_AO0.TextGrid.csv"] #filename is barely legal but ok + # controller._read_folder("shadow") + # model.get_multi_data.assert_called_with("shadow") + # controller._filereader.read.assert_has_calls([mock.call("1_AO0.TextGrid.csv", "shadow")]) def test_save_results(self): model, view, controller = self.makeMVC() diff --git a/umbra/utils.py b/umbra/utils.py index 995f9b58a297ca6e1a2fedc1f665107f6e737a35..93dc8eadbe0459a488e2652ee273bc94a02682a7 100644 --- a/umbra/utils.py +++ b/umbra/utils.py @@ -4,6 +4,7 @@ import sys class Utils: + @staticmethod def get_path(path): if sys.platform.startswith('linux'): return "./umbra/"+path @@ -23,8 +24,11 @@ class Utils: """Helper function that turns a sequence of time strings into a list of integers. - :param times: sequence of strings in hh:mm:ss.sss format. - :return: list of times in milliseconds. + Args: + times: sequence of strings in hh:mm:ss.sss format. + + Returns: + list of times in milliseconds. """ output = [] for time in times: @@ -51,31 +55,44 @@ class Utils: h, m = divmod(m, 60) return "%02i:%02i:%02i.%03i" % (h, m, s, ms) - def shadow_regex(path): - match = re.search("\d_AO[0-9]+", path) - if match: - key = match.group() - if ".csv" in path: - key = key+"csv" - return key + @staticmethod + def shadow_regex(path, video): + """Find the reduced path based on the video number + + Args: + path: the shadow path to check + video: the video number - def filter_key(string): - match = re.search("AO[0-9]+", string) + Returns: + participant: the participant number + task: the task for the participant + """ + match = re.search(r"(\d+)_(\D+){video}+\.T", path) + task = None + participant = None if match: - if len(match.group()[2:]) > 2: - return match.group()[2:4] - return match.group()[2:] + participant = match.group(1) + task = match.group(2) + return participant, task + @staticmethod def id_regex(path): - match = re.search("\d+\.T", path) - # Make sure that not different number accidentally - id = match.group() - if len(id) > 3 and ".csv" in path: - return id[:2] - elif len(id) <= 3 and ".csv" in path: - return id[0] + """Find reduced path for the source file by extracting its id + Args: + path: the path to reduce + Returns: + the found id for the file + """ + match = re.search(r"\d+\.T", path) + if match: + identifier = match.group() + else: + identifier = "" # match is None when no such path found + return identifier[:-2] + + @staticmethod def add_to_dict(key, value, dict, alternative=False): """Checks if key already exists in a dictionary before adding new key-value pair diff --git a/umbra/words.py b/umbra/words.py index f2dc49ee80c514b3a3bbb0dec94a4d04a9b934f0..2801d8b87326a6b532fc2957115ec3c3c6837e7c 100644 --- a/umbra/words.py +++ b/umbra/words.py @@ -99,14 +99,6 @@ class Word: """Anchor setter. Has to be overridden in the subclass.""" raise NotImplementedError - def get_difference(self, other): - """Get the difference between the onset of this word and the other. - - Args: - other: the other Word instance - """ - return other.onset - self._onset - class ShadowWord(Word): def __init__(self, word, onset, offset): @@ -178,6 +170,7 @@ class SourceWord(Word): def __init__(self, word, onset, offset): super().__init__(word, onset, offset) self._shadowed = False + self._shadow = None @property def shadowed(self): @@ -209,6 +202,39 @@ class SourceWord(Word): def _set_anchor(self, anchor): self._anchor = anchor + @property + def shadow(self): + """Getter for Shadow attribute + + Return: + The shadow word. + """ + return self._shadow + + @shadow.setter + def shadow(self, shadow): + """Setter for Source attribute + + Args: + shadow: the source word, presumably of type Word. + """ + self._shadow = shadow + + def has_shadow(self): + """Check whether this word has a shadow word that it is matched with + + Returns: + shadow: True if this word is matched, False otherwise. + """ + return self._shadow is not None + + def get_difference(self, other): + """Get the difference between the onset of this word and the other. + + Args: + other: the other Word instance + """ + return other.onset - self._onset class Sentence(list): def __init__(self, words):