diff --git a/umbra/controller.py b/umbra/controller.py index 9f696e975e70e767bde4c2e8dea0b18a090d7f3e..dd629561dfae346178fd2d7c72cf7abcd702d1d8 100644 --- a/umbra/controller.py +++ b/umbra/controller.py @@ -48,7 +48,7 @@ class Controller: elif action == 'save_txt': self._filewriter = TxtWriter() elif action == 'select_folder': - self._select_folder() + self._select_folder(type[0]) elif action == 'rm_all': self._delete_files(type[0], True) @@ -59,11 +59,16 @@ class Controller: folder_path = self._view.dir_dialog() file_names = os.listdir(folder_path) file_paths = [folder_path + "/" + x for x in file_names] - self._shadow_files = file_paths + self._set_path(file_paths, type) self._view.update_files(file_paths, type) if file_paths: self._view.button_status("select {}".format(type), "disabled") # TODO: Call 'buttons' by functionality, not object (type) + def _set_path(self, paths, type): + if type == 'source': + self._source_files = paths + else: + self._shadow_files = paths def _select_files(self, type): """Select files and add them to files list corresponding to type. @@ -76,8 +81,9 @@ class Controller: # TODO: Check if not already present files.extend(selection) self._view.update_files(files, type) - if type == "shadow" and files: - self._view.button_status("select_folder", "disabled") + if files: + self._view.button_status("select_folder {}".format(type), + "disabled") def _delete_files(self, type, all=False): # TODO: Not yet implemented fully """Remove selected file from files list corresponding to type. @@ -97,7 +103,7 @@ class Controller: else: selection = [file for file in files if selection.lower() in file.lower()][0] files.remove(selection) - self._view.button_status("select_folder".format(type), "normal") + self._view.button_status("select_folder {}".format(type), "normal") self._view.update_files(files, type) def _compare_files(self): @@ -115,10 +121,14 @@ class Controller: self._read_folder("shadow") else: self._read_files(self._shadow_files[0], "shadow") # TODO: Path is temp fix - - self._read_files(self._source_files[0], "source") + if len(self._source_files) > 1: + self._read_folder("source") + else: + self._read_files(self._source_files[0], "source") self._model.compare() - self._view.update_message('comparison complete') + if self._model.analysis_complete: + self._view.update_message('comparison complete') + self._view.button_status("compare", "disabled") def _save_results(self): """Save analysis results to file. If there is multiple results, @@ -167,8 +177,13 @@ class Controller: if "AO" not in file_path or ".csv" not in file_path: # Very dirty pass # TODO: raise error - else: - dict = self._model.multi_data_shadow + elif type == "shadow": + dict = self._model.get_multi_data(type) key = ut.shadow_regex(file_path) data = self._filereader.read(file_path, type) ut.add_to_dict(key, data, dict, alternative=True) + else: + dict = self._model.get_multi_data(type) + key = ut.id_regex(file_path) + data = self._filereader.read(file_path, type) + ut.add_to_dict(key, data, dict, alternative=True) diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py index 40fe64a7b0c9e9ddadf0af2c0e345df88b6a66f4..fb18835008b2d022fd28bbe0c43d7a7c055435c7 100644 --- a/umbra/mistake_finder.py +++ b/umbra/mistake_finder.py @@ -134,6 +134,7 @@ class MistakeFinder: while chain and index-i > 1: diff += 1 i += 1 + print(i, index) chain = self._shadow[i].word == self._shadow[index+diff].word if chain: @@ -193,4 +194,3 @@ class MistakeFinder: phonetic mistake. """ pass - diff --git a/umbra/model.py b/umbra/model.py index 045f16701779caf4ae2d6d1205f6d2b368877c4b..7f67dd85c393eb1811e76c9a87b3dc58be3a68a7 100644 --- a/umbra/model.py +++ b/umbra/model.py @@ -7,8 +7,9 @@ class Model: self._stats = Statistics(None) self._data_source = None self._multi_data_shadow = {} # dict because fast and indexing + self._multi_data_source = {} self._data_shadow = None - self._analysis_results = None + self._analysis_results = {} self._multi_results = {} self._save_pref = None self._id = "" # this is a string @@ -77,6 +78,14 @@ class Model: def multi_data_shadow(self, data): self._multi_data_shadow = data + @property + def multi_data_source(self): + return self._multi_data_source + + @multi_data_source.setter + def multi_data_source(self, data): + self._multi_data_source = data + @property def data_shadow(self): return self._data_shadow @@ -91,7 +100,8 @@ class Model: def has_source(self): """Check whether self._data_source has a value.""" - return self._data_source is not None + return self._data_source is not None \ + or self._multi_data_source is not None def has_shadow(self): """Check whether self._data_shadow has a value.""" @@ -100,23 +110,44 @@ class Model: def analysis_complete(self): """Check whether self._analysis_results has a value.""" - return self._analysis_results is not None \ - or self._multi_results is not None + print(self.analysis_results, self.multi_results) + return self._analysis_results or self._multi_results + + def get_multi_data(self, type): + if type == "source": + return self._multi_data_source + else: + return self._multi_data_shadow def compare(self): """"Run the analyses and saves the results.""" if self.has_source() and self.has_shadow(): - if self._multi_data_shadow: + if self._multi_data_shadow and not self._multi_data_source: for key, data in self._multi_data_shadow.items(): - if self._id is ut.filter_key(key): result = self._stats.analyze(self._data_source, data) ut.add_to_dict(key+"_res", result, self._multi_results) else: pass - else: - + elif self._multi_data_shadow and self._multi_data_source: + for key, source in self._multi_data_source.items(): + for key2, shadow in self._multi_data_shadow.items(): + #TODO: Problem: what if filename does not comply + if key in key2: + print("result of", key, key2) + print("*"*100) + result = self._stats.analyze(source, shadow) + + ut.add_to_dict(key+"_res", + result, self._multi_results) + else: + pass + + elif self._data_source and self._data_shadow: self._analysis_results = \ self._stats.analyze(self._data_source, self._data_shadow) + + else: + print("cannot do this yet") else: pass diff --git a/umbra/saa_Romeo.py b/umbra/saa_Romeo.py index 96162e6adcfc78c642ee32871519d34eeac8d004..4d462c7ff8bbf9df6bf94fa72af0427375e4df27 100644 --- a/umbra/saa_Romeo.py +++ b/umbra/saa_Romeo.py @@ -115,7 +115,7 @@ class SaaRomeo(AlignmentStrategy): if matrix[i][j]['pointer'] == 'diag': alignment_source.append(self._source[j - 1]) alignment_shadow.append(self._shadow[i - 1]) - new_source,new_shadow = self._equals_checker( + new_source, new_shadow = self._equals_checker( self._source[j-1],self._shadow[i-1]) self._source[j-1] = new_source self._shadow[i-1] = new_shadow @@ -153,5 +153,4 @@ class SaaRomeo(AlignmentStrategy): source.shadowed = True source.mistake = Mistake.SEMANTIC shadow.mistake = Mistake.SEMANTIC - return source,shadow - + return source, shadow diff --git a/umbra/semantic_checker.py b/umbra/semantic_checker.py index d5c11376386d924434a23ed7d58612779717a86e..b7df79222a2dc8b7868d9ed43808bd14f70337f0 100644 --- a/umbra/semantic_checker.py +++ b/umbra/semantic_checker.py @@ -2,7 +2,7 @@ from OpenDutchWordnet.le import Le from OpenDutchWordnet.synset import Synset from bisect import bisect_left from OpenDutchWordnet.wn_grid_parser import Wn_grid_parser - +from utils import Utils as ut class SemanticChecker(): @@ -13,8 +13,8 @@ class SemanticChecker(): Args: parser: object of the Wn_grid_parser class """ - self._parser = Wn_grid_parser( - "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz") + self._path = "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz" + self._parser = Wn_grid_parser(ut.get_path(self._path)) # "./umbra/OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz") # Ubuntu path, temporary 'fix' self._parser.load_synonyms_dicts() self._lemma2synsets = getattr(self._parser,'lemma2synsets') @@ -212,4 +212,4 @@ class SemanticChecker(): target_synsets = self._find_synsets(target_ids) all_relations.update(target_synsets) return self._hypernym_helper(hypernyms,all_relations,depth, - current_depth+1) \ No newline at end of file + current_depth+1) diff --git a/umbra/statistics.py b/umbra/statistics.py index 68cb5a8574af050301dd3697fab7c95d83656020..ab5116e04f3e018f5c5460f9b19247c5c4083b0a 100644 --- a/umbra/statistics.py +++ b/umbra/statistics.py @@ -49,7 +49,7 @@ class Statistics: print('Romeo') self._strategy = SaaRomeo(self._seman_checker) source_align, shadow_align = self._strategy.align(source, shadow) - for s_word in shadow_align: + for s_word in shadow_align: if s_word.has_source(): print(f'source: {s_word.source} shadow: {s_word}') correctness = self._strategy.correctly_shadowed(source) diff --git a/umbra/utils.py b/umbra/utils.py index 14506edbaa1d184277bfc773ec6d49f552ab81b5..59e6f1d619f7d525817d8a7f002bde9e2da9b2d0 100644 --- a/umbra/utils.py +++ b/umbra/utils.py @@ -1,6 +1,7 @@ import re import sys + class Utils: def get_path(path): @@ -57,10 +58,13 @@ class Utils: return match.group()[2:] def id_regex(path): - match = re.search("\d\.TextGrid", path) + match = re.search("\d+\.T", path) # Make sure that not different number accidentally - if match: - return match.group()[0] + id = match.group() + if len(id) > 3 and ".csv" in path: + return id[:2] + elif len(id) <= 3 and ".csv" in path: + return id[0] def add_to_dict(key, value, dict, alternative=False): @@ -77,11 +81,9 @@ class Utils: dict[key] = value else: if alternative: - cnt = 0 + cnt = 1 for k, v in dict.items(): - if key is k: + if key is k or k in key: cnt += 1 key += "({})".format(str(cnt)) dict[key] = value - raise ValueError("Key '{}' already exists in {}." - .format(key, dict)) diff --git a/umbra/view.py b/umbra/view.py index 1f178d1a8baf0eb62bb00d64f56d4efafeff4058..d3df7b7d631348b60bfe1e175b9745f3accf877a 100644 --- a/umbra/view.py +++ b/umbra/view.py @@ -38,7 +38,7 @@ class View: # Compare and save buttons self._create_button('compare', self._frame, 'Compare', 4, 1) self._create_button('save', self._frame, 'Save result', 4, 2) - self.button_status("save","disabled") + self.button_status("save", "disabled") # Message self._create_label('message', self._frame, "", 5, 1) @@ -143,7 +143,8 @@ class View: 'delete shadow', 'compare', 'save', - 'select_folder', + 'select_folder source', + 'select_folder shadow', 'rm_all source', 'rm_all shadow', ] @@ -200,8 +201,8 @@ class View: 'Delete all {} files'.format(type), 8, column) - if type == 'shadow': - self._create_button('select_folder', frame, + + self._create_button('select_folder {}'.format(type), frame, 'Select {} folder'.format(type), 6, column)