diff --git a/umbra/controller.py b/umbra/controller.py
index 9d957cd7f7c1f5c2a523d0af09d9c6ee5070ee4e..649a9317be054035d072051049bee11ffab03f48 100644
--- a/umbra/controller.py
+++ b/umbra/controller.py
@@ -1,7 +1,8 @@
 from filereader import CSVReader
-from filereader import CSVWriter
+from filereader import CSVWriter, TxtWriter
 from utils import Utils as ut
 import os
+import numpy as np
 
 
 class Controller:
@@ -27,7 +28,7 @@ class Controller:
         return self._source_files
 
     @source_files.setter
-    def shadow_files(self, val):
+    def source_files(self, val):
         self._source_files = val
 
     @property
@@ -97,7 +98,7 @@ class Controller:
         """
         files = getattr(self, '_{}_files'.format(type))
         selection = self._view.select_files(type)
-        # TODO: Check if not already present
+        # TODO: Check if not already present. R: "make it a set"?
         files.extend(selection)
         self._view.update_files(files, type)
         if files:
@@ -120,9 +121,11 @@ class Controller:
                     files.remove(s)
                 self._view.button_status("select {}".format(type), "normal")
             else:
-                selection = [file for file in files if selection.lower() in file.lower()][0]
+                selection = [file for file in files if selection.lower() in
+                             file.lower()][0]
                 files.remove(selection)
-                self._view.button_status("select_folder {}".format(type), "normal")
+                self._view.button_status("select_folder {}".format(type),
+                                         "normal")
             self._view.update_files(files, type)
 
     def _compare_files(self):
@@ -137,22 +140,20 @@ class Controller:
             self._view.update_message('files ok')
             self._view.button_status("save", "normal")
 
-            self._read_folder("shadow")
-            self._read_folder("source")
+            self._read_folder()
 
             self._model.compare()
             if self._model.analysis_complete:
                 self._view.update_message('comparison complete')
                 self._view.button_status("compare", "disabled")
 
-    def _save_results(self):
+    def _save_results(self):  # TODO: This function will not work as of now, something for later.
         """Save analysis results to file. If there is multiple results,
         save those.
         """
         if not self._model.analysis_complete():
             self._view.update_message('no comparison')
         else:
-
             if self._model.multi_results:
                 path = self._view.ask_save_location()
                 results = self._model.multi_results
@@ -162,43 +163,22 @@ class Controller:
                     and not self._model.analysis_results:
                 self._view.update_message('nonexistent')
 
-    """
-        def _read_files(self, path, type):
-            Read data from file paths and save to model.
-
-            Args:
-                type (str): Role of file ('source' or 'shadow')
-
-            data = self._filereader.read(path, type)
-            if type == "source":
-                self._model.data_source = data
-                self._model.id = ut.id_regex(path)
-            else:
-                self._model.data_shadow = data
-    """
-
-    def _read_folder(self, type):
+    def _read_folder(self):
         """
-        Read in multiple source files.
-        Extensible to consider "type". Now assume shadow
-        files.
+        Read in multiple files.
         """
-        if type == "shadow":
-            for file_path in self._shadow_files:
-                if "AO" not in file_path or ".csv" not in file_path:
-                    # Very dirty
-                    pass  # TODO: raise error
-                else:
-                    dict = self._model.get_multi_data(type)
-                    key = ut.shadow_regex(file_path)
-                    data = self._filereader.read(file_path, type)
-                    ut.add_to_dict(key, data, dict, alternative=True)
-        elif type == "source":
-            for file_path in self._source_files:
-                if ".csv" not in file_path:
-                    pass
-                else:
-                    dict = self._model.get_multi_data(type)
-                    key = ut.id_regex(file_path)
-                    data = self._filereader.read(file_path, type)
-                    ut.add_to_dict(key, data, dict, alternative=True)
+        non_matched_shadows = np.array([])
+        non_matched_shadows = np.append(non_matched_shadows, self._shadow_files)
+        for file_path in self._source_files:
+            video = ut.id_regex(file_path)
+            source_data = self._filereader.read(file_path, "source")
+            shadows = np.array([])
+            for shadow_candidate in non_matched_shadows:
+                participant, task = ut.shadow_regex(shadow_candidate, video)
+                if task is not None:
+                    shadow_data = self._filereader.read(shadow_candidate,
+                                                            "shadow")
+                    self._model.add_task(participant, video, task, source_data,
+                                         shadow_data)
+                    np.append(shadows, shadow_candidate)
+            non_matched_shadows = np.delete(non_matched_shadows, shadows)
diff --git a/umbra/mistake_counter.py b/umbra/mistake_counter.py
index 3fdbceec8470029be0b1b2ecc0357940e929a9e7..30441c339fb0b4e299126838196991ca8e4963bf 100644
--- a/umbra/mistake_counter.py
+++ b/umbra/mistake_counter.py
@@ -33,12 +33,6 @@ class MistakeCounter:
         skipped = self.count_mistakes(source, Mistake.SKIPPED)
         random = self.count_mistakes(shadow, Mistake.RANDOM)
         form = self.count_mistakes(source, Mistake.FORM)
-        print("nr of repetition mistakes:", repetition)
-        print("nr of phonetic mistakes:", phonetic)
-        print("nr of semantic mistakes:", semantic)
-        print("nr of skipped words in source:", skipped)
-        print("nr of random words in shadow:", random)
-        print("nr of form mistakes:", form)
         return repetition, phonetic, semantic, skipped, random, form
 
     def count_mistakes(self, sentence, mistake):
@@ -78,5 +72,10 @@ class MistakeCounter:
             self.analyse_mistakes(source, shadow)
         mistakes = repetition+phonetic+semantic+skipped+random+form
         accuracy = (len(source)-mistakes)/len(source)
-        print("The accuracy of the shadowing task is", accuracy)
-        return accuracy
\ No newline at end of file
+        return ({'accuracy' : accuracy,
+                 '#mistakes': mistakes,
+                 '#phonetic': phonetic,
+                 '#repetition': repetition,
+                 '#semantic': semantic,
+                 '#skipped': skipped,
+                 '#random': random})
diff --git a/umbra/mistake_finder.py b/umbra/mistake_finder.py
index 90ac984de2a947b19cd92e25189221cb30062f74..8587e1bd14972ab7c37faeb5314a5f9c08b158f5 100644
--- a/umbra/mistake_finder.py
+++ b/umbra/mistake_finder.py
@@ -257,4 +257,4 @@ class MistakeFinder:
         self._shadow = shadow
         for word in self._shadow:
             if not word.correct and word.mistake != Mistake.RANDOM:
-                print(str(word) + " " + str(word.mistake))
+                print(str(word) + " " + str(word.mistake))
\ No newline at end of file
diff --git a/umbra/model.py b/umbra/model.py
index 46fe906652857c146efdb9801d1abfc831fa859a..4953bb8ee1e4930cf3517d0a7027b6a912e7c5b5 100644
--- a/umbra/model.py
+++ b/umbra/model.py
@@ -1,136 +1,105 @@
 from statistics import Statistics
-from utils import Utils as ut
+from shadow_task import ShadowTask
+import pandas as pd
 
 
 class Model:
     """Internal data representation and processing."""
     def __init__(self):
         self._stats = Statistics(None)
-        self._data_source = None
-        self._multi_data_shadow = {}  # dict because fast and indexing
-        self._multi_data_source = {}
-        self._data_shadow = None
-        self._analysis_results = {}
-        self._multi_results = {}
-        self._save_pref = None
-        self._id = "" # this is a string
+        self._analysed = False
+        self._shadow_tasks = []
 
     @property
-    def multi_results(self):
-        return self._multi_results
+    def shadow_tasks(self):
+        """Getter for the shadow tasks.
 
-    @multi_results.setter
-    def multi_results(self, results):
-        self._multi_results = results
-
-    @property
-    def analysis_results(self):
-        return self._analysis_results
-
-    @analysis_results.setter
-    def analysis_results(self, results):
-        self._analysis_results = results
-
-    @property
-    def data_shadow(self):
-        return self._data_shadow
-
-    @data_shadow.setter
-    def data_shadow(self, data):
-        self._data_shadow = data
-
-    @property
-    def multi_data_shadow(self):
-        return self._multi_data_shadow
-
-    @multi_data_shadow.setter
-    def multi_data_shadow(self, data):
-        self._multi_data_shadow = data
-
-    @property
-    def id(self):
-        return self._id
-
-    @id.setter # TODO: DO we still use this? As far as I have seen, we do not. Also, change name of id here.
-    def id(self, id):
-        self._id = id
-
-    @property
-    def save_pref(self):
-        return self._save_pref
-
-    @save_pref.setter
-    def save_pref(self, save_pref_code):
-        self._save_pref = save_pref_code
-
-    @property
-    def data_source(self):
-        return self._data_source
-
-    @data_source.setter
-    def data_source(self, data):
-        self._data_source = data
-
-    @property
-    def multi_data_source(self):
-        return self._multi_data_source
-
-    @multi_data_source.setter
-    def multi_data_source(self, data):
-        self._multi_data_source = data
+        Returns:
+             the shadow tasks
+        """
+        return self._shadow_tasks
 
-    def has_source(self):
-        """Check whether self._data_source has a value."""
-        return self._data_source is not None \
-            or self._multi_data_source is not None
+    @shadow_tasks.setter
+    def shadow_tasks(self, tasks):
+        """Set the shadow tasks with the given tasks.
 
-    def has_shadow(self):
-        """Check whether self._data_shadow has a value."""
-        return self._data_shadow is not None  \
-            or self._multi_data_shadow is not None
+        Args:
+            tasks: the shadow tasks there are
+        """
+        self._shadow_tasks = tasks
 
     def analysis_complete(self):
         """Check whether self._analysis_results has a value."""
-        return self._analysis_results or self._multi_results
+        return self._analysed
 
-    def get_multi_data(self, data_type): #TODO: Why do we have this as well when you can also just get multi_data_source itself?
-        """Get data according to type
-
-        Args:
-            data_type: the type of data to get (either source or shadow)
+    def compare(self):
+        """Run the analyses"""
+        for trial in self._shadow_tasks:
+            self._stats.analyze(trial)
+        self._analysed = True
+
+    def add_task(self, pnr, vnr, condition, source, shadow):
+        """ Add a task specifying all the necessary information
+
+            Args:
+                pnr: the participant number
+                vnr: the video number
+                condition: the task of the shadowing trial
+                source: the source file
+                shadow: the shadow file
         """
-        if data_type == "source":
-            return self._multi_data_source
-        else:
-            return self._multi_data_shadow
+        self._shadow_tasks.append(
+            ShadowTask(pnr, vnr, condition, source, shadow))
 
-    def compare(self):
-        """"Run the analyses and saves the results."""
-        if self.has_source() and self.has_shadow():
-            if self._multi_data_shadow and not self._multi_data_source:
-                for key, data in self._multi_data_shadow.items():
-                    if self._id is ut.filter_key(key):
-                        result = self._stats.analyze(self._data_source, data)
-                        ut.add_to_dict(key+"_res", result, self._multi_results)
-                    else:
-                        pass
-            elif self._multi_data_shadow and self._multi_data_source:
-
-                for key, source in self._multi_data_source.items():
-                    for key2, shadow in self._multi_data_shadow.items():
-                        if key == ut.filter_key(key2):
-
-                            result = self._stats.analyze(source, shadow)
-                            ut.add_to_dict(key+"_res",
-                                                result, self._multi_results)
-                        else:
-                            pass
-
-            elif self._data_source and self._data_shadow:
-                self._analysis_results = \
-                    self._stats.analyze(self._data_source, self._data_shadow)
-
-            else:
-                print("cannot do this yet")
-        else:
-            pass
+    def create_delay_frame(self):
+        """Create the frame for displaying delays.
+
+        Returns:
+             The dataframe with delays
+        """
+        delay_frame = ({'participant': [],
+              'assignment': [],
+              'video': [],
+              'words': [],
+              'delays': []})
+        for trial in self._shadow_tasks:
+            for result in trial.delays:
+                delay_frame['participant'].append(result[0])
+                delay_frame['assignment'].append(result[1])
+                delay_frame['video'].append(result[2])
+                delay_frame['words'].append(result[3])
+                delay_frame['delays'].append(result[4])
+
+        return pd.DataFrame(data=delay_frame)
+
+    def create_mistake_frame(self):
+        """ Create the frame for the output
+
+        Returns:
+             The filled data frame
+        """
+        d = ({'participant': [],
+              'assignment': [],
+              'video': [],
+              'accuracy': [],
+              '#mistakes': [],
+              '#phonetic': [],
+              '#repetition': [],
+              '#form': [],
+              '#semantic': [],
+              '#skipped': [],
+              '#random': []})
+        for trial in self._shadow_tasks:
+            d['participant'].append(trial.participant)
+            d['assignment'].append(trial.condition)
+            d['video'].append(trial.video)
+            d['accuracy'].append(trial.results['accuracy'])
+            d['#mistakes'].append(trial.results['#mistakes'])
+            d['#phonetic'].append(trial.results['#phonetic'])
+            d['#repetition'].append(trial.results['#repetition'])
+            d['#form'].append(trial.results['#form'])
+            d['#semantic'].append(trial.results['#semantic'])
+            d['#skipped'].append(trial.results['#skipped'])
+            d['#random'].append(trial.results['#random'])
+
+        return pd.DataFrame(data=d)
diff --git a/umbra/needleman_wunsch.py b/umbra/needleman_wunsch.py
index 22928809ac7b09f38d6b99b0af96acae3221925d..149f133348ae21f29a048b6fec35177b886cb6a6 100644
--- a/umbra/needleman_wunsch.py
+++ b/umbra/needleman_wunsch.py
@@ -3,14 +3,13 @@ from alignment_strategy import AlignmentStrategy
 from words import Sentence
 import numpy as np
 from mistake_enum import Mistake
-from dutch_mmetaphone import DutchPhonetics
 
 
 class NeedlemanWunsch(AlignmentStrategy):
     """Class that aligns the words of a shadow and source file according
     to the Needleman-Wunsch algorithm"""
 
-    def __init__(self, seman_checker, form_checker):
+    def __init__(self, seman_checker, form_checker, phon_checker):
         super().__init__()
         self._match = 4
         self._mismatch = -2
@@ -25,10 +24,11 @@ class NeedlemanWunsch(AlignmentStrategy):
         self._matrix = None
         self._seman_checker = seman_checker
         self._form_checker = form_checker
+        self._phon_checker = phon_checker
 
     def alignment_options(self, match=None, mismatch=None,
-                          gap_sc=None, seman_match=None, repetition=None,
-                          form_match=None):
+                          gap_sc=None, seman_match=None, phon_match=None,
+                          repetition=None, form_match=None):
         """ Set the scores that are allocated whilst aligning. Can be changed
             one at a time or more at once.
 
@@ -42,6 +42,8 @@ class NeedlemanWunsch(AlignmentStrategy):
              same.
              repetition: the score that is allocated when a shadow word is
              a stuttering
+             form_match: the score that is allocated when two words align by
+             virtue of form equivalence.
         """
         if match:
             self._match = match
@@ -113,8 +115,8 @@ class NeedlemanWunsch(AlignmentStrategy):
                 elif self._seman_checker.semantically_related(
                         source_word.word, shadow_word.word):
                     value = self._seman_match
-                elif DutchPhonetics.compare(source_word.word,
-                                            shadow_word.word):
+                elif self._phon_checker.compare(source_word.word,
+                                                shadow_word.word):
                     value = self._phon_match
                 else:
                     value = self._mismatch
@@ -163,10 +165,9 @@ class NeedlemanWunsch(AlignmentStrategy):
             elif self._matrix[i][j]['pointer'] == 'up':
                 alignment_source.append(Gap())
                 alignment_shadow.append(self._shadow[i - 1])
+                self._shadow[i - 1].mistake = Mistake.RANDOM
                 if self._check_repetition(i - 1):
                     self._shadow[i - 1].mistake = Mistake.REPETITION
-                else:
-                    self._shadow[i - 1].mistake = Mistake.RANDOM
                 i -= 1
 
         alignment_source.reverse()
@@ -197,17 +198,19 @@ class NeedlemanWunsch(AlignmentStrategy):
         if source == shadow:
             source.shadowed = True
             shadow.correct = True
-            shadow.source = source
+            source.shadow = shadow
         elif self._seman_checker.semantically_related(source.word,
                                                       shadow.word):
             source.shadowed = True
             source.mistake = Mistake.SEMANTIC
             shadow.mistake = Mistake.SEMANTIC
+            source.shadow = shadow
         elif self._form_checker.form_related(source.word, shadow.word):
             source.shadowed = True
+            source.shadow = shadow
             source.mistake = Mistake.FORM
             shadow.mistake = Mistake.FORM
-        elif DutchPhonetics.compare(source.word, shadow.word):
+        elif self._phon_checker.compare(source.word, shadow.word):
             source.shadowed = True
             source.mistake = Mistake.PHONETIC
             shadow.mistake = Mistake.PHONETIC
diff --git a/umbra/shadow_task.py b/umbra/shadow_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..892df0773880d63284c3eda04a6dbb9f94d91791
--- /dev/null
+++ b/umbra/shadow_task.py
@@ -0,0 +1,54 @@
+
+class ShadowTask:
+    """The task class that wraps all necessary data"""
+
+    def __init__(self, participant, video, condition, source, shadow):
+
+        self._participant = participant
+        self._video = video
+        self._condition = condition
+        self._source = source
+        self._shadow = shadow
+        self._results = None
+        self._delays = None
+
+    @property
+    def participant(self):
+        return self._participant
+
+    @property
+    def video(self):
+        return self._video
+
+    @property
+    def condition(self):
+        return self._condition
+
+    @property
+    def source(self):
+        return self._source
+
+    @property
+    def shadow(self):
+        return self._shadow
+
+    @property
+    def results(self):
+        return self._results
+
+    @results.setter
+    def results(self, results):
+        self._results = results
+
+    @property
+    def delays(self):
+        return self._delays
+
+    @delays.setter
+    def delays(self, delays):
+        self._delays = delays
+
+    def __str__(self):
+        return "participant: %s video: %s condition: %s " \
+           "result: %s" % (self._participant, self._video,
+                           self._condition, self._results)
\ No newline at end of file
diff --git a/umbra/statistics.py b/umbra/statistics.py
index ef135a88089c90a9a9760404ae00ca7bd0e6c92e..1fe76449d452f96b5aa9d9a14f278515b7bd587d 100644
--- a/umbra/statistics.py
+++ b/umbra/statistics.py
@@ -4,6 +4,7 @@ from anchor_algorithm import AnchorAlgorithm
 from mistake_finder import MistakeFinder
 from semantic_checker import SemanticChecker
 from form_checker import FormChecker
+from dutch_mmetaphone import DutchPhonetics
 from mistake_counter import MistakeCounter
 from utils import Utils as ut
 import time
@@ -24,6 +25,7 @@ class Statistics:
             "OpenDutchWordnet/resources/odwn/odwn_orbn_gwg-LMF_1.3.xml.gz")
         self._seman_checker = SemanticChecker()
         self._form_checker = FormChecker()
+        self._phon_checker = DutchPhonetics()
         self._mistake_finder = MistakeFinder(self._seman_checker,
                                              self._form_checker)
         self._mistake_counter = MistakeCounter()
@@ -36,15 +38,19 @@ class Statistics:
     def strategy(self, strategy):
         self._strategy = strategy
 
-    def analyze(self, source, shadow):
+    def analyze(self, trial):
         """Perform the necessary analyses.
         Currently uses all strategies for testing purposes.
         Should only use the specified strategy upon release.
 
         Args:
-            source: the words in the source file
-            shadow: the words in the shadow file
+            trial: the trial to analyze
+
+        Returns:
+            correctness
         """
+        source = trial.source
+        shadow = trial.shadow
 
         # Make a deepcopy such that the testing is equal for both strategies:
         source_em = copy.deepcopy(source)
@@ -55,18 +61,14 @@ class Statistics:
         results = 'Needleman-Wunsch'
         discrete_start_time = time.time()
         self._strategy = NeedlemanWunsch(self._seman_checker,
-                                         self._form_checker)
+                                         self._form_checker,
+                                         self._phon_checker)
         source_align, shadow_align = self._strategy.align(source, shadow)
-        for s_word in shadow_align:
-            if s_word.has_source():
-                results += f'source: {s_word.source} shadow: {s_word}\n'
         correctness = self._strategy.correctly_shadowed(source)
         discrete_time = time.time() - discrete_start_time
         results += f'taken time:{discrete_time}\n'
 
         self._mistake_finder.print_for_nw(source_align, shadow_align)
-        self._mistake_finder.start(source_align, shadow_align)
-        self._mistake_counter.calculate_accuracy(source_align, shadow_align)
 
         # Alignment 2
         print('\n Anchor-algorithm')
@@ -75,14 +77,24 @@ class Statistics:
         self._strategy = AnchorAlgorithm()
         source_align_em, shadow_align_em = self._strategy.align(source_em,
                                                                 shadow_em)
-        for s_word in shadow_align_em:
-            if s_word.has_source():
-                results += f'source: {s_word.source} shadow: {s_word}\n'
         correctness = self._strategy.correctly_shadowed(source_em)
         self._mistake_finder.start(source_align_em, shadow_align_em)
         discrete_time = time.time() - discrete_start_time
         results += f'taken time:{discrete_time}\n'
-        self._mistake_counter.\
-        calculate_accuracy(source_align_em, shadow_align_em)
 
-        return source_align_em, shadow_align_em, correctness
+        # Now the results are from the Anchor algorithm analysis.
+        trial.results = self._mistake_counter.calculate_accuracy(source_em,
+                                                                 shadow_em)
+
+        #IMPORTANT: This will not work on source_em because the matching is
+        # the other way around.
+        delays_per_word = []
+        for source_word in source:
+            if source_word.has_shadow():
+                delays_per_word.append([trial.participant, trial.condition,
+                                       trial.video, source_word.word,
+                                        source_word.get_difference
+                                        (source_word.shadow)])
+        trial.delays = delays_per_word
+
+        return correctness
diff --git a/umbra/tests/test_controller.py b/umbra/tests/test_controller.py
index 6ffb780cc8701af1d965d5b2b00ff20f1839aace..4eadfb4b2bde157fd269e5c49a896274b19ab74d 100644
--- a/umbra/tests/test_controller.py
+++ b/umbra/tests/test_controller.py
@@ -57,35 +57,36 @@ class ControllerTest(pyfakefs.fake_filesystem_unittest.TestCase):
         calls_but = [mock.call("save", "normal"), mock.call("compare", "disabled")]
         view.button_status.assert_has_calls(calls_but)
 
-    def test_read_folder_source(self):
-        model, view, controller = self.makeMVC()
-        model.get_multi_data.return_value = {}
-        fr = mock.Mock()
-        fr.read.return_value = "foo"
-        controller._filereader = fr
-        controller._source_files = ["00.T.csv"] #filename is barely legal but ok
-        controller._read_folder("source")
-        model.get_multi_data.assert_called_with("source")
-        controller._filereader.read.assert_has_calls([mock.call("00.T.csv", "source")])
-
-    def test_read_folder_none(self):
-        model, view, controller = self.makeMVC()
-        controller._shadow_files = ["foo"]
-        controller._read_folder("shadow")     # Nothing should happen there
-        controller._source_files = ["bar"]    # because no .csv in filepath
-        controller._read_folder("source")
-        controller._read_folder("")           # try the empty
-
-    def test_read_folder_shadow(self):
-        model, view, controller = self.makeMVC()
-        model.get_multi_data.return_value = {}
-        fr = mock.Mock()
-        fr.read.return_value = "foo"
-        controller._filereader = fr
-        controller._shadow_files = ["1_AO0.TextGrid.csv"] #filename is barely legal but ok
-        controller._read_folder("shadow")
-        model.get_multi_data.assert_called_with("shadow")
-        controller._filereader.read.assert_has_calls([mock.call("1_AO0.TextGrid.csv", "shadow")])
+    # TODO: Rewrite tests below to mirror new _read_folder() structure
+    # def test_read_folder_source(self):
+    #     model, view, controller = self.makeMVC()
+    #     model.get_multi_data.return_value = {}
+    #     fr = mock.Mock()
+    #     fr.read.return_value = "foo"
+    #     controller._filereader = fr
+    #     controller._source_files = ["00.T.csv"] #filename is barely legal but ok
+    #     controller._read_folder("source")
+    #     model.get_multi_data.assert_called_with("source")
+    #     controller._filereader.read.assert_has_calls([mock.call("00.T.csv", "source")])
+    #
+    # def test_read_folder_none(self):
+    #     model, view, controller = self.makeMVC()
+    #     controller._shadow_files = ["foo"]
+    #     controller._read_folder("shadow")     # Nothing should happen there
+    #     controller._source_files = ["bar"]    # because no .csv in filepath
+    #     controller._read_folder("source")
+    #     controller._read_folder("")           # try the empty
+    #
+    # def test_read_folder_shadow(self):
+    #     model, view, controller = self.makeMVC()
+    #     model.get_multi_data.return_value = {}
+    #     fr = mock.Mock()
+    #     fr.read.return_value = "foo"
+    #     controller._filereader = fr
+    #     controller._shadow_files = ["1_AO0.TextGrid.csv"] #filename is barely legal but ok
+    #     controller._read_folder("shadow")
+    #     model.get_multi_data.assert_called_with("shadow")
+    #     controller._filereader.read.assert_has_calls([mock.call("1_AO0.TextGrid.csv", "shadow")])
 
     def test_save_results(self):
         model, view, controller = self.makeMVC()
diff --git a/umbra/utils.py b/umbra/utils.py
index 995f9b58a297ca6e1a2fedc1f665107f6e737a35..93dc8eadbe0459a488e2652ee273bc94a02682a7 100644
--- a/umbra/utils.py
+++ b/umbra/utils.py
@@ -4,6 +4,7 @@ import sys
 
 class Utils:
 
+    @staticmethod
     def get_path(path):
         if sys.platform.startswith('linux'):
             return "./umbra/"+path
@@ -23,8 +24,11 @@ class Utils:
         """Helper function that turns a sequence of time strings into a list of
         integers.
 
-        :param times: sequence of strings in hh:mm:ss.sss format.
-        :return: list of times in milliseconds.
+        Args:
+            times: sequence of strings in hh:mm:ss.sss format.
+
+        Returns:
+            list of times in milliseconds.
         """
         output = []
         for time in times:
@@ -51,31 +55,44 @@ class Utils:
         h, m = divmod(m, 60)
         return "%02i:%02i:%02i.%03i" % (h, m, s, ms)
 
-    def shadow_regex(path):
-        match = re.search("\d_AO[0-9]+", path)
-        if match:
-            key = match.group()
-        if ".csv" in path:
-            key = key+"csv"
-        return key
+    @staticmethod
+    def shadow_regex(path, video):
+        """Find the reduced path based on the video number
+
+        Args:
+            path: the shadow path to check
+            video: the video number
 
-    def filter_key(string):
-        match = re.search("AO[0-9]+", string)
+        Returns:
+            participant: the participant number
+            task: the task for the participant
+        """
+        match = re.search(r"(\d+)_(\D+){video}+\.T", path)
+        task = None
+        participant = None
         if match:
-            if len(match.group()[2:]) > 2:
-                return match.group()[2:4]
-            return match.group()[2:]
+            participant = match.group(1)
+            task = match.group(2)
+        return participant, task
 
+    @staticmethod
     def id_regex(path):
-        match = re.search("\d+\.T", path)
-        # Make sure that not different number accidentally
-        id = match.group()
-        if len(id) > 3 and ".csv" in path:
-            return id[:2]
-        elif len(id) <= 3 and ".csv" in path:
-            return id[0]
+        """Find reduced path for the source file by extracting its id
 
+        Args:
+            path: the path to reduce
 
+        Returns:
+            the found id for the file
+        """
+        match = re.search(r"\d+\.T", path)
+        if match:
+            identifier = match.group()
+        else:
+            identifier = ""  # match is None when no such path found
+        return identifier[:-2]
+
+    @staticmethod
     def add_to_dict(key, value, dict, alternative=False):
         """Checks if key already exists in a dictionary
         before adding new key-value pair
diff --git a/umbra/words.py b/umbra/words.py
index f2dc49ee80c514b3a3bbb0dec94a4d04a9b934f0..2801d8b87326a6b532fc2957115ec3c3c6837e7c 100644
--- a/umbra/words.py
+++ b/umbra/words.py
@@ -99,14 +99,6 @@ class Word:
         """Anchor setter. Has to be overridden in the subclass."""
         raise NotImplementedError
 
-    def get_difference(self, other):
-        """Get the difference between the onset of this word and the other.
-
-        Args:
-            other: the other Word instance
-        """
-        return other.onset - self._onset
-
 
 class ShadowWord(Word):
     def __init__(self, word, onset, offset):
@@ -178,6 +170,7 @@ class SourceWord(Word):
     def __init__(self, word, onset, offset):
         super().__init__(word, onset, offset)
         self._shadowed = False
+        self._shadow = None
 
     @property
     def shadowed(self):
@@ -209,6 +202,39 @@ class SourceWord(Word):
     def _set_anchor(self, anchor):
         self._anchor = anchor
 
+    @property
+    def shadow(self):
+        """Getter for Shadow attribute
+
+        Return:
+            The shadow word.
+        """
+        return self._shadow
+
+    @shadow.setter
+    def shadow(self, shadow):
+        """Setter for Source attribute
+
+        Args:
+            shadow: the source word, presumably of type Word.
+        """
+        self._shadow = shadow
+
+    def has_shadow(self):
+        """Check whether this word has a shadow word that it is matched with
+
+        Returns:
+            shadow: True if this word is matched, False otherwise.
+        """
+        return self._shadow is not None
+
+    def get_difference(self, other):
+        """Get the difference between the onset of this word and the other.
+
+        Args:
+            other: the other Word instance
+        """
+        return other.onset - self._onset
 
 class Sentence(list):
     def __init__(self, words):