Changed such that words are now paired, and removed the need to keep passing...

Changed such that words are now paired, and removed the need to keep passing the same arguments around that were not updated, only read from.

Changed such that words are now paired, and removed the need to keep passing...
Changed such that words are now paired, and removed the need to keep passing the same arguments around that were not updated, only read from.
c8c78a72 · Haak, R. (Romeo) · 69a5d61c · c8c78a72
Commit c8c78a72 authored 5 years ago by Haak, R. (Romeo)
--- a/umbra/saa_Romeo.py
+++ b/umbra/saa_Romeo.py
@@ -14,6 +14,8 @@ class SaaRomeo(AlignmentStrategy):
        self._mismatch = -2
        self._gap_sc = -1
        self._pointers = ['diag', 'up', 'left']
+        self._source = None
+        self._shadow = None

    def align(self, source, shadow):
        """ This is the main function of finding alignments.
@@ -29,28 +31,25 @@ class SaaRomeo(AlignmentStrategy):
             alignment_shadow: The found alignment for the shadow file
        """
        discrete_start_time = time.time()
-        matrix = self._initialize_matrix(source, shadow)
-        matrix = self._fill_matrix(matrix, source, shadow)
-        alignment_source, alignment_shadow = self._traceback(matrix,
-                                                             source, shadow)
+        self._source = source
+        self._shadow = shadow
+        matrix = self._initialize_matrix()
+        matrix = self._fill_matrix(matrix)
+        alignment_source, alignment_shadow = self._traceback(matrix)
        discrete_time = time.time() - discrete_start_time
        print(f'taken time:{discrete_time}')
-        return alignment_source, alignment_shadow
+        return self._source, self._shadow

-    def _initialize_matrix(self, source, shadow):
+    def _initialize_matrix(self):
        """The matrix is initialized according to the Needleman-Wunsch algorithm

-        Args:
-            source: Sentence of Word instances of the source file
-            shadow: Sentence of Word instances of the shadow file
-
        Returns:
            matrix: A matrix containing the values and pointers, the latter
            indicating what shift in the matrix we take (up, up-left or left).
            The matrix that will be returned will have the default values
        """
-        n = len(source)
-        m = len(shadow)
+        n = len(self._source)
+        m = len(self._shadow)
        matrix = np.array([[{'value': self._gap_sc*y, 'pointer': 'up'}
                            if x == 0 else
                            {'value': self._gap_sc*x, 'pointer': 'left'}
@@ -60,26 +59,24 @@ class SaaRomeo(AlignmentStrategy):
                            for y in range(m + 1)])
        return matrix

-    def _fill_matrix(self, matrix, source, shadow):
+    def _fill_matrix(self, matrix):
        """The matrix is filled according to the Needleman-Wunsch algorithm

        Args:
            matrix: A matrix containing the values and pointers, the latter
            indicating what shift in the matrix we take (up, up-left or left).
            Now the matrix still has its default values
-            source: Sentence of Word instances of the source file
-            shadow: Sentence of Word instances of the shadow file

        Returns:
            matrix: A matrix containing the values and pointers, the latter
            indicating what shift in the matrix we take (up, up-left or left).
            Now the matrix's values and pointers are updated
        """
-        n = len(source)
-        m = len(shadow)
+        n = len(self._source)
+        m = len(self._shadow)
        for i in range(1, m+1):
            for j in range(1, n+1):
-                if shadow[i-1].__eq__(source[j-1]):
+                if self._shadow[i-1].__eq__(self._source[j-1]):
                    value = self._match
                else:
                    value = self._mismatch
@@ -92,55 +89,51 @@ class SaaRomeo(AlignmentStrategy):
                    self._pointers[np.argmax([match_value, delete, insert])]
        return matrix

-    def _traceback(self, matrix, source, shadow):
+    def _traceback(self, matrix):
        """Traces back to top left to print the found alignment.

        Args:
            matrix: A matrix containing the values and pointers, the latter
            indicating what shift in the matrix we take (up, up-left or left)
-            source: Sentence of Word instances of the source file
-            shadow: Sentence of Word instances of the shadow file

        Returns:
            alignment_source: The alignment of source words
            alignment_shadow: The alignment of shadow words
        """
-        j = len(source)
-        i = len(shadow)
+        j = len(self._source)
+        i = len(self._shadow)
        alignment_source = []
        alignment_shadow = []
        while i > 0 or j > 0:
            if matrix[i][j]['pointer'] == 'diag':
-                alignment_source.append(source[j - 1])
-                alignment_shadow.append(shadow[i - 1])
+                alignment_source.append(self._source[j - 1])
+                alignment_shadow.append(self._shadow[i - 1])
+                if self._source[j-1].__eq__(self._shadow[i-1]):
+                    self._source[j-1].shadowed = True
+                    self._shadow[i-1].source = self._source[j-1]
                i -= 1
                j -= 1
            elif matrix[i][j]['pointer'] == 'left':
-                alignment_source.append(source[j - 1])
+                alignment_source.append(self._source[j - 1])
                alignment_shadow.append(Gap())
                j -= 1
            elif matrix[i][j]['pointer'] == 'up':
                alignment_source.append(Gap())
-                alignment_shadow.append(shadow[i - 1])
+                alignment_shadow.append(self._shadow[i - 1])
                i -= 1

        #Finish tracing back to top-left
        while j > 0:
-            alignment_source.append(source[j - 1])
+            alignment_source.append(self._source[j - 1])
            alignment_shadow.append(Gap())
            j -= 1

        while i > 0:
            alignment_source.append(Gap())
-            alignment_shadow.append(shadow[i - 1])
+            alignment_shadow.append(self._shadow[i - 1])
            i -= 1

        alignment_source.reverse()
        alignment_shadow.reverse()

-        for source_word, shadow_word in zip(alignment_source, alignment_shadow):
-            if type(source_word) is not Gap and type(shadow_word) is not Gap:
-                if source_word == shadow_word:
-                    source_word.shadowed = True
-
        return Sentence(alignment_source), Sentence(alignment_shadow)