diff --git a/umbra/saa_Romeo.py b/umbra/saa_Romeo.py index 6b7d87478cfe7ed0e0f037793946a4ef57900fc4..1ff17c9abdf38cb3c0c9336fd4a354fbe40bb847 100644 --- a/umbra/saa_Romeo.py +++ b/umbra/saa_Romeo.py @@ -14,6 +14,8 @@ class SaaRomeo(AlignmentStrategy): self._mismatch = -2 self._gap_sc = -1 self._pointers = ['diag', 'up', 'left'] + self._source = None + self._shadow = None def align(self, source, shadow): """ This is the main function of finding alignments. @@ -29,28 +31,25 @@ class SaaRomeo(AlignmentStrategy): alignment_shadow: The found alignment for the shadow file """ discrete_start_time = time.time() - matrix = self._initialize_matrix(source, shadow) - matrix = self._fill_matrix(matrix, source, shadow) - alignment_source, alignment_shadow = self._traceback(matrix, - source, shadow) + self._source = source + self._shadow = shadow + matrix = self._initialize_matrix() + matrix = self._fill_matrix(matrix) + alignment_source, alignment_shadow = self._traceback(matrix) discrete_time = time.time() - discrete_start_time print(f'taken time:{discrete_time}') - return alignment_source, alignment_shadow + return self._source, self._shadow - def _initialize_matrix(self, source, shadow): + def _initialize_matrix(self): """The matrix is initialized according to the Needleman-Wunsch algorithm - Args: - source: Sentence of Word instances of the source file - shadow: Sentence of Word instances of the shadow file - Returns: matrix: A matrix containing the values and pointers, the latter indicating what shift in the matrix we take (up, up-left or left). The matrix that will be returned will have the default values """ - n = len(source) - m = len(shadow) + n = len(self._source) + m = len(self._shadow) matrix = np.array([[{'value': self._gap_sc*y, 'pointer': 'up'} if x == 0 else {'value': self._gap_sc*x, 'pointer': 'left'} @@ -60,26 +59,24 @@ class SaaRomeo(AlignmentStrategy): for y in range(m + 1)]) return matrix - def _fill_matrix(self, matrix, source, shadow): + def _fill_matrix(self, matrix): """The matrix is filled according to the Needleman-Wunsch algorithm Args: matrix: A matrix containing the values and pointers, the latter indicating what shift in the matrix we take (up, up-left or left). Now the matrix still has its default values - source: Sentence of Word instances of the source file - shadow: Sentence of Word instances of the shadow file Returns: matrix: A matrix containing the values and pointers, the latter indicating what shift in the matrix we take (up, up-left or left). Now the matrix's values and pointers are updated """ - n = len(source) - m = len(shadow) + n = len(self._source) + m = len(self._shadow) for i in range(1, m+1): for j in range(1, n+1): - if shadow[i-1].__eq__(source[j-1]): + if self._shadow[i-1].__eq__(self._source[j-1]): value = self._match else: value = self._mismatch @@ -92,55 +89,51 @@ class SaaRomeo(AlignmentStrategy): self._pointers[np.argmax([match_value, delete, insert])] return matrix - def _traceback(self, matrix, source, shadow): + def _traceback(self, matrix): """Traces back to top left to print the found alignment. Args: matrix: A matrix containing the values and pointers, the latter indicating what shift in the matrix we take (up, up-left or left) - source: Sentence of Word instances of the source file - shadow: Sentence of Word instances of the shadow file Returns: alignment_source: The alignment of source words alignment_shadow: The alignment of shadow words """ - j = len(source) - i = len(shadow) + j = len(self._source) + i = len(self._shadow) alignment_source = [] alignment_shadow = [] while i > 0 or j > 0: if matrix[i][j]['pointer'] == 'diag': - alignment_source.append(source[j - 1]) - alignment_shadow.append(shadow[i - 1]) + alignment_source.append(self._source[j - 1]) + alignment_shadow.append(self._shadow[i - 1]) + if self._source[j-1].__eq__(self._shadow[i-1]): + self._source[j-1].shadowed = True + self._shadow[i-1].source = self._source[j-1] i -= 1 j -= 1 elif matrix[i][j]['pointer'] == 'left': - alignment_source.append(source[j - 1]) + alignment_source.append(self._source[j - 1]) alignment_shadow.append(Gap()) j -= 1 elif matrix[i][j]['pointer'] == 'up': alignment_source.append(Gap()) - alignment_shadow.append(shadow[i - 1]) + alignment_shadow.append(self._shadow[i - 1]) i -= 1 #Finish tracing back to top-left while j > 0: - alignment_source.append(source[j - 1]) + alignment_source.append(self._source[j - 1]) alignment_shadow.append(Gap()) j -= 1 while i > 0: alignment_source.append(Gap()) - alignment_shadow.append(shadow[i - 1]) + alignment_shadow.append(self._shadow[i - 1]) i -= 1 alignment_source.reverse() alignment_shadow.reverse() - for source_word, shadow_word in zip(alignment_source, alignment_shadow): - if type(source_word) is not Gap and type(shadow_word) is not Gap: - if source_word == shadow_word: - source_word.shadowed = True - return Sentence(alignment_source), Sentence(alignment_shadow)