Skip to content
Snippets Groups Projects
Commit 936d5b33 authored by Alfen, T. van (Tanja)'s avatar Alfen, T. van (Tanja)
Browse files

Removed leftover file that remained after merge at commit 53ca7ffd

parent 53ca7ffd
No related branches found
Tags sprint3
No related merge requests found
from gap import Gap
from alignment_strategy import AlignmentStrategy
from words import Sentence
import numpy as np
import time
from mistake_enum import Mistake
class SaaRomeo(AlignmentStrategy):
"""Class that aligns the words of a shadow and source file according
to the Needleman-Wunsch algorithm"""
def __init__(self, seman_checker):
self._match = 4
self._mismatch = -2
self._gap_sc = -1
self._seman_match = 2
self._pointers = ['diag', 'up', 'left']
self._source = None
self._shadow = None
self._seman_checker = seman_checker
def align(self, source, shadow):
""" This is the main function of finding alignments.
It will perform the necessary steps in Needleman-Wunsch algorithm
to find alignments.
Args:
source: Sentence of Word instances of the source file
shadow: Sentence of Word instances of the shadow file
Returns:
alignment_source: The found alignment for the source file
alignment_shadow: The found alignment for the shadow file
"""
discrete_start_time = time.time()
self._source = source
self._shadow = shadow
matrix = self._initialize_matrix()
matrix = self._fill_matrix(matrix)
alignment_source, alignment_shadow = self._traceback(matrix)
discrete_time = time.time() - discrete_start_time
print(f'taken time:{discrete_time}')
return self._source, self._shadow
#return alignment_source,alignment_shadow
def _initialize_matrix(self):
"""The matrix is initialized according to the Needleman-Wunsch algorithm
Returns:
matrix: A matrix containing the values and pointers, the latter
indicating what shift in the matrix we take (up, up-left or left).
The matrix that will be returned will have the default values
"""
n = len(self._source)
m = len(self._shadow)
matrix = np.array([[{'value': self._gap_sc*y, 'pointer': 'up'}
if x == 0 else
{'value': self._gap_sc*x, 'pointer': 'left'}
if y == 0 else
{'value': 0, 'pointer': 'diag'}
for x in range(n + 1)]
for y in range(m + 1)])
return matrix
def _fill_matrix(self, matrix):
"""The matrix is filled according to the Needleman-Wunsch algorithm
Args:
matrix: A matrix containing the values and pointers, the latter
indicating what shift in the matrix we take (up, up-left or left).
Now the matrix still has its default values
Returns:
matrix: A matrix containing the values and pointers, the latter
indicating what shift in the matrix we take (up, up-left or left).
Now the matrix's values and pointers are updated
"""
n = len(self._source)
m = len(self._shadow)
for i in range(1, m+1):
for j in range(1, n+1):
if self._source[j-1] == self._shadow[i-1]:
value = self._match
elif self._seman_checker.semantically_related(
self._source[j-1].word,self._shadow[i-1].word):
value = self._seman_match
else:
value = self._mismatch
match_value = matrix[i-1, j-1]['value'] + value
delete = matrix[i-1, j]['value'] + self._gap_sc
insert = matrix[i, j-1]['value'] + self._gap_sc
max_value = max([match_value, delete, insert])
matrix[i, j]['value'] = max_value
matrix[i, j]['pointer'] = \
self._pointers[np.argmax([match_value, delete, insert])]
return matrix
def _traceback(self, matrix):
"""Traces back to top left to print the found alignment.
Args:
matrix: A matrix containing the values and pointers, the latter
indicating what shift in the matrix we take (up, up-left or left)
Returns:
alignment_source: The alignment of source words
alignment_shadow: The alignment of shadow words
"""
j = len(self._source)
i = len(self._shadow)
alignment_source = []
alignment_shadow = []
while i > 0 or j > 0:
if matrix[i][j]['pointer'] == 'diag':
alignment_source.append(self._source[j - 1])
alignment_shadow.append(self._shadow[i - 1])
new_source,new_shadow = self._equals_checker(
self._source[j-1],self._shadow[i-1])
self._source[j-1] = new_source
self._shadow[i-1] = new_shadow
i -= 1
j -= 1
elif matrix[i][j]['pointer'] == 'left':
alignment_source.append(self._source[j - 1])
alignment_shadow.append(Gap())
j -= 1
elif matrix[i][j]['pointer'] == 'up':
alignment_source.append(Gap())
alignment_shadow.append(self._shadow[i - 1])
i -= 1
alignment_source.reverse()
alignment_shadow.reverse()
return Sentence(alignment_source), Sentence(alignment_shadow)
def _equals_checker(self, source, shadow):
"""
Helper function for part of the _traceback function
Args:
source: The source word, an instance of the Words class
shadow: The shadow word, an instance of the Shadow class
Returns:
The updated source word and shadow word.
"""
if source == shadow:
source.shadowed = True
shadow.source = source
elif self._seman_checker.semantically_related(source.word,shadow.word):
source.shadowed = True
source.mistake = Mistake.SEMANTIC
shadow.mistake = Mistake.SEMANTIC
return source,shadow
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment