Skip to content
Snippets Groups Projects

Sc 75 #bulk load

Merged Nijsen, T.J.P. (Thomas) requested to merge SC-75-#BulkLoad into master
+ 36
0
""""This file contains my second idea for a solution concept to our problem.
It is based on kernel methods as opposed to my last idea."""
import numpy as np
import re
# Input sequences
S1 = ["This", "is", "an", "example", "sequence", "This", "is", "the", "original"]
S2 = ["This", "is", "another", "example", "sequence", "This", "is", "not", "the", "original"]
# Algorithm constants
avr_latency = 170 * 10**(-3)
# Algorithm functions
def in_time(w1, w2, threshold):
t1, t2 = w1.onset, w2.onset # Get onsets for both words
delta = np.abs(t1 - t2) # Get difference and use abs due to similarity of words before and after
penalty = np.sqrt(delta)/avr_latency # Square root to not make penalty too large & normalize in terms of experiment
return penalty < threshold # Return the decision of whether word 2 was in time of word 1
def phi(w1, w2): # String phi function for kernel methods, calculate how often w1 appears as substring of w2
return len([i.start() for i in re.finditer(w1, w2)])
def similarity(w1, w2, seq1, threshold):
k = 0 # Kernel function answer
ws = 1 # hold all weights constant for now
for s in seq1: # Calculate kernel function answer using feature mapping phi
k += ws * phi(s, w1) * phi(s, w2)
return k < threshold # Return decision
def alg(data1, data2, threshold1, threshold2):
Loading