Skip to content
Snippets Groups Projects

Sc 75 #bulk load

Merged Nijsen, T.J.P. (Thomas) requested to merge SC-75-#BulkLoad into master
@@ -6,17 +6,23 @@ import re
# Input sequences
S1 = ["This", "is", "an", "example", "sequence", "This", "is", "the", "original"]
onsets_1 = [0.0, 10.0, 15.0, 20.0, 35.0, 50.0, 60.0, 65.0, 70.0, 80.0]
data_1 = [S1, onsets_1]
S2 = ["This", "is", "another", "example", "sequence", "This", "is", "not", "the", "original"]
onsets_2 = [5.0, 16.0, 20.0, 26.0, 39.0, 46.0, 52.0, 55.0, 60.0, 65.0, 72.0, 76.0, 83.0]
data_2 = [S2, onsets_2]
# Algorithm constants
avr_latency = 170 * 10**(-3)
avr_latency = 15
# Algorithm functions
def in_time(w1, w2, threshold):
t1, t2 = w1.onset, w2.onset # Get onsets for both words
def in_time(w1_onset, w2_onset, threshold):
t1, t2 = w1_onset, w2_onset # Get onsets for both words
delta = np.abs(t1 - t2) # Get difference and use abs due to similarity of words before and after
penalty = np.sqrt(delta)/avr_latency # Square root to not make penalty too large & normalize in terms of experiment
print("W1 onset: ", w1_onset, " W2 onset: ", w2_onset, " Penalty: ", penalty)
return penalty < threshold # Return the decision of whether word 2 was in time of word 1
@@ -31,6 +37,29 @@ def similarity(w1, w2, seq1, threshold):
for s in seq1: # Calculate kernel function answer using feature mapping phi
k += ws * phi(s, w1) * phi(s, w2)
return k < threshold # Return decision
print("W1: ", w1, " W2: ", w2, " k: ", k)
return k >= threshold # Return decision
def alg(data1, data2, threshold1=0.5, threshold2=0.5):
words1, words2 = data1[0], data2[0]
onsets1, onsets2 = data1[1], data2[1]
shadowed = np.full(len(words1), False)
for idx1, w in enumerate(words1):
for idx2, v in enumerate(words2):
if not shadowed[idx1] and in_time(onsets1[idx1], onsets2[idx2], threshold1):
shadowed[idx1] = similarity(w, v, words1, threshold2)
print("Word: ", w, " was shadowed: ", shadowed[idx1])
# Test 1
alg(data_1, data_2, threshold2=1)
def alg(data1, data2, threshold1, threshold2):
""""In general the results seem to be promising. The in_time function does act like a sieve for words.
But probably results are very muddled due to making up the data by myself, so testing on some real
data would provide some good additional insight. Overall the calculation is fast and seems to only
require tuning.
The kernel function as well seems promising, I think it should be reworked somewhat to truly make it
good (I copied an internet example code to save some time), but the idea seems to work. But again some
tuning seems to be required. But this reduces the problem purely to finding a good kernel function and
making that work. """
Loading