Nijsen, T.J.P. (Thomas) · Alfen, T. van (Tanja) · 642e5fdc · 642e5fdc · 642e5fdc · 642e5fdc
--- a/Solution Concept/Kernel idea.py

+ 34

− 5
+++ b/Solution Concept/Kernel idea.py

+ 34

− 5
 @@ -6,17 +6,23 @@ import re

 # Input sequences
 S1 = ["This", "is", "an", "example", "sequence", "This", "is", "the", "original"]
+onsets_1 = [0.0, 10.0, 15.0, 20.0, 35.0, 50.0, 60.0, 65.0, 70.0, 80.0]
+data_1 = [S1, onsets_1]
+
 S2 = ["This", "is", "another", "example", "sequence", "This", "is", "not", "the", "original"]
+onsets_2 = [5.0, 16.0, 20.0, 26.0, 39.0, 46.0, 52.0, 55.0, 60.0, 65.0, 72.0, 76.0, 83.0]
+data_2 = [S2, onsets_2]

 # Algorithm constants
-avr_latency = 170 * 10**(-3)
+avr_latency = 15


 # Algorithm functions
-def in_time(w1, w2, threshold):
-    t1, t2 = w1.onset, w2.onset  # Get onsets for both words
+def in_time(w1_onset, w2_onset, threshold):
+    t1, t2 = w1_onset, w2_onset  # Get onsets for both words
    delta = np.abs(t1 - t2)      # Get difference and use abs due to similarity of words before and after
    penalty = np.sqrt(delta)/avr_latency  # Square root to not make penalty too large & normalize in terms of experiment
+    print("W1 onset: ", w1_onset, " W2 onset: ", w2_onset, " Penalty: ", penalty)

    return penalty < threshold  # Return the decision of whether word 2 was in time of word 1

 @@ -31,6 +37,29 @@ def similarity(w1, w2, seq1, threshold):
    for s in seq1:  # Calculate kernel function answer using feature mapping phi
        k += ws * phi(s, w1) * phi(s, w2)

-    return k < threshold  # Return decision
+    print("W1: ", w1, " W2: ", w2, " k: ", k)
+    return k >= threshold  # Return decision
+
+
+def alg(data1, data2, threshold1=0.5, threshold2=0.5):
+    words1, words2 = data1[0], data2[0]
+    onsets1, onsets2 = data1[1], data2[1]
+    shadowed = np.full(len(words1), False)
+    for idx1, w in enumerate(words1):
+        for idx2, v in enumerate(words2):
+            if not shadowed[idx1] and in_time(onsets1[idx1], onsets2[idx2], threshold1):
+                shadowed[idx1] = similarity(w, v, words1, threshold2)
+                print("Word: ", w, " was shadowed: ", shadowed[idx1])
+
+
+# Test 1
+alg(data_1, data_2, threshold2=1)

-def alg(data1, data2, threshold1, threshold2):
+""""In general the results seem to be promising. The in_time function does act like a sieve for words.
+    But probably results are very muddled due to making up the data by myself, so testing on some real
+    data would provide some good additional insight. Overall the calculation is fast and seems to only
+    require tuning.
+    The kernel function as well seems promising, I think it should be reworked somewhat to truly make it 
+    good (I copied an internet example code to save some time), but the idea seems to work. But again some
+    tuning seems to be required. But this reduces the problem purely to finding a good kernel function and
+    making that work. """