From ded59dab4e19e7d080fd6675ae61070133a58d13 Mon Sep 17 00:00:00 2001
From: "Nijsen, T" <s1006955@ru.nl>
Date: Wed, 4 Dec 2019 19:07:09 +0100
Subject: [PATCH] Removed superfluous exceptions. Prepared compare function for
 integration.

---
 umbra/dutch_mmetaphone.py | 61 +++++----------------------------------
 1 file changed, 7 insertions(+), 54 deletions(-)

diff --git a/umbra/dutch_mmetaphone.py b/umbra/dutch_mmetaphone.py
index 68e6e222..6a3d1cba 100644
--- a/umbra/dutch_mmetaphone.py
+++ b/umbra/dutch_mmetaphone.py
@@ -12,37 +12,12 @@ import copy
 
 class DutchPhonetics:
     @staticmethod
-    def compare_with_shadow(S_original, S_shadow):
-        """"Compare the phonetic representations of the original sentence and
-        the associated shadowed words.
-
-        Args: S_original is a list of SourceWord objects in which _source
-        is the associated shadowed word. (I was unclear whether this is how
-        the attribute is used. But can be changed later.)
-
-        Assumption: source attribute is always a Word object
-        Furthermore Words need a fon_correct attribute.
-        """
-        # Check 1:1 word matches
-        for original in S_original:
-            original.fon_correct = DutchPhonetics.compare_words(original.word, original.source.word)
-
-        # Check 1:2 word matches
-        for original in S_original:
-            if not original.fon_correct:
-                shadow_onset = original.source.onset
-                sh_idx = -1
-                for idx, shadow in S_shadow:
-                    onset = shadow.onset
-                    if shadow_onset == onset and idx+1 < len(S_shadow):
-                        sh_idx = idx + 1
-                shadow_combined = original.source.word + str(S_shadow[sh_idx])
-                original.fon_correct = DutchPhonetics.compare_words(original.word, shadow_combined)
-
-    @staticmethod
-    def compare_words(word1, word2):
+    def compare(word1, word2):
         """"Compare the phonetic representations of 2 source strings and see
-        if one of them matches. """
+        if one of them matches.
+
+        Args: word1 : string
+              word2: string"""
         word1_representations, word2_representations = DutchPhonetics.mmetaphone(word1), \
                                                        DutchPhonetics.mmetaphone(word2)
         return any(w1_rep in word2_representations for w1_rep in word1_representations)
@@ -65,7 +40,7 @@ class DutchPhonetics:
     def derden(source, representations):
         """"Recognise 3 letter patterns in the source and convert them to their
         phonetic representation. """
-        uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>', 'ZIJ': 'zy>', 'JIJ': 'jy>', 'WIJ': 'wy>'}
+        uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>'}
         uz_len = 3
         new_rep = copy.deepcopy(source)  # New representation without ambiguities yet
 
@@ -111,7 +86,7 @@ class DutchPhonetics:
 
         dubbel = {'UU': 'y', 'EU': '2:', 'OE': 'u', 'UE': 'u:', 'IJ': 'e>i', 'EI': 'e>i',
                   'UI': '9y', 'OU': 'v>u', 'AU': 'v>u', 'SJ': 's>', 'NG': 'n>', 'DT': 't',
-                  'TH': 't', 'ZE': 'zy>', 'JE': 'jy>', 'WE': 'wy>'}
+                  'TH': 't'}
 
         for d in dubbel:
             d_pos = source.find(d)
@@ -260,25 +235,3 @@ class DutchPhonetics:
                 fon_rep = klank + additie
                 tot_len = spel_len + additie_len
                 representations[rep_idx] = rep[:spelling_pos] + fon_rep + rep[spelling_pos + tot_len:]
-
-
-# Test examples
-#DutchPhonetics.mmetaphone("Detail")
-#DutchPhonetics.mmetaphone("Haai")
-#DutchPhonetics.mmetaphone("Leeuw")
-#DutchPhonetics.mmetaphone("Lach")
-#DutchPhonetics.mmetaphone("Lag")
-#DutchPhonetics.mmetaphone('Jazz')  # leenwoord, representatie kan niet 100% worden opgevangen tenzij als special case
-#DutchPhonetics.mmetaphone("Handvat")  # 'dv' does not become a 'tf' sound yet
-#DutchPhonetics.mmetaphone("Hand")
-#DutchPhonetics.mmetaphone("Weggelopen")
-# 'gg' does not become a 'xg>' sound yet and because there is only a single medeklinker after, the second 'e' becomes
-# an 'e:' sound, should be more like a 'y' sound but question is whether 'ge' is a pattern often enough such that it
-# can be used a special case of 2 letters.
-#DutchPhonetics.mmetaphone("Pet")
-#DutchPhonetics.mmetaphone("Petten")
-#DutchPhonetics.mmetaphone("Peter")
-#DutchPhonetics.mmetaphone("FeeÃ«n")
-#DutchPhonetics.mmetaphone("Schakelen")
-#DutchPhonetics.mmetaphone("Ik")
-#DutchPhonetics.mmetaphone("Radioprogramma")
-- 
GitLab