Removed superfluous exceptions. Prepared compare function for integration.

ded59dab · Nijsen, T · cd6f0f00 · ded59dab
Commit ded59dab authored 5 years ago by Nijsen, T
--- a/umbra/dutch_mmetaphone.py
+++ b/umbra/dutch_mmetaphone.py
@@ -12,37 +12,12 @@ import copy

 class DutchPhonetics:
    @staticmethod
-    def compare_with_shadow(S_original, S_shadow):
-        """"Compare the phonetic representations of the original sentence and
-        the associated shadowed words.
-
-        Args: S_original is a list of SourceWord objects in which _source
-        is the associated shadowed word. (I was unclear whether this is how
-        the attribute is used. But can be changed later.)
-
-        Assumption: source attribute is always a Word object
-        Furthermore Words need a fon_correct attribute.
-        """
-        # Check 1:1 word matches
-        for original in S_original:
-            original.fon_correct = DutchPhonetics.compare_words(original.word, original.source.word)
-
-        # Check 1:2 word matches
-        for original in S_original:
-            if not original.fon_correct:
-                shadow_onset = original.source.onset
-                sh_idx = -1
-                for idx, shadow in S_shadow:
-                    onset = shadow.onset
-                    if shadow_onset == onset and idx+1 < len(S_shadow):
-                        sh_idx = idx + 1
-                shadow_combined = original.source.word + str(S_shadow[sh_idx])
-                original.fon_correct = DutchPhonetics.compare_words(original.word, shadow_combined)
-
-    @staticmethod
-    def compare_words(word1, word2):
+    def compare(word1, word2):
        """"Compare the phonetic representations of 2 source strings and see
-        if one of them matches. """
+        if one of them matches.
+
+        Args: word1 : string
+              word2: string"""
        word1_representations, word2_representations = DutchPhonetics.mmetaphone(word1), \
                                                       DutchPhonetics.mmetaphone(word2)
        return any(w1_rep in word2_representations for w1_rep in word1_representations)
@@ -65,7 +40,7 @@ class DutchPhonetics:
    def derden(source, representations):
        """"Recognise 3 letter patterns in the source and convert them to their
        phonetic representation. """
-        uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>', 'ZIJ': 'zy>', 'JIJ': 'jy>', 'WIJ': 'wy>'}
+        uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>'}
        uz_len = 3
        new_rep = copy.deepcopy(source)  # New representation without ambiguities yet

@@ -111,7 +86,7 @@ class DutchPhonetics:

        dubbel = {'UU': 'y', 'EU': '2:', 'OE': 'u', 'UE': 'u:', 'IJ': 'e>i', 'EI': 'e>i',
                  'UI': '9y', 'OU': 'v>u', 'AU': 'v>u', 'SJ': 's>', 'NG': 'n>', 'DT': 't',
-                  'TH': 't', 'ZE': 'zy>', 'JE': 'jy>', 'WE': 'wy>'}
+                  'TH': 't'}

        for d in dubbel:
            d_pos = source.find(d)
@@ -260,25 +235,3 @@ class DutchPhonetics:
                fon_rep = klank + additie
                tot_len = spel_len + additie_len
                representations[rep_idx] = rep[:spelling_pos] + fon_rep + rep[spelling_pos + tot_len:]
-
-
-# Test examples
-#DutchPhonetics.mmetaphone("Detail")
-#DutchPhonetics.mmetaphone("Haai")
-#DutchPhonetics.mmetaphone("Leeuw")
-#DutchPhonetics.mmetaphone("Lach")
-#DutchPhonetics.mmetaphone("Lag")
-#DutchPhonetics.mmetaphone('Jazz')  # leenwoord, representatie kan niet 100% worden opgevangen tenzij als special case
-#DutchPhonetics.mmetaphone("Handvat")  # 'dv' does not become a 'tf' sound yet
-#DutchPhonetics.mmetaphone("Hand")
-#DutchPhonetics.mmetaphone("Weggelopen")
-# 'gg' does not become a 'xg>' sound yet and because there is only a single medeklinker after, the second 'e' becomes
-# an 'e:' sound, should be more like a 'y' sound but question is whether 'ge' is a pattern often enough such that it
-# can be used a special case of 2 letters.
-#DutchPhonetics.mmetaphone("Pet")
-#DutchPhonetics.mmetaphone("Petten")
-#DutchPhonetics.mmetaphone("Peter")
-#DutchPhonetics.mmetaphone("Feeën")
-#DutchPhonetics.mmetaphone("Schakelen")
-#DutchPhonetics.mmetaphone("Ik")
-#DutchPhonetics.mmetaphone("Radioprogramma")