From ded59dab4e19e7d080fd6675ae61070133a58d13 Mon Sep 17 00:00:00 2001 From: "Nijsen, T" <s1006955@ru.nl> Date: Wed, 4 Dec 2019 19:07:09 +0100 Subject: [PATCH] Removed superfluous exceptions. Prepared compare function for integration. --- umbra/dutch_mmetaphone.py | 61 +++++---------------------------------- 1 file changed, 7 insertions(+), 54 deletions(-) diff --git a/umbra/dutch_mmetaphone.py b/umbra/dutch_mmetaphone.py index 68e6e222..6a3d1cba 100644 --- a/umbra/dutch_mmetaphone.py +++ b/umbra/dutch_mmetaphone.py @@ -12,37 +12,12 @@ import copy class DutchPhonetics: @staticmethod - def compare_with_shadow(S_original, S_shadow): - """"Compare the phonetic representations of the original sentence and - the associated shadowed words. - - Args: S_original is a list of SourceWord objects in which _source - is the associated shadowed word. (I was unclear whether this is how - the attribute is used. But can be changed later.) - - Assumption: source attribute is always a Word object - Furthermore Words need a fon_correct attribute. - """ - # Check 1:1 word matches - for original in S_original: - original.fon_correct = DutchPhonetics.compare_words(original.word, original.source.word) - - # Check 1:2 word matches - for original in S_original: - if not original.fon_correct: - shadow_onset = original.source.onset - sh_idx = -1 - for idx, shadow in S_shadow: - onset = shadow.onset - if shadow_onset == onset and idx+1 < len(S_shadow): - sh_idx = idx + 1 - shadow_combined = original.source.word + str(S_shadow[sh_idx]) - original.fon_correct = DutchPhonetics.compare_words(original.word, shadow_combined) - - @staticmethod - def compare_words(word1, word2): + def compare(word1, word2): """"Compare the phonetic representations of 2 source strings and see - if one of them matches. """ + if one of them matches. + + Args: word1 : string + word2: string""" word1_representations, word2_representations = DutchPhonetics.mmetaphone(word1), \ DutchPhonetics.mmetaphone(word2) return any(w1_rep in word2_representations for w1_rep in word1_representations) @@ -65,7 +40,7 @@ class DutchPhonetics: def derden(source, representations): """"Recognise 3 letter patterns in the source and convert them to their phonetic representation. """ - uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>', 'ZIJ': 'zy>', 'JIJ': 'jy>', 'WIJ': 'wy>'} + uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>'} uz_len = 3 new_rep = copy.deepcopy(source) # New representation without ambiguities yet @@ -111,7 +86,7 @@ class DutchPhonetics: dubbel = {'UU': 'y', 'EU': '2:', 'OE': 'u', 'UE': 'u:', 'IJ': 'e>i', 'EI': 'e>i', 'UI': '9y', 'OU': 'v>u', 'AU': 'v>u', 'SJ': 's>', 'NG': 'n>', 'DT': 't', - 'TH': 't', 'ZE': 'zy>', 'JE': 'jy>', 'WE': 'wy>'} + 'TH': 't'} for d in dubbel: d_pos = source.find(d) @@ -260,25 +235,3 @@ class DutchPhonetics: fon_rep = klank + additie tot_len = spel_len + additie_len representations[rep_idx] = rep[:spelling_pos] + fon_rep + rep[spelling_pos + tot_len:] - - -# Test examples -#DutchPhonetics.mmetaphone("Detail") -#DutchPhonetics.mmetaphone("Haai") -#DutchPhonetics.mmetaphone("Leeuw") -#DutchPhonetics.mmetaphone("Lach") -#DutchPhonetics.mmetaphone("Lag") -#DutchPhonetics.mmetaphone('Jazz') # leenwoord, representatie kan niet 100% worden opgevangen tenzij als special case -#DutchPhonetics.mmetaphone("Handvat") # 'dv' does not become a 'tf' sound yet -#DutchPhonetics.mmetaphone("Hand") -#DutchPhonetics.mmetaphone("Weggelopen") -# 'gg' does not become a 'xg>' sound yet and because there is only a single medeklinker after, the second 'e' becomes -# an 'e:' sound, should be more like a 'y' sound but question is whether 'ge' is a pattern often enough such that it -# can be used a special case of 2 letters. -#DutchPhonetics.mmetaphone("Pet") -#DutchPhonetics.mmetaphone("Petten") -#DutchPhonetics.mmetaphone("Peter") -#DutchPhonetics.mmetaphone("Feeën") -#DutchPhonetics.mmetaphone("Schakelen") -#DutchPhonetics.mmetaphone("Ik") -#DutchPhonetics.mmetaphone("Radioprogramma") -- GitLab