Skip to content
Snippets Groups Projects
Commit ded59dab authored by Nijsen, T's avatar Nijsen, T
Browse files

Removed superfluous exceptions. Prepared compare function for integration.

parent cd6f0f00
No related branches found
No related tags found
1 merge request!60Sc 111/dmetaphone
......@@ -12,37 +12,12 @@ import copy
class DutchPhonetics:
@staticmethod
def compare_with_shadow(S_original, S_shadow):
""""Compare the phonetic representations of the original sentence and
the associated shadowed words.
Args: S_original is a list of SourceWord objects in which _source
is the associated shadowed word. (I was unclear whether this is how
the attribute is used. But can be changed later.)
Assumption: source attribute is always a Word object
Furthermore Words need a fon_correct attribute.
"""
# Check 1:1 word matches
for original in S_original:
original.fon_correct = DutchPhonetics.compare_words(original.word, original.source.word)
# Check 1:2 word matches
for original in S_original:
if not original.fon_correct:
shadow_onset = original.source.onset
sh_idx = -1
for idx, shadow in S_shadow:
onset = shadow.onset
if shadow_onset == onset and idx+1 < len(S_shadow):
sh_idx = idx + 1
shadow_combined = original.source.word + str(S_shadow[sh_idx])
original.fon_correct = DutchPhonetics.compare_words(original.word, shadow_combined)
@staticmethod
def compare_words(word1, word2):
def compare(word1, word2):
""""Compare the phonetic representations of 2 source strings and see
if one of them matches. """
if one of them matches.
Args: word1 : string
word2: string"""
word1_representations, word2_representations = DutchPhonetics.mmetaphone(word1), \
DutchPhonetics.mmetaphone(word2)
return any(w1_rep in word2_representations for w1_rep in word1_representations)
......@@ -65,7 +40,7 @@ class DutchPhonetics:
def derden(source, representations):
""""Recognise 3 letter patterns in the source and convert them to their
phonetic representation. """
uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>', 'ZIJ': 'zy>', 'JIJ': 'jy>', 'WIJ': 'wy>'}
uitzonderingen = {'AIL': 'a>i', 'TSJ': 'ts>'}
uz_len = 3
new_rep = copy.deepcopy(source) # New representation without ambiguities yet
......@@ -111,7 +86,7 @@ class DutchPhonetics:
dubbel = {'UU': 'y', 'EU': '2:', 'OE': 'u', 'UE': 'u:', 'IJ': 'e>i', 'EI': 'e>i',
'UI': '9y', 'OU': 'v>u', 'AU': 'v>u', 'SJ': 's>', 'NG': 'n>', 'DT': 't',
'TH': 't', 'ZE': 'zy>', 'JE': 'jy>', 'WE': 'wy>'}
'TH': 't'}
for d in dubbel:
d_pos = source.find(d)
......@@ -260,25 +235,3 @@ class DutchPhonetics:
fon_rep = klank + additie
tot_len = spel_len + additie_len
representations[rep_idx] = rep[:spelling_pos] + fon_rep + rep[spelling_pos + tot_len:]
# Test examples
#DutchPhonetics.mmetaphone("Detail")
#DutchPhonetics.mmetaphone("Haai")
#DutchPhonetics.mmetaphone("Leeuw")
#DutchPhonetics.mmetaphone("Lach")
#DutchPhonetics.mmetaphone("Lag")
#DutchPhonetics.mmetaphone('Jazz') # leenwoord, representatie kan niet 100% worden opgevangen tenzij als special case
#DutchPhonetics.mmetaphone("Handvat") # 'dv' does not become a 'tf' sound yet
#DutchPhonetics.mmetaphone("Hand")
#DutchPhonetics.mmetaphone("Weggelopen")
# 'gg' does not become a 'xg>' sound yet and because there is only a single medeklinker after, the second 'e' becomes
# an 'e:' sound, should be more like a 'y' sound but question is whether 'ge' is a pattern often enough such that it
# can be used a special case of 2 letters.
#DutchPhonetics.mmetaphone("Pet")
#DutchPhonetics.mmetaphone("Petten")
#DutchPhonetics.mmetaphone("Peter")
#DutchPhonetics.mmetaphone("Feeën")
#DutchPhonetics.mmetaphone("Schakelen")
#DutchPhonetics.mmetaphone("Ik")
#DutchPhonetics.mmetaphone("Radioprogramma")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment