Skip to content
Snippets Groups Projects
Commit 11fa4707 authored by Hees, L.J.H. (Laurens)'s avatar Hees, L.J.H. (Laurens)
Browse files

improved dutch_mmetaphone docstring

parent 55356a69
No related branches found
No related tags found
1 merge request!132Sc 187/cleanup and docstrings
......@@ -6,7 +6,7 @@ import nltk
class DutchPhonetics:
""""A class for translating the spelling of a word into a phonetic
representation and analyzing these phonetic representations."""
representation and analyzing such phonetic representations."""
def __init__(self):
self.unvoiced = {'P': 'p', 'T': 't', 'K': 'k', 'F': 'f', 'S': 's'}
......@@ -23,9 +23,10 @@ class DutchPhonetics:
self.double_diphthong = {'AA': 'a:', 'EE': 'e:', 'IE': 'i',
'OO': 'o:', 'OE': 'u'}
self.double = {'UU': 'y', 'EU': '2:', 'OE': 'u', 'UE': 'u:', 'IJ': 'e>i',
'EI': 'e>i', 'UI': '9y', 'OU': 'v>u', 'AU': 'v>u',
'SJ': 's>', 'NG': 'n>', 'DT': 't', 'TH': 't'}
self.double = {'UU': 'y', 'EU': '2:', 'OE': 'u', 'UE': 'u:',
'IJ': 'e>i', 'EI': 'e>i', 'UI': '9y', 'OU': 'v>u',
'AU': 'v>u', 'SJ': 's>', 'NG': 'n>', 'DT': 't',
'TH': 't'}
self.ambiguous = {'CH': ('x', 'g>', 's>')}
self.triples = {'AAI': 'a:i', 'OOI': 'o:i', 'OEI': 'u:i'}
self.quadruples = {'EEUW': 'e:u', 'IEUW': 'i:u'}
......@@ -41,15 +42,15 @@ class DutchPhonetics:
self.ambiguous = {'J': ('z>', 'j', 'dz>'), 'X': ('ks', 'z')}
def compare(self, word1, word2):
"""Compare the phonetic representations of 2 source strings and see
if one of them matches.
"""Compare the phonetic representations of two strings and see if
one of them matches.
Args:
word1 (str): a word
word2 (str): another word
word1: a word String
word2: another word String
Returns:
similar (bool): indicating whether the phonetic representations
of word1 and word2 match
similar: indicates whether the phonetic representations of word1
and word2 match
"""
word1 = PhonRep(word1)
word2 = PhonRep(word2)
......@@ -60,13 +61,13 @@ class DutchPhonetics:
return similar
def phonetically_similar(self, word1, word2):
"""Checks if two words are phonetically similar or not.
"""Check if two words are phonetically similar.
Args:
word1: string
word2: string
word1: String
word2: String
Returns:
Bool: True if similar, False otherwise
True if similar, False otherwise
"""
word1_phon = PhonRep(word1)
word2_phon = PhonRep(word2)
......@@ -77,21 +78,21 @@ class DutchPhonetics:
for comb in combinations:
rep_word1 = comb[0]
rep_word2 = comb[1]
return self.checklengths(rep_word1, rep_word2, word1, word2)
return self.check_lengths(rep_word1, rep_word2, word1, word2)
return False
@staticmethod
def checklengths(rep_word1, rep_word2, word1, word2):
"""Check lengths of Words and their phonetic representations.
def check_lengths(rep_word1, rep_word2, word1, word2):
"""Check lengths of words and their phonetic representations.
Args:
rep_word1: First Word's phonetic representation
rep_word2: Second Word's phonetic representation
word1: First Word
word2: Second Word
rep_word1: First word's phonetic representation
rep_word2: Second word's phonetic representation
word1: First word String
word2: Second word String
Returns:
Boolean: denoting whether the edit distance of the
Boolean denoting whether the edit distance of the
phonetic representations is smaller than than the lengths
of these phonetic representations.
Furthermore it denotes whether this distance is smaller
......@@ -107,11 +108,10 @@ class DutchPhonetics:
and len(word2) > 3)
def mmetaphone(self, word):
"""Controlling method. Take unconverted phonetic representation and
convert to a correct phonetic representation.
"""Convert an unconverted word to a correct phonetic representation.
Args:
word (PhonRep): word that is be converted
word: String of word that is be converted
"""
self.triple_letters(word)
self.double_letters(word)
......@@ -121,15 +121,16 @@ class DutchPhonetics:
return word
def triple_letters(self, word):
"""Recognise 3 letter patterns in the source and convert them to their
phonetic representation.
"""Recognise 3 letter patterns in a word and convert them to their
phonetic representations.
"""
exceptions = {'AIL': 'a>i', 'TSJ': 'ts>'}
self.filter(word, exceptions, 3)
def double_letters(self, word):
"""Recognise 2 letter patterns in the source and convert them to their
phonetic representation. """
"""Recognise 2 letter patterns in a word and convert them to their
phonetic representations.
"""
self.filter(word, self.triples, 3)
self.filter(word, self.quadruples, 4)
self.filter(word, self.double_diphthong, 2)
......@@ -140,7 +141,7 @@ class DutchPhonetics:
"""Deal with single letters.
Args:
word (PhonRep): PhonRep that is to be altered.
word: Word that is to be converted
"""
for l_idx, letter in enumerate(word.source):
analyzed = self.analyze(letter, l_idx, word)
......@@ -148,27 +149,26 @@ class DutchPhonetics:
self.filter(word, analyzed, 1)
def special_cases(self, word):
"""Deal with letters with diacritics. Not all diacritics are represented,
because apparently Python 3 does not support all diacritics.
"""Deal with letters with diacritics. Not all diacritics are
represented because Python 3 does not support all diacritics.
"""
self.filter(word, self.ambiguous, 1, ambiguous=True)
self.filter(word, self.aigu, 1)
self.filter(word, self.grave_circondakje, 1)
self.filter(word, self.trema, 1) # More common in Dutch
self.filter(word, self.overig, 1) # Less common in Dutch
self.filter(word, self.overig, 1) # Less common in Dutch
self.filter(word, self.umlaut, 1)
def filter(self, word, symbols, length, ambiguous=False):
"""Filter letters from word and adapt phonetic representation accordingly.
"""Filter letters from word and adapt phonetic representation
accordingly.
Args:
word(PhonRep): PhonRep object of word
symbols (dict): dictionary in which letter, sound pairs are stored
length (int): length of symbols in symbols
Kwargs:
ambiguous (bool): indicating whether the sounds are 'ambiguous' and
whether the representation should be updated
accordingly, False by default
word: Input word
symbols: Dictionary in which letter, sound pairs are stored
length: Length of symbols in symbols
ambiguous: Indicating whether the sounds are ambiguous and
whether the representation should be updated accordingly
"""
for item in symbols:
item_pos = word.source.find(item)
......@@ -188,13 +188,13 @@ class DutchPhonetics:
representation, based on a single letter.
Args:
letter (chr): a letter
word (str): a word
l_idx: index of letter in word
letter: A letter char
word: A word String
l_idx: Index of letter in word
Returns:
(dict): a dictionary containing phonetic representations of the
category letter falls in.
A dictionary containing phonetic representations of the category
the letter falls in.
"""
unvoiced = self.unvoiced
voiced = self.voiced
......@@ -203,18 +203,18 @@ class DutchPhonetics:
vowels = self.vowels
long_vowels = self.long_vowels
if (letter in unvoiced and l_idx + 1 < len(word)
and word.source[l_idx + 1] in voiced and letter in revoicing):
and word.source[l_idx + 1] in voiced and letter in revoicing):
return dict({letter: revoicing[letter]})
if (letter in voiced and l_idx + 1 < len(word) and
word.source[l_idx + 1] in unvoiced and letter in devoicing):
word.source[l_idx + 1] in unvoiced and letter in devoicing):
return dict({letter: devoicing[letter]})
if letter in unvoiced:
return dict({letter: unvoiced[letter]})
if (letter in voiced and l_idx == len(word.source) - 1
and letter in devoicing):
and letter in devoicing):
return dict({letter: devoicing[letter]})
if letter in voiced:
......@@ -237,62 +237,41 @@ class DutchPhonetics:
class PhonRep:
"""A class for storing the possible phonetic representations
of words."""
of words.
"""
def __init__(self, source):
self._source = source.upper()
self._representation = [self.source]
def __len__(self):
"""Returns the length of the source word."""
"""Return the length of the source word."""
return len(self._source)
@property
def source(self):
"""Returns the spelling representation of the source word."""
return self._source
@source.setter
def source(self, src):
"""Sets the source/spelling representation of the phonetic
representations object.
Args:
src: String object denoting the source/spelling
representation of a word.
"""
self._source = src
@property
def representation(self):
"""Phonetic representation of word. It is a list,
a word can have multiple phonetic representations.
"""
return self._representation
@representation.setter
def representation(self, rep):
"""Sets the list of phonetic representations in the
phonetic representation object.
Args:
rep: list of String objects, denoting the phonetic
representations of a word.
"""
self._representation = rep
def update(self, letter, sound, ambiguous=False):
def update(self, letter, sound):
"""Update the phonetic representation of the word.
Args:
letter (str): normal representation of letter
sound (str): phonetic representation of letter to be added to (one of the)
representation
Keyword args:
ambiguous (bool): whether sound is ambiguous, False by default
letter: Normal representation of letter
sound: Phonetic representation of letter to be added to the
representation of the word
"""
for rep_idx, rep in enumerate(self.representation):
spel_len = len(letter)
spelling_pos = rep.find(letter)
......@@ -305,7 +284,6 @@ class PhonRep:
def update_ambiguous(self, letter, sounds):
"""Update the phonetic representation of the word if it is ambiguous.
This is basically multiple updates.
"""
for amb_sound in sounds:
self.update(letter, amb_sound)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment