DeveelDB  20151217
complete SQL database system, primarly developed for .NET/Mono frameworks
Public Member Functions | Private Member Functions | Private Attributes | List of all members
Deveel.Data.Text.Metaphone Class Reference

Public Member Functions

 Metaphone ()
 
string Compute (string s)
 

Private Member Functions

bool IsLastChar (int wdsz, int n)
 
bool RegionMatch (StringBuilder sb, int index, String test)
 
bool IsVowel (StringBuilder sb, int index)
 
bool IsPreviousChar (StringBuilder sb, int index, char c)
 
bool IsNextChar (StringBuilder sb, int index, char c)
 

Private Attributes

string vowels = "AEIOU"
 
string frontv = "EIY"
 
string varson = "CSPTG"
 
int maxCodeLen = 4
 

Detailed Description

Definition at line 21 of file Metaphone.cs.

Constructor & Destructor Documentation

Deveel.Data.Text.Metaphone.Metaphone ( )
inline

Definition at line 23 of file Metaphone.cs.

23  {
24  }

Member Function Documentation

string Deveel.Data.Text.Metaphone.Compute ( string  s)
inline

Definition at line 71 of file Metaphone.cs.

71  {
72  if (string.IsNullOrEmpty(s))
73  return "";
74 
75  // single character is itself
76  if (s.Length == 1)
77  return s.ToUpper();
78 
79  char[] inwd = s.ToUpper().ToCharArray() ;
80 
81  StringBuilder local = new StringBuilder(40); // manipulate
82  StringBuilder code = new StringBuilder(10) ; // output
83  // handle initial 2 characters exceptions
84  switch(inwd[0]) {
85  case 'K' :
86  case 'G' :
87  case 'P' : /* looking for KN, etc*/
88  if (inwd[1] == 'N') {
89  local.Append(inwd, 1, inwd.Length - 1);
90  } else {
91  local.Append(inwd);
92  }
93  break;
94  case 'A': /* looking for AE */
95  if (inwd[1] == 'E') {
96  local.Append(inwd, 1, inwd.Length - 1);
97  } else {
98  local.Append(inwd);
99  }
100  break;
101  case 'W' : /* looking for WR or WH */
102  if (inwd[1] == 'R') { // WR -> R
103  local.Append(inwd, 1, inwd.Length - 1);
104  break ;
105  }
106  if (inwd[1] == 'H') {
107  local.Append(inwd, 1, inwd.Length - 1);
108  local[0] = 'W'; // WH -> W
109  } else {
110  local.Append(inwd);
111  }
112  break;
113  case 'X' : /* initial X becomes S */
114  inwd[0] = 'S';
115  local.Append(inwd);
116  break ;
117  default :
118  local.Append(inwd);
119  break;
120  } // now local has working string with initials fixed
121 
122  int wdsz = local.Length;
123  int n = 0 ;
124 
125  while ((code.Length < this.maxCodeLen) &&
126  (n < wdsz) ) { // max code size of 4 works well
127  char symb = local[n];
128  // remove duplicate letters except C
129  if ((symb != 'C') && (IsPreviousChar( local, n, symb )) ) {
130  n++ ;
131  } else { // not dup
132  switch(symb) {
133  case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
134  if (n == 0) {
135  code.Append(symb);
136  }
137  break ; // only use vowel if leading char
138  case 'B' :
139  if (IsPreviousChar(local, n, 'M') &&
140  IsLastChar(wdsz, n) ) { // B is silent if word ends in MB
141  break;
142  }
143  code.Append(symb);
144  break;
145  case 'C' : // lots of C special cases
146  /* discard if SCI, SCE or SCY */
147  if (IsPreviousChar(local, n, 'S') &&
148  !IsLastChar(wdsz, n) &&
149  (this.frontv.IndexOf(local[n + 1]) >= 0)) {
150  break;
151  }
152  if (RegionMatch(local, n, "CIA")) { // "CIA" -> X
153  code.Append('X');
154  break;
155  }
156  if (!IsLastChar(wdsz, n) &&
157  (this.frontv.IndexOf(local[n + 1]) >= 0)) {
158  code.Append('S');
159  break; // CI,CE,CY -> S
160  }
161  if (IsPreviousChar(local, n, 'S') &&
162  IsNextChar(local, n, 'H') ) { // SCH-&gtsk
163  code.Append('K') ;
164  break ;
165  }
166  if (IsNextChar(local, n, 'H')) { // detect CH
167  if ((n == 0) &&
168  (wdsz >= 3) &&
169  IsVowel(local,2) ) { // CH consonant -> K consonant
170  code.Append('K');
171  } else {
172  code.Append('X'); // CHvowel -> X
173  }
174  } else {
175  code.Append('K');
176  }
177  break ;
178  case 'D' :
179  if (!IsLastChar(wdsz, n + 1) &&
180  IsNextChar(local, n, 'G') &&
181  (this.frontv.IndexOf(local[n + 2]) >= 0)) { // DGE DGI DGY -> J
182  code.Append('J'); n += 2 ;
183  } else {
184  code.Append('T');
185  }
186  break ;
187  case 'G' : // GH silent at end or before consonant
188  if (IsLastChar(wdsz, n + 1) &&
189  IsNextChar(local, n, 'H')) {
190  break;
191  }
192  if (!IsLastChar(wdsz, n + 1) &&
193  IsNextChar(local,n,'H') &&
194  !IsVowel(local,n+2)) {
195  break;
196  }
197  if ((n > 0) &&
198  (RegionMatch(local, n, "GN") ||
199  RegionMatch(local, n, "GNED") ) ) {
200  break; // silent G
201  }
202  var hard = false ;
203  if (IsPreviousChar(local, n, 'G')) {
204  hard = true ;
205  } else {
206  hard = false ;
207  }
208  if (!IsLastChar(wdsz, n) &&
209  (this.frontv.IndexOf(local[n + 1]) >= 0) &&
210  (!hard)) {
211  code.Append('J');
212  } else {
213  code.Append('K');
214  }
215  break ;
216  case 'H':
217  if (IsLastChar(wdsz, n)) {
218  break ; // terminal H
219  }
220  if ((n > 0) &&
221  (this.varson.IndexOf(local[n - 1]) >= 0)) {
222  break;
223  }
224  if (IsVowel(local,n+1)) {
225  code.Append('H'); // Hvowel
226  }
227  break;
228  case 'F':
229  case 'J' :
230  case 'L' :
231  case 'M':
232  case 'N' :
233  case 'R' :
234  code.Append(symb);
235  break;
236  case 'K' :
237  if (n > 0) { // not initial
238  if (!IsPreviousChar(local, n, 'C')) {
239  code.Append(symb);
240  }
241  } else {
242  code.Append(symb); // initial K
243  }
244  break ;
245  case 'P' :
246  if (IsNextChar(local,n,'H')) {
247  // PH -> F
248  code.Append('F');
249  } else {
250  code.Append(symb);
251  }
252  break ;
253  case 'Q' :
254  code.Append('K');
255  break;
256  case 'S' :
257  if (RegionMatch(local,n,"SH") ||
258  RegionMatch(local,n,"SIO") ||
259  RegionMatch(local,n,"SIA")) {
260  code.Append('X');
261  } else {
262  code.Append('S');
263  }
264  break;
265  case 'T' :
266  if (RegionMatch(local,n,"TIA") ||
267  RegionMatch(local,n,"TIO")) {
268  code.Append('X');
269  break;
270  }
271  if (RegionMatch(local,n,"TCH")) {
272  // Silent if in "TCH"
273  break;
274  }
275  // substitute numeral 0 for TH (resembles theta after all)
276  if (RegionMatch(local,n,"TH")) {
277  code.Append('0');
278  } else {
279  code.Append('T');
280  }
281  break ;
282  case 'V' :
283  code.Append('F');
284  break ;
285  case 'W' : case 'Y' : // silent if not followed by vowel
286  if (!IsLastChar(wdsz,n) &&
287  IsVowel(local,n+1)) {
288  code.Append(symb);
289  }
290  break ;
291  case 'X' :
292  code.Append('K');
293  code.Append('S');
294  break ;
295  case 'Z' :
296  code.Append('S');
297  break ;
298  } // end switch
299  n++ ;
300  } // end else from symb != 'C'
301  if (code.Length > this.maxCodeLen) {
302  code.Length = maxCodeLen;
303  }
304  }
305  return code.ToString();
306  }
bool RegionMatch(StringBuilder sb, int index, String test)
Definition: Metaphone.cs:39
bool IsVowel(StringBuilder sb, int index)
Definition: Metaphone.cs:49
bool IsPreviousChar(StringBuilder sb, int index, char c)
Definition: Metaphone.cs:53
bool IsNextChar(StringBuilder sb, int index, char c)
Definition: Metaphone.cs:61
bool IsLastChar(int wdsz, int n)
Definition: Metaphone.cs:35
bool Deveel.Data.Text.Metaphone.IsLastChar ( int  wdsz,
int  n 
)
inlineprivate

Definition at line 35 of file Metaphone.cs.

35  {
36  return n + 1 == wdsz;
37  }
bool Deveel.Data.Text.Metaphone.IsNextChar ( StringBuilder  sb,
int  index,
char  c 
)
inlineprivate

Definition at line 61 of file Metaphone.cs.

61  {
62  bool matches = false;
63  if (index >= 0 && index < sb.Length - 1) {
64  matches = sb[index + 1] == c;
65  }
66  return matches;
67  }
bool Deveel.Data.Text.Metaphone.IsPreviousChar ( StringBuilder  sb,
int  index,
char  c 
)
inlineprivate

Definition at line 53 of file Metaphone.cs.

53  {
54  bool matches = false;
55  if( index > 0 && index < sb.Length) {
56  matches = sb[index - 1] == c;
57  }
58  return matches;
59  }
bool Deveel.Data.Text.Metaphone.IsVowel ( StringBuilder  sb,
int  index 
)
inlineprivate

Definition at line 49 of file Metaphone.cs.

49  {
50  return (this.vowels.IndexOf(sb[index]) >= 0);
51  }
bool Deveel.Data.Text.Metaphone.RegionMatch ( StringBuilder  sb,
int  index,
String  test 
)
inlineprivate

Definition at line 39 of file Metaphone.cs.

39  {
40  bool matches = false;
41  if( index >= 0 &&
42  (index + test.Length - 1) < sb.Length) {
43  string substring = sb.ToString(index, test.Length);
44  matches = substring.Equals(test);
45  }
46  return matches;
47  }

Member Data Documentation

string Deveel.Data.Text.Metaphone.frontv = "EIY"
private

Definition at line 29 of file Metaphone.cs.

int Deveel.Data.Text.Metaphone.maxCodeLen = 4
private

Definition at line 31 of file Metaphone.cs.

string Deveel.Data.Text.Metaphone.varson = "CSPTG"
private

Definition at line 30 of file Metaphone.cs.

string Deveel.Data.Text.Metaphone.vowels = "AEIOU"
private

Definition at line 28 of file Metaphone.cs.


The documentation for this class was generated from the following file: