DeveelDB  20151217
complete SQL database system, primarly developed for .NET/Mono frameworks
PatternSearch.cs
Go to the documentation of this file.
1 //
2 // Copyright 2010-2015 Deveel
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 using System;
18 using System.Text;
19 
20 namespace Deveel.Data.Text {
45  public static class PatternSearch {
46  // Statics for the tokens.
47  private const char ZeroOrMoreChars = '%';
48  private const char OneChar = '_';
49 
55  public static bool IsWildCard(char ch) {
56  return (ch == OneChar || ch == ZeroOrMoreChars);
57  }
58 
67  public static bool FullPatternMatch(string pattern, string str, char escapeChar) {
68  StringBuilder start = new StringBuilder();
69  String rezt = null;
70  int len = pattern.Length;
71  int i = 0;
72  bool lastEscapeChar = false;
73  for (; i < len && rezt == null; ++i) {
74  char c = pattern[i];
75  if (lastEscapeChar) {
76  lastEscapeChar = false;
77  start.Append(c);
78  } else if (c == escapeChar) {
79  lastEscapeChar = true;
80  } else if (IsWildCard(c)) {
81  rezt = pattern.Substring(i);
82  } else {
83  start.Append(c);
84  }
85  }
86 
87  if (rezt == null)
88  rezt = "";
89 
90  string st = start.ToString();
91 
92  if (str.StartsWith(st)) {
93  string strRezt = str.Substring(st.Length); // (i)
94 
95  return rezt.Length > 0 ? PatternMatch(rezt, strRezt, escapeChar) : strRezt.Length == 0;
96  }
97 
98  return false;
99  }
100 
129  public static bool PatternMatch(string pattern, string expression, char escapeChar) {
130  // Look at first character in pattern, if it's a ONE_CHAR wildcard then
131  // check expression and pattern match until next wild card.
132 
133  if (pattern[0] == OneChar) {
134 
135  // Else step through each character in pattern and see if it matches up
136  // with the expression until a wild card is found or the end is reached.
137  // When the end of the pattern is reached, 'finished' is set to true.
138 
139  int i = 1;
140  bool finished = (i >= pattern.Length || expression.Length < 1);
141  bool lastWasEscape = false;
142  int checkd = 0;
143  while (!finished) {
144  char c = pattern[i];
145  if (!lastWasEscape && c == escapeChar) {
146  lastWasEscape = true;
147  if (i >= expression.Length) {
148  return false;
149  }
150  ++i;
151  } else if (lastWasEscape || !IsWildCard(c)) {
152  lastWasEscape = false;
153  // If expression and pattern character doesn't match or end of
154  // expression reached, search has failed.
155  if (i >= expression.Length || c != expression[i]) {
156  return false;
157  }
158  ++i;
159  ++checkd;
160  } else {
161  // found a wildcard, so recurse on this wildcard
162  return PatternMatch(pattern.Substring(i), expression.Substring(i), escapeChar);
163  }
164 
165  finished = (i >= pattern.Length);
166  }
167 
168  // The pattern length minus any escaped characters
169  int realPatternLength = 0;
170  int sz = pattern.Length;
171  for (int n = 0; n < sz; ++n) {
172  if (pattern[n] != escapeChar) {
173  ++realPatternLength;
174  } else {
175  ++n;
176  }
177  }
178 
179  // If pattern and expression lengths match then we have walked through
180  // the expression and found a match, otherwise no match.
181 
182  return realPatternLength == expression.Length;
183  }
184 
185  // Therefore we are doing a ZERO_OR_MORE_CHARS wildcard check.
186 
187  // If the pattern is '%' (ie. pattern.Length == 1 because it's only 1
188  // character in length (the '%' character)) then it doesn't matter what the
189  // expression is, we have found a match.
190 
191  if (pattern.Length == 1)
192  return true;
193 
194  // Look at following character in pattern, and extract all the characters
195  // before the next wild card.
196 
197  var nextString = new StringBuilder();
198  int i1 = 1;
199  bool finished1 = (i1 >= pattern.Length);
200  bool lastWasEscape1 = false;
201  while (!finished1) {
202  char nextChar = pattern[i1];
203  if (!lastWasEscape1 && nextChar == escapeChar) {
204  lastWasEscape1 = true;
205  ++i1;
206  if (i1 >= pattern.Length) {
207  finished1 = true;
208  }
209  } else if (lastWasEscape1 || !IsWildCard(nextChar)) {
210  lastWasEscape1 = false;
211  nextString.Append(nextChar);
212  ++i1;
213  if (i1 >= pattern.Length) {
214  finished1 = true;
215  }
216  } else {
217  finished1 = true;
218  }
219  }
220 
221  string findString = nextString.ToString();
222 
223  // Special case optimisation if we have found the end of the pattern, all
224  // we need to do is check if 'find_string' is on the end of the expression.
225  // eg. pattern = "%er", will have a 'find_string' of "er" and it is saying
226  // 'does the expression end with 'er''.
227 
228  if (i1 >= pattern.Length)
229  return (expression.EndsWith(findString));
230 
231  // Otherwise we must have finished with another wild card.
232  // Try and find 'next_string' in the expression. If its found then
233  // recurse over the next pattern.
234 
235  int findStrLength = findString.Length;
236  int strIndex = expression.IndexOf(findString, 0);
237 
238  while (strIndex != -1) {
239  bool matched = PatternMatch(
240  pattern.Substring(1 + findStrLength),
241  expression.Substring(strIndex + findStrLength),
242  escapeChar);
243 
244  if (matched)
245  return true;
246 
247  strIndex = expression.IndexOf(findString, strIndex + 1);
248  }
249 
250  return false;
251  }
252  }
253 }
static bool PatternMatch(string pattern, string expression, char escapeChar)
This is the pattern match recurrsive method.
static bool IsWildCard(char ch)
Returns true if the given character is a wild card (unknown).
This is a static class that performs the operations to do a pattern search on a given column of a tab...
static bool FullPatternMatch(string pattern, string str, char escapeChar)
Matches a pattern against a string and returns true if it matches or false otherwise.