DeveelDB  20151217
complete SQL database system, primarly developed for .NET/Mono frameworks
ColumnIndex.cs
Go to the documentation of this file.
1 //
2 // Copyright 2010-2015 Deveel
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 using System;
18 using System.Collections.Generic;
19 using System.Linq;
20 
21 using Deveel.Data.Sql;
22 using Deveel.Data.Sql.Tables;
23 
24 namespace Deveel.Data.Index {
25  public abstract class ColumnIndex : IDisposable {
26  private static readonly BlockIndex<int> EmptyList;
27  private static readonly BlockIndex<int> OneList;
28 
29  protected ColumnIndex(ITable table, int columnOffset) {
30  ColumnOffset = columnOffset;
31  Table = table;
32  }
33 
35  Dispose(false);
36  }
37 
38  static ColumnIndex() {
39  EmptyList = new BlockIndex<int>();
40  EmptyList.IsReadOnly = true;
41  OneList = new BlockIndex<int>();
42  OneList.Add(0);
43  OneList.IsReadOnly = true;
44  }
45 
46  public ITable Table { get; private set; }
47 
48  public int ColumnOffset { get; private set; }
49 
50  public virtual bool IsReadOnly {
51  get { return false; }
52  }
53 
54  public virtual bool HandlesTextSearch {
55  get { return false; }
56  }
57 
58  public abstract string IndexType { get; }
59 
60  protected DataObject GetValue(long row) {
61  return Table.GetValue(row, ColumnOffset);
62  }
63 
64  public abstract ColumnIndex Copy(ITable table, bool readOnly);
65 
66  public void Dispose() {
67  Dispose(true);
68  GC.SuppressFinalize(this);
69  }
70 
71  protected virtual void Dispose(bool disposing) {
72  }
73 
74  public abstract void Insert(int rowNumber);
75 
76  public abstract void Remove(int rowNumber);
77 
78  public IIndex<int> Order(IEnumerable<int> rows) {
79  var rowSet = rows.ToList();
80 
81  // The length of the set to order
82  int rowSetLength = rowSet.Count;
83 
84  // Trivial cases where sorting is not required:
85  // NOTE: We use readOnly objects to save some memory.
86  if (rowSetLength == 0)
87  return EmptyList;
88  if (rowSetLength == 1)
89  return OneList;
90 
91  // This will be 'row set' sorted by its entry lookup. This must only
92  // contain indices to rowSet entries.
93  var newSet = new BlockIndex<int>();
94 
95  if (rowSetLength <= 250000) {
96  // If the subset is less than or equal to 250,000 elements, we generate
97  // an array in memory that contains all values in the set and we sort
98  // it. This requires use of memory from the heap but is faster than
99  // the no heap use method.
100  var subsetList = new List<DataObject>(rowSetLength);
101  foreach (long row in rowSet) {
102  subsetList.Add(GetValue(row));
103  }
104 
105  // The comparator we use to sort
106  var comparer = new SubsetIndexComparer(subsetList.ToArray());
107 
108  // Fill new_set with the set { 0, 1, 2, .... , row_set_length }
109  for (int i = 0; i < rowSetLength; ++i) {
110  var cell = subsetList[i];
111  newSet.InsertSort(cell, i, comparer);
112  }
113 
114  } else {
115  // This is the no additional heap use method to sorting the sub-set.
116 
117  // The comparator we use to sort
118  var comparer = new IndexComparer(this, rowSet);
119 
120  // Fill new_set with the set { 0, 1, 2, .... , row_set_length }
121  for (int i = 0; i < rowSetLength; ++i) {
122  var cell = GetValue(rowSet[i]);
123  newSet.InsertSort(cell, i, comparer);
124  }
125  }
126 
127  return newSet;
128  }
129 
130  public IEnumerable<int> SelectRange(IndexRange range) {
131  return SelectRange(new[] {range});
132  }
133 
134  public abstract IEnumerable<int> SelectRange(IndexRange[] ranges);
135 
136  public virtual IEnumerable<int> SelectAll() {
137  return SelectRange(new IndexRange(
140  }
141 
142  public virtual IEnumerable<int> SelectFirst() {
143  // NOTE: This will find NULL at start which is probably wrong. The
144  // first value should be the first non null value.
145  return SelectRange(new IndexRange(
148  }
149 
150  public IEnumerable<int> SelectNotFirst() {
151  // NOTE: This will find NULL at start which is probably wrong. The
152  // first value should be the first non null value.
153  return SelectRange(new IndexRange(
154  RangeFieldOffset.AfterLastValue, IndexRange.FirstInSet,
156  }
157 
158  public IEnumerable<int> SelectLast() {
159  return SelectRange(new IndexRange(
162  }
163 
164  public IEnumerable<int> SelectNotLast() {
165  return SelectRange(new IndexRange(
167  RangeFieldOffset.BeforeFirstValue, IndexRange.LastInSet));
168  }
169 
174  public IEnumerable<int> SelectAllNonNull() {
175  return SelectRange(new IndexRange(
176  RangeFieldOffset.AfterLastValue, DataObject.Null(),
178  }
179 
180  public IEnumerable<int> SelectEqual(DataObject ob) {
181  if (ob.IsNull)
182  return new List<int>(0);
183 
184  return SelectRange(new IndexRange(
185  RangeFieldOffset.FirstValue, ob,
186  RangeFieldOffset.LastValue, ob));
187  }
188 
189  public IEnumerable<int> SelectNotEqual(DataObject ob) {
190  if (ob.IsNull) {
191  return new List<int>(0);
192  }
193  return SelectRange(new IndexRange[]
194  {
195  new IndexRange(
196  RangeFieldOffset.AfterLastValue, DataObject.Null(),
197  RangeFieldOffset.BeforeFirstValue, ob)
198  , new IndexRange(
199  RangeFieldOffset.AfterLastValue, ob,
201  });
202  }
203 
204  public IEnumerable<int> SelectGreater(DataObject ob) {
205  if (ob.IsNull)
206  return new List<int>(0);
207 
208  return SelectRange(new IndexRange(
209  RangeFieldOffset.AfterLastValue, ob,
211  }
212 
213  public IEnumerable<int> SelectLess(DataObject ob) {
214  if (ob.IsNull)
215  return new List<int>(0);
216 
217  return SelectRange(new IndexRange(
218  RangeFieldOffset.AfterLastValue, DataObject.Null(),
219  RangeFieldOffset.BeforeFirstValue, ob));
220  }
221 
222  public IEnumerable<int> SelectGreaterOrEqual(DataObject ob) {
223  if (ob.IsNull)
224  return new List<int>(0);
225 
226  return SelectRange(new IndexRange(
227  RangeFieldOffset.FirstValue, ob,
229  }
230 
231  public IEnumerable<int> SelectLessOrEqual(DataObject ob) {
232  if (ob.IsNull)
233  return new List<int>(0);
234 
235  return SelectRange(new IndexRange(
236  RangeFieldOffset.AfterLastValue, DataObject.Null(),
237  RangeFieldOffset.LastValue, ob));
238  }
239 
240  public IEnumerable<int> SelectBetween(DataObject ob1, DataObject ob2) {
241  if (ob1.IsNull || ob2.IsNull)
242  return new List<int>(0);
243 
244  return SelectRange(new IndexRange(
245  RangeFieldOffset.FirstValue, ob1,
246  RangeFieldOffset.BeforeFirstValue, ob2));
247  }
248 
249  public IEnumerable<int> SelectLike(DataObject value) {
250  if (value.IsNull)
251  return new List<int>(0);
252 
253  if (!HandlesTextSearch)
254  return SelectEqual(value);
255 
256  return SearchText(value);
257  }
258 
259  protected virtual IEnumerable<int> SearchText(DataObject value) {
260  throw new NotSupportedException("The column index does not support text search.");
261  }
262 
263  public virtual ColumnIndex GetSubset(ITable subsetTable, int subsetColumn) {
264  if (subsetTable == null)
265  throw new ArgumentNullException("subsetTable");
266 
267  // Resolve table rows in this table scheme domain.
268  List<int> rowSet = new List<int>(subsetTable.RowCount);
269  var e = subsetTable.GetEnumerator();
270  while (e.MoveNext()) {
271  rowSet.Add(e.Current.RowId.RowNumber);
272  }
273 
274  var rows = subsetTable.ResolveRows(subsetColumn, rowSet, Table);
275 
276  // Generates an IIndex which contains indices into 'rowSet' in
277  // sorted order.
278  var newSet = Order(rows);
279 
280  // Our 'new_set' should be the same size as 'rowSet'
281  if (newSet.Count != rowSet.Count) {
282  throw new Exception("Internal sort error in finding sub-set.");
283  }
284 
285  return CreateSubset(subsetTable, subsetColumn, newSet);
286  }
287 
288  protected virtual ColumnIndex CreateSubset(ITable table, int column, IEnumerable<int> rows) {
289  var index = new InsertSearchIndex(table, column, rows);
290  index.RecordUid = false;
291  return index;
292  }
293 
294  #region IndexComparer
295 
296  private class IndexComparer : IIndexComparer<int> {
297  private readonly ColumnIndex scheme;
298  private readonly IEnumerable<int> rowSet;
299 
300  public IndexComparer(ColumnIndex scheme, IEnumerable<int> rowSet) {
301  this.scheme = scheme;
302  this.rowSet = rowSet;
303  }
304 
305  public int CompareValue(int index, DataObject val) {
306  var cell = scheme.GetValue(rowSet.ElementAt((int)index));
307  return cell.CompareTo(val);
308  }
309 
310  public int Compare(int index1, int index2) {
311  throw new NotSupportedException("Shouldn't be called!");
312  }
313  }
314 
315  #endregion
316 
317  #region SubsetIndexComparer
318 
319  private class SubsetIndexComparer : IIndexComparer<int> {
320  private readonly DataObject[] subsetList;
321 
322  public SubsetIndexComparer(DataObject[] subsetList) {
323  this.subsetList = subsetList;
324  }
325 
326  public int CompareValue(int index, DataObject val) {
327  var cell = subsetList[index];
328  return cell.CompareTo(val);
329  }
330 
331  public int Compare(int index1, int index2) {
332  throw new NotSupportedException("Shouldn't be called!");
333  }
334  }
335 
336  #endregion
337  }
338 }
IEnumerable< int > SelectEqual(DataObject ob)
Definition: ColumnIndex.cs:180
virtual IEnumerable< int > SelectFirst()
Definition: ColumnIndex.cs:142
readonly IEnumerable< int > rowSet
Definition: ColumnIndex.cs:298
abstract DataObject GetValue(long rowNumber, int columnOffset)
Gets a single cell within the table that is located at the given column offset and row...
int CompareValue(int index, DataObject val)
Definition: ColumnIndex.cs:305
bool IsNull
Gets a value that indicates if this object is materialized as null.
Definition: DataObject.cs:91
Defines the contract to access the data contained into a table of a database.
Definition: ITable.cs:40
IndexComparer(ColumnIndex scheme, IEnumerable< int > rowSet)
Definition: ColumnIndex.cs:300
IEnumerable< int > SelectGreaterOrEqual(DataObject ob)
Definition: ColumnIndex.cs:222
IEnumerable< int > SelectLast()
Definition: ColumnIndex.cs:158
Describes the range of values to select from an index.
Definition: IndexRange.cs:38
A comparer that is used within IIndex to compares two values which are indices to data that is bei...
static DataObject Null(SqlType type)
Definition: DataObject.cs:630
IEnumerable< int > SelectNotFirst()
Definition: ColumnIndex.cs:150
static readonly DataObject FirstInSet
Definition: IndexRange.cs:42
IIndex< int > Order(IEnumerable< int > rows)
Definition: ColumnIndex.cs:78
virtual IEnumerable< int > SearchText(DataObject value)
Definition: ColumnIndex.cs:259
IEnumerable< int > SelectNotLast()
Definition: ColumnIndex.cs:164
int CompareValue(int index, DataObject val)
Definition: ColumnIndex.cs:326
Represents a dynamic object that encapsulates a defined SqlType and a compatible constant ISqlObject ...
Definition: DataObject.cs:35
DataObject GetValue(long row)
Definition: ColumnIndex.cs:60
IEnumerable< int > SelectRange(IndexRange range)
Definition: ColumnIndex.cs:130
int RowCount
Gets the total number of rows in the table.
Definition: ITable.cs:52
static readonly BlockIndex< int > OneList
Definition: ColumnIndex.cs:27
IEnumerable< int > SelectGreater(DataObject ob)
Definition: ColumnIndex.cs:204
IEnumerable< int > SelectBetween(DataObject ob1, DataObject ob2)
Definition: ColumnIndex.cs:240
static readonly BlockIndex< int > EmptyList
Definition: ColumnIndex.cs:26
IEnumerable< int > SelectLess(DataObject ob)
Definition: ColumnIndex.cs:213
int Compare(int index1, int index2)
Definition: ColumnIndex.cs:310
ColumnIndex(ITable table, int columnOffset)
Definition: ColumnIndex.cs:29
static readonly DataObject LastInSet
Definition: IndexRange.cs:47
IEnumerable< int > SelectNotEqual(DataObject ob)
Definition: ColumnIndex.cs:189
virtual ColumnIndex CreateSubset(ITable table, int column, IEnumerable< int > rows)
Definition: ColumnIndex.cs:288
IEnumerable< int > SelectLessOrEqual(DataObject ob)
Definition: ColumnIndex.cs:231
RangeFieldOffset
The absolute offset of a field in a range of a selection.
IEnumerable< int > SelectLike(DataObject value)
Definition: ColumnIndex.cs:249
virtual IEnumerable< int > SelectAll()
Definition: ColumnIndex.cs:136
virtual void Dispose(bool disposing)
Definition: ColumnIndex.cs:71
virtual ColumnIndex GetSubset(ITable subsetTable, int subsetColumn)
Definition: ColumnIndex.cs:263
IEnumerable< int > SelectAllNonNull()
Definition: ColumnIndex.cs:174