2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 * HashList - super-quick list manager
24 * A HASHLIST effectively implements an in-memory database: it maintains
25 * the objects you specify in a managed list, placing hashtables across
26 * whatever key fields you select. The practical upshot is that it's a
27 * thread-safe, convenient resizable-array manager that provides fast
30 * There are many tidbits about using HashLists mentioned in the examples
31 * below--some are pretty important, so read through them carefully.
33 * EXAMPLES ___________________________________________________________________
35 * // Some boring example dataset--three OBJECT structures
37 * typedef struct OBJECT {
39 * TCHAR szText[ 256 ];
42 * static OBJECT aObjects[] = {
43 * { 275, "First Object" },
44 * { 473, "Second Object" },
45 * { 32, "Third Object" },
48 * // When adding data to the list, note that it's not necessary to create
49 * // keys at any particular time (or at all). Objects already in the list
50 * // when you add a new key will automatically get indexed, just as will any
51 * // new objects that you add later. Remember that you can't track NULL
52 * // objects in the list--hl.Add(0) is illegal. An object can be added in
53 * // the list any number of times.
56 * hl.Add (&aObjects[0]);
57 * hl.CreateKey ("MyKey", MyKey_Compare, MyKey_HashObject, MyKey_HashData);
58 * hl.Add (&aObjects[1]);
59 * hl.Add (&aObjects[2]);
61 * // Walk the hashlist. There are a few things worthy of note here
62 * // besides the enumeration technique itself: calling FindFirst() returns
63 * // a pointer to an *allocated* ENUMERATION structure. The structure
64 * // is automatically freed whenever a FindNext() or FindPrevious() is
65 * // about to return NULL--so loops like the one below don't have to
66 * // do anything to free the ENUMERATION object. Since a hashlist holds
67 * // a critical section while any ENUMERATION object exists for it,
68 * // it's important to remember that if you do not walk the list to its end
69 * // you must explicitly free the enumeration object yourself.
71 * for (LPENUM pEnum = hl.FindFirst(); pEnum; pEnum = pEnum->FindNext())
73 * OBJECT *pObj = (OBJECT*)(pEnum->GetObject());
74 * printf ("%s", pObj->szText);
77 * // Find the first object with 473 as its dwKey element. Note that since
78 * // we're not walking the whole list, we have to free the ENUMERATION object
81 * HASHLISTKEY pKey = hl.FindKey ("MyKey");
82 * DWORD dwKeyFind = 473;
85 * if ((pEnum = pKey->FindFirst (&dwKeyFind)) != NULL)
87 * OBJECT *pObj = (OBJECT*)(pEnum->GetObject());
88 * printf ("%s", pObj->szText);
92 * // A shorter way to get just the first object with 473 as a dwKey element.
93 * // Since it doesn't return an ENUMERATION object, you don't have to free
96 * OBJECT *pObj = (OBJECT*)pKey->GetFirstObject (&dwKeyFind);
98 * // Find all objects with 473 as a dwKey element. Since we're walking the
99 * // whole list, the ENUMERATION object will be deleted automatically.
101 * for (LPENUM pEnum = pKey->FindFirst (&dwKeyFind); pEnum; pEnum = pEnum->FindNext())
103 * OBJECT *pObj = (OBJECT*)(pEnum->GetObject());
104 * printf ("%s", pObj->szText);
107 * // HashLists also provide fast (i.e., effectively constant-time) testing
108 * // to see if an object is already in the list; that allows use of the
109 * // AddUnique() item, which ensures duplicates won't occur.
112 * hl.AddUnique (&aObjects[0]);
113 * hl.AddUnique (&aObjects[0]); // succeeds but doesn't add item
114 * hl.Remove (&aObjects[0]); // list is now empty
117 * HASHLISTKEY pKey = hl.CreateKey ("IntKey", IntKey_Compare, IntKey_HashObject, IntKey_HashData);
121 * hl.AddUnique (&a, pKey);
122 * hl.AddUnique (&b, pKey);
123 * hl.AddUnique (&c, pKey);
124 * // The list now contains two items: {&a} and {&b}.
125 * // Because {&c} was not unique over key {pKey}, it was not added.
127 * // Remove the first object with a dwKey of 473 from the list. Since we're
128 * // starting an enumeration but not walking it to its end, we'll have
129 * // to free the ENUMERATION object explicitly.
132 * if ((pEnum = pKey->FindFirst (&dwKeyFind)) != NULL)
134 * hl.Remove (pEnum->GetObject());
138 * // Remove all objects in the list--technique one. This technique emphasises
139 * // an useful bit of data: if you remove the object that your ENUMERATION
140 * // object represents, you can still use that enumeration object to continue
141 * // the enumeration! For the fun of it, we'll show erasing the list both
142 * // forwards and backwards.
144 * for (LPENUM pEnum = hl.FindFirst(); pEnum; pEnum = pEnum->FindNext()) {
145 * hl.Remove (pEnum->GetObject());
148 * for (LPENUM pEnum = hl.FindLast(); pEnum; pEnum = pEnum->FindPrevious()) {
149 * hl.Remove (pEnum->GetObject());
152 * // Remove all objects--technique two. This technique is a little messier:
153 * // it finds the first object in the list, removes it, and re-starts the
154 * // enumeration. Since an enumeration is started but not walked to its end,
155 * // the ENUMERATION object must be freed explicitly in each iteration.
158 * while ((pEnum = hl.FindFirst()) != NULL) {
159 * hl.Remove (pEnum->GetObject());
163 * // Remove all objects--technique three. This is essentially the same
164 * // as technique two (find the first object, remove it, repeat), except
165 * // that it avoids using an ENUMERATION object so there's nothing to free.
168 * while ((pObject = hl.GetFirstObject()) != NULL)
169 * hl.Remove (pObject);
171 * // If you change something in an object that you think would affect one or
172 * // more of the list's keys, you should call tell the HashList so it
173 * // can update itself. An important note: if you're enumerating all items,
174 * // you can call Update() for any item without affecting the enumeration;
175 * // however, if you're enumerating along a key when you call Update(),
176 * // you'll need to stop the enumeration.
178 * aObjects[2].dwKey = 182;
179 * hl.Update (&aObjects[2]); // (same as hl.Remove(...) + hl.Add(...))
181 * // It's important to remember that the HashList only knows about your
182 * // objects as generic pointers--it doesn't free them if you remove
183 * // them from the list.
185 * OBJECT *pObject = new OBJECT;
187 * hl.Remove (pObject);
190 * // Another point about freeing objects that you have in the list:
191 * // it's safe to delete the object even before you remove it from the
192 * // list, but if you do so, make sure you remove the object immediately.
193 * // Also, to be thread-safe, you should surround the section with
194 * // Enter()/Leave(); otherwise, another thread may wake up and try to
195 * // work with the list between when you free the object and when you
196 * // remove it from the list.
198 * OBJECT *pObject = new OBJECT;
202 * hl.Remove (pObject);
205 * // Each key requires that you supply three callback functions:
206 * // one to extract the key from an Object and hash it, one to
207 * // just hash a key directly, and one to check an object to see
208 * // if it matches the specified key exactly. A HASHVALUE is really
209 * // just a DWORD--don't worry about the range (since the hashlist
210 * // will automatically modulo the return by the size of its table).
212 * HASHVALUE CALLBACK MyKey_HashObject (LPHASHLISTKEY pKey, PVOID pObject) {
213 * return MyKey_HashData (pKey, &((OBJECT*)pObject)->dwKey);
216 * HASHVALUE CALLBACK MyKey_HashData (LPHASHLISTKEY pKey, PVOID pData) {
217 * return (HASHVALUE)*(DWORD*)pData;
220 * BOOL CALLBACK MyKey_Compare (LPHASHLISTKEY pKey, PVOID pObject, PVOID pData) {
221 * return (((OBJECT*)pObject)->dwKey == *(DWORD*)pData;
226 #define cTABLESIZE_DEFAULT 1000
228 typedef class EXPORTED EXPANDARRAY EXPANDARRAY, *LPEXPANDARRAY;
229 typedef class EXPORTED HASHLIST HASHLIST, *LPHASHLIST;
230 typedef class EXPORTED HASHLISTKEY HASHLISTKEY, *LPHASHLISTKEY;
231 typedef class EXPORTED ENUMERATION ENUMERATION, ENUM, *LPENUMERATION, *LPENUM;
233 typedef UINT_PTR HASHVALUE;
234 typedef BOOL (CALLBACK * LPHASHFUNC_COMPAREOBJECTDATA)(LPHASHLISTKEY pKey, PVOID pObject, PVOID pData);
235 typedef HASHVALUE (CALLBACK * LPHASHFUNC_HASHOBJECT)(LPHASHLISTKEY pKey, PVOID pObject);
236 typedef HASHVALUE (CALLBACK * LPHASHFUNC_HASHDATA)(LPHASHLISTKEY pKey, PVOID pData);
238 typedef struct HASHLISTENTRY
244 } HASHLISTENTRY, *LPHASHLISTENTRY;
246 typedef struct HASHLISTKEYDEBUGINFO
248 size_t cBuckets; // number of buckets in the hashtable currently
249 size_t *aBuckets; // number of items in each bucket in the hashtable
250 WORD perEffective; // percent effectiveness of hashtable (100%=even distrib)
251 } HASHLISTKEYDEBUGINFO, *LPHASHLISTKEYDEBUGINFO;
255 * EXPANDARRAY CLASS __________________________________________________________
261 PVOID aElements; // = EXPANDARRAYHEAP.aElements + 4;
262 // Followed by allocated space for aElements
263 } EXPANDARRAYHEAP, *LPEXPANDARRAYHEAP;
265 class EXPORTED EXPANDARRAY
268 EXPANDARRAY (size_t cbElement, size_t cElementsPerHeap = 0);
271 PVOID GetAt (size_t iElement);
272 PVOID SetAt (size_t iElement, PVOID pData = NULL);
276 size_t m_cElementsPerHeap;
279 LPEXPANDARRAYHEAP *m_aHeaps;
284 * HASHLIST CLASS _____________________________________________________________
288 class EXPORTED HASHLIST
290 friend class HASHLISTKEY;
291 friend class ENUMERATION;
297 // A HashList is basically just a resizable array of objects.
298 // As such, the class exposes methods for adding objects to,
299 // and removing objects from, the list.
301 BOOL Add (PVOID pObjectToAdd);
302 void Remove (PVOID pObjectToRemove);
303 BOOL Update (PVOID pObjectToUpdate); // like Remove() followed by Add()
305 // Calling AddUnique() instead of Add() to add an item ensures
306 // that the specified item will only be added if it does not
307 // already exist in the list. If a key is also specified during the
308 // AddUnique() call, then the test will be performed over a specific
309 // key--detecting duplicate items by checking the key's indexed fields,
310 // rather than just comparing item's addresses.
312 BOOL AddUnique (PVOID pObjectToAdd, LPHASHLISTKEY pKeyUnique = NULL);
313 BOOL fIsInList (PVOID pObject);
315 // Each HashList can use one or more "keys" for looking up items;
316 // each key represents with an internally-maintained hash table
317 // and corresponding hashing functions (which you supply when you
318 // create a key). Keys can be added or removed at any time.
320 LPHASHLISTKEY CreateKey (LPCTSTR pszKeyName,
321 LPHASHFUNC_COMPAREOBJECTDATA fnCompareObjectData,
322 LPHASHFUNC_HASHOBJECT fnHashObject,
323 LPHASHFUNC_HASHDATA fnHashData,
324 size_t cInitialTableSize = cTABLESIZE_DEFAULT);
325 LPHASHLISTKEY FindKey (LPCTSTR pszKeyName);
326 void RemoveKey (LPHASHLISTKEY pKey);
328 // A list isn't much good without methods for enumeration.
329 // A HashList provides enumeration in the form of a list
330 // rather than an array (to prevent problems with holes when
331 // items are removed). You can walk the list in either direction.
332 // An important note: a HashList does not guarantee the order
333 // of enumeration will match the order of insertion (and, in fact,
334 // it probably won't).
336 // Alternately, you can initiate the walk from a HASHLISTKEY;
337 // doing so allows you to quickly enumerate only items which match a
338 // particular value on that key, improving search performance.
340 LPENUM FindFirst (void);
341 LPENUM FindLast (void);
342 PVOID GetFirstObject (void);
343 PVOID GetLastObject (void);
345 // It's also possible to quickly determine the number of objects
346 // in the list, without enumerating them all.
348 size_t GetCount (void);
350 // All HASHLIST routines are thread-safe, but you can wrap compound
351 // operations using the same critical section as the HASHLIST uses
352 // internally. You can also specify an alternate critical section
353 // for the HASHLIST to use instead.
357 void SetCriticalSection (CRITICAL_SECTION *pcs); // NULL = use internal
364 LPEXPANDARRAY m_aObjects;
366 size_t m_cObjectsMax;
368 LPHASHLISTKEY *m_apKeys;
371 LPHASHLISTKEY m_pKeyIndex;
372 static BOOL CALLBACK KeyIndex_CompareObjectData (LPHASHLISTKEY pKey, PVOID pObject, PVOID pData);
373 static HASHVALUE CALLBACK KeyIndex_HashObject (LPHASHLISTKEY pKey, PVOID pObject);
374 static HASHVALUE CALLBACK KeyIndex_HashData (LPHASHLISTKEY pKey, PVOID pData);
376 CRITICAL_SECTION m_cs;
377 CRITICAL_SECTION *m_pcs;
382 * HASHLISTKEY CLASS _____________________________________________________________
386 class EXPORTED HASHLISTKEY
388 friend class HASHLIST;
389 friend class ENUMERATION;
393 // Since every HASHLISTKEY is a member of exactly one HASHLIST,
394 // it contains a pointer back to that HASHLIST. You can get that
395 // pointer, to determine which list the key is used in.
397 LPHASHLIST GetHashList (void);
399 // Each HASHLISTKEY contains an internal (hidden) hashtable,
400 // and pointers to the functions which you supplied to hash objects
401 // for inclusion in that table. For convenience, it exposes methods
402 // for calling those functions.
404 BOOL CompareObjectData (PVOID pObject, PVOID pData);
405 HASHVALUE HashObject (PVOID pObject);
406 HASHVALUE HashData (PVOID pData);
408 // Instead of initiating an enumeration through the HASHLIST, you may
409 // initiate an enumeration through a HASHLISTKEY. Doing so indicates
410 // that you only want to enumerate items whose keys match a particular
411 // value; the HASHLISTKEY's internal hashtable is employed to speed the
414 LPENUM FindFirst (PVOID pData);
415 LPENUM FindLast (PVOID pData);
416 PVOID GetFirstObject (PVOID pData);
417 PVOID GetLastObject (PVOID pData);
419 BOOL fIsInList (PVOID pObject); // Equivalent to (!!GetFirstObject())
421 // For debugging purposes, when developing your hashing algorithms
422 // you may want to use the following routines to examine the distribution
423 // of data within the key. They enable you to see how many items are
424 // on each bucket within the hash table; other statistics are available.
426 LPHASHLISTKEYDEBUGINFO GetDebugInfo (void);
427 void FreeDebugInfo (LPHASHLISTKEYDEBUGINFO pInfo);
430 HASHLISTKEY (LPHASHLIST pList, LPCTSTR pszKeyName, LPHASHFUNC_COMPAREOBJECTDATA fnCompareObjectData, LPHASHFUNC_HASHOBJECT fnHashObject, LPHASHFUNC_HASHDATA fnHashData, size_t cTableSize);
433 void Add (size_t iObject, PVOID pObject);
434 void Remove (size_t iObject);
439 LPEXPANDARRAY m_aObjects;
448 TCHAR m_szKeyName[ 256 ];
449 LPHASHFUNC_COMPAREOBJECTDATA m_fnCompareObjectData;
450 LPHASHFUNC_HASHOBJECT m_fnHashObject;
451 LPHASHFUNC_HASHDATA m_fnHashData;
456 * ENUMERATION CLASS _____________________________________________________________
460 class EXPORTED ENUMERATION
462 friend class HASHLIST;
463 friend class HASHLISTKEY;
467 // Each ENUMERATION object represents an object in the HASHLIST.
468 // You can find the HASHLIST object that the ENUMERATION represents.
470 PVOID GetObject (void);
472 // Additionally, an ENUMERATION object allows you to traverse
473 // (forwards or backwards) the list of objects which meet your search
474 // criteria. If you started the enumeration through a HASHLIST
475 // object then all objects in the list will be enumerated; if you
476 // started the enumeration through a HASHLISTKEY object then only
477 // those objects which have the specified value for that key will be
478 // enumerated. If either of the routines below is about to return NULL,
479 // it will automatically 'delete this' before doing so--ending the
482 LPENUMERATION FindNext (void);
483 LPENUMERATION FindPrevious (void);
485 // You obtain an ENUMERATION object by calling a FindFirst() method,
486 // either through the HASHLIST object or one of its keys. As mentioned
487 // in the examples above, the ENUMERATION object is automatically
488 // freed whenever FindNext() or FindPrevious() returns NULL--however,
489 // if you stop the enumeration before then, you'll have to explicitly
490 // free the object with 'delete'. Failure to do so means (1) a memory
491 // leak, and more seriously, (2) the active thread will hold a critical
492 // section for the hashlist forever.
497 ENUMERATION (LPHASHLIST pList, size_t iObject, LPHASHLISTKEY pKey = NULL, PVOID pData = NULL);
499 void ENUMERATION::PrepareWalk (void);
505 LPHASHLISTKEY m_pKey;
511 * GENERAL-PURPOSE ____________________________________________________________
515 EXPORTED HASHVALUE HashString (LPCTSTR pszString);
516 EXPORTED HASHVALUE HashAnsiString (LPCSTR pszStringA);
517 EXPORTED HASHVALUE HashUnicodeString (LPWSTR pszStringW);