3 @brief A hybrid between a hash table and a doubly linked list.
7 Copyright (C) 2007, 2008 Georgia Public Library Service
8 Bill Erickson <erickson@esilibrary.com>
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License
12 as published by the Free Software Foundation; either version 2
13 of the License, or (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
21 #include <opensrf/osrf_hash.h>
24 @brief A node storing a single item within an osrfHash.
26 struct _osrfHashNodeStruct {
27 /** @brief String containing the key for the item */
29 /** @brief Pointer to the stored item data */
31 /** @brief Pointer to the next node in a doubly linked list */
32 struct _osrfHashNodeStruct* prev;
33 /** @brief Pointer to the previous node in a doubly linked list */
34 struct _osrfHashNodeStruct* next;
36 typedef struct _osrfHashNodeStruct osrfHashNode;
39 @brief osrfHash structure
41 An osrfHash is partly a key/value store based on a hash table, and partly a linear
42 structure implemented as a linked list.
44 The hash table is an array of pointers, managed by an osrfList. Each pointer in that array
45 points to another layer of osrfList, which stores pointers to osrfHashNodes for keys that
46 hash to the same value.
48 Besides residing in this hash table structure, each osrfHashNode resides in a doubly
49 linked list that includes all the osrfHashNodes in the osrfHash (except for any nodes
50 that have been logically deleted). New nodes are added to the end of the list.
52 The hash table supports lookups based on a key.
54 The linked list supports sequential traversal, reflecting the sequence in which the nodes
57 struct _osrfHashStruct {
58 /** @brief Pointer to the osrfList used as a hash table */
60 /** @brief Callback function for freeing stored items */
61 void (*freeItem) (char* key, void* item);
62 /** @brief How many items are in the osrfHash */
64 /** @brief Pointer to the first node in the linked list */
65 osrfHashNode* first_key;
66 /** @brief Pointer to the last node in the linked list */
67 osrfHashNode* last_key;
71 @brief Maintains a position in an osrfHash, for traversing the linked list
73 struct _osrfHashIteratorStruct {
74 /** @brief Pointer to the associated osrfHash */
76 /** @brief Pointer to the current osrfHashNode (the one previously returned, if any) */
77 osrfHashNode* curr_node;
81 @brief How many slots in the hash table.
83 Must be a power of 2, or the hashing algorithm won't work properly.
85 /* 0x100 is a good size for small hashes */
86 //#define OSRF_HASH_LIST_SIZE 0x100 /* size of the main hash list */
87 #define OSRF_HASH_LIST_SIZE 0x10 /* size of the main hash list */
92 @brief Free an osrfHashNode, and its cargo, from within a given osrfHash.
93 @param h Pointer to the osrfHash
94 @param n Pointer to the osrfHashNode
96 If there is a callback function for freeing the item, call it.
98 We use this macro only when freeing an entire osrfHash.
100 #define OSRF_HASH_NODE_FREE(h, n) \
102 if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
103 free(n->key); free(n); \
107 @brief Create and initialize a new (and empty) osrfHash.
108 @return Pointer to the newly created osrfHash.
110 The calling code is responsible for freeing the osrfHash.
112 osrfHash* osrfNewHash() {
114 OSRF_MALLOC(hash, sizeof(osrfHash));
115 hash->hash = osrfNewListSize( OSRF_HASH_LIST_SIZE );
116 hash->first_key = NULL;
117 hash->last_key = NULL;
121 static osrfHashNode* osrfNewHashNode(char* key, void* item);
124 static unsigned int osrfHashMakeKey(char* str) {
126 unsigned int len = strlen(str);
127 unsigned int h = len;
129 for(i = 0; i < len; str++, i++)
130 h = ((h << 5) ^ (h >> 27)) ^ (*str);
131 return (h & (OSRF_HASH_LIST_SIZE-1));
135 /* macro version of the above function */
137 @brief Hashing algorithm: derive a mangled number from a string.
138 @param str Pointer to the string to be hashed.
139 @param num A numeric variable to receive the resulting value.
141 This macro implements an algorithm proposed by Donald E. Knuth
142 in The Art of Computer Programming Volume 3 (more or less..)
144 #define OSRF_HASH_MAKE_KEY(str,num) \
146 const char* k__ = str;\
147 unsigned int len__ = strlen(k__); \
148 unsigned int h__ = len__;\
149 unsigned int i__ = 0;\
150 for(i__ = 0; i__ < len__; k__++, i__++)\
151 h__ = ((h__ << 5) ^ (h__ >> 27)) ^ (*k__);\
152 num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
156 @brief Install a callback function for freeing a stored item.
157 @param hash The osrfHash in which the callback function is to be installed.
158 @param callback A function pointer.
160 The callback function must accept a key value and a void pointer, and return void. The
161 key value enables the callback function to treat different kinds of items differently,
162 provided that it can distinguish them by key.
164 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
166 if( hash ) hash->freeItem = callback;
169 /* Returns a pointer to the item's node if found; otherwise returns NULL. */
171 @brief Search for a given key in an osrfHash.
172 @param hash Pointer to the osrfHash.
173 @param key The key to be sought.
174 @param bucketkey Pointer through which we can report which slot the item belongs in.
175 @return A pointer to the osrfHashNode where the item resides; or NULL, if it isn't there.
177 If the bucketkey parameter is not NULL, we update the index of the hash bucket where the
178 item belongs (whether or not we actually found it). In some cases this feedback enables the
179 calling function to avoid hashing the same key twice.
181 static osrfHashNode* find_item( const osrfHash* hash,
182 const char* key, unsigned int* bucketkey ) {
184 // Find the sub-list in the hash table
186 if( hash->size < 6 && !bucketkey )
188 // For only a few entries, when we don't need to identify
189 // the hash bucket, it's probably faster to search the
190 // linked list instead of hashing
192 osrfHashNode* currnode = hash->first_key;
193 while( currnode && strcmp( currnode->key, key ) )
194 currnode = currnode->next;
200 OSRF_HASH_MAKE_KEY(key,i);
202 // If asked, report which slot the key hashes to
203 if( bucketkey ) *bucketkey = i;
205 osrfList* list = OSRF_LIST_GET_INDEX( hash->hash, i );
206 if( !list ) { return NULL; }
208 // Search the sub-list
211 osrfHashNode* node = NULL;
212 for( k = 0; k < list->size; k++ ) {
213 node = OSRF_LIST_GET_INDEX(list, k);
214 if( node && node->key && !strcmp(node->key, key) )
222 @brief Create and populate a new osrfHashNode.
223 @param key The key string.
224 @param item A pointer to the item associated with the key.
225 @return A pointer to the newly created node.
227 static osrfHashNode* osrfNewHashNode(char* key, void* item) {
228 if(!(key && item)) return NULL;
230 OSRF_MALLOC(n, sizeof(osrfHashNode));
231 n->key = strdup(key);
239 @brief Store an item for a given key in an osrfHash.
240 @param hash Pointer to the osrfHash in which the item is to be stored.
241 @param item Pointer to the item to be stored.
242 @param key A printf-style format string to be expanded into the key for the item. Subsequent
243 parameters, if any, will be formatted and inserted into the expanded key.
244 @return Pointer to an item previously stored for the same key, if any (see discussion).
246 If no item is already stored for the same key, osrfHashSet creates a new entry for it,
249 If an item is already stored for the same key, osrfHashSet updates the entry in place. The
250 fate of the previously stored item varies. If there is a callback defined for freeing the
251 item, osrfHashSet calls it, and returns NULL. Otherwise it returns a pointer to the
252 previously stored item, so that the calling function can dispose of it.
254 osrfHashSet returns NULL if any of its first three parameters is NULL.
256 void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
257 if(!(hash && item && key )) return NULL;
259 unsigned int bucketkey;
261 VA_LIST_TO_STRING(key);
262 osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
265 // We already have an item for this key. Update it in place.
266 void* olditem = NULL;
268 if( hash->freeItem ) {
269 hash->freeItem( node->key, node->item );
272 olditem = node->item;
278 // There is no entry for this key. Create a new one.
280 if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
281 bucket = osrfNewList();
282 osrfListSet( hash->hash, bucket, bucketkey );
285 node = osrfNewHashNode(VA_BUF, item);
286 osrfListPushFirst( bucket, node );
290 // Add the new hash node to the end of the linked list
292 if( NULL == hash->first_key )
293 hash->first_key = hash->last_key = node;
295 node->prev = hash->last_key;
296 hash->last_key->next = node;
297 hash->last_key = node;
304 @brief Remove the item for a specified key from an osrfHash.
305 @param hash Pointer to the osrfHash from which the item is to be removed.
306 @param key A printf-style format string to be expanded into the key for the item. Subsequent
307 parameters, if any, will be formatted and inserted into the expanded key.
308 @return Pointer to the removed item, if any (see discussion).
310 If no entry is present for the specified key, osrfHashRemove returns NULL.
312 If there is such an entry, osrfHashRemove removes it. The fate of the associated item
313 varies. If there is a callback function for freeing items, osrfHashRemove calls it, and
314 returns NULL. Otherwise it returns a pointer to the removed item, so that the calling
315 code can dispose of it.
317 osrfHashRemove returns NULL if either of its first two parameters is NULL.
319 Note: the osrfHashNode for the removed item is logically deleted so that subsequent searches
320 and traversals will ignore it. However it is physically left in place so that an
321 osrfHashIterator pointing to it can advance to the next node. The downside of this
322 design is that, if a lot of items are removed, the accumulation of logically deleted nodes
323 will slow down searches. In addition, the memory used by a logically deleted osrfHashNode
324 remains allocated until the entire osrfHashNode is freed.
326 void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
327 if(!(hash && key )) return NULL;
329 VA_LIST_TO_STRING(key);
331 osrfHashNode* node = find_item( hash, VA_BUF, NULL );
332 if( !node ) return NULL;
336 void* item = NULL; // to be returned
338 hash->freeItem( node->key, node->item );
342 // Mark the node as logically deleted
348 // Make the node unreachable from the rest of the linked list.
349 // We leave the next and prev pointers in place so that an
350 // iterator parked here can find its way to an adjacent node.
353 node->prev->next = node->next;
355 hash->first_key = node->next;
358 node->next->prev = node->prev;
360 hash->last_key = node->prev;
366 @brief Extract the item for a specified key from an osrfHash.
367 @param hash Pointer to the osrfHash from which the item is to be extracted.
368 @param key A printf-style format string to be expanded into the key for the item.
369 Subsequent parameters, if any, will be formatted and inserted into the expanded key.
370 @return Pointer to the extracted item, if any (see discussion).
372 osrfHashRemove removes a specified entry without destroying it, and returns a pointer
373 to it. If either of its first two parameters is NULL, or if no entry is present for the specified key, it returns NULL.
375 This function is identical to osrfHashRemove() except that it does not destroy the
378 void* osrfHashExtract( osrfHash* hash, const char* key, ... ) {
379 if(!(hash && key )) return NULL;
381 VA_LIST_TO_STRING(key);
383 osrfHashNode* node = find_item( hash, VA_BUF, NULL );
384 if( !node ) return NULL;
388 void* item = node->item; // to be returned
390 // Mark the node as logically deleted
395 // Make the node unreachable from the rest of the linked list.
396 // We leave the next and prev pointers in place so that an
397 // iterator parked here can find its way to an adjacent node.
399 node->prev->next = node->next;
401 hash->first_key = node->next;
404 node->next->prev = node->prev;
406 hash->last_key = node->prev;
412 @brief Fetch the item stored in an osrfHash for a given key.
413 @param hash Pointer to the osrfHash from which to fetch the item.
414 @param key The key for the item sought.
415 @return A pointer to the item, if it exists; otherwise NULL.
417 void* osrfHashGet( osrfHash* hash, const char* key ) {
418 if(!(hash && key )) return NULL;
420 osrfHashNode* node = find_item( hash, key, NULL );
421 if( !node ) return NULL;
426 @brief Fetch the item stored in an osrfHash for a given key.
427 @param hash Pointer to the osrfHash from which to fetch the item.
428 @param key A printf-style format string to be expanded into the key for the item. Subsequent
429 parameters, if any, will be formatted and inserted into the expanded key.
430 @return A pointer to the item, if it exists; otherwise NULL.
432 void* osrfHashGetFmt( osrfHash* hash, const char* key, ... ) {
433 if(!(hash && key )) return NULL;
434 VA_LIST_TO_STRING(key);
436 osrfHashNode* node = find_item( hash, (char*) VA_BUF, NULL );
437 if( !node ) return NULL;
442 @brief Create an osrfStringArray containing all the keys in an osrfHash.
443 @param hash Pointer to the osrfHash whose keys are to be extracted.
444 @return A pointer to the newly created osrfStringArray.
446 The strings in the osrfStringArray are arranged in the sequence in which they were added to
449 The calling code is responsible for freeing the osrfStringArray by calling
450 osrfStringArrayFree().
452 osrfHashKeys returns NULL if its parameter is NULL.
454 osrfStringArray* osrfHashKeys( osrfHash* hash ) {
455 if(!hash) return NULL;
458 osrfStringArray* strings = osrfNewStringArray( hash->size );
460 // Add every key on the linked list
462 node = hash->first_key;
464 if( node->key ) // should always be true
465 osrfStringArrayAdd( strings, node->key );
474 @brief Count the items an osrfHash.
475 @param hash Pointer to the osrfHash whose items are to be counted.
476 @return The number of items in the osrfHash.
478 If the parameter is NULL, osrfHashGetCount returns -1.
480 unsigned long osrfHashGetCount( osrfHash* hash ) {
486 @brief Free an osrfHash and all of its contents.
487 @param hash Pointer to the osrfHash to be freed.
489 If a callback function has been defined for freeing items, osrfHashFree calls it for
492 void osrfHashFree( osrfHash* hash ) {
499 for( i = 0; i != hash->hash->size; i++ ) {
500 if( ( list = OSRF_LIST_GET_INDEX( hash->hash, i )) ) {
501 for( j = 0; j != list->size; j++ ) {
502 if( (node = OSRF_LIST_GET_INDEX( list, j )) ) {
503 OSRF_HASH_NODE_FREE(hash, node);
510 osrfListFree(hash->hash);
515 @brief Create and initialize an osrfHashIterator for a given osrfHash.
516 @param hash Pointer to the osrfHash with which the iterator will be associated.
517 @return Pointer to the newly created osrfHashIterator.
519 An osrfHashIterator may be used to traverse the items stored in an osrfHash.
521 The calling code is responsible for freeing the osrfHashIterator by calling
522 osrfHashIteratorFree().
524 osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
525 if(!hash) return NULL;
526 osrfHashIterator* itr;
527 OSRF_MALLOC(itr, sizeof(osrfHashIterator));
529 itr->curr_node = NULL;
534 @brief Advance an osrfHashIterator to the next item in an osrfHash.
535 @param itr Pointer to the osrfHashIterator to be advanced.
536 @return A Pointer to the next stored item, if there is one; otherwise NULL.
538 The items are returned in the sequence in which their keys were added to the osrfHash.
540 void* osrfHashIteratorNext( osrfHashIterator* itr ) {
541 if(!(itr && itr->hash)) return NULL;
543 // Advance to the next node in the linked list
545 if( NULL == itr->curr_node )
546 itr->curr_node = itr->hash->first_key;
548 itr->curr_node = itr->curr_node->next;
551 return itr->curr_node->item;
557 @brief Fetch the key where an osrfHashIterator is currently positioned.
558 @param itr Pointer to the osrfHashIterator.
559 @return A const pointer to the key, if the iterator is currently positioned on an item;
562 The key returned is the one associated with the previous call to osrfHashIteratorNext(),
563 unless there has been a call to osrfHashIteratorReset() in the meanwhile.
565 const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
566 if( itr && itr->curr_node )
567 return itr->curr_node->key;
573 @brief Free an osrfHashIterator.
574 @param itr Pointer to the osrfHashIterator to be freed.
576 void osrfHashIteratorFree( osrfHashIterator* itr ) {
582 @brief Restore an osrfHashIterator to its original pristine state.
583 @param itr Pointer to the osrfHashIterator to be reset.
585 After resetting the iterator, the next call to osrfHashIteratorNext() will return a pointer
586 to the first item in the list.
588 void osrfHashIteratorReset( osrfHashIterator* itr ) {
590 itr->curr_node = NULL;
595 @brief Determine whether there is another entry in an osrfHash beyond the current
596 position of an osrfHashIterator.
597 @param itr Pointer to the osrfHashIterator in question.
598 @return A boolean: 1 if there is another entry beyond the current position, or 0 if the
599 iterator is already at the last entry (or at no entry).
601 int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
602 if( !itr || !itr->hash )
604 else if( itr->curr_node )
605 return itr->curr_node->next ? 1 : 0;
607 return itr->hash->first_key ? 1 : 0;