3 @brief A hybrid between a hash table and a doubly linked list.
7 Copyright (C) 2007, 2008 Georgia Public Library Service
8 Bill Erickson <erickson@esilibrary.com>
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License
12 as published by the Free Software Foundation; either version 2
13 of the License, or (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
21 #include <opensrf/osrf_hash.h>
24 @brief A node storing a single item within an osrfHash.
26 struct _osrfHashNodeStruct {
27 /** @brief String containing the key for the item */
29 /** @brief Pointer to the stored item data */
31 /** @brief Pointer to the next node in a doubly linked list */
32 struct _osrfHashNodeStruct* prev;
33 /** @brief Pointer to the previous node in a doubly linked list */
34 struct _osrfHashNodeStruct* next;
36 typedef struct _osrfHashNodeStruct osrfHashNode;
39 @brief osrfHash structure
41 An osrfHash is partly a key/value store based on a hash table, and partly a linear
42 structure implemented as a linked list.
44 The hash table is an array of pointers, managed by an osrfList. Each pointer in that array
45 points to another layer of osrfList, which stores pointers to osrfHashNodes for keys that
46 hash to the same value.
48 Besides residing in this hash table structure, each osrfHashNode resides in a doubly
49 linked list that includes all the osrfHashNodes in the osrfHash (except for any nodes
50 that have been logically deleted). New nodes are added to the end of the list.
52 The hash table supports lookups based on a key.
54 The linked list supports sequential traversal, reflecting the sequence in which the nodes
57 struct _osrfHashStruct {
58 /** @brief Pointer to the osrfList used as a hash table */
60 /** @brief Callback function for freeing stored items */
61 void (*freeItem) (char* key, void* item);
62 /** @brief How many items are in the osrfHash */
64 /** @brief Pointer to the first node in the linked list */
65 osrfHashNode* first_key;
66 /** @brief Pointer to the last node in the linked list */
67 osrfHashNode* last_key;
71 @brief Maintains a position in an osrfHash, for traversing the linked list
73 struct _osrfHashIteratorStruct {
74 /** @brief Pointer to the associated osrfHash */
76 /** @brief Pointer to the current osrfHashNode (the one previously returned, if any) */
77 osrfHashNode* curr_node;
81 @brief How many slots in the hash table.
83 Must be a power of 2, or the hashing algorithm won't work properly.
85 /* 0x100 is a good size for small hashes */
86 //#define OSRF_HASH_LIST_SIZE 0x100 /* size of the main hash list */
87 #define OSRF_HASH_LIST_SIZE 0x10 /* size of the main hash list */
92 @brief Free an osrfHashNode, and its cargo, from within a given osrfHash.
93 @param h Pointer to the osrfHash
94 @param n Pointer to the osrfHashNode
96 If there is a callback function for freeing the item, call it.
98 We use this macro only when freeing an entire osrfHash.
100 #define OSRF_HASH_NODE_FREE(h, n) \
102 if(h->freeItem && n->key) h->freeItem(n->key, n->item);\
103 free(n->key); free(n); \
107 @brief Create and initialize a new (and empty) osrfHash.
108 @return Pointer to the newly created osrfHash.
110 The calling code is responsible for freeing the osrfHash.
112 osrfHash* osrfNewHash() {
114 OSRF_MALLOC(hash, sizeof(osrfHash));
115 hash->hash = osrfNewListSize( OSRF_HASH_LIST_SIZE );
116 hash->first_key = NULL;
117 hash->last_key = NULL;
121 static osrfHashNode* osrfNewHashNode(char* key, void* item);
124 static unsigned int osrfHashMakeKey(char* str) {
126 unsigned int len = strlen(str);
127 unsigned int h = len;
129 for(i = 0; i < len; str++, i++)
130 h = ((h << 5) ^ (h >> 27)) ^ (*str);
131 return (h & (OSRF_HASH_LIST_SIZE-1));
135 /* macro version of the above function */
137 @brief Hashing algorithm: derive a mangled number from a string.
138 @param str Pointer to the string to be hashed.
139 @param num A numeric variable to receive the resulting value.
141 This macro implements an algorithm proposed by Donald E. Knuth
142 in The Art of Computer Programming Volume 3 (more or less..)
144 #define OSRF_HASH_MAKE_KEY(str,num) \
146 const char* k__ = str;\
147 unsigned int len__ = strlen(k__); \
148 unsigned int h__ = len__;\
149 unsigned int i__ = 0;\
150 for(i__ = 0; i__ < len__; k__++, i__++)\
151 h__ = ((h__ << 5) ^ (h__ >> 27)) ^ (*k__);\
152 num = (h__ & (OSRF_HASH_LIST_SIZE-1));\
156 @brief Install a callback function for freeing a stored item.
157 @param hash The osrfHash in which the callback function is to be installed.
158 @param callback A function pointer.
160 The callback function must accept a key value and a void pointer, and return void. The
161 key value enables the callback function to treat different kinds of items differently,
162 provided that it can distinguish them by key.
164 void osrfHashSetCallback( osrfHash* hash, void (*callback) (char* key, void* item) )
166 if( hash ) hash->freeItem = callback;
169 /* Returns a pointer to the item's node if found; otherwise returns NULL. */
171 @brief Search for a given key in an osrfHash.
172 @param hash Pointer to the osrfHash.
173 @param key The key to be sought.
174 @param bucketkey Pointer through which we can report which slot the item belongs in.
175 @return A pointer to the osrfHashNode where the item resides; or NULL, if it isn't there.
177 If the bucketkey parameter is not NULL, we update the index of the hash bucket where the
178 item belongs (whether or not we actually found it). In some cases this feedback enables the
179 calling function to avoid hashing the same key twice.
181 static osrfHashNode* find_item( const osrfHash* hash,
182 const char* key, unsigned int* bucketkey ) {
184 // Find the sub-list in the hash table
186 if( hash->size < 6 && !bucketkey )
188 // For only a few entries, when we don't need to identify
189 // the hash bucket, it's probably faster to search the
190 // linked list instead of hashing
192 osrfHashNode* currnode = hash->first_key;
193 while( currnode && strcmp( currnode->key, key ) )
194 currnode = currnode->next;
200 OSRF_HASH_MAKE_KEY(key,i);
202 // If asked, report which slot the key hashes to
203 if( bucketkey ) *bucketkey = i;
205 osrfList* list = OSRF_LIST_GET_INDEX( hash->hash, i );
206 if( !list ) { return NULL; }
208 // Search the sub-list
211 osrfHashNode* node = NULL;
212 for( k = 0; k < list->size; k++ ) {
213 node = OSRF_LIST_GET_INDEX(list, k);
214 if( node && node->key && !strcmp(node->key, key) )
222 @brief Create and populate a new osrfHashNode.
223 @param key The key string.
224 @param item A pointer to the item associated with the key.
225 @return A pointer to the newly created node.
227 static osrfHashNode* osrfNewHashNode(char* key, void* item) {
228 if(!(key && item)) return NULL;
230 OSRF_MALLOC(n, sizeof(osrfHashNode));
231 n->key = strdup(key);
239 @brief Store an item for a given key in an osrfHash.
240 @param hash Pointer to the osrfHash in which the item is to be stored.
241 @param item Pointer to the item to be stored.
242 @param key A printf-style format string to be expanded into the key for the item. Subsequent
243 parameters, if any, will be formatted and inserted into the expanded key.
244 @return Pointer to an item previously stored for the same key, if any (see discussion).
246 If no item is already stored for the same key, osrfHashSet creates a new entry for it,
249 If an item is already stored for the same key, osrfHashSet updates the entry in place. The
250 fate of the previously stored item varies. If there is a callback defined for freeing the
251 item, osrfHashSet calls it, and returns NULL. Otherwise it returns a pointer to the
252 previously stored item, so that the calling function can dispose of it.
254 osrfHashSet returns NULL if any of its first three parameters is NULL.
256 void* osrfHashSet( osrfHash* hash, void* item, const char* key, ... ) {
257 if(!(hash && item && key )) return NULL;
259 unsigned int bucketkey;
261 VA_LIST_TO_STRING(key);
262 osrfHashNode* node = find_item( hash, VA_BUF, &bucketkey );
265 // We already have an item for this key. Update it in place.
266 void* olditem = NULL;
268 if( hash->freeItem ) {
269 hash->freeItem( node->key, node->item );
272 olditem = node->item;
278 // There is no entry for this key. Create a new one.
280 if( !(bucket = OSRF_LIST_GET_INDEX(hash->hash, bucketkey)) ) {
281 bucket = osrfNewList();
282 osrfListSet( hash->hash, bucket, bucketkey );
285 node = osrfNewHashNode(VA_BUF, item);
286 osrfListPushFirst( bucket, node );
290 // Add the new hash node to the end of the linked list
292 if( NULL == hash->first_key )
293 hash->first_key = hash->last_key = node;
295 node->prev = hash->last_key;
296 hash->last_key->next = node;
297 hash->last_key = node;
303 /* Delete the entry for a specified key. If the entry exists,
304 and there is no callback function to destroy the associated
305 item, return a pointer to the formerly associated item.
306 Otherwise return NULL.
309 @brief Remove the item for a specified key from an osrfHash.
310 @param hash Pointer to the osrfHash from which the item is to be removed.
311 @param key A printf-style format string to be expanded into the key for the item. Subsequent
312 parameters, if any, will be formatted and inserted into the expanded key.
313 @return Pointer to the removed item, if any (see discussion).
315 If no entry is present for the specified key, osrfHashRemove returns NULL.
317 If there is such an entry, osrfHashRemove removes it. The fate of the associated item
318 varies. If there is a callback function for freeing items, osrfHashRemove calls it, and
319 returns NULL. Otherwise it returns a pointer to the removed item, so that the calling
320 code can dispose of it.
322 osrfHashRemove returns NULL if either of its first two parameters is NULL.
324 Note: the osrfHashNode for the removed item is logically deleted so that subsequent searches
325 and traversals will ignore it. However it is physically left in place so that an
326 osrfHashIterator pointing to it can advance to the next node. The downside of this
327 design is that, if a lot of items are removed, the accumulation of logically deleted nodes
328 will slow down searches. In addition, the memory used by a logically deleted osrfHashNode
329 remains allocated until the entire osrfHashNode is freed.
331 void* osrfHashRemove( osrfHash* hash, const char* key, ... ) {
332 if(!(hash && key )) return NULL;
334 VA_LIST_TO_STRING(key);
336 osrfHashNode* node = find_item( hash, VA_BUF, NULL );
337 if( !node ) return NULL;
341 void* item = NULL; // to be returned
343 hash->freeItem( node->key, node->item );
347 // Mark the node as logically deleted
353 // Make the node unreachable from the rest of the linked list.
354 // We leave the next and prev pointers in place so that an
355 // iterator parked here can find its way to an adjacent node.
358 node->prev->next = node->next;
360 hash->first_key = node->next;
363 node->next->prev = node->prev;
365 hash->last_key = node->prev;
372 @brief Fetch the item stored in an osrfHash for a given key.
373 @param hash Pointer to the osrfHash from which to fetch the item.
374 @param key The key for the item sought.
375 @return A pointer to the item, if it exists; otherwise NULL.
377 void* osrfHashGet( osrfHash* hash, const char* key ) {
378 if(!(hash && key )) return NULL;
380 osrfHashNode* node = find_item( hash, key, NULL );
381 if( !node ) return NULL;
386 @brief Fetch the item stored in an osrfHash for a given key.
387 @param hash Pointer to the osrfHash from which to fetch the item.
388 @param key A printf-style format string to be expanded into the key for the item. Subsequent
389 parameters, if any, will be formatted and inserted into the expanded key.
390 @return A pointer to the item, if it exists; otherwise NULL.
392 void* osrfHashGetFmt( osrfHash* hash, const char* key, ... ) {
393 if(!(hash && key )) return NULL;
394 VA_LIST_TO_STRING(key);
396 osrfHashNode* node = find_item( hash, (char*) VA_BUF, NULL );
397 if( !node ) return NULL;
402 @brief Create an osrfStringArray containing all the keys in an osrfHash.
403 @param hash Pointer to the osrfHash whose keys are to be extracted.
404 @return A pointer to the newly created osrfStringArray.
406 The strings in the osrfStringArray are arranged in the sequence in which they were added to
409 The calling code is responsible for freeing the osrfStringArray by calling
410 osrfStringArrayFree().
412 osrfHashKeys returns NULL if its parameter is NULL.
414 osrfStringArray* osrfHashKeys( osrfHash* hash ) {
415 if(!hash) return NULL;
418 osrfStringArray* strings = osrfNewStringArray( hash->size );
420 // Add every key on the linked list
422 node = hash->first_key;
424 if( node->key ) // should always be true
425 osrfStringArrayAdd( strings, node->key );
434 @brief Count the items an osrfHash.
435 @param hash Pointer to the osrfHash whose items are to be counted.
436 @return The number of items in the osrfHash.
438 If the parameter is NULL, osrfHashGetCount returns -1.
440 unsigned long osrfHashGetCount( osrfHash* hash ) {
446 @brief Free an osrfHash and all of its contents.
447 @param hash Pointer to the osrfHash to be freed.
449 If a callback function has been defined for freeing items, osrfHashFree calls it for
452 void osrfHashFree( osrfHash* hash ) {
459 for( i = 0; i != hash->hash->size; i++ ) {
460 if( ( list = OSRF_LIST_GET_INDEX( hash->hash, i )) ) {
461 for( j = 0; j != list->size; j++ ) {
462 if( (node = OSRF_LIST_GET_INDEX( list, j )) ) {
463 OSRF_HASH_NODE_FREE(hash, node);
470 osrfListFree(hash->hash);
475 @brief Create and initialize an osrfHashIterator for a given osrfHash.
476 @param hash Pointer to the osrfHash with which the iterator will be associated.
477 @return Pointer to the newly created osrfHashIterator.
479 An osrfHashIterator may be used to traverse the items stored in an osrfHash.
481 The calling code is responsible for freeing the osrfHashIterator by calling
482 osrfHashIteratorFree().
484 osrfHashIterator* osrfNewHashIterator( osrfHash* hash ) {
485 if(!hash) return NULL;
486 osrfHashIterator* itr;
487 OSRF_MALLOC(itr, sizeof(osrfHashIterator));
489 itr->curr_node = NULL;
494 @brief Advance an osrfHashIterator to the next item in an osrfHash.
495 @param itr Pointer to the osrfHashIterator to be advanced.
496 @return A Pointer to the next stored item, if there is one; otherwise NULL.
498 The items are returned in the sequence in which their keys were added to the osrfHash.
500 void* osrfHashIteratorNext( osrfHashIterator* itr ) {
501 if(!(itr && itr->hash)) return NULL;
503 // Advance to the next node in the linked list
505 if( NULL == itr->curr_node )
506 itr->curr_node = itr->hash->first_key;
508 itr->curr_node = itr->curr_node->next;
511 return itr->curr_node->item;
517 @brief Fetch the key where an osrfHashIterator is currently positioned.
518 @param itr Pointer to the osrfHashIterator.
519 @return A const pointer to the key, if the iterator is currently positioned on an item;
522 The key returned is the one associated with the previous call to osrfHashIteratorNext(),
523 unless there has been a call to osrfHashIteratorReset() in the meanwhile.
525 const char* osrfHashIteratorKey( const osrfHashIterator* itr ) {
526 if( itr && itr->curr_node )
527 return itr->curr_node->key;
533 @brief Free an osrfHashIterator.
534 @param itr Pointer to the osrfHashIterator to be freed.
536 void osrfHashIteratorFree( osrfHashIterator* itr ) {
542 @brief Restore an osrfHashIterator to its original pristine state.
543 @param itr Pointer to the osrfHashIterator to be reset.
545 After resetting the iterator, the next call to osrfHashIteratorNext() will return a pointer
546 to the first item in the list.
548 void osrfHashIteratorReset( osrfHashIterator* itr ) {
550 itr->curr_node = NULL;
555 @brief Determine whether there is another entry in an osrfHash beyond the current
556 position of an osrfHashIterator.
557 @param itr Pointer to the osrfHashIterator in question.
558 @return A boolean: 1 if there is another entry beyond the current position, or 0 if the
559 iterator is already at the last entry (or at no entry).
561 int osrfHashIteratorHasNext( osrfHashIterator* itr ) {
562 if( !itr || !itr->hash )
564 else if( itr->curr_node )
565 return itr->curr_node->next ? 1 : 0;
567 return itr->hash->first_key ? 1 : 0;