src/util/afs_lhash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #ifdef KERNEL
  11 #include "../afs/afs_atomlist.h"
  12 #include "../afs/afs_lhash.h"
  13 #else /* KERNEL */
  14 #include "afs_atomlist.h"
  15 #include "afs_lhash.h"
  16 /* for now, only turn on assertions in user-space code */
  17 #include <assert.h>
  18 #define CHECK_INVARIANTS
  19 #endif /* KERNEL */
  20
  21 /* max hash table load factor */
  22 enum { LOAD_FACTOR = 5 };
  23
  24 struct bucket {
  25         struct bucket *next;
  26         void *data;
  27         unsigned key;
  28 };
  29
  30 struct afs_lhash {
  31         int (*equal)(const void *a, const void *b);
  32
  33         void *(*allocate)(size_t n);
  34
  35         void (*deallocate)(void *p, size_t n);
  36
  37         size_t          p;      /* index of next bucket to be split */
  38         size_t          maxp;   /* upper bound on p during this expansion */
  39
  40         size_t          ndata;  /* count of data records in hash */
  41
  42         size_t          ltable; /* logical table size */
  43
  44         size_t          ntable; /* physical table size */
  45         struct bucket **table;
  46
  47         afs_atomlist   *bucket_list;    /* hash bucket allocator */
  48
  49         size_t  search_calls;   /* cumulative afs_lhash_search() call count */
  50         size_t  search_tests;   /* cumulative afs_lhash_search() comparison count */
  51         size_t  remove_calls;   /* cumulative afs_lhash_remove() call count */
  52         size_t  remove_tests;   /* cumulative afs_lhash_remove() comparison count */
  53 };
  54
  55 /*
  56  * make sure the given hash table can accomodate the given index
  57  * value; expand the bucket table if necessary
  58  *
  59  * returns 0 on success, <0 on failure
  60  */
  61
  62 static int
  63 afs_lhash_accomodate(
  64     afs_lhash *lh,
  65     size_t max_index
  66 )
  67 {
  68         /*
  69          * The usual approach to growing ntable to accomodate max_index
  70          * would be to double ntable enough times.  This would give us
  71          * an exponential backoff in how many times we need to grow
  72          * ntable.  However, there is a space tradeoff.
  73          *
  74          * Since afs_lhash may be used in an environment where memory is
  75          * constrained, we choose instead to grow ntable in fixed
  76          * increments.  This may have a larger total cost, but it is
  77          * amortized in smaller increments.
  78          *
  79          * If even this cost is too great, we can consider adopting the
  80          * two-level array approach mentioned in the paper.  This could
  81          * keep the sizes of the allocations more consistent, and also
  82          * reduce the amount of data copying.  However, we won't do that
  83          * until we see real results that show that the benefit of the
  84          * additional complexity is worth it.
  85          */
  86         enum { ntable_inc = 1024 / sizeof *lh->table };
  87
  88         size_t          new_ntable;
  89         struct bucket **new_table;
  90         size_t          i;
  91
  92         /* if the given index fits, we're done */
  93
  94         if (max_index < lh->ntable)
  95                 return 0;
  96
  97         /* otherwise, determine new_ntable and allocate new_table */
  98
  99         if (lh->ntable == (size_t)0) {
 100                 new_ntable = ntable_inc;
 101         } else {
 102                 new_ntable = lh->ntable;
 103         }
 104
 105         for(; !(max_index < new_ntable); new_ntable += ntable_inc)
 106                 continue;
 107
 108         new_table = lh->allocate(new_ntable * sizeof *lh->table);
 109         if (!new_table) {
 110                 return -1;
 111         }
 112
 113         /* initialize new_table from the old table */
 114
 115         for (i = 0; i < lh->ntable; i++) {
 116                 new_table[i] = lh->table[i];
 117         }
 118         for (i = lh->ntable; i < new_ntable; i++) {
 119                 new_table[i] = 0;
 120         }
 121
 122         /*
 123          * free the old table, if any, and switch to the new table
 124          *
 125          * (when called from afs_lhash_create(), the table is empty)
 126          */
 127
 128         if (lh->table && lh->ntable) {
 129                 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 130         }
 131         lh->ntable = new_ntable;
 132         lh->table  = new_table;
 133
 134         return 0;
 135 }
 136
 137 /*
 138  * given a hash table and a key value, returns the index corresponding
 139  * to the key value
 140  */
 141
 142 static size_t
 143 afs_lhash_address(
 144     const afs_lhash *lh,
 145     unsigned key
 146 )
 147 {
 148         enum { prime = 1048583 };
 149         size_t h;
 150         size_t address;
 151
 152         h = key % prime;        /* 0 <= h < prime */
 153
 154         address = h % lh->maxp;
 155         if (address < lh->p) {
 156                 address = h % (2 * lh->maxp);
 157         }
 158
 159         return address;
 160 }
 161
 162 /*
 163  * if possible, expand the logical size of the given hash table
 164  */
 165 static void
 166 afs_lhash_expand(
 167     afs_lhash *lh
 168 )
 169 {
 170         size_t old_address;     /* index of bucket to split */
 171         size_t new_address;     /* index of new bucket */
 172
 173         struct bucket *current; /* for scanning down old chain */
 174         struct bucket *previous;
 175
 176         struct bucket *last_of_new;     /* last element in new chain */
 177
 178         /* save address to split */
 179
 180         old_address = lh->p;
 181
 182         /* determine new address, grow table if necessary */
 183
 184         new_address = lh->maxp + lh->p;
 185
 186         if (afs_lhash_accomodate(lh, new_address) < 0) {
 187                 return;
 188         }
 189
 190         /* adjust the state variables */
 191
 192         /* this makes afs_lhash_address() work with respect
 193          * to the expanded table */
 194
 195         lh->p++;
 196         if (lh->p == lh->maxp) {
 197                 lh->maxp *= 2;
 198                 lh->p = 0;
 199         }
 200
 201         lh->ltable++;
 202
 203 #ifdef CHECK_INVARIANTS
 204         assert(lh->ltable-1 == new_address);
 205         assert(lh->ltable <= lh->ntable);
 206 #endif  /* CHECK_INVARIANTS */
 207
 208         /* relocate records to the new bucket */
 209
 210         current = lh->table[old_address];
 211         previous = 0;
 212         last_of_new = 0;
 213         lh->table[new_address] = 0;
 214
 215         while (current) {
 216                 size_t addr;
 217                 addr = afs_lhash_address(lh, current->key);
 218                 if (addr == new_address) {
 219                         /* attach it to the end of the new chain */
 220                         if (last_of_new) {
 221                                 last_of_new->next = current;
 222                         } else {
 223                                 lh->table[new_address] = current;
 224                         }
 225                         if (previous) {
 226                                 previous->next = current->next;
 227                         } else {
 228                                 lh->table[old_address] = current->next;
 229                         }
 230                         last_of_new = current;
 231                         current = current->next;
 232                         last_of_new->next = 0;
 233                 } else {
 234 #ifdef CHECK_INVARIANTS
 235                         assert(addr == old_address);
 236 #endif  /* CHECK_INVARIANTS */
 237                         /* leave it on the old chain */
 238                         previous = current;
 239                         current = current->next;
 240                 }
 241         }
 242 }
 243
 244 afs_lhash *
 245 afs_lhash_create(
 246     int (*equal)(const void *a, const void *b),
 247         /* returns true if the elements pointed to by
 248            a and b are the same, false otherwise */
 249
 250     void *(*allocate)(size_t n),
 251
 252     void (*deallocate)(void *p, size_t n)
 253 )
 254 {
 255         afs_lhash *lh;
 256
 257         lh = allocate(sizeof *lh);
 258         if (!lh)
 259                 return (afs_lhash *)0;
 260
 261         lh->equal = equal;
 262         lh->allocate = allocate;
 263         lh->deallocate = deallocate;
 264
 265         lh->p = 0;
 266         lh->maxp = 16;
 267
 268         lh->ltable = lh->maxp;
 269
 270         lh->ndata = 0;
 271
 272         lh->ntable = 0;
 273         lh->table = (struct bucket **)0;
 274
 275         if (afs_lhash_accomodate(lh, lh->ltable-1) < 0) {
 276                 lh->deallocate(lh, sizeof *lh);
 277                 return (afs_lhash *)0;
 278         }
 279 #ifdef CHECK_INVARIANTS
 280         assert(lh->ltable <= lh->ntable);
 281 #endif  /* CHECK_INVARIANTS */
 282
 283         /* maybe the chunk size should be passed in for hashes, so we
 284          * can pass it down here */
 285
 286         lh->bucket_list = afs_atomlist_create(sizeof(struct bucket),
 287                                               sizeof(long) * 1024,
 288                                               allocate, deallocate);
 289 #ifdef CHECK_INVARIANTS
 290         assert(lh->bucket_list);
 291 #endif  /* CHECK_INVARIANTS */
 292
 293         lh->search_calls = 0;
 294         lh->search_tests = 0;
 295         lh->remove_calls = 0;
 296         lh->remove_tests = 0;
 297
 298         return lh;
 299 }
 300
 301 void
 302 afs_lhash_destroy(
 303     afs_lhash *lh
 304 )
 305 {
 306 #ifdef CHECK_INVARIANTS
 307         assert(lh->ltable <= lh->ntable);
 308 #endif  /* CHECK_INVARIANTS */
 309
 310         /*
 311          * first, free the buckets
 312          *
 313          * afs_atomlist_destroy() implicitly frees all the memory allocated
 314          * from the given afs_atomlist, so there is no need to go through
 315          * the hash table to explicitly free each bucket.
 316          */
 317
 318         afs_atomlist_destroy(lh->bucket_list);
 319
 320         /* then, free the table and the afs_lhash */
 321
 322         lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 323         lh->deallocate(lh, sizeof *lh);
 324 }
 325
 326 void
 327 afs_lhash_iter(
 328     afs_lhash *lh,
 329     void(*f)(size_t index, unsigned key, void *data)
 330 )
 331 {
 332         size_t i;
 333
 334 #ifdef CHECK_INVARIANTS
 335         assert(lh->ltable <= lh->ntable);
 336 #endif  /* CHECK_INVARIANTS */
 337
 338         for (i = 0; i < lh->ltable; i++) {
 339                 struct bucket *current;
 340
 341                 for (current = lh->table[i];
 342                      current;
 343                      current = current->next) {
 344                         f(i, current->key, current->data);
 345                 }
 346         }
 347 }
 348
 349 void *
 350 afs_lhash_search(
 351     afs_lhash *lh,
 352     unsigned key,
 353     const void *data
 354 )
 355 {
 356         size_t k;
 357         struct bucket *previous;
 358         struct bucket *current;
 359
 360         lh->search_calls++;
 361
 362         k = afs_lhash_address(lh, key);
 363         for (previous = 0, current = lh->table[k];
 364              current;
 365              previous = current, current = current->next) {
 366                 lh->search_tests++;
 367                 if (lh->equal(data, current->data)) {
 368
 369                         /*
 370                          * Since we found what we were looking for, move
 371                          * the bucket to the head of the chain.  The
 372                          * theory is that unused hash table entries will
 373                          * be left at the end of the chain, where they
 374                          * will not cause search times to increase.
 375                          *
 376                          * This optimization is both easy to understand
 377                          * and cheap to execute, so we go ahead and do
 378                          * it.
 379                          *
 380                          */
 381
 382                         if (previous) {
 383                                 previous->next = current->next;
 384                                 current->next = lh->table[k];
 385                                 lh->table[k] = current;
 386                         }
 387
 388                         return current->data;
 389                 }
 390         }
 391
 392         return 0;
 393 }
 394
 395 void *
 396 afs_lhash_rosearch(
 397     const afs_lhash *lh,
 398     unsigned key,
 399     const void *data
 400 )
 401 {
 402         size_t k;
 403         struct bucket *current;
 404
 405         k = afs_lhash_address(lh, key);
 406         for (current = lh->table[k];
 407              current;
 408              current = current->next) {
 409                 if (lh->equal(data, current->data)) {
 410                         return current->data;
 411                 }
 412         }
 413
 414         return 0;
 415 }
 416
 417 void *
 418 afs_lhash_remove(
 419     afs_lhash *lh,
 420     unsigned key,
 421     const void *data
 422 )
 423 {
 424         size_t k;
 425         struct bucket **pprev;
 426         struct bucket *cur;
 427
 428         lh->remove_calls++;
 429
 430         k = afs_lhash_address(lh, key);
 431         for (pprev = 0, cur = lh->table[k];
 432              cur;
 433              pprev = &cur->next, cur = cur->next) {
 434                 lh->remove_tests++;
 435                 if (lh->equal(data, cur->data)) {
 436                         void *data = cur->data;
 437
 438                         if (pprev) {
 439                                 *pprev = cur->next;
 440                         } else {
 441                                 lh->table[k] = cur->next;
 442                         }
 443
 444                         afs_atomlist_put(lh->bucket_list, cur);
 445
 446                         lh->ndata--;
 447
 448                         return data;
 449                 }
 450         }
 451
 452         return 0;
 453 }
 454
 455 int
 456 afs_lhash_enter(
 457     afs_lhash *lh,
 458     unsigned key,
 459     void *data
 460 )
 461 {
 462         size_t k;
 463         struct bucket *buck;
 464
 465         buck = afs_atomlist_get(lh->bucket_list);
 466         if (!buck) {
 467                 return -1;
 468         }
 469
 470         buck->key = key;
 471         buck->data = data;
 472
 473         k = afs_lhash_address(lh, key);
 474
 475         buck->next = lh->table[k];
 476         lh->table[k] = buck;
 477
 478         lh->ndata++;
 479
 480         /*
 481          * The load factor is the number of data items in the hash
 482          * table divided by the logical table length.  We expand the
 483          * table when the load factor exceeds a predetermined limit
 484          * (LOAD_FACTOR).  To avoid floating point, we multiply both
 485          * sides of the inequality by the logical table length...
 486          */
 487         if (lh->ndata > LOAD_FACTOR * lh->ltable) {
 488                 afs_lhash_expand(lh);
 489 #if 0
 490                 printf("lh->p = %d; lh->maxp = %d\n",
 491                         lh->p, lh->maxp);
 492 #endif
 493         }
 494
 495 #ifdef CHECK_INVARIANTS
 496         assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
 497 #endif  /* CHECK_INVARIANTS */
 498
 499         return 0;
 500 }
 501
 502 int
 503 afs_lhash_stat(
 504     afs_lhash *lh,
 505     struct afs_lhash_stat *sb
 506 )
 507 {
 508         size_t k;
 509
 510         int     min_chain_length = -1;
 511         int     max_chain_length = -1;
 512         size_t  buckets = lh->ltable;
 513         size_t  records = 0;
 514
 515         if (!sb) {
 516                 return -1;
 517         }
 518
 519 #ifdef CHECK_INVARIANTS
 520         assert(lh->ltable <= lh->ntable);
 521 #endif  /* CHECK_INVARIANTS */
 522
 523         for (k = 0; k < lh->ltable; k++) {
 524                 struct bucket *buck;
 525                 int chain_length = 0;
 526
 527                 for (buck = lh->table[k]; buck; buck = buck->next) {
 528                         chain_length++;
 529                         records++;
 530                 }
 531
 532                 if (min_chain_length == -1) {
 533                         min_chain_length = chain_length;
 534                 }
 535
 536                 if (max_chain_length == -1) {
 537                         max_chain_length = chain_length;
 538                 }
 539
 540                 if (chain_length < min_chain_length) {
 541                         min_chain_length = chain_length;
 542                 }
 543
 544                 if (max_chain_length < chain_length) {
 545                         max_chain_length = chain_length;
 546                 }
 547         }
 548
 549         sb->min_chain_length = min_chain_length;
 550         sb->max_chain_length = max_chain_length;
 551         sb->buckets = buckets;
 552         sb->records = records;
 553
 554 #ifdef CHECK_INVARIANTS
 555         assert(lh->ndata == records);
 556 #endif  /* CHECK_INVARIANTS */
 557
 558         sb->search_calls = lh->search_calls;
 559         sb->search_tests = lh->search_tests;
 560         sb->remove_calls = lh->remove_calls;
 561         sb->remove_tests = lh->remove_tests;
 562
 563         return 0;
 564 }