src/budb/db_hash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12 #include <afs/stds.h>
  13
  14 #include <roken.h>
  15
  16 #include <ubik.h>
  17 #include <afs/bubasics.h>
  18
  19 #include "budb_errs.h"
  20 #include "database.h"
  21 #include "budb_internal.h"
  22 #include "error_macros.h"
  23
  24
  25 int sizeFunctions[HT_MAX_FUNCTION + 1];
  26 int nHTBuckets = NhtBucketS;    /* testing: we need small HT blocks */
  27
  28 int ht_minHBlocks(struct memoryHashTable *mht);
  29
  30 /* ht_TableSize - return the size of table necessary to represent a hashtable
  31  * of given length in memory.  It basically rounds the length up by the number
  32  * of buckets per block. */
  33
  34 int
  35 ht_TableSize(int length)
  36 {
  37     int n;
  38     if (length == 0)
  39         return 0;
  40     n = (length + nHTBuckets - 1) / nHTBuckets;
  41     return n * sizeof(struct memoryHTBlock *);
  42 }
  43
  44 /* ht_ResetT - resets the in-memory representation of a hashtable block array.
  45  * It also resets the global variable nHTBuckets. */
  46
  47 static void
  48 ht_ResetT(struct memoryHTBlock ***blocksP, int *sizeP, int length)
  49 {
  50     struct memoryHTBlock **b = *blocksP;
  51     int newsize;
  52     int n;
  53     int i;
  54
  55     nHTBuckets = ntohl(db.h.nHTBuckets);
  56     if (b) {
  57         n = *sizeP / sizeof(b[0]);
  58         newsize = ht_TableSize(length);
  59         if (*sizeP != newsize) {
  60             /* free all blocks in the old array */
  61             for (i = 0; i < n; i++)
  62                 if (b[i])
  63                     free(b[i]);
  64             free(b);
  65             *sizeP = 0;
  66             *blocksP = 0;
  67         } else {
  68             /* invalidate the blocks of the array  */
  69             for (i = 0; i < n; i++)
  70                 if (b[i])
  71                     b[i]->valid = 0;
  72         }
  73     }
  74 }
  75
  76 /* ht_Reset
  77  *      reinitialize a memory hash table.
  78  *      Calls ht_ResetT to invalidate the two block arrays.
  79  */
  80
  81 void
  82 ht_Reset(struct memoryHashTable *mht)
  83 {
  84     struct hashTable *ht = NULL;
  85
  86     if (!(mht && (ht = mht->ht)))
  87         db_panic("some ht called with bad mht");
  88     mht->threadOffset = ntohl(ht->threadOffset);
  89     mht->length = ntohl(ht->length);
  90     mht->oldLength = ntohl(ht->oldLength);
  91     mht->progress = ntohl(ht->progress);
  92     ht_ResetT(&mht->blocks, &mht->size, mht->length);
  93     ht_ResetT(&mht->oldBlocks, &mht->oldSize, mht->oldLength);
  94 }
  95
  96 /* InitDBhash - When server starts, do hash table initialization.
  97      test - initialization parameters: bit 4 is small ht. */
  98
  99 afs_int32
 100 InitDBhash(void)
 101 {
 102     sizeFunctions[0] = 0;
 103
 104     sizeFunctions[HT_dumpIden_FUNCTION] = sizeof(struct dump);
 105     sizeFunctions[HT_dumpName_FUNCTION] = sizeof(struct dump);
 106     sizeFunctions[HT_volName_FUNCTION] = sizeof(struct volInfo);
 107     sizeFunctions[HT_tapeName_FUNCTION] = sizeof(struct tape);
 108
 109     db.volName.ht = &db.h.volName;
 110     db.tapeName.ht = &db.h.tapeName;
 111     db.dumpName.ht = &db.h.dumpName;
 112     db.dumpIden.ht = &db.h.dumpIden;
 113     return 0;
 114 }
 115
 116 /* ht_DBInit - When rebuilding database, this sets up the hash tables. */
 117
 118 void
 119 ht_DBInit(void)
 120 {
 121     db.h.nHTBuckets = htonl(nHTBuckets);
 122
 123     {
 124         struct volInfo *s = 0;
 125         db.h.volName.threadOffset =
 126             htonl((char *)&s->nameHashChain - (char *)s);
 127         db.h.volName.functionType = htonl(HT_volName_FUNCTION);
 128     }
 129     {
 130         struct tape *s = 0;
 131         db.h.tapeName.threadOffset =
 132             htonl((char *)&s->nameHashChain - (char *)s);
 133         db.h.tapeName.functionType = htonl(HT_tapeName_FUNCTION);
 134     }
 135     {
 136         struct dump *s = 0;
 137         db.h.dumpName.threadOffset =
 138             htonl((char *)&s->nameHashChain - (char *)s);
 139         db.h.dumpName.functionType = htonl(HT_dumpName_FUNCTION);
 140
 141         db.h.dumpIden.threadOffset =
 142             htonl((char *)&s->idHashChain - (char *)s);
 143         db.h.dumpIden.functionType = htonl(HT_dumpIden_FUNCTION);
 144     }
 145     ht_Reset(&db.volName);
 146     ht_Reset(&db.tapeName);
 147     ht_Reset(&db.dumpName);
 148     ht_Reset(&db.dumpIden);
 149 }
 150
 151 afs_int32
 152 ht_AllocTable(struct ubik_trans *ut, struct memoryHashTable *mht)
 153 {
 154     struct hashTable *ht = NULL;
 155     afs_int32 code;
 156     int len;
 157     int nb, mnb;                /* number of blocks for hashTable */
 158     int i;
 159     struct memoryHTBlock **b;
 160     afs_int32 *plen;
 161
 162     if (!(mht && (ht = mht->ht)))
 163         db_panic("some ht called with bad mht");
 164     if (ht->length || mht->blocks)
 165         db_panic("previous table still allocated");
 166
 167     len = ntohl(ht->entries) * 2;       /* allow room to grow */
 168     nb = (len + nHTBuckets - 1) / nHTBuckets;
 169     mnb = ht_minHBlocks(mht);
 170     if (nb < mnb)
 171         nb = mnb;               /* use minimum */
 172     len = nb * nHTBuckets;      /* new hash table length */
 173
 174     mht->size = nb * sizeof(struct memoryHTBlock *);
 175     b = mht->blocks = calloc(1, mht->size);
 176
 177     for (i = 0; i < nb; i++) {
 178         b[i] = (struct memoryHTBlock *)malloc(sizeof(struct memoryHTBlock));
 179         code = AllocBlock(ut, (struct block *)&b[i]->b, &b[i]->a);
 180         if (code)
 181             return code;
 182         b[i]->valid = 0;
 183
 184         b[i]->b.h.type = hashTable_BLOCK;
 185
 186         /* thread the blocks */
 187         if (i)
 188             b[i - 1]->b.h.next = htonl(b[i]->a);
 189     }
 190     for (i = 0; i < nb; i++) {
 191         code =
 192             dbwrite(ut, b[i]->a, (char *)&b[i]->b,
 193                     sizeof(struct htBlock) + (nHTBuckets -
 194                                               NhtBucketS) * sizeof(dbadr));
 195         if (code)
 196             return code;
 197     }
 198     if ((code = set_word_addr(ut, 0, &db.h, &ht->table, htonl(b[0]->a))))
 199         return code;
 200
 201     plen = &ht->length;
 202     if ((code = set_word_addr(ut, 0, &db.h, plen, htonl(len))))
 203         return code;
 204     mht->length = len;
 205     return 0;
 206 }
 207
 208 afs_int32
 209 ht_FreeTable(struct ubik_trans *ut, struct memoryHashTable *mht)
 210 {
 211     struct hashTable *ht = NULL;
 212     afs_int32 code;
 213     struct blockHeader bh;
 214     dbadr a, na;
 215     afs_int32 *plen, *pprog;
 216
 217     if (!(mht && (ht = mht->ht)))
 218         db_panic("some ht called with bad mht");
 219     if (ht->oldLength == 0)
 220         db_panic("no table to free");
 221
 222     ht_ResetT(&mht->oldBlocks, &mht->oldSize, 0);
 223
 224     for (a = ntohl(ht->oldTable); a; a = na) {
 225         if (dbread(ut, a, (char *)&bh, sizeof(bh))) {
 226             Log("ht_FreeTable: dbread failed\n");
 227             return BUDB_IO;
 228         }
 229         na = ntohl(bh.next);
 230         if ((code = FreeBlock(ut, &bh, a)))
 231             return code;
 232     }
 233     plen = &ht->oldLength;
 234     pprog = &ht->progress;
 235     if (set_word_addr(ut, 0, &db.h, &ht->oldTable, 0)
 236         || set_word_addr(ut, 0, &db.h, plen, 0)
 237         || set_word_addr(ut, 0, &db.h, pprog, 0))
 238         return BUDB_IO;
 239     mht->oldLength = mht->progress = 0;
 240     return 0;
 241 }
 242
 243 afs_int32
 244 ht_GetTableBlock(struct ubik_trans *ut, struct memoryHashTable *mht,
 245                  afs_uint32 hash, int old, struct memoryHTBlock **blockP,
 246                  int *boP)
 247 {
 248     struct hashTable *ht = NULL;
 249     struct memoryHTBlock **b;
 250     int hi, bi;
 251     struct memoryHTBlock ***blocksP;
 252     int *sizeP;
 253     int n;
 254     int i;
 255     int length;
 256     dbadr ta = 0;
 257
 258     if ((mht == 0)
 259         || ((ht = mht->ht) == 0)
 260         ) {
 261         db_panic("some ht called with bad mht");
 262     }
 263
 264     *blockP = 0;
 265
 266     if (old) {
 267         if ((length = mht->oldLength) == 0)
 268             return 0;           /* no entries */
 269         hi = hash % length;
 270         if (hi < mht->progress)
 271             return 0;           /* no such entry */
 272         blocksP = &mht->oldBlocks;
 273         sizeP = &mht->oldSize;
 274     } else {
 275         if ((length = mht->length) == 0)
 276             return 0;           /* no entries */
 277         hi = hash % length;
 278         blocksP = &mht->blocks;
 279         sizeP = &mht->size;
 280     }
 281
 282     bi = hi / nHTBuckets;       /* block index */
 283     *boP = hi - bi * nHTBuckets;        /* block offset ptr */
 284
 285     if (*blocksP == 0) {
 286         *sizeP = ht_TableSize(length);
 287         *blocksP = calloc(1, *sizeP);
 288     }
 289     n = *sizeP / sizeof(struct memoryHTBlock *);
 290     if (bi >= n)
 291         db_panic("table size inconsistent");
 292     b = *blocksP;
 293
 294     /* find an allocated block or the beginning of the block array */
 295     for (i = bi; (i > 0) && (b[i] == 0); i--);
 296
 297     while (1) {
 298         if (b[i] == 0) {
 299             if (i == 0) {       /* the first block is found from the hashTable */
 300                 ta = ntohl(old ? ht->oldTable : ht->table);
 301                 if (ta == 0)
 302                     db_panic("non-zero length, but no table");
 303             }
 304             /* else ta is set from last time around loop */
 305             b[i] =
 306                 (struct memoryHTBlock *)malloc(sizeof(struct memoryHTBlock));
 307             b[i]->a = ta;
 308             b[i]->valid = 0;
 309         }
 310
 311         if (!b[i]->valid) {
 312             if (dbread(ut, b[i]->a, (char *)&b[i]->b, sizeof(struct htBlock)))
 313                 return BUDB_IO;
 314             b[i]->valid = 1;
 315         }
 316
 317         if (i == bi) {
 318             *blockP = b[bi];
 319             /* printf("ht_GetTableBlock: hash %d block %d offset %d\n",
 320              * hash, *blockP, *boP); */
 321             return 0;
 322         }
 323
 324         ta = ntohl(b[i++]->b.h.next);   /* get next ptr from current block */
 325     }
 326 }
 327
 328 /* ht_MaybeAdjust
 329  *      Decide when to push the current hash table to the old hash table.
 330  *      The entries in the old hash table are VALID, and are slowly hashed
 331  *      into the current table.
 332  */
 333
 334 static afs_int32
 335 ht_MaybeAdjust(struct ubik_trans *ut, struct memoryHashTable *mht)
 336 {
 337     struct hashTable *ht = mht->ht;
 338     int numberEntries = ntohl(ht->entries);
 339
 340     /* old hash table must be empty */
 341     if (mht->oldLength != 0)
 342         return (0);
 343
 344     /*
 345      * It costs a lot to grow and shrink the hash table. Therefore, we will not
 346      * shrink the hash table (only grow it). If the table is more than 2 entries per
 347      * chain (average) we need to grow: push the entries to the old hash table.
 348      *
 349      * Don't shrink it:
 350      * || ((mht->length > nHTBuckets) && (numberEntries*8 < mht->length))
 351      */
 352
 353     /* Only grow a hash table if the number of entries is twice the
 354      * number of hash length and is less than 20,450 (20 hash blocks). This
 355      * means that the volname hash table will not grow (its initial
 356      * hashtable size contains 30,600 buckets). Earlier revisions of
 357      * the buserver have the initial size at 510 and 5,100 buckets -
 358      * in which case we do want to grow it). We don't grow anything larger
 359      * than 20,450 entries because it's expensive to re-hash everything.
 360      */
 361     if ((numberEntries > mht->length * 2) && (numberEntries < 20450)) { /* push current hash table to old hash table */
 362         ht->oldLength = ht->length;
 363         ht->oldTable = ht->table;
 364         ht->progress = 0;
 365         ht->length = 0;
 366         ht->table = 0;
 367         if (dbwrite
 368             (ut, ((char *)ht - (char *)&db.h), (char *)ht, sizeof(*ht)))
 369             return BUDB_IO;
 370
 371         ht_Reset(mht);
 372         LogDebug(2, "ht_MaybeAdjust: push ht to old\n");
 373     }
 374     return 0;
 375 }
 376
 377 dbadr
 378 ht_LookupBucket(struct ubik_trans *ut, struct memoryHashTable *mht,
 379                 afs_uint32 hash, int old)
 380 {
 381     struct memoryHTBlock *block;
 382     int bo;
 383     afs_int32 code;
 384
 385     if ((old ? mht->oldLength : mht->length) == 0)
 386         return 0;
 387     code = ht_GetTableBlock(ut, mht, hash, old, &block, &bo);
 388     if (code || (block == 0))
 389         return 0;
 390     return ntohl(block->b.bucket[bo]);
 391 }
 392
 393 /* This function is not too bad, for small hash tables, but suffers, I think,
 394  * from insufficient mixing of the hash information. */
 395
 396 afs_uint32
 397 Old2StringHashFunction(unsigned char *str)
 398 {
 399     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 400     while (*str)
 401         hash = (hash << 1) + (hash >> 31) + *str++;
 402     return hash;
 403 }
 404
 405 /* This was actually a coding error, and produces dreadful results.  The
 406  * problem is that the hash needs to be mixed up not the incoming character. */
 407
 408 afs_uint32
 409 Old3StringHashFunction(unsigned char *str)
 410 {
 411     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 412     while (*str)
 413         hash += (*str++) * 0x072a51a4;
 414     return hash;
 415 }
 416
 417 /* This function is pretty good.  Its main problem is that the low two bits of
 418  * the hash multiplier are zero which tends to shift information too far left.
 419  * It behaves especially badly for hash tables whose size is a power of two. */
 420
 421 afs_uint32
 422 Old4StringHashFunction(unsigned char *str)
 423 {
 424     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 425     while (*str)
 426         hash = (*str++) + hash * 0x072a51a4;
 427     return hash;
 428 }
 429
 430 /* While this is good for a hash table with 500 buckets it is nearly as bad as
 431  * #3 with a hash table as big as 8200. */
 432
 433 afs_uint32
 434 Old5StringHashFunction(unsigned char *str)
 435 {
 436     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 437     while (*str)
 438         hash += (*str++);
 439     return hash;
 440 }
 441
 442 /* This was an attempt to produce a hash function with the smallest and
 443  * simplest mixing multiplier.  This is only a little worse than the real one,
 444  * and the difference seems to be smaller with larger hash tables.  It behaves
 445  * better than the random hash function. */
 446
 447 afs_uint32
 448 Old6StringHashFunction(unsigned char *str)
 449 {
 450     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 451     while (*str)
 452         hash = hash * 0x81 + (*str++);
 453     return hash;
 454 }
 455
 456 /* This actually seems to be little better then the real one.  Having the same
 457  * number of bits but only 5 bits apart seems to produce worse results but
 458  * having the bits spanning the same range farther apart also doesn't do as
 459  * well.  All these differences are fairly small, however. */
 460
 461 afs_uint32
 462 Old7StringHashFunction(unsigned char *str)
 463 {
 464     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 465     while (*str)
 466         hash = hash * 0x42108421 + (*str++);
 467     return hash;
 468 }
 469
 470 /* This function tries to provide some non-linearity by providing some feedback
 471  * from higher-order bits in the word.  It also uses shifts instead of
 472  * multiplies, which may be faster on some architectures. */
 473
 474 afs_uint32
 475 Old8StringHashFunction(unsigned char *str)
 476 {
 477     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 478     while (*str)
 479         hash =
 480             hash + (hash << 7) + (hash << 14) + (hash << 21) + (hash << 28) +
 481             (hash >> 17) + *str++;
 482     return hash;
 483 }
 484
 485 /* This is the result of the above search for good hash functions.  It seems
 486  * that the choice of multipliers is somewhat arbitrary but has several
 487  * constraints.  It shouldn't have too many or too few one bits and should be
 488  * odd.  It behaves beeter than the random hash function. */
 489
 490 afs_uint32
 491 StringHashFunction(unsigned char *str)
 492 {
 493     afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 494     /* The multiplicative constant should be odd and have a goodly number of
 495      * one bits. */
 496     while (*str)
 497         hash = (*str++) + hash * 0x10204081;
 498     return hash;
 499 }
 500
 501 afs_uint32
 502 IdHashFunction(afs_uint32 id)
 503 {
 504     afs_uint32 l, r;
 505     id *= 81847;
 506     l = id | 0xaaaaaaaa;
 507     r = id | 0x55555555;
 508     return (l * r);
 509 }
 510
 511 /* The minimum hash table blocks to allocate. Each block contains 510
 512  * buckets. They hash table grows when the number of entries reaches
 513  * twice the number of buckets.
 514  */
 515 int
 516 ht_minHBlocks(struct memoryHashTable *mht)
 517 {
 518     int retval;
 519
 520     switch (ntohl(mht->ht->functionType)) {
 521     case HT_dumpIden_FUNCTION:
 522     case HT_dumpName_FUNCTION:  /* hash table able to handle (befor it grows) ... */
 523         retval = 2;             /*     1,020 dump entries */
 524         break;
 525
 526     case HT_tapeName_FUNCTION:
 527         retval = 4;             /*      2,040 tape entries */
 528         break;
 529
 530     case HT_volName_FUNCTION:
 531         retval = 60;            /*     61,200 volInfo entries (with different names) */
 532         break;
 533
 534     default:
 535         db_panic("Illegal hash function type");
 536         retval = -1; /* not reached */
 537     }
 538     return (retval);
 539 }
 540
 541 afs_uint32
 542 ht_HashEntry(struct memoryHashTable *mht,
 543              char *e)                           /* entry's address (in b) */
 544 {
 545     int type = ntohl(mht->ht->functionType);
 546     afs_uint32 retval;
 547
 548     switch (type) {
 549     case HT_dumpIden_FUNCTION:
 550         retval = IdHashFunction(ntohl(((struct dump *)e)->id));
 551         LogDebug(5, "HashEntry: dumpid returns %d\n", retval);
 552         break;
 553
 554     case HT_dumpName_FUNCTION:
 555         retval = StringHashFunction((unsigned char *)((struct dump *)e)->dumpName);
 556         LogDebug(5, "HashEntry: dumpname returns %d\n", retval);
 557         break;
 558
 559     case HT_tapeName_FUNCTION:
 560         retval = StringHashFunction((unsigned char *)((struct tape *)e)->name);
 561         LogDebug(5, "HashEntry: tapename returns %d\n", retval);
 562         break;
 563
 564     case HT_volName_FUNCTION:
 565         retval = StringHashFunction((unsigned char *)((struct volInfo *)e)->name);
 566         LogDebug(5, "HashEntry: volname returns %d\n", retval);
 567         break;
 568
 569     default:
 570         db_panic("illegal hash function");
 571         retval = -1; /* not reached */
 572     }
 573
 574     return (retval);
 575 }
 576
 577
 578 /* ht_GetType
 579  *      returns a ptr to the memory hash table for the specified hash
 580  *      list.
 581  */
 582
 583 struct memoryHashTable *
 584 ht_GetType(int type, int *e_sizeP)
 585 {
 586     struct memoryHashTable *mht;
 587
 588     if ((type <= 0) || (type > HT_MAX_FUNCTION))
 589         return 0;
 590
 591     if (e_sizeP)
 592         *e_sizeP = sizeFunctions[type];
 593     switch (type) {
 594     case HT_dumpIden_FUNCTION:
 595         mht = &db.dumpIden;
 596         break;
 597
 598     case HT_dumpName_FUNCTION:
 599         mht = &db.dumpName;
 600         break;
 601
 602     case HT_tapeName_FUNCTION:
 603         mht = &db.tapeName;
 604         break;
 605
 606     case HT_volName_FUNCTION:
 607         mht = &db.volName;
 608         break;
 609
 610     default:
 611         return 0;
 612     }
 613     if (ntohl(mht->ht->functionType) != type)
 614         db_panic("ht types don't match");
 615     return mht;
 616 }
 617
 618 static int
 619 ht_KeyMatch(int type, char *key, char *e)
 620 {
 621     switch (type) {
 622     case HT_dumpIden_FUNCTION:
 623         return *(dumpId *) key == ntohl(((struct dump *)e)->id);
 624     case HT_dumpName_FUNCTION:
 625         return strcmp(key, ((struct dump *)e)->dumpName) == 0;
 626     case HT_tapeName_FUNCTION:
 627         return strcmp(key, ((struct tape *)e)->name) == 0;
 628     case HT_volName_FUNCTION:
 629         return strcmp(key, ((struct volInfo *)e)->name) == 0;
 630
 631     default:
 632         db_panic("illegal hash function");
 633     }
 634     /* not reached */
 635     return 0;
 636 }
 637
 638 /* ht_LookupEntry
 639  * entry:
 640  *      ut - ubik transaction
 641  *      mht - memory hash table ptr
 642  *      key - hash and lookup key
 643  * exit:
 644  *      eaP - dbaddr of entry found or zero if failed
 645   *     e - contents of located entry
 646  */
 647
 648 afs_int32
 649 ht_LookupEntry(struct ubik_trans *ut,
 650                struct memoryHashTable *mht,
 651                void *key,       /* pointer to lookup key to match */
 652                dbadr *eaP,      /* db addr of entry found or zero */
 653                void *e)         /* contents of located entry */
 654 {
 655     struct hashTable *ht = NULL;
 656     int type;
 657     int e_size;
 658     int old;
 659     afs_uint32 hash;
 660     dbadr a;
 661
 662     if (!key || !eaP || !e)
 663         db_panic("null ptrs passed to LookupEntry");
 664     if (!(mht && (ht = mht->ht)))
 665         db_panic("some ht called with bad mht");
 666
 667     *eaP = 0;                   /* initialize not-found indicator */
 668
 669     type = ntohl(ht->functionType);
 670     e_size = sizeFunctions[type];
 671     if (type == HT_dumpIden_FUNCTION)
 672         hash = IdHashFunction(*(dumpId *) key);
 673     else
 674         hash = StringHashFunction(key);
 675
 676     for (old = 0;; old++) {
 677         a = ht_LookupBucket(ut, mht, hash, old);
 678         while (a) {
 679             if (dbread(ut, a, e, e_size))
 680                 return BUDB_IO;
 681             if (ht_KeyMatch(type, key, e)) {
 682                 *eaP = a;
 683                 return 0;
 684             }
 685             a = ntohl(*(dbadr *) ((char *)e + mht->threadOffset));
 686         }
 687         if (old)
 688             return 0;
 689     }
 690 }
 691
 692 /* ht_HashInList
 693  * entry:
 694  *      opQuota - max # of items to move
 695  * exit:
 696  *      opQuota - adjusted to reflect # of moves
 697  */
 698
 699 static afs_int32
 700 ht_HashInList(struct ubik_trans *ut, struct memoryHashTable *mht,
 701               int *opQuota, struct memoryHTBlock *block, int blockOffset)
 702 {
 703     struct hashTable *ht = mht->ht;
 704     afs_int32 code;
 705     dbadr ea, next_ea;
 706     dbadr listA;
 707     char e[sizeof(struct block)];       /* unnecessarily conservative */
 708     int e_size = sizeFunctions[ntohl(ht->functionType)];
 709
 710     if (mht->length == 0) {
 711         if ((code = ht_AllocTable(ut, mht))) {
 712             Log("ht_HashInList: ht_AllocTable failed\n");
 713             return code;
 714         }
 715     }
 716
 717     listA = ntohl(block->b.bucket[blockOffset]);
 718
 719     if (listA == 0) {
 720         Log("ht_HashInList: expecting non-zero bucket\n");
 721         return 0;
 722     }
 723
 724     for (ea = listA; ea; ea = next_ea) {        /*f */
 725
 726         LogDebug(3, "ht_HashInList: move entry at %d, type %d\n", ea,
 727                  ntohl(mht->ht->functionType));
 728
 729         if (dbread(ut, ea, e, e_size))
 730             return BUDB_IO;
 731
 732         /* LogNetDump((struct dump *) e); */
 733
 734         /* get the address of the next item on the list */
 735         next_ea = ntohl(*(dbadr *) (e + mht->threadOffset));
 736
 737         /* write the link into the bucket */
 738         code =
 739             set_word_addr(ut, block->a, &block->b,
 740                           &block->b.bucket[blockOffset], htonl(next_ea));
 741         if (code) {
 742             Log("ht_HashInList: bucket update failed\n");
 743             return (code);
 744         }
 745
 746         {
 747             struct memoryHTBlock *block;
 748             int bo;
 749             afs_uint32 hash;
 750
 751             /* get the hash value */
 752             hash = ht_HashEntry(mht, e) % mht->length;
 753             LogDebug(4, "ht_HashInList: moved to %d\n", hash);
 754
 755             /* get the new hash table block */
 756             code = ht_GetTableBlock(ut, mht, hash, 0 /*old */ , &block, &bo);
 757             if (code) {
 758                 Log("ht_HashInList: ht_GetTableBlock failed\n");
 759                 return code;
 760             }
 761             if (block == 0) {
 762                 Log("ht_HashInList: ht_GetTableBlock returned 0\n");
 763                 return BUDB_INTERNALERROR;
 764             }
 765
 766             /* Chain entry at front of bucket;
 767              * first threadOffset of entry = bucket
 768              * then bucket = addr of entry
 769              */
 770             if (set_word_offset
 771                 (ut, ea, e, mht->threadOffset, block->b.bucket[bo])
 772                 || set_word_addr(ut, block->a, &block->b,
 773                                  &block->b.bucket[bo], htonl(ea)))
 774                 return BUDB_IO;
 775         }
 776
 777         if (--(*opQuota) == 0)
 778             break;
 779     }                           /*f */
 780     return 0;
 781 }
 782
 783
 784 /* ht_MoveEntries
 785  *      The hash table is needs to be re-sized. Move entries from the old
 786  *      to the new.
 787  */
 788
 789 static afs_int32
 790 ht_MoveEntries(struct ubik_trans *ut, struct memoryHashTable *mht)
 791 {
 792     struct memoryHTBlock *block;
 793     afs_uint32 hash;
 794     int count;
 795     int bo;
 796     afs_int32 code;
 797     afs_int32 *pprog;
 798
 799     if (mht->oldLength == 0)
 800         return 0;
 801
 802     LogDebug(3, "ht_MoveEntries:\n");
 803     /* we assume here that the hash function will map numbers smaller than the
 804      * size of the hash table straight through to hash table indexes.
 805      */
 806     hash = mht->progress;
 807
 808     /* get hash table block ? */
 809     code = ht_GetTableBlock(ut, mht, hash, 1 /*old */ , &block, &bo);
 810     if (code)
 811         return code;
 812
 813     if (block == 0)
 814         return BUDB_INTERNALERROR;
 815
 816     count = 10;                 /* max. # entries to move */
 817
 818     do {
 819         if (block->b.bucket[bo]) {
 820             code = ht_HashInList(ut, mht, &count, block, bo);
 821             if (code) {
 822                 Log("ht_MoveEntries: ht_HashInList failed\n");
 823                 return (BUDB_IO);
 824             }
 825         }
 826
 827         if (block->b.bucket[bo] == 0) {
 828             /* this bucket is now empty */
 829             mht->progress++;
 830         }
 831
 832         /* don't exceed the quota of items to be moved */
 833         if (count == 0)
 834             break;
 835
 836     } while (++bo < nHTBuckets);
 837
 838     if (mht->progress >= mht->oldLength)
 839         return (ht_FreeTable(ut, mht));
 840
 841     pprog = &mht->ht->progress;
 842     if (set_word_addr(ut, 0, &db.h, pprog, htonl(mht->progress))) {
 843         Log("ht_MoveEntries: progress set failed\n");
 844         return BUDB_IO;
 845     }
 846     return 0;
 847 }
 848
 849
 850 #ifdef notdef
 851 static afs_int32
 852 ht_MoveEntries(struct ubik_trans *ut, struct memoryHashTable *mht)
 853 {
 854     afs_uint32 hash;
 855     int bo;
 856     struct memoryHTBlock *block;
 857     afs_int32 code;
 858
 859     if (mht->oldLength == 0)
 860         return 0;
 861
 862     LogDebug(3, "ht_MoveEntries:\n");
 863     /* we assume here that the hash function will map numbers smaller than the
 864      * size of the hash table straight through to hash table indexes.
 865      */
 866     hash = mht->progress;
 867
 868     /* get hash table block ? */
 869     code = ht_GetTableBlock(ut, mht, hash, 1 /*old */ , &block, &bo);
 870     if (code)
 871         return code;
 872
 873     if (block == 0)
 874         return BUDB_INTERNALERROR;
 875
 876     do {
 877         mht->progress++;
 878         if (block->b.bucket[bo]) {
 879             code = ht_HashInList(ut, mht, ntohl(block->b.bucket[bo]));
 880             if (code) {
 881                 Log("ht_MoveEntries: ht_HashInList failed\n");
 882                 return (BUDB_IO);
 883             }
 884             code =
 885                 set_word_addr(ut, block->a, &block->b, &block->b.bucket[bo],
 886                               0);
 887             if (code) {
 888                 Log("ht_MoveEntries: clear old entry failed\n");
 889                 return BUDB_IO;
 890             }
 891             break;
 892         }
 893     } while (++bo < nHTBuckets);
 894
 895     if (mht->progress >= mht->oldLength)
 896         return (ht_FreeTable(ut, mht));
 897
 898     if (set_word_addr(ut, 0, &db.h, &mht->ht->progress, htonl(mht->progress))) {
 899         Log("ht_MoveEntries: progress set failed\n");
 900         return BUDB_IO;
 901     }
 902     return 0;
 903 }
 904 #endif /* notdef */
 905
 906 afs_int32
 907 ht_HashIn(struct ubik_trans *ut,
 908           struct memoryHashTable *mht,
 909           dbadr ea,                     /* block db address */
 910           void *e)                      /* entry's address (in b) */
 911 {
 912     struct hashTable *ht = NULL;
 913     afs_uint32 hash;
 914     struct memoryHTBlock *block;
 915     int bo;
 916     afs_int32 code;
 917     afs_int32 *pentries;
 918
 919     if (!(mht && (ht = mht->ht)))
 920         db_panic("some ht called with bad mht");
 921
 922     if ((code = ht_MaybeAdjust(ut, mht)))
 923         return code;
 924     if (mht->length == 0)
 925         if ((code = ht_AllocTable(ut, mht)))
 926             return code;
 927
 928     hash = ht_HashEntry(mht, e);
 929     code = ht_GetTableBlock(ut, mht, hash, 0 /*old */ , &block, &bo);
 930     if (code)
 931         return code;
 932     if (!block)
 933         return BUDB_INTERNALERROR;
 934
 935     code = set_word_offset(ut, ea, e, mht->threadOffset, block->b.bucket[bo]);
 936     if (code)
 937         return BUDB_IO;
 938     LogDebug(5, "Hashin: set %d to %d\n", mht->threadOffset,
 939              block->b.bucket[bo]);
 940
 941     code =
 942         set_word_addr(ut, block->a, &block->b, &block->b.bucket[bo],
 943                       htonl(ea));
 944     if (code)
 945         return BUDB_IO;
 946     LogDebug(5, "Hashin: set %"AFS_PTR_FMT" to %d\n",
 947              &block->b.bucket[bo], htonl(ea));
 948
 949     pentries = &ht->entries;
 950     code =
 951         set_word_addr(ut, 0, &db.h, pentries,
 952                       htonl(ntohl(ht->entries) + 1));
 953     if (code)
 954         return BUDB_IO;
 955
 956     return ht_MoveEntries(ut, mht);
 957 }
 958
 959 /* RemoveFromList - generic procedure to delete an entry from a list given its
 960  * head and thread offset.  Only a single long is modified by this routine.
 961  * The head pointer is modified, in place, using set_word_addr if the entry is
 962  * at the head of the list, otherwise only the thread of the previous entry is
 963  * modified.  The entry pointer is only used to calculate the thread offset,
 964  * but is not otherwise used. */
 965
 966 afs_int32
 967 RemoveFromList(struct ubik_trans *ut,
 968                dbadr ea,        /* db addr of head structure */
 969                void *e,         /* head structure */
 970                dbadr *head,     /* address of head pointer */
 971                dbadr ta,        /* db addr of strucure to be removed */
 972                void *t,         /* structure being removed */
 973                dbadr *thread)   /* pointer to thread pointer */
 974 {
 975     afs_int32 code;
 976     int threadOffset = ((char *)thread - (char *)t);
 977     dbadr next_a;               /* db addr of next element in list */
 978     dbadr loop_a;               /* db addr of current list element */
 979
 980     if (*head == 0)
 981         return -1;              /* empty list: not found */
 982     next_a = ntohl(*head);      /* start at head of list */
 983     if (next_a == ta) {         /* remove from head of list */
 984         code = set_word_addr(ut, ea, e, head, *thread);
 985         return code;
 986     }
 987     do {
 988         loop_a = next_a;
 989         code =
 990             dbread(ut, loop_a + threadOffset, (char *)&next_a, sizeof(dbadr));
 991         if (code)
 992             return code;
 993         if (next_a == 0)
 994             return -1;          /* end of list: not found */
 995     } while (ta != (next_a = ntohl(next_a)));
 996     code = dbwrite(ut, loop_a + threadOffset, (char *)thread, sizeof(dbadr));
 997     return code;
 998 }
 999
1000 afs_int32
1001 ht_HashOutT(struct ubik_trans *ut, struct memoryHashTable *mht,
1002             afs_uint32 hash, dbadr ea, char *e, int old)
1003 {
1004     struct memoryHTBlock *block;
1005     int bo;
1006     afs_int32 code;
1007     afs_int32 *pentries;
1008
1009     if ((old ? mht->oldLength : mht->length) == 0)
1010         return -1;
1011     code = ht_GetTableBlock(ut, mht, hash, old, &block, &bo);
1012     if (code)
1013         return code;
1014     if ((block == 0) || (block->b.bucket[bo] == 0))
1015         return -1;
1016
1017     code =
1018         RemoveFromList(ut, block->a, (char *)&block->b, &block->b.bucket[bo],
1019                        ea, e, (dbadr *) (e + mht->threadOffset));
1020     if (code)
1021         return code;
1022 #if 0
1023     net_ea = htonl(ea);
1024     unthread_ea = *(afs_int32 *) ((char *)e + mht->threadOffset);
1025     if (block->b.bucket[bo] == net_ea) {
1026         if (set_word_addr
1027             (ut, block->a, &block->b, &block->b.bucket[bo], unthread_ea))
1028             return BUDB_IO;
1029         goto done;
1030     }
1031     loop_a = ntohl(block->b.bucket[bo]);
1032     while (1) {
1033         if (dbread
1034             (ut, loop_a + mht->threadOffset, (char *)&next_loop_a,
1035              sizeof(dbadr)))
1036             return BUDB_IO;
1037         if (next_loop_a == 0)
1038             return -1;          /* not found */
1039         if (net_ea == next_loop_a) {
1040             if (dbwrite
1041                 (ut, loop_a + mht->threadOffset, (char *)&unthread_ea,
1042                  sizeof(dbadr)))
1043                 return BUDB_IO;
1044             goto done;
1045         }
1046         loop_a = ntohl(next_loop_a);
1047     }
1048   done:
1049 #endif
1050     pentries = &mht->ht->entries;
1051     if (set_word_addr
1052         (ut, 0, &db.h, pentries, htonl(ntohl(mht->ht->entries) - 1)))
1053         return BUDB_IO;
1054     return 0;
1055 }
1056
1057 afs_int32
1058 ht_HashOut(struct ubik_trans *ut, struct memoryHashTable *mht, dbadr ea,
1059            void *e)
1060 {
1061     afs_uint32 hash;
1062     afs_int32 code;
1063
1064     if (!mht)
1065         db_panic("some ht called with bad mht");
1066     hash = ht_HashEntry(mht, e);
1067     if (mht->oldLength) {
1068         code = ht_HashOutT(ut, mht, hash, ea, e, 1 /*old */ );
1069         if (code == 0)
1070             return 0;
1071         else if (code != -1)
1072             ERROR(code);
1073     }
1074     if (mht->length == 0)       /* not found */
1075         ERROR(BUDB_INTERNALERROR);
1076     code = ht_HashOutT(ut, mht, hash, ea, e, 0 /*old */ );
1077     if (code == -1)
1078         ERROR(BUDB_NOENT);
1079     if (code)
1080         ERROR(code);
1081
1082     code = ht_MoveEntries(ut, mht);
1083     if (code)
1084         ERROR(code);
1085     code = ht_MaybeAdjust(ut, mht);
1086     if (code)
1087         ERROR(code);
1088
1089   error_exit:
1090     return (code);
1091 }
1092
1093 /* generic hash table traversal routines */
1094
1095
1096 afs_int32
1097 scanHashTableBlock(struct ubik_trans *ut,
1098                    struct memoryHashTable *mhtPtr,
1099                    struct htBlock *htBlockPtr,
1100                    int old,
1101                    afs_int32 length,    /* size of whole hash table */
1102                    int index,           /* base index of this block */
1103                    int (*selectFn) (dbadr, void *, void *),
1104                    int (*operationFn) (dbadr, void *, void *),
1105                    void *rockPtr)
1106 {
1107     int type;                   /* hash table type */
1108     int entrySize;              /* hashed entry size */
1109
1110     char entry[sizeof(struct block)];
1111     dbadr entryAddr, nextEntryAddr;
1112
1113     int i;
1114
1115     type = ntohl(mhtPtr->ht->functionType);
1116     entrySize = sizeFunctions[type];
1117
1118     /* step through this hash table block, being careful to stop
1119      * before the end of the overall hash table
1120      */
1121
1122     for (i = 0; (i < nHTBuckets) && (index < length); i++, index++) {   /*f */
1123         entryAddr = 0;
1124         nextEntryAddr = ntohl(htBlockPtr->bucket[i]);
1125
1126         /* if this is the old hash table, all entries below the progress mark
1127          * should have been moved to the new hash table
1128          */
1129         if (old && (index < mhtPtr->progress) && nextEntryAddr)
1130             return BUDB_INTERNALERROR;
1131
1132         /* now walk down the chain of each bucket */
1133         while (nextEntryAddr) { /*w */
1134
1135             entryAddr = nextEntryAddr;
1136             if (dbread(ut, entryAddr, &entry[0], entrySize))
1137                 return (BUDB_INTERNALERROR);
1138
1139             if ((*selectFn) (entryAddr, &entry[0], rockPtr)) {
1140                 (*operationFn) (entryAddr, &entry[0], rockPtr);
1141             }
1142
1143             nextEntryAddr =
1144                 ntohl(*((dbadr *) (entry + mhtPtr->threadOffset)));
1145         }                       /*w */
1146
1147     }                           /*f */
1148
1149     return (0);
1150 }
1151
1152 afs_int32
1153 scanHashTable(struct ubik_trans *ut, struct memoryHashTable *mhtPtr,
1154               int (*selectFn) (dbadr, void *, void *),
1155               int (*operationFn) (dbadr, void *, void *),
1156               void *rockPtr)
1157 {
1158     struct htBlock hashTableBlock;
1159     dbadr tableAddr;            /* disk addr of hash block */
1160     int tableLength;            /* # entries */
1161     int blockLength;            /* # blocks */
1162     int hashIndex;
1163     int old;
1164     int i;
1165     afs_int32 code = 0;
1166
1167     extern int nHTBuckets;      /* # buckets in a hash table */
1168
1169     for (old = 0; old <= 1; old++) {    /*fo */
1170         if (old) {
1171             /* check the old hash table */
1172             tableLength = mhtPtr->oldLength;
1173             if (tableLength == 0)
1174                 continue;       /* nothing to do */
1175
1176             tableAddr = ntohl(mhtPtr->ht->oldTable);
1177         } else {
1178             /* check current hash table */
1179             tableLength = mhtPtr->length;
1180             if (tableLength == 0)
1181                 continue;       /* nothing to do */
1182
1183             tableAddr = ntohl(mhtPtr->ht->table);
1184         }
1185
1186         blockLength = (tableLength - 1) / nHTBuckets;
1187         hashIndex = 0;
1188
1189         /* follow the hash chain */
1190         for (i = 0; i <= blockLength; i++) {    /*fi */
1191             /* chain too short */
1192             if (tableAddr == 0)
1193                 ERROR(BUDB_DATABASEINCONSISTENT);
1194
1195             code =
1196                 dbread(ut, tableAddr, &hashTableBlock,
1197                        sizeof(hashTableBlock));
1198             if (code)
1199                 goto error_exit;
1200
1201             code =
1202                 scanHashTableBlock(ut, mhtPtr, &hashTableBlock, old,
1203                                    tableLength, hashIndex, selectFn,
1204                                    operationFn, rockPtr);
1205             if (code)
1206                 goto error_exit;
1207
1208             hashIndex += nHTBuckets;
1209             tableAddr = ntohl(hashTableBlock.h.next);
1210         }                       /*fi */
1211     }                           /*fo */
1212
1213   error_exit:
1214     return (code);
1215 }