src/budb/db_hash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12
  13 RCSID("$Header$");
  14
  15 #ifdef AFS_NT40_ENV
  16 #include <winsock2.h>
  17 #else
  18 #include <netinet/in.h>
  19 #endif
  20 #include <sys/types.h>
  21 #include <afs/stds.h>
  22 #include <ubik.h>
  23 #include <afs/auth.h>
  24 #include <afs/bubasics.h>
  25 #include "budb_errs.h"
  26 #include "database.h"
  27 #include "error_macros.h"
  28
  29
  30 int sizeFunctions[HT_MAX_FUNCTION+1];
  31 int nHTBuckets = NhtBucketS;            /* testing: we need small HT blocks */
  32
  33 /* ht_TableSize - return the size of table necessary to represent a hashtable
  34  * of given length in memory.  It basically rounds the length up by the number
  35  * of buckets per block. */
  36
  37 int ht_TableSize (length)
  38   int length;
  39 {   int n;
  40     if (length == 0) return 0;
  41     n = (length + nHTBuckets-1) / nHTBuckets;
  42     return n*sizeof(struct memoryHTBlock *);
  43 }
  44
  45 /* ht_ResetT - resets the in-memory representation of a hashtable block array.
  46  * It also resets the global variable nHTBuckets. */
  47
  48 static void ht_ResetT (blocksP, sizeP, length)
  49   struct memoryHTBlock ***blocksP;
  50   int  *sizeP;
  51   int   length;
  52 {   struct memoryHTBlock **b = *blocksP;
  53     int newsize;
  54     int n;
  55     int i;
  56
  57     nHTBuckets = ntohl(db.h.nHTBuckets);
  58     if (b) {
  59         n = *sizeP / sizeof(b[0]);
  60         newsize = ht_TableSize (length);
  61         if (*sizeP != newsize) {
  62             /* free all blocks in the old array */
  63             for (i=0; i<n; i++)
  64                 if (b[i]) free (b[i]);
  65             free (b);
  66             *sizeP = 0;
  67             *blocksP = 0;
  68         } else {
  69             /* invalidate the blocks of the array  */
  70             for (i=0; i<n; i++)
  71                 if (b[i]) b[i]->valid = 0;
  72         }
  73     }
  74 }
  75
  76 /* ht_Reset
  77  *      reinitialize a memory hash table.
  78  *      Calls ht_ResetT to invalidate the two block arrays.
  79  */
  80
  81 void ht_Reset (mht)
  82   struct memoryHashTable *mht;
  83 {   struct hashTable *ht;
  84
  85     if (!(mht && (ht = mht->ht)))
  86         db_panic ("some ht called with bad mht");
  87     mht->threadOffset = ntohl(ht->threadOffset);
  88     mht->length = ntohl(ht->length);
  89     mht->oldLength = ntohl(ht->oldLength);
  90     mht->progress = ntohl(ht->progress);
  91     ht_ResetT (&mht->blocks, &mht->size, mht->length);
  92     ht_ResetT (&mht->oldBlocks, &mht->oldSize, mht->oldLength);
  93 }
  94
  95 /* InitDBhash - When server starts, do hash table initialization.
  96      test - initialization parameters: bit 4 is small ht. */
  97
  98 afs_int32 InitDBhash ()
  99 {
 100     sizeFunctions[0] = 0;
 101
 102     sizeFunctions[HT_dumpIden_FUNCTION] = sizeof(struct dump);
 103     sizeFunctions[HT_dumpName_FUNCTION] = sizeof(struct dump);
 104     sizeFunctions[HT_volName_FUNCTION] = sizeof(struct volInfo);
 105     sizeFunctions[HT_tapeName_FUNCTION] = sizeof(struct tape);
 106
 107     db.volName.ht = &db.h.volName;
 108     db.tapeName.ht = &db.h.tapeName;
 109     db.dumpName.ht = &db.h.dumpName;
 110     db.dumpIden.ht = &db.h.dumpIden;
 111     return 0;
 112 }
 113
 114 /* ht_DBInit - When rebuilding database, this sets up the hash tables. */
 115
 116 void ht_DBInit ()
 117 {
 118     db.h.nHTBuckets = htonl(nHTBuckets);
 119
 120     {   struct volInfo *s = 0;
 121         db.h.volName.threadOffset = htonl((char *)&s->nameHashChain - (char *)s);
 122         db.h.volName.functionType = htonl(HT_volName_FUNCTION);
 123     }
 124     {   struct tape *s = 0;
 125         db.h.tapeName.threadOffset = htonl((char *)&s->nameHashChain - (char *)s);
 126         db.h.tapeName.functionType = htonl(HT_tapeName_FUNCTION);
 127     }
 128     {   struct dump *s = 0;
 129         db.h.dumpName.threadOffset = htonl((char *)&s->nameHashChain - (char *)s);
 130         db.h.dumpName.functionType = htonl(HT_dumpName_FUNCTION);
 131
 132         db.h.dumpIden.threadOffset = htonl((char *)&s->idHashChain - (char *)s);
 133         db.h.dumpIden.functionType = htonl(HT_dumpIden_FUNCTION);
 134     }
 135     ht_Reset (&db.volName);
 136     ht_Reset (&db.tapeName);
 137     ht_Reset (&db.dumpName);
 138     ht_Reset (&db.dumpIden);
 139 }
 140
 141 afs_int32 ht_AllocTable (ut, mht)
 142   struct ubik_trans *ut;
 143   struct memoryHashTable *mht;
 144 {   struct hashTable *ht;
 145     afs_int32  code;
 146     int   len;
 147     int   nb, mnb;                              /* number of blocks for hashTable */
 148     int   i;
 149     struct memoryHTBlock **b;
 150
 151     if (!(mht && (ht = mht->ht))) db_panic ("some ht called with bad mht");
 152     if (ht->length || mht->blocks) db_panic ("previous table still allocated");
 153
 154     len = ntohl(ht->entries) * 2;       /* allow room to grow */
 155     nb = (len + nHTBuckets-1) / nHTBuckets;
 156     mnb = ht_minHBlocks(mht);
 157     if (nb < mnb) nb = mnb;             /* use minimum */
 158     len = nb * nHTBuckets;              /* new hash table length */
 159
 160     mht->size = nb*sizeof(struct memoryHTBlock *);
 161     b = mht->blocks = (struct memoryHTBlock **)malloc (mht->size);
 162     bzero (b, mht->size);
 163
 164     for (i=0; i<nb; i++) {
 165         b[i] = (struct memoryHTBlock *)malloc (sizeof (struct memoryHTBlock));
 166         code = AllocBlock (ut, (struct block *)&b[i]->b, &b[i]->a);
 167         if (code) return code;
 168         b[i]->valid = 0;
 169
 170         b[i]->b.h.type = hashTable_BLOCK;
 171
 172         /* thread the blocks */
 173         if (i) b[i-1]->b.h.next = htonl(b[i]->a);
 174     }
 175     for (i=0; i<nb; i++)
 176     {
 177         code = dbwrite (ut, b[i]->a, (char *)&b[i]->b,
 178                         sizeof(struct htBlock) + (nHTBuckets-NhtBucketS)*sizeof(dbadr));
 179         if (code)
 180             return code;
 181     }
 182     if (code = set_word_addr (ut, 0, &db.h, &ht->table, htonl(b[0]->a)))
 183         return code;
 184
 185     if (code = set_word_addr (ut, 0, &db.h, &ht->length, htonl(len)))
 186         return code;
 187     mht->length = len;
 188     return 0;
 189 }
 190
 191 afs_int32 ht_FreeTable (ut, mht)
 192   struct ubik_trans *ut;
 193   struct memoryHashTable *mht;
 194 {   struct hashTable *ht;
 195     afs_int32  code;
 196     struct blockHeader bh;
 197     dbadr a,na;
 198
 199     if (!(mht && (ht = mht->ht))) db_panic ("some ht called with bad mht");
 200     if (ht->oldLength == 0) db_panic ("no table to free");
 201
 202     ht_ResetT (&mht->oldBlocks, &mht->oldSize, 0);
 203
 204     for (a=ntohl(ht->oldTable); a; a=na) {
 205         if (dbread (ut, a, (char *)&bh, sizeof(bh)))
 206         {
 207             Log("ht_FreeTable: dbread failed\n");
 208             return BUDB_IO;
 209         }
 210         na = ntohl(bh.next);
 211         if (code = FreeBlock (ut, &bh, a)) return code;
 212     }
 213     if (set_word_addr (ut, 0, &db.h, &ht->oldTable,  0) ||
 214         set_word_addr (ut, 0, &db.h, &ht->oldLength, 0) ||
 215         set_word_addr (ut, 0, &db.h, &ht->progress,  0))
 216         return BUDB_IO;
 217     mht->oldLength = mht->progress = 0;
 218     return 0;
 219 }
 220
 221 afs_int32
 222 ht_GetTableBlock (ut, mht, hash, old, blockP, boP)
 223   struct ubik_trans *ut;
 224   struct memoryHashTable *mht;
 225   afs_uint32 hash;
 226   int   old;
 227   struct memoryHTBlock **blockP;
 228   int  *boP;
 229 {
 230     struct hashTable *ht;
 231     struct memoryHTBlock **b;
 232     int   hi,bi;
 233     struct memoryHTBlock ***blocksP;
 234     int  *sizeP;
 235     int   n;
 236     int   i;
 237     int   length;
 238     dbadr ta;
 239
 240     if ( (mht == 0)
 241     ||   ((ht = mht->ht) == 0)
 242        )
 243     {
 244         db_panic ("some ht called with bad mht");
 245     }
 246
 247     *blockP = 0;
 248
 249     if (old) {
 250         if ((length = mht->oldLength) == 0) return 0; /* no entries */
 251         hi = hash % length;
 252         if (hi < mht->progress) return 0; /* no such entry */
 253         blocksP = &mht->oldBlocks;
 254         sizeP = &mht->oldSize;
 255     } else {
 256         if ((length = mht->length) == 0) return 0; /* no entries */
 257         hi = hash % length;
 258         blocksP = &mht->blocks;
 259         sizeP = &mht->size;
 260     }
 261
 262     bi = hi / nHTBuckets;               /* block index */
 263     *boP = hi - bi*nHTBuckets;          /* block offset ptr */
 264
 265     if (*blocksP == 0) {
 266         *sizeP = ht_TableSize (length);
 267         *blocksP = (struct memoryHTBlock **)malloc (*sizeP);
 268         bzero (*blocksP, *sizeP);
 269     }
 270     n = *sizeP / sizeof (struct memoryHTBlock *);
 271     if (bi >= n) db_panic ("table size inconsistent");
 272     b = *blocksP;
 273
 274     /* find an allocated block or the beginning of the block array */
 275     for (i=bi; (i>0) && (b[i] == 0); i--);
 276
 277     while (1) {
 278         if (b[i] == 0) {
 279             if (i == 0) { /* the first block is found from the hashTable */
 280                 ta = ntohl(old ? ht->oldTable : ht->table);
 281                 if (ta == 0) db_panic ("non-zero length, but no table");
 282             }
 283             /* else ta is set from last time around loop */
 284             b[i] = (struct memoryHTBlock *)malloc (sizeof (struct memoryHTBlock));
 285             b[i]->a = ta;
 286             b[i]->valid = 0;
 287         }
 288
 289         if (!b[i]->valid) {
 290             if (dbread (ut, b[i]->a, (char *)&b[i]->b, sizeof(struct htBlock)))
 291                 return BUDB_IO;
 292             b[i]->valid = 1;
 293         }
 294
 295         if (i == bi)
 296         {
 297             *blockP = b[bi];
 298             /* printf("ht_GetTableBlock: hash %d block %d offset %d\n",
 299                 hash, *blockP, *boP); */
 300             return 0;
 301         }
 302
 303         ta = ntohl(b[i++]->b.h.next);   /* get next ptr from current block */
 304     }
 305 }
 306
 307 /* ht_MaybeAdjust
 308  *      Decide when to push the current hash table to the old hash table.
 309  *      The entries in the old hash table are VALID, and are slowly hashed
 310  *      into the current table.
 311  */
 312
 313 static afs_int32
 314 ht_MaybeAdjust (ut, mht)
 315      struct ubik_trans *ut;
 316      struct memoryHashTable *mht;
 317 {
 318     struct hashTable *ht = mht->ht;
 319     int numberEntries = ntohl(ht->entries);
 320
 321     /* old hash table must be empty */
 322     if ( mht->oldLength != 0)
 323         return(0);
 324
 325     /*
 326      * It costs a lot to grow and shrink the hash table. Therefore, we will not
 327      * shrink the hash table (only grow it). If the table is more than 2 entries per
 328      * chain (average) we need to grow: push the entries to the old hash table.
 329      *
 330      * Don't shrink it:
 331      * || ((mht->length > nHTBuckets) && (numberEntries*8 < mht->length))
 332      */
 333
 334     /* Only grow a hash table if the number of entries is twice the
 335      * number of hash length and is less than 20,450 (20 hash blocks). This
 336      * means that the volname hash table will not grow (its initial
 337      * hashtable size contains 30,600 buckets). Earlier revisions of
 338      * the buserver have the initial size at 510 and 5,100 buckets -
 339      * in which case we do want to grow it). We don't grow anything larger
 340      * than 20,450 entries because it's expensive to re-hash everything.
 341      */
 342     if ((numberEntries > mht->length*2) && (numberEntries < 20450))
 343     {                                 /* push current hash table to old hash table */
 344         ht->oldLength = ht->length;
 345         ht->oldTable  = ht->table;
 346         ht->progress  = 0;
 347         ht->length    = 0;
 348         ht->table     = 0;
 349         if (dbwrite (ut, ((char *)ht - (char *)&db.h), (char *)ht, sizeof (*ht)))
 350             return BUDB_IO;
 351
 352         ht_Reset (mht);
 353         LogDebug(2, "ht_MaybeAdjust: push ht to old\n");
 354     }
 355     return 0;
 356 }
 357
 358 dbadr ht_LookupBucket (ut, mht, hash, old)
 359   struct ubik_trans *ut;
 360   struct memoryHashTable *mht;
 361   afs_uint32 hash;
 362   int old;
 363 {   struct memoryHTBlock *block;
 364     int   bo;
 365     afs_int32  code;
 366
 367     if ((old ? mht->oldLength : mht->length) == 0) return 0;
 368     code = ht_GetTableBlock (ut, mht, hash, old, &block, &bo);
 369     if (code || (block == 0)) return 0;
 370     return ntohl(block->b.bucket[bo]);
 371 }
 372
 373 /* This function is not too bad, for small hash tables, but suffers, I think,
 374  * from insufficient mixing of the hash information. */
 375
 376 afs_uint32 Old2StringHashFunction (str)
 377   unsigned char *str;
 378 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 379     while (*str) hash = (hash<<1) + (hash>>31) + *str++;
 380     return hash;
 381 }
 382
 383 /* This was actually a coding error, and produces dreadful results.  The
 384  * problem is that the hash needs to be mixed up not the incoming character. */
 385
 386 afs_uint32 Old3StringHashFunction (str)
 387   unsigned char *str;
 388 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 389     while (*str) hash += (*str++) * 0x072a51a4;
 390     return hash;
 391 }
 392
 393 /* This function is pretty good.  Its main problem is that the low two bits of
 394  * the hash multiplier are zero which tends to shift information too far left.
 395  * It behaves especially badly for hash tables whose size is a power of two. */
 396
 397 afs_uint32 Old4StringHashFunction (str)
 398   unsigned char *str;
 399 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 400     while (*str) hash = (*str++) + hash * 0x072a51a4;
 401     return hash;
 402 }
 403
 404 /* While this is good for a hash table with 500 buckets it is nearly as bad as
 405  * #3 with a hash table as big as 8200. */
 406
 407 afs_uint32 Old5StringHashFunction (str)
 408   unsigned char *str;
 409 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 410     while (*str) hash += (*str++);
 411     return hash;
 412 }
 413
 414 /* This was an attempt to produce a hash function with the smallest and
 415  * simplest mixing multiplier.  This is only a little worse than the real one,
 416  * and the difference seems to be smaller with larger hash tables.  It behaves
 417  * better than the random hash function. */
 418
 419 afs_uint32 Old6StringHashFunction (str)
 420   unsigned char *str;
 421 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 422     while (*str) hash = hash*0x81 + (*str++);
 423     return hash;
 424 }
 425
 426 /* This actually seems to be little better then the real one.  Having the same
 427  * number of bits but only 5 bits apart seems to produce worse results but
 428  * having the bits spanning the same range farther apart also doesn't do as
 429  * well.  All these differences are fairly small, however. */
 430
 431 afs_uint32 Old7StringHashFunction (str)
 432   unsigned char *str;
 433 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 434     while (*str) hash = hash*0x42108421 + (*str++);
 435     return hash;
 436 }
 437
 438 /* This function tries to provide some non-linearity by providing some feedback
 439  * from higher-order bits in the word.  It also uses shifts instead of
 440  * multiplies, which may be faster on some architectures. */
 441
 442 afs_uint32 Old8StringHashFunction (str)
 443   unsigned char *str;
 444 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 445     while (*str)
 446         hash = hash + (hash<<7) + (hash<<14) + (hash<<21) + (hash<<28) +
 447             (hash>>17) + *str++;
 448     return hash;
 449 }
 450
 451 /* This is the result of the above search for good hash functions.  It seems
 452  * that the choice of multipliers is somewhat arbitrary but has several
 453  * constraints.  It shouldn't have too many or too few one bits and should be
 454  * odd.  It behaves beeter than the random hash function. */
 455
 456 afs_uint32 StringHashFunction (str)
 457   unsigned char *str;
 458 {   afs_uint32 hash = 1000003;  /* big prime to make "" hash nicely */
 459     /* The multiplicative constant should be odd and have a goodly number of
 460      * one bits. */
 461     while (*str) hash = (*str++) + hash * 0x10204081;
 462     return hash;
 463 }
 464
 465 afs_uint32 IdHashFunction (id)
 466   afs_uint32 id;
 467 {   afs_uint32 l,r;
 468     id *= 81847;
 469     l = id | 0xaaaaaaaa;
 470     r = id | 0x55555555;
 471     return (l*r);
 472 }
 473
 474 /* The minimum hash table blocks to allocate. Each block contains 510
 475  * buckets. They hash table grows when the number of entries reaches
 476  * twice the number of buckets.
 477  */
 478 int ht_minHBlocks(mht)
 479   struct memoryHashTable *mht;
 480 {
 481     int retval;
 482
 483     switch ( ntohl(mht->ht->functionType) )
 484     {
 485       case HT_dumpIden_FUNCTION:
 486       case HT_dumpName_FUNCTION:    /* hash table able to handle (befor it grows) ... */
 487         retval = 2;                 /*     1,020 dump entries */
 488         break;
 489
 490       case HT_tapeName_FUNCTION:
 491         retval = 4;                /*      2,040 tape entries */
 492         break;
 493
 494       case HT_volName_FUNCTION:
 495         retval = 60;               /*     61,200 volInfo entries (with different names) */
 496         break;
 497
 498       default: db_panic ("Illegal hash function type");
 499     }
 500     return (retval);
 501 }
 502
 503 afs_uint32 ht_HashEntry (mht, e)
 504   struct memoryHashTable *mht;
 505   char *e;                              /* entry's address (in b) */
 506 {
 507     int   type = ntohl(mht->ht->functionType);
 508     afs_uint32 retval;
 509
 510     switch (type)
 511     {
 512       case HT_dumpIden_FUNCTION:
 513         retval = IdHashFunction(ntohl(((struct dump *)e)->id));
 514         LogDebug(5, "HashEntry: dumpid returns %d\n", retval);
 515         break;
 516
 517       case HT_dumpName_FUNCTION:
 518         retval = StringHashFunction(((struct dump *)e)->dumpName);
 519         LogDebug(5, "HashEntry: dumpname returns %d\n", retval);
 520         break;
 521
 522       case HT_tapeName_FUNCTION:
 523         retval = StringHashFunction(((struct tape *)e)->name);
 524         LogDebug(5, "HashEntry: tapename returns %d\n", retval);
 525         break;
 526
 527       case HT_volName_FUNCTION:
 528         retval =  StringHashFunction (((struct volInfo *)e)->name);
 529         LogDebug(5, "HashEntry: volname returns %d\n", retval);
 530         break;
 531
 532       default: db_panic ("illegal hash function");
 533     }
 534
 535     return(retval);
 536 }
 537
 538
 539 /* ht_GetType
 540  *      returns a ptr to the memory hash table for the specified hash
 541  *      list.
 542  */
 543
 544 struct memoryHashTable *
 545 ht_GetType(type, e_sizeP)
 546      int   type;
 547      int  *e_sizeP;
 548 {
 549     struct memoryHashTable *mht;
 550
 551     if ((type <= 0) || (type > HT_MAX_FUNCTION))
 552         return 0;
 553
 554     if (e_sizeP) *e_sizeP = sizeFunctions[type];
 555     switch (type)
 556     {
 557       case HT_dumpIden_FUNCTION:
 558         mht = &db.dumpIden;
 559         break;
 560
 561       case HT_dumpName_FUNCTION:
 562         mht = &db.dumpName;
 563         break;
 564
 565       case HT_tapeName_FUNCTION:
 566         mht = &db.tapeName;
 567         break;
 568
 569       case HT_volName_FUNCTION:
 570         mht = &db.volName;
 571         break;
 572
 573       default: return 0;
 574     }
 575     if (ntohl(mht->ht->functionType) != type)
 576         db_panic ("ht types don't match");
 577     return mht;
 578 }
 579
 580 static int ht_KeyMatch (type, key, e)
 581   int   type;
 582   char *key;
 583   char *e;
 584 {
 585     switch (type) {
 586       case HT_dumpIden_FUNCTION:
 587         return *(dumpId *)key == ntohl(((struct dump *)e)->id);
 588       case HT_dumpName_FUNCTION:
 589         return strcmp (key, ((struct dump *)e)->dumpName) == 0;
 590       case HT_tapeName_FUNCTION:
 591         return strcmp (key, ((struct tape *)e)->name) == 0;
 592       case HT_volName_FUNCTION:
 593         return strcmp (key, ((struct volInfo *)e)->name) == 0;
 594
 595       default: db_panic ("illegal hash function");
 596     }
 597 }
 598
 599 /* ht_LookupEntry
 600  * entry:
 601  *      ut - ubik transaction
 602  *      mht - memory hash table ptr
 603  *      key - hash and lookup key
 604  * exit:
 605  *      eaP - dbaddr of entry found or zero if failed
 606   *     e - contents of located entry
 607  */
 608
 609 afs_int32
 610 ht_LookupEntry (ut, mht, key, eaP, e)
 611   struct ubik_trans *ut;
 612   struct memoryHashTable *mht;
 613   char *key;                            /* pointer to lookup key to match */
 614   dbadr *eaP;                           /* db addr of entry found or zero */
 615   char *e;                              /* contents of located entry */
 616 {   struct hashTable *ht;
 617     int  type;
 618     int  e_size;
 619     int  old;
 620     afs_uint32 hash;
 621     dbadr a;
 622
 623     if (!key || !eaP || !e) db_panic ("null ptrs passed to LookupEntry");
 624     if (!(mht && (ht = mht->ht))) db_panic ("some ht called with bad mht");
 625
 626     *eaP = 0;                           /* initialize not-found indicator */
 627
 628     type = ntohl(ht->functionType);
 629     e_size = sizeFunctions[type];
 630     if (type == HT_dumpIden_FUNCTION)
 631         hash = IdHashFunction (*(dumpId *)key);
 632     else
 633         hash = StringHashFunction (key);
 634
 635     for (old=0; ; old++)
 636     {
 637         a = ht_LookupBucket (ut, mht, hash, old);
 638         while (a) {
 639             if (dbread (ut, a, e, e_size))
 640                 return BUDB_IO;
 641             if (ht_KeyMatch (type, key, e))
 642             {
 643                 *eaP = a;
 644                 return 0;
 645             }
 646             a = ntohl(*(dbadr *)(e + mht->threadOffset));
 647         }
 648         if (old) return 0;
 649     }
 650 }
 651
 652 /* ht_HashInList
 653  * entry:
 654  *      opQuota - max # of items to move
 655  * exit:
 656  *      opQuota - adjusted to reflect # of moves
 657  */
 658
 659 static afs_int32 ht_HashInList (ut, mht, opQuota, block, blockOffset)
 660      struct ubik_trans *ut;
 661      struct memoryHashTable *mht;
 662      int *opQuota;
 663      struct memoryHTBlock *block;
 664      int blockOffset;
 665 {
 666     struct hashTable *ht = mht->ht;
 667     afs_int32  code;
 668     dbadr ea, next_ea;
 669     dbadr listA;
 670     char  e[sizeof(struct block)];      /* unnecessarily conservative */
 671     int   e_size = sizeFunctions[ntohl(ht->functionType)];
 672
 673     if (mht->length == 0)
 674     {
 675         if (code = ht_AllocTable (ut, mht))
 676         {
 677             Log("ht_HashInList: ht_AllocTable failed\n");
 678             return code;
 679         }
 680     }
 681
 682     listA = ntohl(block->b.bucket[blockOffset]);
 683
 684     if (listA == 0)
 685     {
 686         Log("ht_HashInList: expecting non-zero bucket\n");
 687         return 0;
 688     }
 689
 690     for (ea=listA; ea; ea=next_ea)
 691     { /*f*/
 692
 693         LogDebug(3, "ht_HashInList: move entry at %d, type %d\n",
 694             ea, ntohl(mht->ht->functionType));
 695
 696         if (dbread (ut, ea, e, e_size))
 697             return BUDB_IO;
 698
 699         /* LogNetDump((struct dump *) e); */
 700
 701         /* get the address of the next item on the list */
 702         next_ea = ntohl(*(dbadr *)(e + mht->threadOffset));
 703
 704         /* write the link into the bucket */
 705         code = set_word_addr (ut, block->a, &block->b, &block->b.bucket[blockOffset],
 706                               htonl(next_ea));
 707         if (code)
 708         {
 709             Log("ht_HashInList: bucket update failed\n");
 710             return(code);
 711         }
 712
 713         {   struct memoryHTBlock *block;
 714             int   bo;
 715             afs_uint32 hash;
 716
 717             /* get the hash value */
 718             hash = ht_HashEntry (mht, e) % mht->length;
 719             LogDebug(4, "ht_HashInList: moved to %d\n", hash);
 720
 721             /* get the new hash table block */
 722             code = ht_GetTableBlock (ut, mht, hash, 0/*old*/, &block, &bo);
 723             if (code)
 724             {
 725                 Log("ht_HashInList: ht_GetTableBlock failed\n");
 726                 return code;
 727             }
 728             if (block == 0)
 729             {
 730                 Log("ht_HashInList: ht_GetTableBlock returned 0\n");
 731                 return BUDB_INTERNALERROR;
 732             }
 733
 734             /* Chain entry at front of bucket;
 735              * first threadOffset of entry = bucket
 736              * then bucket = addr of entry
 737              */
 738             if (set_word_offset (ut, ea, e, mht->threadOffset, block->b.bucket[bo]) ||
 739                 set_word_addr (ut, block->a, &block->b, &block->b.bucket[bo], htonl(ea)))
 740                 return BUDB_IO;
 741         }
 742
 743         if ( --(*opQuota) == 0 )
 744             break;
 745     } /*f*/
 746     return 0;
 747 }
 748
 749
 750 /* ht_MoveEntries
 751  *      The hash table is needs to be re-sized. Move entries from the old
 752  *      to the new.
 753  */
 754
 755 static afs_int32
 756 ht_MoveEntries(ut, mht)
 757      struct ubik_trans *ut;
 758      struct memoryHashTable *mht;
 759 {
 760     struct memoryHTBlock *block;
 761     afs_uint32 hash;
 762     int count;
 763     int   bo;
 764     afs_int32  code;
 765
 766     if (mht->oldLength == 0)
 767         return 0;
 768
 769     LogDebug(3, "ht_MoveEntries:\n");
 770     /* we assume here that the hash function will map numbers smaller than the
 771      * size of the hash table straight through to hash table indexes.
 772      */
 773     hash = mht->progress;
 774
 775     /* get hash table block ? */
 776     code = ht_GetTableBlock (ut, mht, hash, 1 /*old*/, &block, &bo);
 777     if (code)
 778         return code;
 779
 780     if (block == 0)
 781         return BUDB_INTERNALERROR;
 782
 783     count = 10;                         /* max. # entries to move */
 784
 785     do {
 786         if (block->b.bucket[bo])
 787         {
 788             code = ht_HashInList(ut, mht, &count, block, bo);
 789             if ( code )
 790             {
 791                 Log("ht_MoveEntries: ht_HashInList failed\n");
 792                 return(BUDB_IO);
 793             }
 794         }
 795
 796         if (block->b.bucket[bo] == 0)
 797         {
 798             /* this bucket is now empty */
 799             mht->progress++;
 800         }
 801
 802         /* don't exceed the quota of items to be moved */
 803         if ( count == 0 )
 804             break;
 805
 806     } while (++bo < nHTBuckets);
 807
 808     if (mht->progress >= mht->oldLength)
 809         return( ht_FreeTable (ut, mht) );
 810
 811     if (set_word_addr (ut, 0, &db.h, &mht->ht->progress, htonl(mht->progress)))
 812     {
 813         Log("ht_MoveEntries: progress set failed\n");
 814         return BUDB_IO;
 815     }
 816     return 0;
 817 }
 818
 819
 820 #ifdef notdef
 821 static afs_int32
 822 ht_MoveEntries(ut, mht)
 823      struct ubik_trans *ut;
 824      struct memoryHashTable *mht;
 825 {
 826     afs_uint32 hash;
 827     int   bo;
 828     struct memoryHTBlock *block;
 829     afs_int32  code;
 830
 831     if (mht->oldLength == 0)
 832         return 0;
 833
 834     LogDebug(3, "ht_MoveEntries:\n");
 835     /* we assume here that the hash function will map numbers smaller than the
 836      * size of the hash table straight through to hash table indexes.
 837      */
 838     hash = mht->progress;
 839
 840     /* get hash table block ? */
 841     code = ht_GetTableBlock (ut, mht, hash, 1 /*old*/, &block, &bo);
 842     if (code)
 843         return code;
 844
 845     if (block == 0)
 846         return BUDB_INTERNALERROR;
 847
 848     do {
 849         mht->progress++;
 850         if (block->b.bucket[bo])
 851         {
 852             code = ht_HashInList(ut, mht, ntohl(block->b.bucket[bo]));
 853             if ( code )
 854             {
 855                 Log("ht_MoveEntries: ht_HashInList failed\n");
 856                 return(BUDB_IO);
 857             }
 858             code = set_word_addr(ut, block->a, &block->b, &block->b.bucket[bo], 0);
 859             if ( code )
 860             {
 861                 Log("ht_MoveEntries: clear old entry failed\n");
 862                 return BUDB_IO;
 863             }
 864             break;
 865         }
 866     } while (++bo < nHTBuckets);
 867
 868     if (mht->progress >= mht->oldLength)
 869         return( ht_FreeTable (ut, mht) );
 870
 871     if (set_word_addr (ut, 0, &db.h, &mht->ht->progress, htonl(mht->progress)))
 872     {
 873         Log("ht_MoveEntries: progress set failed\n");
 874         return BUDB_IO;
 875     }
 876     return 0;
 877 }
 878 #endif /* notdef */
 879
 880 afs_int32
 881 ht_HashIn(ut, mht, ea, e)
 882      struct ubik_trans *ut;
 883      struct memoryHashTable *mht;
 884      dbadr ea;                          /* block db address */
 885      char *e;                           /* entry's address (in b) */
 886 {
 887     struct hashTable *ht;
 888     afs_uint32 hash;
 889     struct memoryHTBlock *block;
 890     int   bo;
 891     afs_int32  code;
 892
 893     if (!(mht && (ht = mht->ht)))
 894         db_panic("some ht called with bad mht");
 895
 896     if (code = ht_MaybeAdjust (ut, mht))
 897         return code;
 898     if (mht->length == 0)
 899         if (code = ht_AllocTable (ut, mht))
 900             return code;
 901
 902     hash = ht_HashEntry (mht, e);
 903     code = ht_GetTableBlock (ut, mht, hash, 0/*old*/, &block, &bo);
 904     if (code) return code;
 905     if (!block) return BUDB_INTERNALERROR;
 906
 907     code = set_word_offset(ut, ea, e, mht->threadOffset, block->b.bucket[bo]);
 908     if (code) return BUDB_IO;
 909     LogDebug(5, "Hashin: set %d to %d\n", mht->threadOffset, block->b.bucket[bo]);
 910
 911     code = set_word_addr (ut, block->a, &block->b, &block->b.bucket[bo], htonl(ea));
 912     if (code) return BUDB_IO;
 913     LogDebug(5, "Hashin: set %d to %d\n", &block->b.bucket[bo], htonl(ea));
 914
 915     code = set_word_addr (ut, 0, &db.h, &ht->entries, htonl(ntohl(ht->entries)+1));
 916     if (code) return BUDB_IO;
 917
 918     return ht_MoveEntries (ut, mht);
 919 }
 920
 921 /* RemoveFromList - generic procedure to delete an entry from a list given its
 922  * head and thread offset.  Only a single long is modified by this routine.
 923  * The head pointer is modified, in place, using set_word_addr if the entry is
 924  * at the head of the list, otherwise only the thread of the previous entry is
 925  * modified.  The entry pointer is only used to calculate the thread offset,
 926  * but is not otherwise used. */
 927
 928 afs_int32 RemoveFromList (ut, ea, e, head, ta, t, thread)
 929   struct ubik_trans *ut;
 930   dbadr ea;                             /* db addr of head structure */
 931   char *e;                              /* head structure */
 932   dbadr *head;                          /* address of head pointer */
 933   dbadr ta;                             /* db addr of strucure to be removed */
 934   char *t;                              /* structure being removed */
 935   dbadr *thread;                        /* pointer to thread pointer */
 936 {   afs_int32  code;
 937     int   threadOffset = ((char *)thread - t);
 938     dbadr next_a;                       /* db addr of next element in list */
 939     dbadr loop_a;                       /* db addr of current list element */
 940
 941     if (*head == 0) return -1;          /* empty list: not found */
 942     next_a = ntohl(*head);              /* start at head of list */
 943     if (next_a == ta) {                 /* remove from head of list */
 944         code = set_word_addr (ut, ea, e, head, *thread);
 945         return code;
 946     }
 947     do {
 948         loop_a = next_a;
 949         code = dbread (ut, loop_a+threadOffset, (char *)&next_a, sizeof(dbadr));
 950         if (code) return code;
 951         if (next_a == 0) return -1;     /* end of list: not found */
 952     } while (ta != (next_a = ntohl(next_a)));
 953     code = dbwrite (ut, loop_a+threadOffset, (char *)thread, sizeof(dbadr));
 954     return code;
 955 }
 956
 957 afs_int32 ht_HashOutT (ut, mht, hash, ea, e, old)
 958   struct ubik_trans *ut;
 959   struct memoryHashTable *mht;
 960   afs_uint32 hash;
 961   dbadr ea;
 962   char *e;
 963   int   old;
 964 {
 965     struct memoryHTBlock *block;
 966     int   bo;
 967     afs_int32  code;
 968
 969     if ((old ? mht->oldLength : mht->length) == 0)
 970         return -1;
 971     code = ht_GetTableBlock (ut, mht, hash, old, &block, &bo);
 972     if (code)
 973         return code;
 974     if ((block == 0) || (block->b.bucket[bo] == 0))
 975         return -1;
 976
 977     code = RemoveFromList(ut, block->a, (char *)&block->b,
 978                           &block->b.bucket[bo], ea, e,
 979                           (dbadr *)(e+mht->threadOffset));
 980     if (code)
 981         return code;
 982 #if 0
 983     net_ea = htonl(ea);
 984     unthread_ea = *(afs_int32 *)((char *)e + mht->threadOffset);
 985     if (block->b.bucket[bo] == net_ea) {
 986         if (set_word_addr (ut, block->a, &block->b, &block->b.bucket[bo], unthread_ea))
 987             return BUDB_IO;
 988         goto done;
 989     }
 990     loop_a = ntohl(block->b.bucket[bo]);
 991     while (1) {
 992         if (dbread (ut, loop_a + mht->threadOffset,
 993                     (char *)&next_loop_a, sizeof(dbadr)))
 994             return BUDB_IO;
 995         if (next_loop_a == 0) return -1; /* not found */
 996         if (net_ea == next_loop_a) {
 997             if (dbwrite (ut, loop_a + mht->threadOffset, (char *)&unthread_ea, sizeof(dbadr)))
 998                 return BUDB_IO;
 999             goto done;
1000         }
1001         loop_a = ntohl (next_loop_a);
1002     }
1003   done:
1004 #endif
1005     if (set_word_addr (ut, 0, &db.h, &mht->ht->entries, htonl(ntohl(mht->ht->entries) - 1)))
1006         return BUDB_IO;
1007     return 0;
1008 }
1009
1010 afs_int32
1011 ht_HashOut (ut, mht, ea, e)
1012      struct ubik_trans *ut;
1013      struct memoryHashTable *mht;
1014      dbadr ea;
1015      char *e;
1016 {
1017     afs_uint32 hash;
1018     afs_int32  code;
1019
1020     if (!mht)
1021         db_panic ("some ht called with bad mht");
1022     hash = ht_HashEntry (mht, e);
1023     if (mht->oldLength)
1024     {
1025         code = ht_HashOutT (ut, mht, hash, ea, e, 1/*old*/);
1026         if (code == 0)
1027             return 0;
1028         else
1029         if (code != -1)
1030             ERROR(code);
1031     }
1032     if (mht->length == 0)                /* not found */
1033         ERROR(BUDB_INTERNALERROR);
1034     code = ht_HashOutT (ut, mht, hash, ea, e, 0/*old*/);
1035     if (code == -1)
1036         ERROR(BUDB_NOENT);
1037     if (code)
1038         ERROR(code);
1039
1040     code = ht_MoveEntries (ut, mht);
1041     if ( code )
1042         ERROR(code);
1043     code = ht_MaybeAdjust (ut, mht);
1044     if ( code )
1045         ERROR(code);
1046
1047 error_exit:
1048     return(code);
1049 }
1050
1051 /* generic hash table traversal routines */
1052
1053
1054 afs_int32
1055 scanHashTableBlock(ut, mhtPtr, htBlockPtr, old, length, index,
1056                    selectFn, operationFn, rockPtr)
1057      struct ubik_trans *ut;
1058      struct memoryHashTable *mhtPtr;
1059      struct htBlock *htBlockPtr;
1060      int old;
1061      afs_int32 length;                       /* size of whole hash table */
1062      int index;                         /* base index of this block */
1063      int (*selectFn)();
1064      int (*operationFn)();
1065      char *rockPtr;
1066 {
1067     int type;                           /* hash table type */
1068     int entrySize;                      /* hashed entry size */
1069
1070     afs_uint32 *mapEntryPtr = 0;            /* for status checks */
1071
1072     char entry[sizeof(struct block)];
1073     dbadr entryAddr, nextEntryAddr;
1074
1075     int i;
1076     afs_int32 code = 0;
1077
1078     type = ntohl(mhtPtr->ht->functionType);
1079     entrySize = sizeFunctions[type];
1080
1081     /* step through this hash table block, being careful to stop
1082      * before the end of the overall hash table
1083      */
1084
1085     for ( i = 0; (i < nHTBuckets) && (index < length); i++, index++ )
1086     { /*f*/
1087         entryAddr = 0;
1088         nextEntryAddr = ntohl( htBlockPtr->bucket[i] );
1089
1090         /* if this is the old hash table, all entries below the progress mark
1091          * should have been moved to the new hash table
1092          */
1093         if (old && (index < mhtPtr->progress) && nextEntryAddr)
1094                 return BUDB_INTERNALERROR;
1095
1096         /* now walk down the chain of each bucket */
1097         while ( nextEntryAddr )
1098         { /*w*/
1099
1100             entryAddr = nextEntryAddr;
1101             if ( dbread(ut, entryAddr, &entry[0], entrySize) )
1102                 return(BUDB_INTERNALERROR);
1103
1104             if ( (*selectFn)(entryAddr, &entry[0], rockPtr) )
1105             {
1106                 (*operationFn)(entryAddr, &entry[0], rockPtr);
1107             }
1108
1109             nextEntryAddr = ntohl( *((dbadr *)(entry + mhtPtr->threadOffset)));
1110         } /*w*/
1111
1112     } /*f*/
1113
1114     return(0);
1115 }
1116
1117 afs_int32
1118 scanHashTable(ut, mhtPtr, selectFn, operationFn, rockPtr)
1119     struct ubik_trans *ut;
1120     struct memoryHashTable *mhtPtr;
1121     int (*selectFn)();
1122     int (*operationFn)();
1123     char *rockPtr;
1124 {
1125     struct htBlock hashTableBlock;
1126     dbadr tableAddr;                            /* disk addr of hash block */
1127     int tableLength;                            /* # entries */
1128     int blockLength;                            /* # blocks */
1129     int hashIndex;
1130     int blockIndex, entryIndex;
1131     int old;
1132     int i;
1133     afs_int32 code = 0;
1134
1135     extern int nHTBuckets;                      /* # buckets in a hash table */
1136
1137     for ( old = 0; old <= 1; old++ )
1138     { /*fo*/
1139         if (old)
1140         {
1141             /* check the old hash table */
1142             tableLength = mhtPtr->oldLength;
1143             if ( tableLength == 0 )
1144                 continue;                       /* nothing to do */
1145
1146             tableAddr = ntohl(mhtPtr->ht->oldTable);
1147         }
1148         else
1149         {
1150             /* check current hash table */
1151             tableLength = mhtPtr->length;
1152             if ( tableLength == 0 )
1153                 continue;                       /* nothing to do */
1154
1155             tableAddr = ntohl(mhtPtr->ht->table);
1156         }
1157
1158         blockLength = (tableLength-1)/nHTBuckets;
1159         hashIndex = 0;
1160
1161         /* follow the hash chain */
1162         for ( i = 0; i <= blockLength; i++ )
1163         { /*fi*/
1164             /* chain too short */
1165             if ( tableAddr == 0 )
1166                 ERROR(BUDB_DATABASEINCONSISTENT);
1167
1168             code = dbread(ut, tableAddr, &hashTableBlock,
1169                           sizeof(hashTableBlock));
1170             if ( code )
1171                 goto error_exit;
1172
1173             code = scanHashTableBlock(ut, mhtPtr, &hashTableBlock, old,
1174                                       tableLength, hashIndex,
1175                                       selectFn, operationFn,
1176                                       rockPtr);
1177             if ( code )
1178                 goto error_exit;
1179
1180             hashIndex += nHTBuckets;
1181             tableAddr = ntohl(hashTableBlock.h.next);
1182         } /*fi*/
1183     } /*fo*/
1184
1185 error_exit:
1186     return(code);
1187 }
1188
1189
1190