ubik: Rename flags to dbFlags
[openafs.git] / src / ubik / recovery.c
index 7ea8e47..414d1ad 100644 (file)
@@ -133,9 +133,17 @@ int
 urecovery_AbortAll(struct ubik_dbase *adbase)
 {
     struct ubik_trans *tt;
+    int reads = 0, writes = 0;
+
     for (tt = adbase->activeTrans; tt; tt = tt->next) {
+       if (tt->type == UBIK_WRITETRANS)
+           writes++;
+       else
+           reads++;
        udisk_abort(tt);
     }
+    ViceLog(0, ("urecovery_AbortAll: just aborted %d read and %d write transactions\n",
+                   reads, writes));
     return 0;
 }
 
@@ -151,8 +159,20 @@ urecovery_CheckTid(struct ubik_tid *atid, int abortalways)
        if (atid->epoch != ubik_currentTrans->tid.epoch
            || atid->counter > ubik_currentTrans->tid.counter || abortalways) {
            /* don't match, abort it */
+           int endit = 0;
            /* If the thread is not waiting for lock - ok to end it */
            if (ubik_currentTrans->locktype != LOCKWAIT) {
+               endit = 1;
+           }
+
+           ViceLog(0, ("urecovery_CheckTid: Aborting/ending bad remote "
+                       "transaction. (tx %d.%d, atid %d.%d, abortalways %d, "
+                       "endit %d)\n",
+                       ubik_currentTrans->tid.epoch,
+                       ubik_currentTrans->tid.counter,
+                       atid->epoch, atid->counter,
+                       abortalways, endit));
+           if (endit) {
                udisk_end(ubik_currentTrans);
            }
            ubik_currentTrans = (struct ubik_trans *)0;
@@ -374,11 +394,6 @@ InitializeDB(struct ubik_dbase *adbase)
            adbase->version.counter = 0;
            (*adbase->setlabel) (adbase, 0, &adbase->version);
        }
-#ifdef AFS_PTHREAD_ENV
-       opr_cv_broadcast(&adbase->version_cond);
-#else
-       LWP_NoYieldSignal(&adbase->version);
-#endif
        UBIK_VERSION_UNLOCK;
     }
     return 0;
@@ -457,7 +472,7 @@ urecovery_Interact(void *dummy)
     int fd = -1;
     afs_int32 pass;
 
-    afs_pthread_setname_self("recovery");
+    opr_threadname_set("recovery");
 
     /* otherwise, begin interaction */
     urecovery_state = 0;
@@ -599,19 +614,20 @@ urecovery_Interact(void *dummy)
            UBIK_ADDR_LOCK;
            rxcall = rx_NewCall(bestServer->disk_rxcid);
 
-           ViceLog(0, ("Ubik: Synchronize database via DISK_GetFile to server %s\n",
+           ViceLog(0, ("Ubik: Synchronize database: receive (via GetFile) "
+                       "from server %s begin\n",
                       afs_inet_ntoa_r(bestServer->addr[0], hoststr)));
            UBIK_ADDR_UNLOCK;
 
            code = StartDISK_GetFile(rxcall, file);
            if (code) {
-               ViceLog(5, ("StartDiskGetFile failed=%d\n", code));
+               ViceLog(0, ("StartDiskGetFile failed=%d\n", code));
                goto FetchEndCall;
            }
            nbytes = rx_Read(rxcall, (char *)&length, sizeof(afs_int32));
            length = ntohl(length);
            if (nbytes != sizeof(afs_int32)) {
-               ViceLog(5, ("Rx-read length error=%d\n", BULK_ERROR));
+               ViceLog(0, ("Rx-read length error=%d\n", BULK_ERROR));
                code = EIO;
                goto FetchEndCall;
            }
@@ -622,7 +638,7 @@ urecovery_Interact(void *dummy)
            code = (*ubik_dbase->setlabel) (ubik_dbase, file, &tversion);
            UBIK_VERSION_UNLOCK;
            if (code) {
-               ViceLog(5, ("setlabel io error=%d\n", code));
+               ViceLog(0, ("setlabel io error=%d\n", code));
                goto FetchEndCall;
            }
            snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d.TMP",
@@ -648,7 +664,7 @@ urecovery_Interact(void *dummy)
 #endif
                nbytes = rx_Read(rxcall, tbuffer, tlen);
                if (nbytes != tlen) {
-                   ViceLog(5, ("Rx-read bulk error=%d\n", BULK_ERROR));
+                   ViceLog(0, ("Rx-read bulk error=%d\n", BULK_ERROR));
                    code = EIO;
                    close(fd);
                    goto FetchEndCall;
@@ -716,18 +732,20 @@ urecovery_Interact(void *dummy)
                ubik_dbase->version.epoch = 0;
                ubik_dbase->version.counter = 0;
                UBIK_VERSION_UNLOCK;
-               ViceLog(0, ("Ubik: Synchronize database failed (error = %d)\n",
-                          code));
+               ViceLog(0,
+                   ("Ubik: Synchronize database: receive (via GetFile) "
+                   "from server %s failed (error = %d)\n",
+                   afs_inet_ntoa_r(bestServer->addr[0], hoststr), code));
            } else {
-               ViceLog(0, ("Ubik: Synchronize database completed\n"));
+               ViceLog(0,
+                   ("Ubik: Synchronize database: receive (via GetFile) "
+                   "from server %s complete, version: %d.%d\n",
+                   afs_inet_ntoa_r(bestServer->addr[0], hoststr),
+                   ubik_dbase->version.epoch, ubik_dbase->version.counter));
+
                urecovery_state |= UBIK_RECHAVEDB;
            }
            udisk_Invalidate(ubik_dbase, 0);    /* data has changed */
-#ifdef AFS_PTHREAD_ENV
-           opr_cv_broadcast(&ubik_dbase->version_cond);
-#else
-           LWP_NoYieldSignal(&ubik_dbase->version);
-#endif
        }
        if (!(urecovery_state & UBIK_RECHAVEDB)) {
            DBRELE(ubik_dbase);
@@ -748,11 +766,6 @@ urecovery_Interact(void *dummy)
                (*ubik_dbase->setlabel) (ubik_dbase, 0, &ubik_dbase->version);
            UBIK_VERSION_UNLOCK;
            udisk_Invalidate(ubik_dbase, 0);    /* data may have changed */
-#ifdef AFS_PTHREAD_ENV
-           opr_cv_broadcast(&ubik_dbase->version_cond);
-#else
-           LWP_NoYieldSignal(&ubik_dbase->version);
-#endif
        }
 
        /* Check the other sites and send the database to them if they
@@ -771,11 +784,11 @@ urecovery_Interact(void *dummy)
             * the write-lock above if there is a write transaction in progress,
             * but then, it won't hurt to check, will it?
             */
-           if (ubik_dbase->flags & DBWRITING) {
+           if (ubik_dbase->dbFlags & DBWRITING) {
                struct timeval tv;
                int safety = 0;
                long cur_usec = 50000;
-               while ((ubik_dbase->flags & DBWRITING) && (safety < 500)) {
+               while ((ubik_dbase->dbFlags & DBWRITING) && (safety < 500)) {
                    DBRELE(ubik_dbase);
                    /* sleep for a little while */
                    tv.tv_sec = 0;
@@ -798,16 +811,23 @@ urecovery_Interact(void *dummy)
                UBIK_BEACON_LOCK;
                if (!ts->up) {
                    UBIK_BEACON_UNLOCK;
+                   /* It would be nice to have this message at loglevel
+                    * 0 as well, but it will log once every 4s for each
+                    * down server while in this recovery state.  This
+                    * should only be changed to loglevel 0 if it is
+                    * also rate-limited.
+                    */
                    ViceLog(5, ("recovery cannot send version to %s\n",
                                afs_inet_ntoa_r(inAddr.s_addr, hoststr)));
                    dbok = 0;
                    continue;
                }
                UBIK_BEACON_UNLOCK;
-               ViceLog(5, ("recovery sending version to %s\n",
-                           afs_inet_ntoa_r(inAddr.s_addr, hoststr)));
+
                if (vcmp(ts->version, ubik_dbase->version) != 0) {
-                   ViceLog(5, ("recovery stating local database\n"));
+                   ViceLog(0, ("Synchronize database: send (via SendFile) "
+                               "to server %s begin\n",
+                           afs_inet_ntoa_r(inAddr.s_addr, hoststr)));
 
                    /* Rx code to do the Bulk Store */
                    code = (*ubik_dbase->stat) (ubik_dbase, 0, &ubikstat);
@@ -821,7 +841,7 @@ urecovery_Interact(void *dummy)
                            StartDISK_SendFile(rxcall, file, length,
                                               &ubik_dbase->version);
                        if (code) {
-                           ViceLog(5, ("StartDiskSendFile failed=%d\n",
+                           ViceLog(0, ("StartDiskSendFile failed=%d\n",
                                        code));
                            goto StoreEndCall;
                        }
@@ -834,13 +854,13 @@ urecovery_Interact(void *dummy)
                                                     tbuffer, offset, tlen);
                            if (nbytes != tlen) {
                                code = UIOERROR;
-                               ViceLog(5, ("Local disk read error=%d\n", code));
+                               ViceLog(0, ("Local disk read error=%d\n", code));
                                goto StoreEndCall;
                            }
                            nbytes = rx_Write(rxcall, tbuffer, tlen);
                            if (nbytes != tlen) {
                                code = BULK_ERROR;
-                               ViceLog(5, ("Rx-write bulk error=%d\n", code));
+                               ViceLog(0, ("Rx-write bulk error=%d\n", code));
                                goto StoreEndCall;
                            }
                            offset += tlen;
@@ -850,12 +870,24 @@ urecovery_Interact(void *dummy)
                      StoreEndCall:
                        code = rx_EndCall(rxcall, code);
                    }
+
                    if (code == 0) {
                        /* we set a new file, process its header */
                        ts->version = ubik_dbase->version;
                        ts->currentDB = 1;
-                   } else
+                       ViceLog(0,
+                           ("Ubik: Synchronize database: send (via SendFile) "
+                           "to server %s complete, version: %d.%d\n",
+                           afs_inet_ntoa_r(inAddr.s_addr, hoststr),
+                           ts->version.epoch, ts->version.counter));
+
+                   } else {
                        dbok = 0;
+                       ViceLog(0,
+                           ("Ubik: Synchronize database: send (via SendFile) "
+                            "to server %s failed (error = %d)\n",
+                           afs_inet_ntoa_r(inAddr.s_addr, hoststr), code));
+                   }
                } else {
                    /* mark file up to date */
                    ts->currentDB = 1;