From: Mark Vitale Date: Fri, 4 May 2018 21:32:51 +0000 (-0400) Subject: ubik: improve logging for database synchonizations X-Git-Tag: openafs-devel-1_9_0~507 X-Git-Url: http://git.openafs.org/?p=openafs.git;a=commitdiff_plain;h=0e1c042615d1aeb919a22568cdd2b2ea42c677ba ubik: improve logging for database synchonizations As an aid for debugging database synchronization issues, ensure that the logging is consistent and unambiguous for both the client and server sides of DISK_GetFile and DISK_SendFile. Add new error messages as required. In addition, rework the "recovery sending version to " message in urecovery_Interact. This message is misleading because the new version database is only sent to a DB server if its version is not up to date. Instead, move this message into the version check block immediately below it. Also reword it for clarity and promote its log level from 5 to 0. Finally, remove the now-superfluous "recovery stating local database" log message. Change-Id: If8bbaa1215cab9fd24b157a0ee57759b34e77e9c Reviewed-on: https://gerrit.openafs.org/13079 Reviewed-by: Michael Meffie Tested-by: BuildBot Reviewed-by: Andrew Deason Reviewed-by: Marcio Brito Barbosa Reviewed-by: Benjamin Kaduk --- diff --git a/src/ubik/recovery.c b/src/ubik/recovery.c index b0644cd..27a3588 100644 --- a/src/ubik/recovery.c +++ b/src/ubik/recovery.c @@ -607,7 +607,8 @@ urecovery_Interact(void *dummy) UBIK_ADDR_LOCK; rxcall = rx_NewCall(bestServer->disk_rxcid); - ViceLog(0, ("Ubik: Synchronize database via DISK_GetFile to server %s\n", + ViceLog(0, ("Ubik: Synchronize database: receive (via GetFile) " + "from server %s begin\n", afs_inet_ntoa_r(bestServer->addr[0], hoststr))); UBIK_ADDR_UNLOCK; @@ -724,10 +725,17 @@ urecovery_Interact(void *dummy) ubik_dbase->version.epoch = 0; ubik_dbase->version.counter = 0; UBIK_VERSION_UNLOCK; - ViceLog(0, ("Ubik: Synchronize database failed (error = %d)\n", - code)); + ViceLog(0, + ("Ubik: Synchronize database: receive (via GetFile) " + "from server %s failed (error = %d)\n", + afs_inet_ntoa_r(bestServer->addr[0], hoststr), code)); } else { - ViceLog(0, ("Ubik: Synchronize database completed\n")); + ViceLog(0, + ("Ubik: Synchronize database: receive (via GetFile) " + "from server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(bestServer->addr[0], hoststr), + ubik_dbase->version.epoch, ubik_dbase->version.counter)); + urecovery_state |= UBIK_RECHAVEDB; } udisk_Invalidate(ubik_dbase, 0); /* data has changed */ @@ -818,10 +826,11 @@ urecovery_Interact(void *dummy) continue; } UBIK_BEACON_UNLOCK; - ViceLog(5, ("recovery sending version to %s\n", - afs_inet_ntoa_r(inAddr.s_addr, hoststr))); + if (vcmp(ts->version, ubik_dbase->version) != 0) { - ViceLog(5, ("recovery stating local database\n")); + ViceLog(0, ("Synchronize database: send (via SendFile) " + "to server %s begin\n", + afs_inet_ntoa_r(inAddr.s_addr, hoststr))); /* Rx code to do the Bulk Store */ code = (*ubik_dbase->stat) (ubik_dbase, 0, &ubikstat); @@ -864,12 +873,24 @@ urecovery_Interact(void *dummy) StoreEndCall: code = rx_EndCall(rxcall, code); } + if (code == 0) { /* we set a new file, process its header */ ts->version = ubik_dbase->version; ts->currentDB = 1; - } else + ViceLog(0, + ("Ubik: Synchronize database: send (via SendFile) " + "to server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(inAddr.s_addr, hoststr), + ts->version.epoch, ts->version.counter)); + + } else { dbok = 0; + ViceLog(0, + ("Ubik: Synchronize database: send (via SendFile) " + "to server %s failed (error = %d)\n", + afs_inet_ntoa_r(inAddr.s_addr, hoststr), code)); + } } else { /* mark file up to date */ ts->currentDB = 1; diff --git a/src/ubik/remote.c b/src/ubik/remote.c index c3b708d..069a8c3 100644 --- a/src/ubik/remote.c +++ b/src/ubik/remote.c @@ -396,45 +396,72 @@ SDISK_GetFile(struct rx_call *rxcall, afs_int32 file, char tbuffer[256]; afs_int32 tlen; afs_int32 length; + struct rx_peer *tpeer; + struct rx_connection *tconn; + afs_uint32 otherHost = 0; + char hoststr[16]; if ((code = ubik_CheckAuth(rxcall))) { return code; } + + tconn = rx_ConnectionOf(rxcall); + tpeer = rx_PeerOf(tconn); + otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer)); + ViceLog(0, ("Ubik: Synchronize database: send (via GetFile) " + "to server %s begin\n", + afs_inet_ntoa_r(otherHost, hoststr))); + dbase = ubik_dbase; DBHOLD(dbase); code = (*dbase->stat) (dbase, file, &ubikstat); if (code < 0) { - DBRELE(dbase); - return code; + ViceLog(0, ("database stat() error:%d\n", code)); + goto failed; } length = ubikstat.size; tlen = htonl(length); code = rx_Write(rxcall, (char *)&tlen, sizeof(afs_int32)); if (code != sizeof(afs_int32)) { - DBRELE(dbase); ViceLog(0, ("Rx-write length error=%d\n", code)); - return BULK_ERROR; + code = BULK_ERROR; + goto failed; } offset = 0; while (length > 0) { tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length); code = (*dbase->read) (dbase, file, tbuffer, offset, tlen); if (code != tlen) { - DBRELE(dbase); ViceLog(0, ("read failed error=%d\n", code)); - return UIOERROR; + code = UIOERROR; + goto failed; } code = rx_Write(rxcall, tbuffer, tlen); if (code != tlen) { - DBRELE(dbase); - ViceLog(0, ("Rx-write length error=%d\n", code)); - return BULK_ERROR; + ViceLog(0, ("Rx-write data error=%d\n", code)); + code = BULK_ERROR; + goto failed; } length -= tlen; offset += tlen; } code = (*dbase->getlabel) (dbase, file, version); /* return the dbase, too */ + if (code) + ViceLog(0, ("getlabel error=%d\n", code)); + + failed: DBRELE(dbase); + if (code) { + ViceLog(0, + ("Ubik: Synchronize database: send (via GetFile) to " + "server %s failed (error = %d)\n", + afs_inet_ntoa_r(otherHost, hoststr), code)); + } else { + ViceLog(0, + ("Ubik: Synchronize database: send (via GetFile) to " + "server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(otherHost, hoststr), version->epoch, version->counter)); + } return code; } @@ -495,7 +522,7 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file, /* abort any active trans that may scribble over the database */ urecovery_AbortAll(dbase); - ViceLog(0, ("Ubik: Synchronize database via DISK_SendFile from server %s\n", + ViceLog(0, ("Ubik: Synchronize database: receive (via SendFile) from server %s begin\n", afs_inet_ntoa_r(otherHost, hoststr))); offset = 0; @@ -507,11 +534,13 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file, (file<0)?-file:file); fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600); if (fd < 0) { + ViceLog(0, ("Open error=%d\n", errno)); code = errno; goto failed_locked; } code = lseek(fd, HDRSIZE, 0); if (code != HDRSIZE) { + ViceLog(0, ("lseek error=%d\n", code)); close(fd); goto failed_locked; } @@ -543,8 +572,10 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file, length -= tlen; } code = close(fd); - if (code) + if (code) { + ViceLog(0, ("close failed error=%d\n", code)); goto failed; + } /* sync data first, then write label and resync (resync done by setlabel call). * This way, good label is only on good database. */ @@ -594,11 +625,16 @@ failed: (*dbase->setlabel) (dbase, file, &tversion); UBIK_VERSION_UNLOCK; } - ViceLog(0, ("Ubik: Synchronize database with server %s failed (error = %d)\n", - afs_inet_ntoa_r(otherHost, hoststr), code)); + ViceLog(0, + ("Ubik: Synchronize database: receive (via SendFile) from " + "server %s failed (error = %d)\n", + afs_inet_ntoa_r(otherHost, hoststr), code)); } else { uvote_set_dbVersion(*avers); - ViceLog(0, ("Ubik: Synchronize database completed\n")); + ViceLog(0, + ("Ubik: Synchronize database: receive (via SendFile) from " + "server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(otherHost, hoststr), avers->epoch, avers->counter)); } DBRELE(dbase); return code;