From 7f58e4ac454f9c06fb2d51ff0a17b8656c454efe Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Fri, 20 Dec 2013 12:16:37 -0600 Subject: [PATCH] afs: Return raw code from background daemons Currently, a background daemon processing a 'store' request will return any error code in the 'code' field in the brequest structure, for processing by anyone that's waiting for the response. Since any waiter will not have access to the treq for the request, they won't be able to call afs_CheckCode on that return code, so the background daemon calls afs_CheckCode before returning its error code. Currently, afs_close uses the 'code' value from the background daemon as if it were not passed through afs_CheckCode. That is, if all background daemons are busy, we get our 'code' directly from afs_StoreOnLastReference, and if we use a background daemon, our 'code' is tb->code. But these values are two different things: the return value from afs_StoreOnLastReference is a raw error code, and the code from the background daemon (tb->code) has been translated through afs_CheckCode. This can be confusing, in particular for the scenario where a StoreData fails because of network errors or because of a VBUSY error. If we get a network error when the request went through a background daemon, afs_CheckCode will translate this to ETIMEDOUT, which is commonly value 110, the same as VBUSY. So, an ETIMEDOUT error from the background daemon is difficult to distinguish from a VBUSY error from a direct afs_StoreOnLastReference call. Either case can result in a message to the kernel like the following: afs: failed to store file (110) To resolve this, have the background daemon store both the 'raw' error code, and the error code that has been translated through afs_CheckCode. afs_close can then use the raw error code when reporting messages like normal, but can still use the translated error code to return to the caller, if it has a translated error. With this change, now afs_close will always log "network problems" for a network error, regardless of if the error came in via a background daemon or a direct afs_StoreOnLastReference call. In Irix's afs_delmap, we just remove the old usage of tb->code, since the result was not used for anything. Change-Id: I3e2bf7e36c1f098df16a1fdb0dc88b45ea87dfa9 Reviewed-on: http://gerrit.openafs.org/10633 Reviewed-by: Benjamin Kaduk Reviewed-by: Derrick Brashear Tested-by: BuildBot --- src/afs/IRIX/osi_vnodeops.c | 1 - src/afs/VNOPS/afs_vnop_write.c | 12 ++++++++++-- src/afs/afs.h | 3 ++- src/afs/afs_daemons.c | 17 ++++++++++++++--- src/afs/afs_dcache.c | 2 +- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/afs/IRIX/osi_vnodeops.c b/src/afs/IRIX/osi_vnodeops.c index 0f13a9c..c55474a 100644 --- a/src/afs/IRIX/osi_vnodeops.c +++ b/src/afs/IRIX/osi_vnodeops.c @@ -983,7 +983,6 @@ OSI_VC_DECL(avc); tb->flags |= BUWAIT; afs_osi_Sleep(tb); } - code = tb->code; afs_BRelease(tb); } } else { diff --git a/src/afs/VNOPS/afs_vnop_write.c b/src/afs/VNOPS/afs_vnop_write.c index 5ef62aa..2bc85eb 100644 --- a/src/afs/VNOPS/afs_vnop_write.c +++ b/src/afs/VNOPS/afs_vnop_write.c @@ -470,6 +470,7 @@ afs_close(OSI_VC_DECL(avc), afs_int32 aflags, afs_ucred_t *acred) #endif { afs_int32 code; + afs_int32 code_checkcode = 0; struct brequest *tb; struct vrequest treq; #ifdef AFS_SGI65_ENV @@ -555,7 +556,8 @@ afs_close(OSI_VC_DECL(avc), afs_int32 aflags, afs_ucred_t *acred) tb->flags |= BUWAIT; afs_osi_Sleep(tb); } - code = tb->code; + code = tb->code_raw; + code_checkcode = tb->code_checkcode; afs_BRelease(tb); } @@ -576,6 +578,7 @@ afs_close(OSI_VC_DECL(avc), afs_int32 aflags, afs_ucred_t *acred) #endif /* printf("avc->vc_error=%d\n", avc->vc_error); */ code = avc->vc_error; + code_checkcode = 0; avc->vc_error = 0; } ReleaseWriteLock(&avc->lock); @@ -637,7 +640,12 @@ afs_close(OSI_VC_DECL(avc), afs_int32 aflags, afs_ucred_t *acred) } AFS_DISCON_UNLOCK(); afs_PutFakeStat(&fakestat); - code = afs_CheckCode(code, &treq, 5); + + if (code_checkcode) { + code = code_checkcode; + } else { + code = afs_CheckCode(code, &treq, 5); + } return code; } diff --git a/src/afs/afs.h b/src/afs/afs.h index 39a8e57..3d7e461 100644 --- a/src/afs/afs.h +++ b/src/afs/afs.h @@ -153,7 +153,8 @@ struct brequest { afs_ucred_t *cred; /* credentials to use for operation */ afs_size_t size_parm[BPARMS]; /* random parameters */ void *ptr_parm[BPARMS]; /* pointer parameters */ - afs_int32 code; /* return code */ + afs_int32 code_raw; /* return code from AFS routines */ + afs_int32 code_checkcode; /* the afs_CheckCode-translated code */ short refCount; /* use counter for this structure */ char opcode; /* what to do (store, fetch, etc) */ char flags; /* free, etc */ diff --git a/src/afs/afs_daemons.c b/src/afs/afs_daemons.c index 687bf6a..da68e18 100644 --- a/src/afs/afs_daemons.c +++ b/src/afs/afs_daemons.c @@ -599,7 +599,17 @@ BStore(struct brequest *ab) #endif /* now set final return code, and wakeup anyone waiting */ if ((ab->flags & BUVALID) == 0) { - ab->code = afs_CheckCode(code, &treq, 43); /* set final code, since treq doesn't go across processes */ + + /* To explain code_raw/code_checkcode: + * Anyone that's waiting won't have our treq, so they won't be able to + * call afs_CheckCode themselves on the return code we provide here. + * But if we give back only the afs_CheckCode value, they won't know + * what the "raw" value was. So give back both values, so the waiter + * can know the "raw" value for interpreting the value internally, as + * well as the afs_CheckCode value to give to the OS. */ + ab->code_raw = code; + ab->code_checkcode = afs_CheckCode(code, &treq, 43); + ab->flags |= BUVALID; if (ab->flags & BUWAIT) { ab->flags &= ~BUWAIT; @@ -635,7 +645,8 @@ BPartialStore(struct brequest *ab) /* now set final return code, and wakeup anyone waiting */ if ((ab->flags & BUVALID) == 0) { /* set final code, since treq doesn't go across processes */ - ab->code = afs_CheckCode(code, &treq, 43); + ab->code_raw = code; + ab->code_checkcode = afs_CheckCode(code, &treq, 43); ab->flags |= BUVALID; if (ab->flags & BUWAIT) { ab->flags &= ~BUWAIT; @@ -702,7 +713,7 @@ afs_BQueue(short aopcode, struct vcache *avc, tb->ptr_parm[1] = apparm1; tb->ptr_parm[2] = apparm2; tb->flags = 0; - tb->code = 0; + tb->code_raw = tb->code_checkcode = 0; tb->ts = afs_brs_count++; /* if daemons are waiting for work, wake them up */ if (afs_brsDaemons > 0) { diff --git a/src/afs/afs_dcache.c b/src/afs/afs_dcache.c index 06ab7ad..dd75e2c 100644 --- a/src/afs/afs_dcache.c +++ b/src/afs/afs_dcache.c @@ -3044,7 +3044,7 @@ afs_wakeup(struct vcache *avc) * is already being handled by the higher-level code. */ if ((avc->f.states & CSafeStore) == 0) { - tb->code = 0; + tb->code_raw = tb->code_checkcode = 0; tb->flags |= BUVALID; if (tb->flags & BUWAIT) { tb->flags &= ~BUWAIT; -- 1.9.4