2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afs/param.h>
22 osi_rwlock_t cm_connLock;
24 long RDRtimeout = CM_CONN_DEFAULTRDRTIMEOUT;
26 void cm_PutConn(cm_conn_t *connp)
28 lock_ObtainWrite(&cm_connLock);
29 osi_assert(connp->refCount-- > 0);
30 lock_ReleaseWrite(&cm_connLock);
33 void cm_InitConn(void)
35 static osi_once_t once;
37 if (osi_Once(&once)) {
38 lock_InitializeRWLock(&cm_connLock, "connection global lock");
43 void cm_InitReq(cm_req_t *reqp)
45 memset((char *)reqp, 0, sizeof(cm_req_t));
46 reqp->startTime = GetCurrentTime();
49 long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp,
50 struct cm_req *reqp, cm_serverRef_t **serverspp)
54 cm_serverRef_t *serversp;
62 cellp = cm_FindCellByID(fidp->cell);
63 if (!cellp) return CM_ERROR_NOSUCHCELL;
65 code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, &volp);
66 if (code) return code;
68 if (fidp->volume == volp->rwID)
69 serversp = volp->rwServersp;
70 else if (fidp->volume == volp->roID)
71 serversp = volp->roServersp;
72 else if (fidp->volume == volp->bkID)
73 serversp = volp->bkServersp;
78 *serverspp = serversp;
83 * Analyze the error return from an RPC. Determine whether or not to retry,
84 * and if we're going to retry, determine whether failover is appropriate,
85 * and whether timed backoff is appropriate.
87 * If the error code is from cm_Conn() or friends, it will be a CM_ERROR code.
88 * Otherwise it will be an RPC code. This may be a UNIX code (e.g. EDQUOT), or
89 * it may be an RX code, or it may be a special code (e.g. VNOVOL), or it may
90 * be a security code (e.g. RXKADEXPIRED).
92 * If the error code is from cm_Conn() or friends, connp will be NULL.
94 * For VLDB calls, fidp will be NULL.
96 * volSyncp and/or cbrp may also be NULL.
98 cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
100 AFSVolSync *volSyncp, cm_callbackRequest_t *cbrp, long errorCode)
102 cm_server_t *serverp;
103 cm_serverRef_t *serversp, *tsrp;
108 osi_Log2(afsd_logp, "cm_Analyze connp 0x%x, code %d",
109 (long) connp, errorCode);
111 /* no locking required, since connp->serverp never changes after
113 dead_session = (userp->cellInfop == NULL);
115 serverp = connp->serverp;
117 /* Update callback pointer */
118 if (cbrp && errorCode == 0) cbrp->serverp = connp->serverp;
120 /* If not allowed to retry, don't */
121 if (reqp->flags & CM_REQ_NORETRY)
124 /* if all servers are busy, mark them non-busy and start over */
125 if (errorCode == CM_ERROR_ALLBUSY) {
126 cm_GetServerList(fidp, userp, reqp, &serversp);
127 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
128 if (tsrp->status == busy)
129 tsrp->status = not_busy;
135 /* special codes: VBUSY and VRESTARTING */
136 if (errorCode == VBUSY || errorCode == VRESTARTING) {
137 cm_GetServerList(fidp, userp, reqp, &serversp);
138 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
139 if (tsrp->server == serverp
140 && tsrp->status == not_busy) {
148 /* special codes: missing volumes */
149 if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE
150 || errorCode == VSALVAGE || errorCode == VNOSERVICE) {
154 /* Back off to allow move to complete */
157 /* Update the volume location and see if it changed */
158 cm_GetServerList(fidp, userp, reqp, &serversp);
159 oldSum = cm_ChecksumServerList(serversp);
160 cm_ForceUpdateVolume(fidp, userp, reqp);
161 cm_GetServerList(fidp, userp, reqp, &serversp);
162 newSum = cm_ChecksumServerList(serversp);
163 same = (oldSum == newSum);
165 /* mark servers as appropriate */
166 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
167 if (tsrp->server == serverp)
168 tsrp->status = offline;
170 tsrp->status = not_busy;
176 if (errorCode == RX_CALL_TIMEOUT) {
177 /* server took longer than hardDeadTime
178 * don't mark server as down but don't retry
179 * this is to prevent the SMB session from timing out
180 * In addition, we log an event to the event log
185 h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME);
186 sprintf(s, "cm_Analyze: HardDeadTime exceeded.");
188 ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL,
190 DeregisterEventSource(h);
193 osi_Log0(afsd_logp, "cm_Analyze: hardDeadTime exceeded");
195 else if (errorCode >= -64 && errorCode < 0) {
196 /* mark server as down */
197 lock_ObtainMutex(&serverp->mx);
198 serverp->flags |= CM_SERVERFLAG_DOWN;
199 lock_ReleaseMutex(&serverp->mx);
203 if (errorCode == RXKADEXPIRED && !dead_session) {
204 lock_ObtainMutex(&userp->mx);
205 ucellp = cm_GetUCell(userp, serverp->cellp);
206 if (ucellp->ticketp) {
207 free(ucellp->ticketp);
208 ucellp->ticketp = NULL;
210 ucellp->flags &= ~CM_UCELLFLAG_RXKAD;
212 lock_ReleaseMutex(&userp->mx);
216 if (retry && dead_session)
220 /* drop this on the way out */
224 /* retry until we fail to find a connection */
228 long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
229 cm_req_t *reqp, cm_conn_t **connpp)
232 cm_serverRef_t *tsrp;
235 int someBusy = 0, someOffline = 0;
236 long timeUsed, timeLeft, hardTimeLeft;
240 timeUsed = (GetCurrentTime() - reqp->startTime) / 1000;
241 /* leave 5 seconds margin of safety */
242 timeLeft = RDRtimeout - timeUsed - 5;
243 hardTimeLeft = timeLeft;
245 /* Time enough to do an RPC? */
247 return CM_ERROR_TIMEDOUT;
250 lock_ObtainWrite(&cm_serverLock);
252 for(tsrp = serversp; tsrp; tsrp=tsrp->next) {
255 lock_ReleaseWrite(&cm_serverLock);
256 if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
257 if (tsrp->status == busy)
259 else if (tsrp->status == offline)
262 code = cm_ConnByServer(tsp, usersp, connpp);
265 /* Set RPC timeout */
266 if (timeLeft > CM_CONN_CONNDEADTIME)
267 timeLeft = CM_CONN_CONNDEADTIME;
269 if (hardTimeLeft > CM_CONN_HARDDEADTIME)
270 hardTimeLeft = CM_CONN_HARDDEADTIME;
272 lock_ObtainMutex(&(*connpp)->mx);
273 rx_SetConnDeadTime((*connpp)->callp,
275 rx_SetConnHardDeadTime((*connpp)->callp,
276 (u_short) hardTimeLeft);
277 lock_ReleaseMutex(&(*connpp)->mx);
281 if (firstError == 0) firstError = code;
284 lock_ObtainWrite(&cm_serverLock);
285 osi_assert(tsp->refCount-- > 0);
288 lock_ReleaseWrite(&cm_serverLock);
289 if (firstError == 0) {
290 if (someBusy) firstError = CM_ERROR_ALLBUSY;
291 else if (someOffline) firstError = CM_ERROR_NOSUCHVOLUME;
292 else firstError = CM_ERROR_TIMEDOUT;
294 osi_Log1(afsd_logp, "cm_ConnByMServers returning %x", firstError);
298 /* called with a held server to GC all bad connections hanging off of the server */
299 void cm_GCConnections(cm_server_t *serverp)
305 lock_ObtainWrite(&cm_connLock);
306 lcpp = &serverp->connsp;
307 for(tcp = *lcpp; tcp; tcp = *lcpp) {
309 if (userp && tcp->refCount == 0 && (userp->vcRefs == 0)) {
310 /* do the deletion of this guy */
311 cm_ReleaseUser(userp);
313 rx_DestroyConnection(tcp->callp);
314 lock_FinalizeMutex(&tcp->mx);
318 /* just advance to the next */
322 lock_ReleaseWrite(&cm_connLock);
325 static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
326 cm_server_t *serverp)
331 struct rx_securityClass *secObjp;
333 if (serverp->type == CM_SERVER_VLDB) {
338 osi_assert(serverp->type == CM_SERVER_FILE);
342 if (ucellp->flags & CM_UCELLFLAG_RXKAD) {
344 secObjp = rxkad_NewClientSecurityObject(rxkad_clear,
345 &ucellp->sessionKey, ucellp->kvno,
346 ucellp->ticketLen, ucellp->ticketp);
351 secObjp = rxnull_NewClientSecurityObject();
353 osi_assert(secObjp != NULL);
354 tcp->callp = rx_NewConnection(serverp->addr.sin_addr.s_addr,
359 rx_SetConnDeadTime(tcp->callp, CM_CONN_CONNDEADTIME);
360 rx_SetConnHardDeadTime(tcp->callp, CM_CONN_HARDDEADTIME);
361 tcp->ucgen = ucellp->gen;
364 long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp)
369 lock_ObtainMutex(&userp->mx);
370 lock_ObtainWrite(&cm_connLock);
371 for(tcp = serverp->connsp; tcp; tcp=tcp->nextp) {
372 if (tcp->userp == userp) break;
374 /* find ucell structure */
375 ucellp = cm_GetUCell(userp, serverp->cellp);
377 tcp = malloc(sizeof(*tcp));
378 memset(tcp, 0, sizeof(*tcp));
379 tcp->nextp = serverp->connsp;
380 serverp->connsp = tcp;
383 lock_InitializeMutex(&tcp->mx, "cm_conn_t mutex");
384 tcp->serverp = serverp;
385 cm_NewRXConnection(tcp, ucellp, serverp);
389 if (tcp->ucgen < ucellp->gen) {
390 rx_DestroyConnection(tcp->callp);
391 cm_NewRXConnection(tcp, ucellp, serverp);
395 lock_ReleaseWrite(&cm_connLock);
396 lock_ReleaseMutex(&userp->mx);
398 /* return this pointer to our caller */
399 osi_Log1(afsd_logp, "cm_ConnByServer returning conn 0x%x", (long) tcp);
405 long cm_Conn(struct cm_fid *fidp, struct cm_user *userp, cm_req_t *reqp,
410 cm_serverRef_t *serversp;
412 code = cm_GetServerList(fidp, userp, reqp, &serversp);
418 code = cm_ConnByMServers(serversp, userp, reqp, connpp);