recovery updated handling of BADSESSION

moved recovery-related fields into struct nfs41_client.recovery.  now uses a com
bination of CRITICAL_SECTION and CONDITION_VARIABLE for use with SleepConditionV
ariableCS()

renamed check_renew_in_progress() to recovery_start_or_wait(), and fixed the loc
king so that we atomically check/set in_recovery

when recovery is finished (including error conditions), call recovery_finish() t
o reset the recovery status and wake any waiting threads

Signed-off-by: Casey Bodley <cbodley@citi.umich.edu>
This commit is contained in:
Olga Kornievskaia 2010-12-06 14:28:13 -05:00
parent 8616b03597
commit 80cb5b5f57
3 changed files with 55 additions and 66 deletions

View file

@ -131,9 +131,13 @@ typedef struct __nfs41_client {
struct pnfs_file_layout_list *layouts; struct pnfs_file_layout_list *layouts;
struct pnfs_file_device_list *devices; struct pnfs_file_device_list *devices;
struct list_entry root_entry; /* position in nfs41_root.clients */ struct list_entry root_entry; /* position in nfs41_root.clients */
HANDLE cond;
struct __nfs41_root *root; struct __nfs41_root *root;
struct {
CONDITION_VARIABLE cond;
CRITICAL_SECTION lock;
bool_t in_recovery; bool_t in_recovery;
} recovery;
/* for state recovery on server reboot */ /* for state recovery on server reboot */
struct client_state state; struct client_state state;

View file

@ -118,14 +118,6 @@ int nfs41_client_create(
goto out_err_rpc; goto out_err_rpc;
} }
client->cond = CreateEvent(NULL, TRUE, FALSE, "client_recovery_cond");
if (client->cond == NULL) {
status = GetLastError();
eprintf("CreateEvent failed %d\n", status);
free(client);
goto out_err_rpc;
}
memcpy(&client->owner, owner, sizeof(client_owner4)); memcpy(&client->owner, owner, sizeof(client_owner4));
client->rpc = rpc; client->rpc = rpc;
client->is_data = is_data; client->is_data = is_data;
@ -137,6 +129,9 @@ int nfs41_client_create(
//initialize a lock used to protect access to client id and client id seq# //initialize a lock used to protect access to client id and client id seq#
InitializeSRWLock(&client->exid_lock); InitializeSRWLock(&client->exid_lock);
InitializeConditionVariable(&client->recovery.cond);
InitializeCriticalSection(&client->recovery.lock);
status = pnfs_client_init(client); status = pnfs_client_init(client);
if (status) { if (status) {
eprintf("pnfs_client_init() failed with %d\n", status); eprintf("pnfs_client_init() failed with %d\n", status);
@ -203,25 +198,6 @@ out:
return status; return status;
} }
bool_t nfs41_renew_in_progress(nfs41_client *client, bool_t *value)
{
bool_t status = FALSE;
if (value) {
dprintf(1, "nfs41_renew_in_progress: setting value %d\n", *value);
AcquireSRWLockExclusive(&client->exid_lock);
client->in_recovery = *value;
if (!client->in_recovery)
SetEvent(client->cond);
ReleaseSRWLockExclusive(&client->exid_lock);
} else {
AcquireSRWLockShared(&client->exid_lock);
status = client->in_recovery;
ReleaseSRWLockShared(&client->exid_lock);
dprintf(1, "nfs41_renew_in_progress: returning value %d\n", status);
}
return status;
}
void nfs41_client_free( void nfs41_client_free(
IN nfs41_client *client) IN nfs41_client *client)
{ {
@ -231,7 +207,6 @@ void nfs41_client_free(
nfs41_rpc_clnt_free(client->rpc); nfs41_rpc_clnt_free(client->rpc);
if (client->layouts) pnfs_file_layout_list_free(client->layouts); if (client->layouts) pnfs_file_layout_list_free(client->layouts);
if (client->devices) pnfs_file_device_list_free(client->devices); if (client->devices) pnfs_file_device_list_free(client->devices);
CloseHandle(client->cond);
free(client); free(client);
} }

View file

@ -88,26 +88,41 @@ static void set_expected_res(
compound->res.resarray[i].op = compound->args.argarray[i].op; compound->res.resarray[i].op = compound->args.argarray[i].op;
} }
int check_renew_in_progress( /* session/client recovery uses a lock and condition variable in nfs41_client
IN nfs41_session *session) * to prevent multiple threads from attempting to recover at the same time */
static bool_t recovery_start_or_wait(
IN nfs41_client *client)
{ {
int status = 0; bool_t status = TRUE;
bool_t one = 1, zero = 0;;
while (nfs41_renew_in_progress(session->client, NULL)) { EnterCriticalSection(&client->recovery.lock);
status = WaitForSingleObject(session->client->cond, INFINITE);
if (status != WAIT_OBJECT_0) { if (!client->recovery.in_recovery) {
dprintf(1, "nfs41_renew_in_progress: WaitForSingleObject failed\n"); dprintf(1, "Entering recovery mode for client %llu\n", client->clnt_id);
print_condwait_status(1, status); client->recovery.in_recovery = TRUE;
status = ERROR_LOCK_VIOLATION; } else {
goto out; status = FALSE;
dprintf(1, "Waiting for recovery of client %llu\n", client->clnt_id);
while (client->recovery.in_recovery)
SleepConditionVariableCS(&client->recovery.cond,
&client->recovery.lock, INFINITE);
dprintf(1, "Woke up after recovery of client %llu\n", client->clnt_id);
} }
nfs41_renew_in_progress(session->client, &zero);
status = 1; LeaveCriticalSection(&client->recovery.lock);
}
nfs41_renew_in_progress(session->client, &one);
out:
return status; return status;
} }
static void recovery_finish(
IN nfs41_client *client)
{
EnterCriticalSection(&client->recovery.lock);
dprintf(1, "Finished recovery for client %llu\n", client->clnt_id);
client->recovery.in_recovery = FALSE;
WakeAllConditionVariable(&client->recovery.cond);
LeaveCriticalSection(&client->recovery.lock);
}
int compound_encode_send_decode( int compound_encode_send_decode(
nfs41_session *session, nfs41_session *session,
nfs41_compound *compound, nfs41_compound *compound,
@ -117,7 +132,7 @@ int compound_encode_send_decode(
int status, retry_count = 0, delayby = 0; int status, retry_count = 0, delayby = 0;
nfs41_sequence_args *args = nfs41_sequence_args *args =
(nfs41_sequence_args *)compound->args.argarray[0].arg; (nfs41_sequence_args *)compound->args.argarray[0].arg;
bool_t zero = 0, client_state_lost = FALSE; bool_t client_state_lost = FALSE;
retry: retry:
/* send compound */ /* send compound */
@ -178,29 +193,24 @@ retry:
break; break;
case NFS4ERR_STALE_CLIENTID: case NFS4ERR_STALE_CLIENTID:
//try to create a new client if (!recovery_start_or_wait(session->client))
status = check_renew_in_progress(session);
if (status == ERROR_LOCK_VIOLATION)
goto out_free_slot;
else if (status == 1)
goto do_retry; goto do_retry;
//try to create a new client
client_state_lost = TRUE; client_state_lost = TRUE;
status = nfs41_client_renew(session->client); status = nfs41_client_renew(session->client);
if (status) { if (status) {
eprintf("nfs41_exchange_id() failed with %d\n", status); eprintf("nfs41_exchange_id() failed with %d\n", status);
status = ERROR_BAD_NET_RESP; status = ERROR_BAD_NET_RESP;
recovery_finish(session->client);
goto out; goto out;
} }
//fallthru and reestablish the session //fallthru and reestablish the session
case NFS4ERR_BADSESSION: case NFS4ERR_BADSESSION:
//try to create a new session
if (compound->res.status == NFS4ERR_BADSESSION) { if (compound->res.status == NFS4ERR_BADSESSION) {
status = check_renew_in_progress(session); if (!recovery_start_or_wait(session->client))
if (status == ERROR_LOCK_VIOLATION)
goto out_free_slot;
else if (status == 1)
goto do_retry; goto do_retry;
} }
//try to create a new session
status = nfs41_session_renew(session); status = nfs41_session_renew(session);
if (status == NFS4ERR_STALE_CLIENTID) { if (status == NFS4ERR_STALE_CLIENTID) {
client_state_lost = TRUE; client_state_lost = TRUE;
@ -208,6 +218,7 @@ retry:
if (status) { if (status) {
eprintf("nfs41_exchange_id() failed with %d\n", status); eprintf("nfs41_exchange_id() failed with %d\n", status);
status = ERROR_BAD_NET_RESP; status = ERROR_BAD_NET_RESP;
recovery_finish(session->client);
goto out; goto out;
} }
status = nfs41_session_renew(session); status = nfs41_session_renew(session);
@ -215,10 +226,12 @@ retry:
eprintf("after reestablishing clientid: nfs41_session_renew() " eprintf("after reestablishing clientid: nfs41_session_renew() "
"failed with %d\n", status); "failed with %d\n", status);
status = ERROR_BAD_NET_RESP; status = ERROR_BAD_NET_RESP;
recovery_finish(session->client);
goto out; goto out;
} }
} else if (status && status != NFS4ERR_STALE_CLIENTID) { } else if (status && status != NFS4ERR_STALE_CLIENTID) {
eprintf("nfs41_session_renew: failed with %d\n", status); eprintf("nfs41_session_renew: failed with %d\n", status);
recovery_finish(session->client);
goto out; goto out;
} }
/* do client state recovery */ /* do client state recovery */
@ -249,8 +262,7 @@ retry:
eprintf("nfs41_reclaim_complete() failed with %s\n", eprintf("nfs41_reclaim_complete() failed with %s\n",
nfs_error_string(status)); nfs_error_string(status));
} }
if (nfs41_renew_in_progress(session->client, NULL)) recovery_finish(session->client);
nfs41_renew_in_progress(session->client, &zero);
goto do_retry; goto do_retry;
case NFS4ERR_STALE_STATEID: case NFS4ERR_STALE_STATEID:
@ -296,13 +308,11 @@ retry:
switch (stateid->type) { switch (stateid->type) {
case STATEID_OPEN: case STATEID_OPEN:
/* if there's recovery in progress, wait for it to finish */ /* if there's recovery in progress, wait for it to finish */
while (nfs41_renew_in_progress(session->client, NULL)) { EnterCriticalSection(&session->client->recovery.lock);
DWORD wait = WaitForSingleObject(session->client->cond, INFINITE); while (session->client->recovery.in_recovery)
if (wait != WAIT_OBJECT_0) { SleepConditionVariableCS(&session->client->recovery.cond,
print_condwait_status(1, wait); &session->client->recovery.lock, INFINITE);
break; LeaveCriticalSection(&session->client->recovery.lock);
}
}
/* if the open stateid is different, update and retry */ /* if the open stateid is different, update and retry */
AcquireSRWLockShared(&stateid->open->lock); AcquireSRWLockShared(&stateid->open->lock);