diff --git a/daemon/nfs41.h b/daemon/nfs41.h index 61ae61d..75ecde3 100644 --- a/daemon/nfs41.h +++ b/daemon/nfs41.h @@ -131,9 +131,13 @@ typedef struct __nfs41_client { struct pnfs_file_layout_list *layouts; struct pnfs_file_device_list *devices; struct list_entry root_entry; /* position in nfs41_root.clients */ - HANDLE cond; struct __nfs41_root *root; - bool_t in_recovery; + + struct { + CONDITION_VARIABLE cond; + CRITICAL_SECTION lock; + bool_t in_recovery; + } recovery; /* for state recovery on server reboot */ struct client_state state; diff --git a/daemon/nfs41_client.c b/daemon/nfs41_client.c index ad5572c..b0c4baf 100644 --- a/daemon/nfs41_client.c +++ b/daemon/nfs41_client.c @@ -118,14 +118,6 @@ int nfs41_client_create( goto out_err_rpc; } - client->cond = CreateEvent(NULL, TRUE, FALSE, "client_recovery_cond"); - if (client->cond == NULL) { - status = GetLastError(); - eprintf("CreateEvent failed %d\n", status); - free(client); - goto out_err_rpc; - } - memcpy(&client->owner, owner, sizeof(client_owner4)); client->rpc = rpc; client->is_data = is_data; @@ -137,6 +129,9 @@ int nfs41_client_create( //initialize a lock used to protect access to client id and client id seq# InitializeSRWLock(&client->exid_lock); + InitializeConditionVariable(&client->recovery.cond); + InitializeCriticalSection(&client->recovery.lock); + status = pnfs_client_init(client); if (status) { eprintf("pnfs_client_init() failed with %d\n", status); @@ -203,25 +198,6 @@ out: return status; } -bool_t nfs41_renew_in_progress(nfs41_client *client, bool_t *value) -{ - bool_t status = FALSE; - if (value) { - dprintf(1, "nfs41_renew_in_progress: setting value %d\n", *value); - AcquireSRWLockExclusive(&client->exid_lock); - client->in_recovery = *value; - if (!client->in_recovery) - SetEvent(client->cond); - ReleaseSRWLockExclusive(&client->exid_lock); - } else { - AcquireSRWLockShared(&client->exid_lock); - status = client->in_recovery; - ReleaseSRWLockShared(&client->exid_lock); - dprintf(1, "nfs41_renew_in_progress: returning value %d\n", status); - } - return status; -} - void nfs41_client_free( IN nfs41_client *client) { @@ -231,7 +207,6 @@ void nfs41_client_free( nfs41_rpc_clnt_free(client->rpc); if (client->layouts) pnfs_file_layout_list_free(client->layouts); if (client->devices) pnfs_file_device_list_free(client->devices); - CloseHandle(client->cond); free(client); } diff --git a/daemon/nfs41_compound.c b/daemon/nfs41_compound.c index edd2d37..0f09540 100644 --- a/daemon/nfs41_compound.c +++ b/daemon/nfs41_compound.c @@ -88,26 +88,41 @@ static void set_expected_res( compound->res.resarray[i].op = compound->args.argarray[i].op; } -int check_renew_in_progress( - IN nfs41_session *session) +/* session/client recovery uses a lock and condition variable in nfs41_client + * to prevent multiple threads from attempting to recover at the same time */ +static bool_t recovery_start_or_wait( + IN nfs41_client *client) { - int status = 0; - bool_t one = 1, zero = 0;; - while (nfs41_renew_in_progress(session->client, NULL)) { - status = WaitForSingleObject(session->client->cond, INFINITE); - if (status != WAIT_OBJECT_0) { - dprintf(1, "nfs41_renew_in_progress: WaitForSingleObject failed\n"); - print_condwait_status(1, status); - status = ERROR_LOCK_VIOLATION; - goto out; - } - nfs41_renew_in_progress(session->client, &zero); - status = 1; + bool_t status = TRUE; + + EnterCriticalSection(&client->recovery.lock); + + if (!client->recovery.in_recovery) { + dprintf(1, "Entering recovery mode for client %llu\n", client->clnt_id); + client->recovery.in_recovery = TRUE; + } else { + status = FALSE; + dprintf(1, "Waiting for recovery of client %llu\n", client->clnt_id); + while (client->recovery.in_recovery) + SleepConditionVariableCS(&client->recovery.cond, + &client->recovery.lock, INFINITE); + dprintf(1, "Woke up after recovery of client %llu\n", client->clnt_id); } - nfs41_renew_in_progress(session->client, &one); -out: + + LeaveCriticalSection(&client->recovery.lock); return status; } + +static void recovery_finish( + IN nfs41_client *client) +{ + EnterCriticalSection(&client->recovery.lock); + dprintf(1, "Finished recovery for client %llu\n", client->clnt_id); + client->recovery.in_recovery = FALSE; + WakeAllConditionVariable(&client->recovery.cond); + LeaveCriticalSection(&client->recovery.lock); +} + int compound_encode_send_decode( nfs41_session *session, nfs41_compound *compound, @@ -117,7 +132,7 @@ int compound_encode_send_decode( int status, retry_count = 0, delayby = 0; nfs41_sequence_args *args = (nfs41_sequence_args *)compound->args.argarray[0].arg; - bool_t zero = 0, client_state_lost = FALSE; + bool_t client_state_lost = FALSE; retry: /* send compound */ @@ -178,29 +193,24 @@ retry: break; case NFS4ERR_STALE_CLIENTID: - //try to create a new client - status = check_renew_in_progress(session); - if (status == ERROR_LOCK_VIOLATION) - goto out_free_slot; - else if (status == 1) + if (!recovery_start_or_wait(session->client)) goto do_retry; + //try to create a new client client_state_lost = TRUE; status = nfs41_client_renew(session->client); if (status) { eprintf("nfs41_exchange_id() failed with %d\n", status); status = ERROR_BAD_NET_RESP; + recovery_finish(session->client); goto out; } //fallthru and reestablish the session case NFS4ERR_BADSESSION: - //try to create a new session if (compound->res.status == NFS4ERR_BADSESSION) { - status = check_renew_in_progress(session); - if (status == ERROR_LOCK_VIOLATION) - goto out_free_slot; - else if (status == 1) + if (!recovery_start_or_wait(session->client)) goto do_retry; } + //try to create a new session status = nfs41_session_renew(session); if (status == NFS4ERR_STALE_CLIENTID) { client_state_lost = TRUE; @@ -208,6 +218,7 @@ retry: if (status) { eprintf("nfs41_exchange_id() failed with %d\n", status); status = ERROR_BAD_NET_RESP; + recovery_finish(session->client); goto out; } status = nfs41_session_renew(session); @@ -215,10 +226,12 @@ retry: eprintf("after reestablishing clientid: nfs41_session_renew() " "failed with %d\n", status); status = ERROR_BAD_NET_RESP; + recovery_finish(session->client); goto out; } } else if (status && status != NFS4ERR_STALE_CLIENTID) { eprintf("nfs41_session_renew: failed with %d\n", status); + recovery_finish(session->client); goto out; } /* do client state recovery */ @@ -249,8 +262,7 @@ retry: eprintf("nfs41_reclaim_complete() failed with %s\n", nfs_error_string(status)); } - if (nfs41_renew_in_progress(session->client, NULL)) - nfs41_renew_in_progress(session->client, &zero); + recovery_finish(session->client); goto do_retry; case NFS4ERR_STALE_STATEID: @@ -296,13 +308,11 @@ retry: switch (stateid->type) { case STATEID_OPEN: /* if there's recovery in progress, wait for it to finish */ - while (nfs41_renew_in_progress(session->client, NULL)) { - DWORD wait = WaitForSingleObject(session->client->cond, INFINITE); - if (wait != WAIT_OBJECT_0) { - print_condwait_status(1, wait); - break; - } - } + EnterCriticalSection(&session->client->recovery.lock); + while (session->client->recovery.in_recovery) + SleepConditionVariableCS(&session->client->recovery.cond, + &session->client->recovery.lock, INFINITE); + LeaveCriticalSection(&session->client->recovery.lock); /* if the open stateid is different, update and retry */ AcquireSRWLockShared(&stateid->open->lock);