recovery updated handling of BADSESSION
moved recovery-related fields into struct nfs41_client.recovery. now uses a com bination of CRITICAL_SECTION and CONDITION_VARIABLE for use with SleepConditionV ariableCS() renamed check_renew_in_progress() to recovery_start_or_wait(), and fixed the loc king so that we atomically check/set in_recovery when recovery is finished (including error conditions), call recovery_finish() t o reset the recovery status and wake any waiting threads Signed-off-by: Casey Bodley <cbodley@citi.umich.edu>
This commit is contained in:
parent
8616b03597
commit
80cb5b5f57
3 changed files with 55 additions and 66 deletions
|
|
@ -131,9 +131,13 @@ typedef struct __nfs41_client {
|
||||||
struct pnfs_file_layout_list *layouts;
|
struct pnfs_file_layout_list *layouts;
|
||||||
struct pnfs_file_device_list *devices;
|
struct pnfs_file_device_list *devices;
|
||||||
struct list_entry root_entry; /* position in nfs41_root.clients */
|
struct list_entry root_entry; /* position in nfs41_root.clients */
|
||||||
HANDLE cond;
|
|
||||||
struct __nfs41_root *root;
|
struct __nfs41_root *root;
|
||||||
bool_t in_recovery;
|
|
||||||
|
struct {
|
||||||
|
CONDITION_VARIABLE cond;
|
||||||
|
CRITICAL_SECTION lock;
|
||||||
|
bool_t in_recovery;
|
||||||
|
} recovery;
|
||||||
|
|
||||||
/* for state recovery on server reboot */
|
/* for state recovery on server reboot */
|
||||||
struct client_state state;
|
struct client_state state;
|
||||||
|
|
|
||||||
|
|
@ -118,14 +118,6 @@ int nfs41_client_create(
|
||||||
goto out_err_rpc;
|
goto out_err_rpc;
|
||||||
}
|
}
|
||||||
|
|
||||||
client->cond = CreateEvent(NULL, TRUE, FALSE, "client_recovery_cond");
|
|
||||||
if (client->cond == NULL) {
|
|
||||||
status = GetLastError();
|
|
||||||
eprintf("CreateEvent failed %d\n", status);
|
|
||||||
free(client);
|
|
||||||
goto out_err_rpc;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(&client->owner, owner, sizeof(client_owner4));
|
memcpy(&client->owner, owner, sizeof(client_owner4));
|
||||||
client->rpc = rpc;
|
client->rpc = rpc;
|
||||||
client->is_data = is_data;
|
client->is_data = is_data;
|
||||||
|
|
@ -137,6 +129,9 @@ int nfs41_client_create(
|
||||||
//initialize a lock used to protect access to client id and client id seq#
|
//initialize a lock used to protect access to client id and client id seq#
|
||||||
InitializeSRWLock(&client->exid_lock);
|
InitializeSRWLock(&client->exid_lock);
|
||||||
|
|
||||||
|
InitializeConditionVariable(&client->recovery.cond);
|
||||||
|
InitializeCriticalSection(&client->recovery.lock);
|
||||||
|
|
||||||
status = pnfs_client_init(client);
|
status = pnfs_client_init(client);
|
||||||
if (status) {
|
if (status) {
|
||||||
eprintf("pnfs_client_init() failed with %d\n", status);
|
eprintf("pnfs_client_init() failed with %d\n", status);
|
||||||
|
|
@ -203,25 +198,6 @@ out:
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool_t nfs41_renew_in_progress(nfs41_client *client, bool_t *value)
|
|
||||||
{
|
|
||||||
bool_t status = FALSE;
|
|
||||||
if (value) {
|
|
||||||
dprintf(1, "nfs41_renew_in_progress: setting value %d\n", *value);
|
|
||||||
AcquireSRWLockExclusive(&client->exid_lock);
|
|
||||||
client->in_recovery = *value;
|
|
||||||
if (!client->in_recovery)
|
|
||||||
SetEvent(client->cond);
|
|
||||||
ReleaseSRWLockExclusive(&client->exid_lock);
|
|
||||||
} else {
|
|
||||||
AcquireSRWLockShared(&client->exid_lock);
|
|
||||||
status = client->in_recovery;
|
|
||||||
ReleaseSRWLockShared(&client->exid_lock);
|
|
||||||
dprintf(1, "nfs41_renew_in_progress: returning value %d\n", status);
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
void nfs41_client_free(
|
void nfs41_client_free(
|
||||||
IN nfs41_client *client)
|
IN nfs41_client *client)
|
||||||
{
|
{
|
||||||
|
|
@ -231,7 +207,6 @@ void nfs41_client_free(
|
||||||
nfs41_rpc_clnt_free(client->rpc);
|
nfs41_rpc_clnt_free(client->rpc);
|
||||||
if (client->layouts) pnfs_file_layout_list_free(client->layouts);
|
if (client->layouts) pnfs_file_layout_list_free(client->layouts);
|
||||||
if (client->devices) pnfs_file_device_list_free(client->devices);
|
if (client->devices) pnfs_file_device_list_free(client->devices);
|
||||||
CloseHandle(client->cond);
|
|
||||||
free(client);
|
free(client);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -88,26 +88,41 @@ static void set_expected_res(
|
||||||
compound->res.resarray[i].op = compound->args.argarray[i].op;
|
compound->res.resarray[i].op = compound->args.argarray[i].op;
|
||||||
}
|
}
|
||||||
|
|
||||||
int check_renew_in_progress(
|
/* session/client recovery uses a lock and condition variable in nfs41_client
|
||||||
IN nfs41_session *session)
|
* to prevent multiple threads from attempting to recover at the same time */
|
||||||
|
static bool_t recovery_start_or_wait(
|
||||||
|
IN nfs41_client *client)
|
||||||
{
|
{
|
||||||
int status = 0;
|
bool_t status = TRUE;
|
||||||
bool_t one = 1, zero = 0;;
|
|
||||||
while (nfs41_renew_in_progress(session->client, NULL)) {
|
EnterCriticalSection(&client->recovery.lock);
|
||||||
status = WaitForSingleObject(session->client->cond, INFINITE);
|
|
||||||
if (status != WAIT_OBJECT_0) {
|
if (!client->recovery.in_recovery) {
|
||||||
dprintf(1, "nfs41_renew_in_progress: WaitForSingleObject failed\n");
|
dprintf(1, "Entering recovery mode for client %llu\n", client->clnt_id);
|
||||||
print_condwait_status(1, status);
|
client->recovery.in_recovery = TRUE;
|
||||||
status = ERROR_LOCK_VIOLATION;
|
} else {
|
||||||
goto out;
|
status = FALSE;
|
||||||
}
|
dprintf(1, "Waiting for recovery of client %llu\n", client->clnt_id);
|
||||||
nfs41_renew_in_progress(session->client, &zero);
|
while (client->recovery.in_recovery)
|
||||||
status = 1;
|
SleepConditionVariableCS(&client->recovery.cond,
|
||||||
|
&client->recovery.lock, INFINITE);
|
||||||
|
dprintf(1, "Woke up after recovery of client %llu\n", client->clnt_id);
|
||||||
}
|
}
|
||||||
nfs41_renew_in_progress(session->client, &one);
|
|
||||||
out:
|
LeaveCriticalSection(&client->recovery.lock);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void recovery_finish(
|
||||||
|
IN nfs41_client *client)
|
||||||
|
{
|
||||||
|
EnterCriticalSection(&client->recovery.lock);
|
||||||
|
dprintf(1, "Finished recovery for client %llu\n", client->clnt_id);
|
||||||
|
client->recovery.in_recovery = FALSE;
|
||||||
|
WakeAllConditionVariable(&client->recovery.cond);
|
||||||
|
LeaveCriticalSection(&client->recovery.lock);
|
||||||
|
}
|
||||||
|
|
||||||
int compound_encode_send_decode(
|
int compound_encode_send_decode(
|
||||||
nfs41_session *session,
|
nfs41_session *session,
|
||||||
nfs41_compound *compound,
|
nfs41_compound *compound,
|
||||||
|
|
@ -117,7 +132,7 @@ int compound_encode_send_decode(
|
||||||
int status, retry_count = 0, delayby = 0;
|
int status, retry_count = 0, delayby = 0;
|
||||||
nfs41_sequence_args *args =
|
nfs41_sequence_args *args =
|
||||||
(nfs41_sequence_args *)compound->args.argarray[0].arg;
|
(nfs41_sequence_args *)compound->args.argarray[0].arg;
|
||||||
bool_t zero = 0, client_state_lost = FALSE;
|
bool_t client_state_lost = FALSE;
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
/* send compound */
|
/* send compound */
|
||||||
|
|
@ -178,29 +193,24 @@ retry:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NFS4ERR_STALE_CLIENTID:
|
case NFS4ERR_STALE_CLIENTID:
|
||||||
//try to create a new client
|
if (!recovery_start_or_wait(session->client))
|
||||||
status = check_renew_in_progress(session);
|
|
||||||
if (status == ERROR_LOCK_VIOLATION)
|
|
||||||
goto out_free_slot;
|
|
||||||
else if (status == 1)
|
|
||||||
goto do_retry;
|
goto do_retry;
|
||||||
|
//try to create a new client
|
||||||
client_state_lost = TRUE;
|
client_state_lost = TRUE;
|
||||||
status = nfs41_client_renew(session->client);
|
status = nfs41_client_renew(session->client);
|
||||||
if (status) {
|
if (status) {
|
||||||
eprintf("nfs41_exchange_id() failed with %d\n", status);
|
eprintf("nfs41_exchange_id() failed with %d\n", status);
|
||||||
status = ERROR_BAD_NET_RESP;
|
status = ERROR_BAD_NET_RESP;
|
||||||
|
recovery_finish(session->client);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
//fallthru and reestablish the session
|
//fallthru and reestablish the session
|
||||||
case NFS4ERR_BADSESSION:
|
case NFS4ERR_BADSESSION:
|
||||||
//try to create a new session
|
|
||||||
if (compound->res.status == NFS4ERR_BADSESSION) {
|
if (compound->res.status == NFS4ERR_BADSESSION) {
|
||||||
status = check_renew_in_progress(session);
|
if (!recovery_start_or_wait(session->client))
|
||||||
if (status == ERROR_LOCK_VIOLATION)
|
|
||||||
goto out_free_slot;
|
|
||||||
else if (status == 1)
|
|
||||||
goto do_retry;
|
goto do_retry;
|
||||||
}
|
}
|
||||||
|
//try to create a new session
|
||||||
status = nfs41_session_renew(session);
|
status = nfs41_session_renew(session);
|
||||||
if (status == NFS4ERR_STALE_CLIENTID) {
|
if (status == NFS4ERR_STALE_CLIENTID) {
|
||||||
client_state_lost = TRUE;
|
client_state_lost = TRUE;
|
||||||
|
|
@ -208,6 +218,7 @@ retry:
|
||||||
if (status) {
|
if (status) {
|
||||||
eprintf("nfs41_exchange_id() failed with %d\n", status);
|
eprintf("nfs41_exchange_id() failed with %d\n", status);
|
||||||
status = ERROR_BAD_NET_RESP;
|
status = ERROR_BAD_NET_RESP;
|
||||||
|
recovery_finish(session->client);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
status = nfs41_session_renew(session);
|
status = nfs41_session_renew(session);
|
||||||
|
|
@ -215,10 +226,12 @@ retry:
|
||||||
eprintf("after reestablishing clientid: nfs41_session_renew() "
|
eprintf("after reestablishing clientid: nfs41_session_renew() "
|
||||||
"failed with %d\n", status);
|
"failed with %d\n", status);
|
||||||
status = ERROR_BAD_NET_RESP;
|
status = ERROR_BAD_NET_RESP;
|
||||||
|
recovery_finish(session->client);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
} else if (status && status != NFS4ERR_STALE_CLIENTID) {
|
} else if (status && status != NFS4ERR_STALE_CLIENTID) {
|
||||||
eprintf("nfs41_session_renew: failed with %d\n", status);
|
eprintf("nfs41_session_renew: failed with %d\n", status);
|
||||||
|
recovery_finish(session->client);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
/* do client state recovery */
|
/* do client state recovery */
|
||||||
|
|
@ -249,8 +262,7 @@ retry:
|
||||||
eprintf("nfs41_reclaim_complete() failed with %s\n",
|
eprintf("nfs41_reclaim_complete() failed with %s\n",
|
||||||
nfs_error_string(status));
|
nfs_error_string(status));
|
||||||
}
|
}
|
||||||
if (nfs41_renew_in_progress(session->client, NULL))
|
recovery_finish(session->client);
|
||||||
nfs41_renew_in_progress(session->client, &zero);
|
|
||||||
goto do_retry;
|
goto do_retry;
|
||||||
|
|
||||||
case NFS4ERR_STALE_STATEID:
|
case NFS4ERR_STALE_STATEID:
|
||||||
|
|
@ -296,13 +308,11 @@ retry:
|
||||||
switch (stateid->type) {
|
switch (stateid->type) {
|
||||||
case STATEID_OPEN:
|
case STATEID_OPEN:
|
||||||
/* if there's recovery in progress, wait for it to finish */
|
/* if there's recovery in progress, wait for it to finish */
|
||||||
while (nfs41_renew_in_progress(session->client, NULL)) {
|
EnterCriticalSection(&session->client->recovery.lock);
|
||||||
DWORD wait = WaitForSingleObject(session->client->cond, INFINITE);
|
while (session->client->recovery.in_recovery)
|
||||||
if (wait != WAIT_OBJECT_0) {
|
SleepConditionVariableCS(&session->client->recovery.cond,
|
||||||
print_condwait_status(1, wait);
|
&session->client->recovery.lock, INFINITE);
|
||||||
break;
|
LeaveCriticalSection(&session->client->recovery.lock);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* if the open stateid is different, update and retry */
|
/* if the open stateid is different, update and retry */
|
||||||
AcquireSRWLockShared(&stateid->open->lock);
|
AcquireSRWLockShared(&stateid->open->lock);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue