ms-nfs41-client/daemon/nfs41_compound.c
Casey Bodley 8616b03597 recovery: recover from STALE_STATEID errors
consider an operation that takes a stateid, and results in a BADSESSION error due to server reboot.  we'll recover the client and session, and send OPENs to reclaim all of the client's state.  but after recovery, we'll resend the original operation with the original stateid, and this will result in a STALE_STATEID error

we handle this by making use of the information in stateid_arg.  if we determine that stateid_arg.stateid is different from the nfs41_open_state's stateid, we copy the new stateid into stateid_arg.stateid and retry

note that if another thread is in recovery, it hasn't finished reclaiming its open state yet; so we wait on recovery to finish before comparing the stateids

Signed-off-by: Casey Bodley <cbodley@citi.umich.edu>
2010-12-06 14:21:34 -05:00

411 lines
16 KiB
C

/* Copyright (c) 2010
* The Regents of the University of Michigan
* All Rights Reserved
*
* Permission is granted to use, copy and redistribute this software
* for noncommercial education and research purposes, so long as no
* fee is charged, and so long as the name of the University of Michigan
* is not used in any advertising or publicity pertaining to the use
* or distribution of this software without specific, written prior
* authorization. Permission to modify or otherwise create derivative
* works of this software is not granted.
*
* This software is provided as is, without representation or warranty
* of any kind either express or implied, including without limitation
* the implied warranties of merchantability, fitness for a particular
* purpose, or noninfringement. The Regents of the University of
* Michigan shall not be liable for any damages, including special,
* indirect, incidental, or consequential damages, with respect to any
* claim arising out of or in connection with the use of the software,
* even if it has been or is hereafter advised of the possibility of
* such damages.
*/
#include <stdio.h>
#include <stdlib.h>
#include "nfs41_compound.h"
#include "nfs41_xdr.h"
#include "nfs41_ops.h"
#include "name_cache.h"
#include "daemon_debug.h"
#define BUF_SIZE 1024
int compound_error(int status)
{
if (status != NFS4_OK)
dprintf(1, "COMPOUND failed with status %d.\n", status);
return status;
}
void compound_init(
nfs41_compound *compound,
nfs_argop4 *argops,
nfs_resop4 *resops,
const char *tag)
{
/* initialize args */
compound->args.tag_len = (uint32_t)strlen(tag);
memcpy(compound->args.tag, tag, compound->args.tag_len);
compound->args.minorversion = 1;
compound->args.argarray_count = 0;
compound->args.argarray = argops;
/* initialize results */
ZeroMemory(&compound->res, sizeof(nfs41_compound_res));
compound->res.tag_len = NFS4_OPAQUE_LIMIT;
compound->res.resarray_count = 0;
compound->res.resarray = resops;
}
void compound_add_op(
nfs41_compound *compound,
uint32_t opnum,
void *arg,
void *res)
{
const uint32_t i = compound->args.argarray_count++;
compound->args.argarray[i].op = opnum;
compound->args.argarray[i].arg = arg;
compound->res.resarray[compound->res.resarray_count++].res = res;
}
/* Due to the possibility of replays, we might get a response to a different
* call than the one we're expecting. If we don't have a way to check for
* this, we'll likely crash trying to decode into the wrong structures.
* This function copies the number of operations and all of the operation
* numbers from the compound arguments into the response, so we can verify
* them on decode and fail before doing any damage. */
static void set_expected_res(
nfs41_compound *compound)
{
uint32_t i;
compound->res.resarray_count = compound->args.argarray_count;
for (i = 0; i < compound->res.resarray_count; i++)
compound->res.resarray[i].op = compound->args.argarray[i].op;
}
int check_renew_in_progress(
IN nfs41_session *session)
{
int status = 0;
bool_t one = 1, zero = 0;;
while (nfs41_renew_in_progress(session->client, NULL)) {
status = WaitForSingleObject(session->client->cond, INFINITE);
if (status != WAIT_OBJECT_0) {
dprintf(1, "nfs41_renew_in_progress: WaitForSingleObject failed\n");
print_condwait_status(1, status);
status = ERROR_LOCK_VIOLATION;
goto out;
}
nfs41_renew_in_progress(session->client, &zero);
status = 1;
}
nfs41_renew_in_progress(session->client, &one);
out:
return status;
}
int compound_encode_send_decode(
nfs41_session *session,
nfs41_compound *compound,
uint32_t bufsize_in,
uint32_t bufsize_out)
{
int status, retry_count = 0, delayby = 0;
nfs41_sequence_args *args =
(nfs41_sequence_args *)compound->args.argarray[0].arg;
bool_t zero = 0, client_state_lost = FALSE;
retry:
/* send compound */
retry_count++;
set_expected_res(compound);
status = nfs41_send_compound(session->client->rpc,
(char *)&compound->args, (char *)&compound->res);
// bump sequence number if sequence op succeeded.
if (compound->res.resarray_count > 0 &&
compound->res.resarray[0].op == OP_SEQUENCE) {
nfs41_sequence_res *seq =
(nfs41_sequence_res *)compound->res.resarray[0].res;
if (seq->sr_status == NFS4_OK) {
// returned slotid must be the same we sent
status = NFS4ERR_IO;
if (seq->sr_resok4.sr_slotid != args->sa_slotid) {
eprintf("[session] sr_slotid=%d != sa_slotid=%d\n",
seq->sr_resok4.sr_slotid, args->sa_slotid);
goto out_free_slot;
}
// returned sessionid must be the same we sent
if (memcmp(seq->sr_resok4.sr_sessionid, args->sa_sessionid,
NFS4_SESSIONID_SIZE)) {
eprintf("[session] sr_sessionid != sa_sessionid\n");
print_hexbuf(1, (unsigned char *)"sr_sessionid",
seq->sr_resok4.sr_sessionid, NFS4_SESSIONID_SIZE);
print_hexbuf(1, (unsigned char *)"sa_sessionid",
args->sa_sessionid, NFS4_SESSIONID_SIZE);
goto out_free_slot;
}
if (seq->sr_resok4.sr_status_flags)
print_sr_status_flags(1, seq->sr_resok4.sr_status_flags);
status = nfs41_session_bump_seq(session, args->sa_slotid);
if (status)
goto out_free_slot;
}
}
if (status) {
eprintf("nfs41_send_compound failed %d for seqid=%d, slotid=%d\n",
status, args->sa_sequenceid, args->sa_slotid);
status = NFS4ERR_IO;
goto out_free_slot;
}
if (compound->res.status != NFS4_OK)
dprintf(1, "\n################ %s ################\n\n",
nfs_error_string(compound->res.status));
if (compound->res.status != NFS4_OK &&
compound->args.argarray[0].op == OP_DESTROY_SESSION) {
dprintf(1, "OP_DESTROY_SESSION ignoring errors\n");
compound->res.status = NFS4_OK;
}
switch (compound->res.status) {
case NFS4_OK:
break;
case NFS4ERR_STALE_CLIENTID:
//try to create a new client
status = check_renew_in_progress(session);
if (status == ERROR_LOCK_VIOLATION)
goto out_free_slot;
else if (status == 1)
goto do_retry;
client_state_lost = TRUE;
status = nfs41_client_renew(session->client);
if (status) {
eprintf("nfs41_exchange_id() failed with %d\n", status);
status = ERROR_BAD_NET_RESP;
goto out;
}
//fallthru and reestablish the session
case NFS4ERR_BADSESSION:
//try to create a new session
if (compound->res.status == NFS4ERR_BADSESSION) {
status = check_renew_in_progress(session);
if (status == ERROR_LOCK_VIOLATION)
goto out_free_slot;
else if (status == 1)
goto do_retry;
}
status = nfs41_session_renew(session);
if (status == NFS4ERR_STALE_CLIENTID) {
client_state_lost = TRUE;
status = nfs41_client_renew(session->client);
if (status) {
eprintf("nfs41_exchange_id() failed with %d\n", status);
status = ERROR_BAD_NET_RESP;
goto out;
}
status = nfs41_session_renew(session);
if (status) {
eprintf("after reestablishing clientid: nfs41_session_renew() "
"failed with %d\n", status);
status = ERROR_BAD_NET_RESP;
goto out;
}
} else if (status && status != NFS4ERR_STALE_CLIENTID) {
eprintf("nfs41_session_renew: failed with %d\n", status);
goto out;
}
/* do client state recovery */
if (client_state_lost) {
struct client_state *state = &session->client->state;
struct list_entry *entry;
nfs41_open_state *open;
stateid4 stateid;
EnterCriticalSection(&state->lock);
list_for_each(entry, &state->opens) {
open = list_container(entry, nfs41_open_state, client_entry);
status = nfs41_open_reclaim(session, &open->parent, &open->file,
&open->owner, open->share_access, open->share_deny,
&stateid);
if (status == NFS4_OK) {
/* update the open's stateid under lock */
AcquireSRWLockExclusive(&open->lock);
memcpy(&open->stateid, &stateid, sizeof(stateid4));
ReleaseSRWLockExclusive(&open->lock);
}
}
LeaveCriticalSection(&state->lock);
/* send reclaim_complete, but don't fail on errors */
status = nfs41_reclaim_complete(session);
if (status && status == NFS4ERR_NOTSUPP)
eprintf("nfs41_reclaim_complete() failed with %s\n",
nfs_error_string(status));
}
if (nfs41_renew_in_progress(session->client, NULL))
nfs41_renew_in_progress(session->client, &zero);
goto do_retry;
case NFS4ERR_STALE_STATEID:
if (compound->args.argarray[0].op == OP_SEQUENCE) {
nfs41_sequence_args *seq = (nfs41_sequence_args*)
compound->args.argarray[0].arg;
nfs41_session_free_slot(session, seq->sa_slotid);
}
{
nfs_argop4 *argop = &compound->args.argarray[
compound->res.resarray_count-1];
stateid_arg *stateid = NULL;
if (argop->op == OP_CLOSE) {
nfs41_op_close_args *close = (nfs41_op_close_args*)argop->arg;
stateid = close->stateid;
} else if (argop->op == OP_READ) {
nfs41_read_args *read = (nfs41_read_args*)argop->arg;
stateid = read->stateid;
} else if (argop->op == OP_WRITE) {
nfs41_write_args *write = (nfs41_write_args*)argop->arg;
stateid = write->stateid;
} else if (argop->op == OP_LOCK) {
nfs41_lock_args *lock = (nfs41_lock_args*)argop->arg;
if (lock->locker.new_lock_owner)
stateid = lock->locker.u.open_owner.open_stateid;
else
stateid = lock->locker.u.lock_owner.lock_stateid;
} else if (argop->op == OP_LOCKU) {
nfs41_locku_args *locku = (nfs41_locku_args*)argop->arg;
stateid = locku->lock_stateid;
} else if (argop->op == OP_SETATTR) {
nfs41_setattr_args *setattr = (nfs41_setattr_args*)argop->arg;
stateid = setattr->stateid;
} else if (argop->op == OP_LAYOUTGET) {
pnfs_layoutget_args *lget = (pnfs_layoutget_args*)argop->arg;
stateid = lget->stateid;
}
if (stateid) {
bool_t retry = FALSE;
switch (stateid->type) {
case STATEID_OPEN:
/* if there's recovery in progress, wait for it to finish */
while (nfs41_renew_in_progress(session->client, NULL)) {
DWORD wait = WaitForSingleObject(session->client->cond, INFINITE);
if (wait != WAIT_OBJECT_0) {
print_condwait_status(1, wait);
break;
}
}
/* if the open stateid is different, update and retry */
AcquireSRWLockShared(&stateid->open->lock);
if (memcmp(&stateid->stateid, &stateid->open->stateid, sizeof(stateid4))) {
memcpy(&stateid->stateid, &stateid->open->stateid, sizeof(stateid4));
retry = TRUE;
}
ReleaseSRWLockShared(&stateid->open->lock);
break;
default:
eprintf("%s returned %s: can't recover stateid type %u\n",
nfs_opnum_to_string(argop->op),
nfs_error_string(compound->res.status), stateid->type);
break;
}
if (retry) {
dprintf(1, "Stateid has been reclaimed, retrying..\n");
goto do_retry;
}
}
}
goto out;
case NFS4ERR_GRACE:
case NFS4ERR_DELAY:
#define RETRY_INDEFINITELY
#ifndef RETRY_INDEFINITELY
#define NUMBER_2_RETRY 19
#endif
#ifndef RETRY_INDEFINITELY
if (retry_count < NUMBER_2_RETRY) {
#endif
if (compound->args.argarray[0].op == OP_SEQUENCE) {
nfs41_sequence_args *seq = (nfs41_sequence_args*)
compound->args.argarray[0].arg;
nfs41_session_free_slot(session, seq->sa_slotid);
}
if (compound->res.status == NFS4ERR_GRACE)
delayby = 5000;
else
delayby = 500*retry_count;
dprintf(1, "Compound returned %s: sleeping for %ums..\n",
(compound->res.status==NFS4ERR_GRACE)?"NFS4ERR_GRACE":"NFS4ERR_DELAY",
delayby);
Sleep(delayby);
dprintf(1, "Attempting to resend compound.\n");
goto do_retry;
#ifndef RETRY_INDEFINITELY
}
#endif
break;
case NFS4ERR_FHEXPIRED: /* TODO: recover expired volatile filehandles */
status = NFS4ERR_STALE; /* for now, treat them as ERR_STALE */
/* no break */
case NFS4ERR_STALE:
{
nfs_argop4 *argarray = compound->args.argarray;
struct nfs41_name_cache *name_cache =
session_name_cache(session);
nfs41_putfh_args *putfh;
uint32_t i, start = 0;
/* NFS4ERR_STALE generally comes from a PUTFH operation. in
* this case, remove its filehandle from the name cache. but
* because COMPOUNDs are not atomic, a file can be removed
* between PUTFH and the operation that uses it. in this
* case, we can't tell which PUTFH operation is to blame, so
* we must invalidate filehandles of all PUTFH operations in
* the COMPOUND */
if (argarray[compound->res.resarray_count-1].op == OP_PUTFH)
start = compound->res.resarray_count-1;
for (i = start; i < compound->res.resarray_count; i++) {
if (argarray[i].op == OP_PUTFH) {
putfh = (nfs41_putfh_args*)argarray[i].arg;
if (!putfh->in_recovery && putfh->file->path)
nfs41_name_cache_remove_stale(name_cache,
session, putfh->file->path);
}
}
}
break;
}
out_free_slot:
if (compound->args.argarray[0].op == OP_SEQUENCE) {
nfs41_sequence_args *seq = (nfs41_sequence_args *)compound->args.argarray[0].arg;
nfs41_session_free_slot(session, seq->sa_slotid);
}
out:
return status;
do_retry:
if (compound->res.resarray[0].op == OP_SEQUENCE) {
nfs41_sequence_args *seq = (nfs41_sequence_args*)
compound->args.argarray[0].arg;
status = nfs41_session_get_slot(session, &seq->sa_slotid,
&seq->sa_sequenceid, &seq->sa_highest_slotid);
if (status)
goto out;
}
goto retry;
}