pnfs: new locking model for layouts

exclusive locks are no longer held over LAYOUTGET, LAYOUTRETURN, or GETDEVICEINFO rpcs.  this prevents a deadlock when CB_LAYOUTRECALL needs an exclusive lock while another operation is on the wire

introduced a 'pending' condition variable to protect access to state->layout while the layout's lock is not held

updated file_layout_recall() to compare the stateid sequence numbers to determine if the server has processed an outstanding LAYOUTGET or LAYOUTRETURN, where we're required to reply with NFS4ERR_DELAY

LAYOUTGET, LAYOUTRETURN, and GETDEVICEINFO can now be sent with try_recovery=TRUE because they no longer hold an exclusive lock.  this makes it possible for recover_client_state() to recall all of the client's layouts without deadlocking

Signed-off-by: Casey Bodley <cbodley@citi.umich.edu>
This commit is contained in:
Casey Bodley 2011-03-10 12:46:34 -05:00 committed by unknown
parent c9585d937f
commit bf53e3dc1a
4 changed files with 77 additions and 24 deletions

View file

@ -41,20 +41,23 @@ static enum_t handle_cb_layoutrecall(
OUT struct cb_layoutrecall_res *res)
{
enum pnfs_status status;
/* forgetful model for layout recalls; return NOMATCHING_LAYOUT
* and flag the layout(s) to prevent further use */
res->status = NFS4ERR_NOMATCHING_LAYOUT;
status = pnfs_file_layout_recall(rpc_clnt->client, args);
switch (status) {
case PNFS_PENDING:
/* not enough information to process the recall yet */
res->status = NFS4ERR_DELAY;
break;
default:
/* forgetful model for layout recalls */
res->status = NFS4ERR_NOMATCHING_LAYOUT;
break;
}
dprintf(CBSLVL, " OP_CB_LAYOUTRECALL { %s, %s, recall %u } %s\n",
pnfs_layout_type_string(args->type),
pnfs_iomode_string(args->iomode), args->recall.type,
nfs_error_string(res->status));
status = pnfs_file_layout_recall(rpc_clnt->client, args);
if (status)
eprintf("pnfs_file_layout_recall() failed with %s\n",
pnfs_error_string(status));
return res->status;
}

View file

@ -1643,7 +1643,7 @@ enum nfsstat4 pnfs_rpc_layoutget(
ZeroMemory(&layoutget_res, sizeof(layoutget_res));
layoutget_res.u.res_ok = layoutget_res_ok;
status = compound_encode_send_decode(session, &compound, FALSE);
status = compound_encode_send_decode(session, &compound, TRUE);
if (status)
goto out;
@ -1768,7 +1768,7 @@ enum nfsstat4 pnfs_rpc_layoutreturn(
layoutreturn_args.length = length;
layoutreturn_args.stateid = stateid;
status = compound_encode_send_decode(session, &compound, FALSE);
status = compound_encode_send_decode(session, &compound, TRUE);
if (status)
goto out;
@ -1806,7 +1806,7 @@ enum nfsstat4 pnfs_rpc_getdeviceinfo(
getdeviceinfo_args.notify_types.count = 0;
getdeviceinfo_res.u.res_ok.device = device;
status = compound_encode_send_decode(session, &compound, FALSE);
status = compound_encode_send_decode(session, &compound, TRUE);
if (status)
goto out;

View file

@ -164,7 +164,9 @@ typedef struct __pnfs_layout_state {
bool_t return_on_close;
LONG open_count; /* for return on last close */
uint32_t io_count; /* number of pending io operations */
bool_t pending; /* pending LAYOUTGET/LAYOUTRETURN */
SRWLOCK lock;
CONDITION_VARIABLE cond;
} pnfs_layout_state;
typedef struct __pnfs_layout {

View file

@ -55,6 +55,7 @@ static enum pnfs_status layout_state_create(
fh_copy(&layout->meta_fh, meta_fh);
InitializeSRWLock(&layout->lock);
InitializeConditionVariable(&layout->cond);
*layout_out = layout;
out:
@ -295,8 +296,12 @@ static enum pnfs_status file_layout_fetch(
list_init(&layoutget_res.layouts);
/* drop the lock during the rpc call */
ReleaseSRWLockExclusive(&state->lock);
nfsstat = pnfs_rpc_layoutget(session, meta_file,
stateid, iomode, offset, length, &layoutget_res);
AcquireSRWLockExclusive(&state->lock);
if (nfsstat) {
dprintf(FLLVL, "pnfs_rpc_layoutget() failed with %s\n",
nfs_error_string(nfsstat));
@ -374,6 +379,11 @@ static enum pnfs_status file_layout_cache(
/* use an exclusive lock while attempting to get a new layout */
AcquireSRWLockExclusive(&state->lock);
/* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
while (state->pending)
SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
state->pending = TRUE;
status = layout_grant_status(state, iomode);
if (status == PNFS_PENDING) {
/* if there's an existing layout stateid, use it */
@ -395,6 +405,8 @@ static enum pnfs_status file_layout_cache(
}
}
state->pending = FALSE;
WakeConditionVariable(&state->cond);
ReleaseSRWLockExclusive(&state->lock);
}
return status;
@ -462,14 +474,32 @@ static enum pnfs_status file_layout_device(
/* use an exclusive lock to look up device info */
AcquireSRWLockExclusive(&state->lock);
/* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
while (state->pending)
SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
state->pending = TRUE;
status = file_device_status(state);
if (status == PNFS_PENDING) {
status = pnfs_file_device_get(session, session->client->devices,
state->layout->deviceid, &state->layout->device);
if (status == PNFS_SUCCESS)
unsigned char deviceid[PNFS_DEVICEID_SIZE];
pnfs_file_device *device;
memcpy(deviceid, state->layout->deviceid, PNFS_DEVICEID_SIZE);
/* drop the lock during the rpc call */
ReleaseSRWLockExclusive(&state->lock);
status = pnfs_file_device_get(session,
session->client->devices, deviceid, &device);
AcquireSRWLockExclusive(&state->lock);
if (status == PNFS_SUCCESS) {
state->layout->device = device;
state->status |= PNFS_LAYOUT_HAS_DEVICE;
}
}
state->pending = FALSE;
WakeConditionVariable(&state->cond);
ReleaseSRWLockExclusive(&state->lock);
}
return status;
@ -540,15 +570,24 @@ static enum pnfs_status file_layout_return(
/* under exclusive lock, return the layout and reset status flags */
AcquireSRWLockExclusive(&state->lock);
/* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
while (state->pending)
SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
state->pending = TRUE;
status = layout_return_status(state);
if (status == PNFS_PENDING) {
pnfs_layoutreturn_res layoutreturn_res = { 0 };
stateid4 stateid;
memcpy(&stateid, &state->stateid, sizeof(stateid));
/* drop the lock during the rpc call */
ReleaseSRWLockExclusive(&state->lock);
nfsstat = pnfs_rpc_layoutreturn(session, file,
PNFS_LAYOUTTYPE_FILE, PNFS_IOMODE_ANY, 0,
NFS4_UINT64_MAX, &stateid, &layoutreturn_res);
AcquireSRWLockExclusive(&state->lock);
if (nfsstat) {
eprintf("pnfs_rpc_layoutreturn() failed with %s\n",
nfs_error_string(nfsstat));
@ -578,6 +617,8 @@ static enum pnfs_status file_layout_return(
}
}
state->pending = FALSE;
WakeConditionVariable(&state->cond);
ReleaseSRWLockExclusive(&state->lock);
}
@ -763,24 +804,31 @@ static enum pnfs_status file_layout_recall(
IN pnfs_layout_state *state,
IN const struct cb_layoutrecall_args *recall)
{
const stateid4 *stateid_arg = &recall->recall.args.file.stateid;
enum pnfs_status status = PNFS_SUCCESS;
/* under an exclusive lock, flag the layout as recalled */
AcquireSRWLockExclusive(&state->lock);
if (state->io_count == 0) {
/* if there is no pending io, return the layout now */
status = layout_recall_return(state);
} else {
if (recall->recall.type == PNFS_RETURN_FILE
&& stateid_arg->seqid > state->stateid.seqid + 1) {
/* the server has processed an outstanding LAYOUTGET or LAYOUTRETURN;
* we must return ERR_DELAY until we get the response and update our
* view of the layout */
status = PNFS_PENDING;
} else if (state->io_count) {
/* flag the layout as recalled so it can be returned after io */
state->status |= PNFS_LAYOUT_RECALLED;
if (recall->changed)
state->status |= PNFS_LAYOUT_CHANGED;
}
/* if we got a stateid, update the layout's seqid */
if (recall->recall.type == PNFS_RETURN_FILE)
state->stateid.seqid = recall->recall.args.file.stateid.seqid;
/* if we got a stateid, update the layout's seqid */
if (recall->recall.type == PNFS_RETURN_FILE)
state->stateid.seqid = stateid_arg->seqid;
} else {
/* if there is no pending io, return the layout now */
status = layout_recall_return(state);
}
ReleaseSRWLockExclusive(&state->lock);
return status;