pnfs: new locking model for layouts
exclusive locks are no longer held over LAYOUTGET, LAYOUTRETURN, or GETDEVICEINFO rpcs. this prevents a deadlock when CB_LAYOUTRECALL needs an exclusive lock while another operation is on the wire introduced a 'pending' condition variable to protect access to state->layout while the layout's lock is not held updated file_layout_recall() to compare the stateid sequence numbers to determine if the server has processed an outstanding LAYOUTGET or LAYOUTRETURN, where we're required to reply with NFS4ERR_DELAY LAYOUTGET, LAYOUTRETURN, and GETDEVICEINFO can now be sent with try_recovery=TRUE because they no longer hold an exclusive lock. this makes it possible for recover_client_state() to recall all of the client's layouts without deadlocking Signed-off-by: Casey Bodley <cbodley@citi.umich.edu>
This commit is contained in:
parent
c9585d937f
commit
bf53e3dc1a
4 changed files with 77 additions and 24 deletions
|
|
@ -41,20 +41,23 @@ static enum_t handle_cb_layoutrecall(
|
|||
OUT struct cb_layoutrecall_res *res)
|
||||
{
|
||||
enum pnfs_status status;
|
||||
/* forgetful model for layout recalls; return NOMATCHING_LAYOUT
|
||||
* and flag the layout(s) to prevent further use */
|
||||
res->status = NFS4ERR_NOMATCHING_LAYOUT;
|
||||
|
||||
status = pnfs_file_layout_recall(rpc_clnt->client, args);
|
||||
switch (status) {
|
||||
case PNFS_PENDING:
|
||||
/* not enough information to process the recall yet */
|
||||
res->status = NFS4ERR_DELAY;
|
||||
break;
|
||||
default:
|
||||
/* forgetful model for layout recalls */
|
||||
res->status = NFS4ERR_NOMATCHING_LAYOUT;
|
||||
break;
|
||||
}
|
||||
|
||||
dprintf(CBSLVL, " OP_CB_LAYOUTRECALL { %s, %s, recall %u } %s\n",
|
||||
pnfs_layout_type_string(args->type),
|
||||
pnfs_iomode_string(args->iomode), args->recall.type,
|
||||
nfs_error_string(res->status));
|
||||
|
||||
status = pnfs_file_layout_recall(rpc_clnt->client, args);
|
||||
if (status)
|
||||
eprintf("pnfs_file_layout_recall() failed with %s\n",
|
||||
pnfs_error_string(status));
|
||||
|
||||
return res->status;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1643,7 +1643,7 @@ enum nfsstat4 pnfs_rpc_layoutget(
|
|||
ZeroMemory(&layoutget_res, sizeof(layoutget_res));
|
||||
layoutget_res.u.res_ok = layoutget_res_ok;
|
||||
|
||||
status = compound_encode_send_decode(session, &compound, FALSE);
|
||||
status = compound_encode_send_decode(session, &compound, TRUE);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
|
|
@ -1768,7 +1768,7 @@ enum nfsstat4 pnfs_rpc_layoutreturn(
|
|||
layoutreturn_args.length = length;
|
||||
layoutreturn_args.stateid = stateid;
|
||||
|
||||
status = compound_encode_send_decode(session, &compound, FALSE);
|
||||
status = compound_encode_send_decode(session, &compound, TRUE);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
|
|
@ -1806,7 +1806,7 @@ enum nfsstat4 pnfs_rpc_getdeviceinfo(
|
|||
getdeviceinfo_args.notify_types.count = 0;
|
||||
getdeviceinfo_res.u.res_ok.device = device;
|
||||
|
||||
status = compound_encode_send_decode(session, &compound, FALSE);
|
||||
status = compound_encode_send_decode(session, &compound, TRUE);
|
||||
if (status)
|
||||
goto out;
|
||||
|
||||
|
|
|
|||
|
|
@ -164,7 +164,9 @@ typedef struct __pnfs_layout_state {
|
|||
bool_t return_on_close;
|
||||
LONG open_count; /* for return on last close */
|
||||
uint32_t io_count; /* number of pending io operations */
|
||||
bool_t pending; /* pending LAYOUTGET/LAYOUTRETURN */
|
||||
SRWLOCK lock;
|
||||
CONDITION_VARIABLE cond;
|
||||
} pnfs_layout_state;
|
||||
|
||||
typedef struct __pnfs_layout {
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ static enum pnfs_status layout_state_create(
|
|||
|
||||
fh_copy(&layout->meta_fh, meta_fh);
|
||||
InitializeSRWLock(&layout->lock);
|
||||
InitializeConditionVariable(&layout->cond);
|
||||
|
||||
*layout_out = layout;
|
||||
out:
|
||||
|
|
@ -295,8 +296,12 @@ static enum pnfs_status file_layout_fetch(
|
|||
|
||||
list_init(&layoutget_res.layouts);
|
||||
|
||||
/* drop the lock during the rpc call */
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
nfsstat = pnfs_rpc_layoutget(session, meta_file,
|
||||
stateid, iomode, offset, length, &layoutget_res);
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
if (nfsstat) {
|
||||
dprintf(FLLVL, "pnfs_rpc_layoutget() failed with %s\n",
|
||||
nfs_error_string(nfsstat));
|
||||
|
|
@ -374,6 +379,11 @@ static enum pnfs_status file_layout_cache(
|
|||
/* use an exclusive lock while attempting to get a new layout */
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
/* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
|
||||
while (state->pending)
|
||||
SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
|
||||
state->pending = TRUE;
|
||||
|
||||
status = layout_grant_status(state, iomode);
|
||||
if (status == PNFS_PENDING) {
|
||||
/* if there's an existing layout stateid, use it */
|
||||
|
|
@ -395,6 +405,8 @@ static enum pnfs_status file_layout_cache(
|
|||
}
|
||||
}
|
||||
|
||||
state->pending = FALSE;
|
||||
WakeConditionVariable(&state->cond);
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
}
|
||||
return status;
|
||||
|
|
@ -462,14 +474,32 @@ static enum pnfs_status file_layout_device(
|
|||
/* use an exclusive lock to look up device info */
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
/* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
|
||||
while (state->pending)
|
||||
SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
|
||||
state->pending = TRUE;
|
||||
|
||||
status = file_device_status(state);
|
||||
if (status == PNFS_PENDING) {
|
||||
status = pnfs_file_device_get(session, session->client->devices,
|
||||
state->layout->deviceid, &state->layout->device);
|
||||
if (status == PNFS_SUCCESS)
|
||||
unsigned char deviceid[PNFS_DEVICEID_SIZE];
|
||||
pnfs_file_device *device;
|
||||
|
||||
memcpy(deviceid, state->layout->deviceid, PNFS_DEVICEID_SIZE);
|
||||
|
||||
/* drop the lock during the rpc call */
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
status = pnfs_file_device_get(session,
|
||||
session->client->devices, deviceid, &device);
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
if (status == PNFS_SUCCESS) {
|
||||
state->layout->device = device;
|
||||
state->status |= PNFS_LAYOUT_HAS_DEVICE;
|
||||
}
|
||||
}
|
||||
|
||||
state->pending = FALSE;
|
||||
WakeConditionVariable(&state->cond);
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
}
|
||||
return status;
|
||||
|
|
@ -540,15 +570,24 @@ static enum pnfs_status file_layout_return(
|
|||
/* under exclusive lock, return the layout and reset status flags */
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
/* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
|
||||
while (state->pending)
|
||||
SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
|
||||
state->pending = TRUE;
|
||||
|
||||
status = layout_return_status(state);
|
||||
if (status == PNFS_PENDING) {
|
||||
pnfs_layoutreturn_res layoutreturn_res = { 0 };
|
||||
stateid4 stateid;
|
||||
memcpy(&stateid, &state->stateid, sizeof(stateid));
|
||||
|
||||
/* drop the lock during the rpc call */
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
nfsstat = pnfs_rpc_layoutreturn(session, file,
|
||||
PNFS_LAYOUTTYPE_FILE, PNFS_IOMODE_ANY, 0,
|
||||
NFS4_UINT64_MAX, &stateid, &layoutreturn_res);
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
if (nfsstat) {
|
||||
eprintf("pnfs_rpc_layoutreturn() failed with %s\n",
|
||||
nfs_error_string(nfsstat));
|
||||
|
|
@ -578,6 +617,8 @@ static enum pnfs_status file_layout_return(
|
|||
}
|
||||
}
|
||||
|
||||
state->pending = FALSE;
|
||||
WakeConditionVariable(&state->cond);
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
}
|
||||
|
||||
|
|
@ -763,24 +804,31 @@ static enum pnfs_status file_layout_recall(
|
|||
IN pnfs_layout_state *state,
|
||||
IN const struct cb_layoutrecall_args *recall)
|
||||
{
|
||||
const stateid4 *stateid_arg = &recall->recall.args.file.stateid;
|
||||
enum pnfs_status status = PNFS_SUCCESS;
|
||||
|
||||
/* under an exclusive lock, flag the layout as recalled */
|
||||
AcquireSRWLockExclusive(&state->lock);
|
||||
|
||||
if (state->io_count == 0) {
|
||||
/* if there is no pending io, return the layout now */
|
||||
status = layout_recall_return(state);
|
||||
} else {
|
||||
if (recall->recall.type == PNFS_RETURN_FILE
|
||||
&& stateid_arg->seqid > state->stateid.seqid + 1) {
|
||||
/* the server has processed an outstanding LAYOUTGET or LAYOUTRETURN;
|
||||
* we must return ERR_DELAY until we get the response and update our
|
||||
* view of the layout */
|
||||
status = PNFS_PENDING;
|
||||
} else if (state->io_count) {
|
||||
/* flag the layout as recalled so it can be returned after io */
|
||||
state->status |= PNFS_LAYOUT_RECALLED;
|
||||
if (recall->changed)
|
||||
state->status |= PNFS_LAYOUT_CHANGED;
|
||||
}
|
||||
|
||||
/* if we got a stateid, update the layout's seqid */
|
||||
if (recall->recall.type == PNFS_RETURN_FILE)
|
||||
state->stateid.seqid = recall->recall.args.file.stateid.seqid;
|
||||
/* if we got a stateid, update the layout's seqid */
|
||||
if (recall->recall.type == PNFS_RETURN_FILE)
|
||||
state->stateid.seqid = stateid_arg->seqid;
|
||||
} else {
|
||||
/* if there is no pending io, return the layout now */
|
||||
status = layout_recall_return(state);
|
||||
}
|
||||
|
||||
ReleaseSRWLockExclusive(&state->lock);
|
||||
return status;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue