pnfs: store a list of layout segments
pnfs_layout_state now stores a list instead of a single pnfs_file_layout entry. when new segments are acquired through LAYOUTGET, they are inserted into the list in order of increasing offset functions related to pnfs_layout_state_prepare() now operate on the list to find missing layout ranges and segments missing devices pattern_init() in pnfs_io.c now allocates and initializes io threads for each layout segment in the range new function pattern_join() will call WaitForMultipleObjects() in a loop, to support io patterns with more than 64 threads. if pattern_fork() is called with a thread count of 1, the thread function is called directly instead of spawning a new thread Signed-off-by: Casey Bodley <cbodley@citi.umich.edu>
This commit is contained in:
parent
62ed5248bf
commit
5cc317e8a5
4 changed files with 290 additions and 170 deletions
|
|
@ -84,11 +84,6 @@ enum pnfs_iomode {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum pnfs_layout_status {
|
enum pnfs_layout_status {
|
||||||
/* LAYOUTGET was successful, and the layout has not been returned or
|
|
||||||
* otherwise revoked by the server */
|
|
||||||
PNFS_LAYOUT_GRANTED = 0x01,
|
|
||||||
/* GETDEVICEINFO was successful, and we have a valid 'device' pointer */
|
|
||||||
PNFS_LAYOUT_HAS_DEVICE = 0x02,
|
|
||||||
/* CB_LAYOUTRECALL indicated that the server has recalled this layout,
|
/* CB_LAYOUTRECALL indicated that the server has recalled this layout,
|
||||||
* and it should be returned on completion of any pending io */
|
* and it should be returned on completion of any pending io */
|
||||||
PNFS_LAYOUT_RECALLED = 0x04,
|
PNFS_LAYOUT_RECALLED = 0x04,
|
||||||
|
|
@ -162,7 +157,7 @@ typedef struct __pnfs_layout_state {
|
||||||
nfs41_fh meta_fh;
|
nfs41_fh meta_fh;
|
||||||
stateid4 stateid;
|
stateid4 stateid;
|
||||||
struct list_entry entry; /* position in nfs41_client.layouts */
|
struct list_entry entry; /* position in nfs41_client.layouts */
|
||||||
struct __pnfs_file_layout *layout;
|
struct list_entry layouts; /* list of pnfs_file_layouts */
|
||||||
enum pnfs_layout_status status;
|
enum pnfs_layout_status status;
|
||||||
bool_t return_on_close;
|
bool_t return_on_close;
|
||||||
LONG open_count; /* for return on last close */
|
LONG open_count; /* for return on last close */
|
||||||
|
|
@ -245,7 +240,7 @@ enum pnfs_status pnfs_file_layout_recall(
|
||||||
IN const struct cb_layoutrecall_args *recall);
|
IN const struct cb_layoutrecall_args *recall);
|
||||||
|
|
||||||
/* expects caller to hold an exclusive lock on pnfs_layout_state */
|
/* expects caller to hold an exclusive lock on pnfs_layout_state */
|
||||||
enum pnfs_status pnfs_layout_io_start(
|
void pnfs_layout_io_start(
|
||||||
IN pnfs_layout_state *state);
|
IN pnfs_layout_state *state);
|
||||||
|
|
||||||
void pnfs_layout_io_finished(
|
void pnfs_layout_io_finished(
|
||||||
|
|
|
||||||
206
daemon/pnfs_io.c
206
daemon/pnfs_io.c
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#define IOLVL 2 /* dprintf level for pnfs io logging */
|
#define IOLVL 2 /* dprintf level for pnfs io logging */
|
||||||
|
|
||||||
|
#define file_layout_entry(pos) list_container(pos, pnfs_file_layout, layout.entry)
|
||||||
|
|
||||||
typedef struct __pnfs_io_pattern {
|
typedef struct __pnfs_io_pattern {
|
||||||
struct __pnfs_io_thread *threads;
|
struct __pnfs_io_thread *threads;
|
||||||
|
|
@ -63,6 +64,13 @@ typedef struct __pnfs_io_unit {
|
||||||
typedef uint32_t (WINAPI *pnfs_io_thread_fn)(void*);
|
typedef uint32_t (WINAPI *pnfs_io_thread_fn)(void*);
|
||||||
|
|
||||||
|
|
||||||
|
static enum pnfs_status stripe_next_unit(
|
||||||
|
IN const pnfs_file_layout *layout,
|
||||||
|
IN uint32_t stripeid,
|
||||||
|
IN uint64_t *position,
|
||||||
|
IN uint64_t offset_end,
|
||||||
|
OUT pnfs_io_unit *io);
|
||||||
|
|
||||||
/* 13.4.2. Interpreting the File Layout Using Sparse Packing
|
/* 13.4.2. Interpreting the File Layout Using Sparse Packing
|
||||||
* http://tools.ietf.org/html/rfc5661#section-13.4.2 */
|
* http://tools.ietf.org/html/rfc5661#section-13.4.2 */
|
||||||
|
|
||||||
|
|
@ -113,22 +121,107 @@ static enum pnfs_status get_dense_fh(
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __inline bool_t layout_compatible(
|
||||||
|
IN const pnfs_layout *layout,
|
||||||
|
IN enum pnfs_iomode iomode,
|
||||||
|
IN uint64_t position)
|
||||||
|
{
|
||||||
|
return layout->iomode >= iomode
|
||||||
|
&& layout->offset <= position
|
||||||
|
&& position < layout->offset + layout->length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* count stripes for all layout segments that intersect the range
|
||||||
|
* and have not been covered by previous segments */
|
||||||
|
static uint32_t thread_count(
|
||||||
|
IN pnfs_layout_state *state,
|
||||||
|
IN enum pnfs_iomode iomode,
|
||||||
|
IN uint64_t offset,
|
||||||
|
IN uint64_t length)
|
||||||
|
{
|
||||||
|
uint64_t position = offset;
|
||||||
|
struct list_entry *entry;
|
||||||
|
uint32_t count = 0;
|
||||||
|
|
||||||
|
list_for_each(entry, &state->layouts) {
|
||||||
|
pnfs_file_layout *layout = file_layout_entry(entry);
|
||||||
|
|
||||||
|
if (!layout_compatible(&layout->layout, iomode, position))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
position = layout->layout.offset + layout->layout.length;
|
||||||
|
count += layout->device->stripes.count;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static enum pnfs_status thread_init(
|
static enum pnfs_status thread_init(
|
||||||
IN pnfs_io_pattern *pattern,
|
IN pnfs_io_pattern *pattern,
|
||||||
IN pnfs_io_thread *thread,
|
IN pnfs_io_thread *thread,
|
||||||
IN pnfs_file_layout *layout,
|
IN pnfs_file_layout *layout,
|
||||||
IN uint32_t stripeid)
|
IN uint32_t stripeid,
|
||||||
|
IN uint64_t offset)
|
||||||
{
|
{
|
||||||
thread->pattern = pattern;
|
thread->pattern = pattern;
|
||||||
thread->layout = layout;
|
thread->layout = layout;
|
||||||
thread->stable = FILE_SYNC4;
|
thread->stable = FILE_SYNC4;
|
||||||
thread->offset = pattern->offset_start;
|
thread->offset = offset;
|
||||||
thread->id = stripeid;
|
thread->id = stripeid;
|
||||||
|
|
||||||
return is_dense(layout) ? get_dense_fh(layout, stripeid, &thread->file)
|
return is_dense(layout) ? get_dense_fh(layout, stripeid, &thread->file)
|
||||||
: get_sparse_fh(layout, pattern->meta_file, stripeid, &thread->file);
|
: get_sparse_fh(layout, pattern->meta_file, stripeid, &thread->file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum pnfs_status pattern_threads_init(
|
||||||
|
IN pnfs_io_pattern *pattern,
|
||||||
|
IN enum pnfs_iomode iomode,
|
||||||
|
IN uint64_t offset,
|
||||||
|
IN uint64_t length)
|
||||||
|
{
|
||||||
|
pnfs_io_unit io;
|
||||||
|
uint64_t position = offset;
|
||||||
|
struct list_entry *entry;
|
||||||
|
uint32_t s, t = 0;
|
||||||
|
enum pnfs_status status = PNFS_SUCCESS;
|
||||||
|
|
||||||
|
list_for_each(entry, &pattern->state->layouts) {
|
||||||
|
pnfs_file_layout *layout = file_layout_entry(entry);
|
||||||
|
|
||||||
|
if (!layout_compatible(&layout->layout, iomode, position))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (s = 0; s < layout->device->stripes.count; s++) {
|
||||||
|
uint64_t off = position;
|
||||||
|
|
||||||
|
/* does the range contain this stripe? */
|
||||||
|
status = stripe_next_unit(layout, s, &off, offset + length, &io);
|
||||||
|
if (status != PNFS_PENDING)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (t >= pattern->count) { /* miscounted threads needed? */
|
||||||
|
status = PNFSERR_NO_LAYOUT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = thread_init(pattern, &pattern->threads[t++], layout, s, off);
|
||||||
|
if (status)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
position = layout->layout.offset + layout->layout.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (position < offset + length) {
|
||||||
|
/* unable to satisfy the entire range */
|
||||||
|
status = PNFSERR_NO_LAYOUT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* update the pattern with the actual number of threads used */
|
||||||
|
pattern->count = t;
|
||||||
|
out:
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
static enum pnfs_status pattern_init(
|
static enum pnfs_status pattern_init(
|
||||||
IN pnfs_io_pattern *pattern,
|
IN pnfs_io_pattern *pattern,
|
||||||
IN nfs41_root *root,
|
IN nfs41_root *root,
|
||||||
|
|
@ -136,20 +229,22 @@ static enum pnfs_status pattern_init(
|
||||||
IN const stateid_arg *stateid,
|
IN const stateid_arg *stateid,
|
||||||
IN pnfs_layout_state *state,
|
IN pnfs_layout_state *state,
|
||||||
IN unsigned char *buffer,
|
IN unsigned char *buffer,
|
||||||
|
IN enum pnfs_iomode iomode,
|
||||||
IN uint64_t offset,
|
IN uint64_t offset,
|
||||||
IN uint64_t length,
|
IN uint64_t length,
|
||||||
IN uint32_t default_lease)
|
IN uint32_t default_lease)
|
||||||
{
|
{
|
||||||
uint32_t i;
|
|
||||||
enum pnfs_status status;
|
enum pnfs_status status;
|
||||||
|
|
||||||
pattern->count = state->layout->device->stripes.count;
|
/* calculate an upper bound on the number of threads to allocate */
|
||||||
|
pattern->count = thread_count(state, iomode, offset, length);
|
||||||
pattern->threads = calloc(pattern->count, sizeof(pnfs_io_thread));
|
pattern->threads = calloc(pattern->count, sizeof(pnfs_io_thread));
|
||||||
if (pattern->threads == NULL) {
|
if (pattern->threads == NULL) {
|
||||||
status = PNFSERR_RESOURCES;
|
status = PNFSERR_RESOURCES;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* information shared between threads */
|
||||||
pattern->root = root;
|
pattern->root = root;
|
||||||
pattern->meta_file = meta_file;
|
pattern->meta_file = meta_file;
|
||||||
pattern->stateid = stateid;
|
pattern->stateid = stateid;
|
||||||
|
|
@ -159,16 +254,13 @@ static enum pnfs_status pattern_init(
|
||||||
pattern->offset_end = offset + length;
|
pattern->offset_end = offset + length;
|
||||||
pattern->default_lease = default_lease;
|
pattern->default_lease = default_lease;
|
||||||
|
|
||||||
for (i = 0; i < pattern->count; i++) {
|
/* initialize a thread for every stripe necessary to cover the range */
|
||||||
status = thread_init(pattern, &pattern->threads[i], state->layout, i);
|
status = pattern_threads_init(pattern, iomode, offset, length);
|
||||||
if (status)
|
if (status)
|
||||||
goto out_err_free;
|
goto out_err_free;
|
||||||
}
|
|
||||||
|
|
||||||
/* take a reference on the layout so we don't return it during io */
|
/* take a reference on the layout so we don't return it during io */
|
||||||
status = pnfs_layout_io_start(state);
|
pnfs_layout_io_start(state);
|
||||||
if (status)
|
|
||||||
goto out_err_free;
|
|
||||||
out:
|
out:
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
|
|
@ -278,65 +370,77 @@ static enum pnfs_status thread_data_server(
|
||||||
return PNFS_SUCCESS;
|
return PNFS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum pnfs_status pattern_join(
|
||||||
|
IN HANDLE *threads,
|
||||||
|
IN DWORD count)
|
||||||
|
{
|
||||||
|
DWORD status;
|
||||||
|
/* WaitForMultipleObjects() supports a maximum of 64 objects */
|
||||||
|
while (count) {
|
||||||
|
const DWORD n = min(count, MAXIMUM_WAIT_OBJECTS);
|
||||||
|
status = WaitForMultipleObjects(n, threads, TRUE, INFINITE);
|
||||||
|
if (status != WAIT_OBJECT_0)
|
||||||
|
return PNFSERR_RESOURCES;
|
||||||
|
|
||||||
|
count -= n;
|
||||||
|
threads += n;
|
||||||
|
}
|
||||||
|
return PNFS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static enum pnfs_status pattern_fork(
|
static enum pnfs_status pattern_fork(
|
||||||
IN pnfs_io_pattern *pattern,
|
IN pnfs_io_pattern *pattern,
|
||||||
IN pnfs_io_thread_fn thread_fn)
|
IN pnfs_io_thread_fn thread_fn)
|
||||||
{
|
{
|
||||||
pnfs_io_unit io;
|
|
||||||
#ifdef PNFS_THREADING
|
|
||||||
HANDLE *threads;
|
HANDLE *threads;
|
||||||
uint32_t num_threads;
|
|
||||||
#endif
|
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
DWORD status;
|
enum pnfs_status status = PNFS_SUCCESS;
|
||||||
enum pnfs_status pnfsstat = PNFS_SUCCESS;
|
|
||||||
|
|
||||||
if (pattern->count == 0)
|
if (pattern->count == 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
#ifdef PNFS_THREADING
|
if (pattern->count == 1) {
|
||||||
/* create a thread for each unit that has actual io */
|
/* no need to fork if there's only 1 thread */
|
||||||
threads = calloc(pattern->count, sizeof(HANDLE));
|
status = (enum pnfs_status)thread_fn(pattern->threads);
|
||||||
if (threads == NULL) {
|
goto out;
|
||||||
pnfsstat = PNFSERR_RESOURCES;
|
}
|
||||||
|
|
||||||
|
/* create a thread for each unit that has actual io */
|
||||||
|
threads = calloc(pattern->count, sizeof(HANDLE));
|
||||||
|
if (threads == NULL) {
|
||||||
|
status = PNFSERR_RESOURCES;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
num_threads = 0;
|
|
||||||
for (i = 0; i < pattern->count; i++) {
|
for (i = 0; i < pattern->count; i++) {
|
||||||
if (thread_next_unit(&pattern->threads[i], &io) == PNFS_PENDING) {
|
threads[i] = (HANDLE)_beginthreadex(NULL, 0,
|
||||||
threads[num_threads++] = (HANDLE)_beginthreadex(NULL, 0,
|
|
||||||
thread_fn, &pattern->threads[i], 0, NULL);
|
thread_fn, &pattern->threads[i], 0, NULL);
|
||||||
|
if (threads[i] == NULL) {
|
||||||
|
eprintf("_beginthreadex() failed with %d\n", GetLastError());
|
||||||
|
pattern->count = i; /* join any threads already started */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_threads) { /* wait on all threads to finish */
|
/* wait on all threads to finish */
|
||||||
status = WaitForMultipleObjects(num_threads, threads, TRUE, INFINITE);
|
status = pattern_join(threads, pattern->count);
|
||||||
if (status == WAIT_OBJECT_0)
|
if (status) {
|
||||||
status = NO_ERROR;
|
eprintf("pattern_join() failed with %s\n", pnfs_error_string(status));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_threads; i++) {
|
for (i = 0; i < pattern->count; i++) {
|
||||||
/* keep track of the most severe error returned by a thread */
|
/* keep track of the most severe error returned by a thread */
|
||||||
if (GetExitCodeThread(threads[i], &status))
|
DWORD exitcode;
|
||||||
pnfsstat = max(pnfsstat, (enum pnfs_status)status);
|
if (GetExitCodeThread(threads[i], &exitcode))
|
||||||
|
status = max(status, (enum pnfs_status)exitcode);
|
||||||
|
|
||||||
CloseHandle(threads[i]);
|
CloseHandle(threads[i]);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
free(threads);
|
free(threads);
|
||||||
#else
|
|
||||||
/* process each server that has actual io */
|
|
||||||
for (i = 0; i < pattern->count; i++) {
|
|
||||||
if (thread_next_unit(&pattern->threads[i], &io) == PNFS_PENDING) {
|
|
||||||
/* keep track of the most severe error returned by a thread */
|
|
||||||
status = thread_fn(&pattern->threads[i]);
|
|
||||||
pnfsstat = max(pnfsstat, (enum pnfs_status)status);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
out:
|
out:
|
||||||
return pnfsstat;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t pattern_bytes_transferred(
|
static uint64_t pattern_bytes_transferred(
|
||||||
|
|
@ -376,8 +480,6 @@ static enum pnfs_status map_ds_error(
|
||||||
AcquireSRWLockExclusive(&state->lock);
|
AcquireSRWLockExclusive(&state->lock);
|
||||||
/* flag the layout for return once io is finished */
|
/* flag the layout for return once io is finished */
|
||||||
state->status |= PNFS_LAYOUT_RECALLED | PNFS_LAYOUT_CHANGED;
|
state->status |= PNFS_LAYOUT_RECALLED | PNFS_LAYOUT_CHANGED;
|
||||||
/* reset GRANTED so we know not to try LAYOUTRETURN */
|
|
||||||
state->status &= ~PNFS_LAYOUT_GRANTED;
|
|
||||||
ReleaseSRWLockExclusive(&state->lock);
|
ReleaseSRWLockExclusive(&state->lock);
|
||||||
|
|
||||||
/* return CHANGED to prevent any further use of the layout */
|
/* return CHANGED to prevent any further use of the layout */
|
||||||
|
|
@ -423,7 +525,7 @@ static uint32_t WINAPI file_layout_read_thread(void *args)
|
||||||
stateid.stateid.seqid = 0;
|
stateid.stateid.seqid = 0;
|
||||||
|
|
||||||
total_read = 0;
|
total_read = 0;
|
||||||
while ((status = thread_next_unit(thread, &io)) == PNFS_PENDING) {
|
while (thread_next_unit(thread, &io) == PNFS_PENDING) {
|
||||||
maxreadsize = max_read_size(client->session, &thread->file->fh);
|
maxreadsize = max_read_size(client->session, &thread->file->fh);
|
||||||
if (io.length > maxreadsize)
|
if (io.length > maxreadsize)
|
||||||
io.length = maxreadsize;
|
io.length = maxreadsize;
|
||||||
|
|
@ -498,7 +600,7 @@ retry_write:
|
||||||
commit_max = 0;
|
commit_max = 0;
|
||||||
total_written = 0;
|
total_written = 0;
|
||||||
|
|
||||||
while ((status = thread_next_unit(thread, &io)) == PNFS_PENDING) {
|
while (thread_next_unit(thread, &io) == PNFS_PENDING) {
|
||||||
if (io.length > maxwritesize)
|
if (io.length > maxwritesize)
|
||||||
io.length = maxwritesize;
|
io.length = maxwritesize;
|
||||||
|
|
||||||
|
|
@ -589,8 +691,9 @@ enum pnfs_status pnfs_read(
|
||||||
|
|
||||||
if (status == PNFS_SUCCESS) {
|
if (status == PNFS_SUCCESS) {
|
||||||
/* interpret the layout and set up threads for io */
|
/* interpret the layout and set up threads for io */
|
||||||
status = pattern_init(&pattern, root, &state->file, stateid, layout,
|
status = pattern_init(&pattern, root, &state->file, stateid,
|
||||||
buffer_out, offset, length, state->session->lease_time);
|
layout, buffer_out, PNFS_IOMODE_READ, offset, length,
|
||||||
|
state->session->lease_time);
|
||||||
if (status)
|
if (status)
|
||||||
eprintf("pattern_init() failed with %s\n",
|
eprintf("pattern_init() failed with %s\n",
|
||||||
pnfs_error_string(status));
|
pnfs_error_string(status));
|
||||||
|
|
@ -685,8 +788,9 @@ enum pnfs_status pnfs_write(
|
||||||
|
|
||||||
if (status == PNFS_SUCCESS) {
|
if (status == PNFS_SUCCESS) {
|
||||||
/* interpret the layout and set up threads for io */
|
/* interpret the layout and set up threads for io */
|
||||||
status = pattern_init(&pattern, root, &state->file, stateid, layout,
|
status = pattern_init(&pattern, root, &state->file, stateid,
|
||||||
buffer, offset, length, state->session->lease_time);
|
layout, buffer, PNFS_IOMODE_RW, offset, length,
|
||||||
|
state->session->lease_time);
|
||||||
if (status)
|
if (status)
|
||||||
eprintf("pattern_init() failed with %s\n",
|
eprintf("pattern_init() failed with %s\n",
|
||||||
pnfs_error_string(status));
|
pnfs_error_string(status));
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,8 @@ struct pnfs_layout_list {
|
||||||
};
|
};
|
||||||
|
|
||||||
#define state_entry(pos) list_container(pos, pnfs_layout_state, entry)
|
#define state_entry(pos) list_container(pos, pnfs_layout_state, entry)
|
||||||
|
#define layout_entry(pos) list_container(pos, pnfs_layout, entry)
|
||||||
|
#define file_layout_entry(pos) list_container(pos, pnfs_file_layout, layout.entry)
|
||||||
|
|
||||||
static enum pnfs_status layout_state_create(
|
static enum pnfs_status layout_state_create(
|
||||||
IN const nfs41_fh *meta_fh,
|
IN const nfs41_fh *meta_fh,
|
||||||
|
|
@ -52,6 +54,7 @@ static enum pnfs_status layout_state_create(
|
||||||
}
|
}
|
||||||
|
|
||||||
fh_copy(&layout->meta_fh, meta_fh);
|
fh_copy(&layout->meta_fh, meta_fh);
|
||||||
|
list_init(&layout->layouts);
|
||||||
InitializeSRWLock(&layout->lock);
|
InitializeSRWLock(&layout->lock);
|
||||||
InitializeConditionVariable(&layout->cond);
|
InitializeConditionVariable(&layout->cond);
|
||||||
|
|
||||||
|
|
@ -68,10 +71,19 @@ static void file_layout_free(
|
||||||
free(layout);
|
free(layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void layout_state_free_layouts(
|
||||||
|
IN pnfs_layout_state *state)
|
||||||
|
{
|
||||||
|
struct list_entry *entry, *tmp;
|
||||||
|
list_for_each_tmp(entry, tmp, &state->layouts)
|
||||||
|
file_layout_free(file_layout_entry(entry));
|
||||||
|
list_init(&state->layouts);
|
||||||
|
}
|
||||||
|
|
||||||
static void layout_state_free(
|
static void layout_state_free(
|
||||||
IN pnfs_layout_state *state)
|
IN pnfs_layout_state *state)
|
||||||
{
|
{
|
||||||
if (state->layout) file_layout_free(state->layout);
|
layout_state_free_layouts(state);
|
||||||
free(state);
|
free(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -194,36 +206,77 @@ static enum pnfs_status layout_state_find_and_delete(
|
||||||
|
|
||||||
|
|
||||||
/* pnfs_file_layout */
|
/* pnfs_file_layout */
|
||||||
|
static uint64_t range_max(
|
||||||
|
IN const pnfs_layout *layout)
|
||||||
|
{
|
||||||
|
uint64_t result = layout->offset + layout->length;
|
||||||
|
return result < layout->offset ? NFS4_UINT64_MAX : result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool_t layout_sanity_check(
|
||||||
|
IN pnfs_file_layout *layout)
|
||||||
|
{
|
||||||
|
/* prevent div/0 */
|
||||||
|
if (layout->layout.length == 0 ||
|
||||||
|
layout->layout.iomode < PNFS_IOMODE_READ ||
|
||||||
|
layout->layout.iomode > PNFS_IOMODE_RW ||
|
||||||
|
layout_unit_size(layout) == 0)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
/* put a cap on layout.length to prevent overflow */
|
||||||
|
layout->layout.length = range_max(&layout->layout) - layout->layout.offset;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void layout_ordered_insert(
|
||||||
|
IN pnfs_layout_state *state,
|
||||||
|
IN pnfs_layout *layout)
|
||||||
|
{
|
||||||
|
struct list_entry *entry;
|
||||||
|
list_for_each(entry, &state->layouts) {
|
||||||
|
pnfs_layout *existing = layout_entry(entry);
|
||||||
|
|
||||||
|
/* maintain an order of increasing offset */
|
||||||
|
if (existing->offset < layout->offset)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* when offsets are equal, prefer a longer segment first */
|
||||||
|
if (existing->offset == layout->offset &&
|
||||||
|
existing->length > layout->length)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
list_add(&layout->entry, existing->entry.prev, &existing->entry);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_add_tail(&state->layouts, &layout->entry);
|
||||||
|
}
|
||||||
|
|
||||||
static enum pnfs_status layout_update_range(
|
static enum pnfs_status layout_update_range(
|
||||||
IN OUT pnfs_layout_state *state,
|
IN OUT pnfs_layout_state *state,
|
||||||
IN const struct list_entry *layouts)
|
IN const struct list_entry *layouts)
|
||||||
{
|
{
|
||||||
struct list_entry *entry, *tmp;
|
struct list_entry *entry, *tmp;
|
||||||
pnfs_layout *layout;
|
pnfs_file_layout *layout;
|
||||||
enum pnfs_status status = PNFSERR_NO_LAYOUT;
|
enum pnfs_status status = PNFSERR_NO_LAYOUT;
|
||||||
|
|
||||||
list_for_each_tmp(entry, tmp, layouts) {
|
list_for_each_tmp(entry, tmp, layouts) {
|
||||||
layout = list_container(entry, pnfs_layout, entry);
|
layout = file_layout_entry(entry);
|
||||||
|
|
||||||
/* don't know what to do with non-file layouts */
|
/* don't know what to do with non-file layouts */
|
||||||
if (layout->type != PNFS_LAYOUTTYPE_FILE)
|
if (layout->layout.type != PNFS_LAYOUTTYPE_FILE)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (state->layout == NULL) {
|
if (!layout_sanity_check(layout)) {
|
||||||
/* store the first file layout returned */
|
file_layout_free(layout);
|
||||||
dprintf(FLLVL, "Saving layout:\n");
|
continue;
|
||||||
dprint_layout(FLLVL, (pnfs_file_layout*)layout);
|
|
||||||
|
|
||||||
state->layout = (pnfs_file_layout*)layout;
|
|
||||||
status = PNFS_SUCCESS;
|
|
||||||
} else {
|
|
||||||
/* free anything else */
|
|
||||||
/* TODO: attempt to merge with existing segments */
|
|
||||||
dprintf(FLLVL, "Discarding extra layout:\n");
|
|
||||||
dprint_layout(FLLVL, (pnfs_file_layout*)layout);
|
|
||||||
|
|
||||||
file_layout_free((pnfs_file_layout*)layout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dprintf(FLLVL, "Saving layout:\n");
|
||||||
|
dprint_layout(FLLVL, layout);
|
||||||
|
|
||||||
|
layout_ordered_insert(state, &layout->layout);
|
||||||
|
status = PNFS_SUCCESS;
|
||||||
}
|
}
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
@ -263,18 +316,13 @@ static enum pnfs_status layout_update(
|
||||||
status = layout_update_stateid(state, &layoutget_res->stateid);
|
status = layout_update_stateid(state, &layoutget_res->stateid);
|
||||||
if (status) {
|
if (status) {
|
||||||
eprintf("LAYOUTGET returned a new stateid when we already had one\n");
|
eprintf("LAYOUTGET returned a new stateid when we already had one\n");
|
||||||
goto out_free;
|
goto out;
|
||||||
}
|
}
|
||||||
/* if a previous LAYOUTGET set return_on_close, don't overwrite it */
|
/* if a previous LAYOUTGET set return_on_close, don't overwrite it */
|
||||||
if (!state->return_on_close)
|
if (!state->return_on_close)
|
||||||
state->return_on_close = layoutget_res->return_on_close;
|
state->return_on_close = layoutget_res->return_on_close;
|
||||||
out:
|
out:
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
out_free:
|
|
||||||
file_layout_free(state->layout);
|
|
||||||
state->layout = NULL;
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum pnfs_status file_layout_fetch(
|
static enum pnfs_status file_layout_fetch(
|
||||||
|
|
@ -312,11 +360,6 @@ static enum pnfs_status file_layout_fetch(
|
||||||
case NFS4_OK:
|
case NFS4_OK:
|
||||||
/* use the LAYOUTGET results to update our view of the layout */
|
/* use the LAYOUTGET results to update our view of the layout */
|
||||||
pnfsstat = layout_update(state, &layoutget_res);
|
pnfsstat = layout_update(state, &layoutget_res);
|
||||||
if (pnfsstat)
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* mark granted and clear other flags */
|
|
||||||
state->status = PNFS_LAYOUT_GRANTED;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NFS4ERR_BADIOMODE:
|
case NFS4ERR_BADIOMODE:
|
||||||
|
|
@ -348,18 +391,17 @@ static enum pnfs_status layout_coverage_status(
|
||||||
IN uint64_t length,
|
IN uint64_t length,
|
||||||
OUT uint64_t *offset_missing)
|
OUT uint64_t *offset_missing)
|
||||||
{
|
{
|
||||||
pnfs_file_layout *layout;
|
|
||||||
uint64_t position = offset;
|
uint64_t position = offset;
|
||||||
|
struct list_entry *entry;
|
||||||
|
|
||||||
/* XXX: foreach layout, sorted from lowest offset */
|
list_for_each(entry, &state->layouts) {
|
||||||
layout = state->layout;
|
|
||||||
if (layout) {
|
|
||||||
/* if the current position intersects with a compatible
|
/* if the current position intersects with a compatible
|
||||||
* layout, move the position to the end of that layout */
|
* layout, move the position to the end of that layout */
|
||||||
if (layout->layout.iomode >= iomode &&
|
pnfs_layout *layout = layout_entry(entry);
|
||||||
layout->layout.offset <= position &&
|
if (layout->iomode >= iomode &&
|
||||||
position < layout->layout.offset + layout->layout.length)
|
layout->offset <= position &&
|
||||||
position = layout->layout.offset + layout->layout.length;
|
position < layout->offset + layout->length)
|
||||||
|
position = layout->offset + layout->length;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (position >= offset + length)
|
if (position >= offset + length)
|
||||||
|
|
@ -378,6 +420,7 @@ static enum pnfs_status layout_fetch(
|
||||||
IN uint64_t offset,
|
IN uint64_t offset,
|
||||||
IN uint64_t length)
|
IN uint64_t length)
|
||||||
{
|
{
|
||||||
|
stateid_arg layout_stateid = { 0 };
|
||||||
enum pnfs_status status = PNFS_PENDING;
|
enum pnfs_status status = PNFS_PENDING;
|
||||||
|
|
||||||
/* check for previous errors from LAYOUTGET */
|
/* check for previous errors from LAYOUTGET */
|
||||||
|
|
@ -394,8 +437,9 @@ static enum pnfs_status layout_fetch(
|
||||||
|
|
||||||
/* if there's an existing layout stateid, use it */
|
/* if there's an existing layout stateid, use it */
|
||||||
if (state->stateid.seqid) {
|
if (state->stateid.seqid) {
|
||||||
memcpy(&stateid->stateid, &state->stateid, sizeof(stateid4));
|
memcpy(&layout_stateid.stateid, &state->stateid, sizeof(stateid4));
|
||||||
stateid->type = STATEID_LAYOUT;
|
layout_stateid.type = STATEID_LAYOUT;
|
||||||
|
stateid = &layout_stateid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((state->status & PNFS_LAYOUT_NOT_RW) == 0) {
|
if ((state->status & PNFS_LAYOUT_NOT_RW) == 0) {
|
||||||
|
|
@ -422,30 +466,41 @@ static enum pnfs_status device_status(
|
||||||
IN uint64_t length,
|
IN uint64_t length,
|
||||||
OUT unsigned char *deviceid)
|
OUT unsigned char *deviceid)
|
||||||
{
|
{
|
||||||
/* XXX: foreach layout */
|
struct list_entry *entry;
|
||||||
if (state->layout == NULL)
|
enum pnfs_status status = PNFS_SUCCESS;
|
||||||
return PNFSERR_NO_LAYOUT;
|
|
||||||
if (state->layout->device)
|
|
||||||
return PNFS_SUCCESS;
|
|
||||||
|
|
||||||
|
list_for_each(entry, &state->layouts) {
|
||||||
|
pnfs_file_layout *layout = file_layout_entry(entry);
|
||||||
|
|
||||||
|
if (layout->device == NULL) {
|
||||||
/* copy missing deviceid */
|
/* copy missing deviceid */
|
||||||
memcpy(deviceid, state->layout->deviceid, PNFS_DEVICEID_SIZE);
|
memcpy(deviceid, layout->deviceid, PNFS_DEVICEID_SIZE);
|
||||||
return PNFS_PENDING;
|
status = PNFS_PENDING;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum pnfs_status device_assign(
|
static void device_assign(
|
||||||
IN pnfs_layout_state *state,
|
IN pnfs_layout_state *state,
|
||||||
IN const unsigned char *deviceid,
|
IN const unsigned char *deviceid,
|
||||||
IN pnfs_file_device *device)
|
IN pnfs_file_device *device)
|
||||||
{
|
{
|
||||||
/* XXX: foreach layout */
|
struct list_entry *entry;
|
||||||
if (state->layout == NULL)
|
list_for_each(entry, &state->layouts) {
|
||||||
return PNFSERR_NO_LAYOUT;
|
pnfs_file_layout *layout = file_layout_entry(entry);
|
||||||
/* update layouts with a matching deviceid */
|
|
||||||
if (memcmp(state->layout->deviceid, deviceid, PNFS_DEVICEID_SIZE) == 0)
|
|
||||||
state->layout->device = device;
|
|
||||||
|
|
||||||
return PNFS_SUCCESS;
|
/* assign the device to any matching layouts */
|
||||||
|
if (layout->device == NULL &&
|
||||||
|
memcmp(layout->deviceid, deviceid, PNFS_DEVICEID_SIZE) == 0) {
|
||||||
|
layout->device = device;
|
||||||
|
|
||||||
|
/* XXX: only assign the device to a single segment, because
|
||||||
|
* pnfs_file_device_get() only gives us a single reference */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum pnfs_status device_fetch(
|
static enum pnfs_status device_fetch(
|
||||||
|
|
@ -461,11 +516,9 @@ static enum pnfs_status device_fetch(
|
||||||
status = pnfs_file_device_get(session,
|
status = pnfs_file_device_get(session,
|
||||||
session->client->devices, deviceid, &device);
|
session->client->devices, deviceid, &device);
|
||||||
AcquireSRWLockExclusive(&state->lock);
|
AcquireSRWLockExclusive(&state->lock);
|
||||||
if (status)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
status = device_assign(state, deviceid, device);
|
if (status == PNFS_SUCCESS)
|
||||||
out:
|
device_assign(state, deviceid, device);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -610,8 +663,8 @@ out:
|
||||||
static enum pnfs_status layout_return_status(
|
static enum pnfs_status layout_return_status(
|
||||||
IN const pnfs_layout_state *state)
|
IN const pnfs_layout_state *state)
|
||||||
{
|
{
|
||||||
return (state->status & PNFS_LAYOUT_GRANTED) == 0
|
/* return the layout if we have a stateid */
|
||||||
? PNFS_SUCCESS : PNFS_PENDING;
|
return state->stateid.seqid ? PNFS_SUCCESS : PNFS_PENDING;
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum pnfs_status file_layout_return(
|
static enum pnfs_status file_layout_return(
|
||||||
|
|
@ -660,25 +713,13 @@ static enum pnfs_status file_layout_return(
|
||||||
status = PNFS_SUCCESS;
|
status = PNFS_SUCCESS;
|
||||||
|
|
||||||
/* update the layout range held by the client */
|
/* update the layout range held by the client */
|
||||||
file_layout_free(state->layout);
|
layout_state_free_layouts(state);
|
||||||
state->layout = NULL;
|
|
||||||
|
|
||||||
if (layoutreturn_res.stateid_present) {
|
|
||||||
/* update the layout seqid */
|
|
||||||
/* XXX: this shouldn't happen when we send a LAYOUTRETURN
|
|
||||||
* with IOMODE_ANY for the entire range */
|
|
||||||
memcpy(&state->stateid, &layoutreturn_res.stateid,
|
|
||||||
sizeof(stateid4));
|
|
||||||
} else {
|
|
||||||
/* 12.5.3. Layout Stateid: Once a client has no more
|
/* 12.5.3. Layout Stateid: Once a client has no more
|
||||||
* layouts on a file, the layout stateid is no longer
|
* layouts on a file, the layout stateid is no longer
|
||||||
* valid and MUST NOT be used. */
|
* valid and MUST NOT be used. */
|
||||||
ZeroMemory(&state->stateid, sizeof(stateid4));
|
ZeroMemory(&state->stateid, sizeof(stateid4));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reset the granted flag */
|
|
||||||
state->status &= ~PNFS_LAYOUT_GRANTED;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
state->pending = FALSE;
|
state->pending = FALSE;
|
||||||
|
|
@ -736,17 +777,7 @@ static void layout_recall_return(
|
||||||
{
|
{
|
||||||
dprintf(FLLVL, "layout_recall_return() 'forgetting' layout\n");
|
dprintf(FLLVL, "layout_recall_return() 'forgetting' layout\n");
|
||||||
|
|
||||||
if (state->layout) {
|
layout_state_free_layouts(state);
|
||||||
/* release our reference on the device */
|
|
||||||
if (state->layout->device) {
|
|
||||||
pnfs_file_device_put(state->layout->device);
|
|
||||||
state->layout->device = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update the layout range held by the client */
|
|
||||||
file_layout_free(state->layout);
|
|
||||||
state->layout = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* since we're forgetful, we don't actually return the layout;
|
/* since we're forgetful, we don't actually return the layout;
|
||||||
* just zero the stateid since it won't be valid anymore */
|
* just zero the stateid since it won't be valid anymore */
|
||||||
|
|
@ -764,7 +795,7 @@ static enum pnfs_status file_layout_recall(
|
||||||
/* under an exclusive lock, flag the layout as recalled */
|
/* under an exclusive lock, flag the layout as recalled */
|
||||||
AcquireSRWLockExclusive(&state->lock);
|
AcquireSRWLockExclusive(&state->lock);
|
||||||
|
|
||||||
if ((state->status & PNFS_LAYOUT_GRANTED) == 0) {
|
if (state->stateid.seqid == 0) {
|
||||||
/* return NOMATCHINGLAYOUT if it wasn't actually granted */
|
/* return NOMATCHINGLAYOUT if it wasn't actually granted */
|
||||||
status = PNFSERR_NO_LAYOUT;
|
status = PNFSERR_NO_LAYOUT;
|
||||||
} else if (recall->recall.type == PNFS_RETURN_FILE
|
} else if (recall->recall.type == PNFS_RETURN_FILE
|
||||||
|
|
@ -918,24 +949,14 @@ out:
|
||||||
|
|
||||||
|
|
||||||
/* expects caller to hold an exclusive lock on pnfs_layout_state */
|
/* expects caller to hold an exclusive lock on pnfs_layout_state */
|
||||||
enum pnfs_status pnfs_layout_io_start(
|
void pnfs_layout_io_start(
|
||||||
IN pnfs_layout_state *state)
|
IN pnfs_layout_state *state)
|
||||||
{
|
{
|
||||||
enum pnfs_status status = PNFS_SUCCESS;
|
|
||||||
|
|
||||||
if ((layout_unit_size(state->layout) == 0 ) || /* prevent div/0 */
|
|
||||||
(state->layout->device->stripes.count == 0) ||
|
|
||||||
(state->layout->device->servers.count == 0)) {
|
|
||||||
status = PNFSERR_NO_LAYOUT;
|
|
||||||
} else {
|
|
||||||
/* take a reference on the layout, so that it won't be recalled
|
/* take a reference on the layout, so that it won't be recalled
|
||||||
* until all io is finished */
|
* until all io is finished */
|
||||||
state->io_count++;
|
state->io_count++;
|
||||||
dprintf(FLLVL, "pnfs_layout_io_start(): count -> %u\n",
|
dprintf(FLLVL, "pnfs_layout_io_start(): count -> %u\n",
|
||||||
state->io_count);
|
state->io_count);
|
||||||
}
|
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void pnfs_layout_io_finished(
|
void pnfs_layout_io_finished(
|
||||||
|
|
|
||||||
|
|
@ -606,7 +606,7 @@ static uint32_t stateid_array(
|
||||||
|
|
||||||
if (open->layout) { /* layout stateid? */
|
if (open->layout) { /* layout stateid? */
|
||||||
AcquireSRWLockShared(&open->layout->lock);
|
AcquireSRWLockShared(&open->layout->lock);
|
||||||
if (open->layout->status & PNFS_LAYOUT_GRANTED) {
|
if (open->layout->stateid.seqid) {
|
||||||
memcpy(&stateids[i].stateid, &open->layout->stateid, sizeof(stateid4));
|
memcpy(&stateids[i].stateid, &open->layout->stateid, sizeof(stateid4));
|
||||||
stateids[i].type = STATEID_LAYOUT;
|
stateids[i].type = STATEID_LAYOUT;
|
||||||
stateids[i].open = open;
|
stateids[i].open = open;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue