/*
* Memory
*/
-#ifndef memory_pressure_get
-#define memory_pressure_get() (0)
-#endif
-#ifndef memory_pressure_set
-#define memory_pressure_set() do {} while (0)
-#endif
-#ifndef memory_pressure_clr
-#define memory_pressure_clr() do {} while (0)
+#if BITS_PER_LONG == 32
+/* limit to lowmem on 32-bit systems */
+#define NUM_CACHEPAGES \
- min(totalram_pages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
++ min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
+#else
+#define NUM_CACHEPAGES totalram_pages
#endif
+static inline unsigned int memory_pressure_get(void)
+{
+ return current->flags & PF_MEMALLOC;
+}
+
+static inline void memory_pressure_set(void)
+{
+ current->flags |= PF_MEMALLOC;
+}
+
+static inline void memory_pressure_clr(void)
+{
+ current->flags &= ~PF_MEMALLOC;
+}
+
static inline int cfs_memory_pressure_get_and_set(void)
{
int old = memory_pressure_get();
* NB: this is not going to work for variable page size,
* but we have to keep it for compatibility
*/
- len = npg * PAGE_CACHE_SIZE;
+ len = npg * PAGE_SIZE;
-
} else {
test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
opc = breq->blk_opc;
flags = breq->blk_flags;
npg = breq->blk_npg;
- len = npg * PAGE_CACHE_SIZE;
+ len = npg * PAGE_SIZE;
-
} else {
test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
{ \
type *value; \
\
- CLASSERT(PAGE_CACHE_SIZE >= sizeof(*value)); \
- CLASSERT(PAGE_SIZE >= sizeof (*value)); \
++ CLASSERT(PAGE_SIZE >= sizeof(*value)); \
\
value = kzalloc(sizeof(*value), GFP_NOFS); \
if (!value) \
body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
/* Checked by mdc_readpage() */
if (body->valid & OBD_MD_FLSIZE)
- cl_isize_write(inode, body->size);
+ i_size_write(inode, body->size);
- nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1)
- >> PAGE_CACHE_SHIFT;
+ nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
+ >> PAGE_SHIFT;
SetPageUptodate(page0);
}
unlock_page(page0);
if (!mapping)
return;
- ll_teardown_mmaps(mapping, offset, offset + PAGE_CACHE_SIZE);
+ /*
+ * truncate_complete_page() calls
+ * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
+ */
+ ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
truncate_complete_page(mapping, vmpage);
}
struct vm_area_struct *vma, unsigned long addr,
size_t count)
{
- policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+ policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
- (vma->vm_pgoff << PAGE_CACHE_SHIFT);
+ (vma->vm_pgoff << PAGE_SHIFT);
policy->l_extent.end = (policy->l_extent.start + count - 1) |
- ~CFS_PAGE_MASK;
+ ~PAGE_MASK;
}
struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
if (reserved != 0)
ll_ra_count_put(ll_i2sbi(inode), reserved);
- if (ra_end == end + 1 && ra_end == (kms >> PAGE_CACHE_SHIFT))
+ if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
- ll_ra_stats_inc(mapping, RA_STAT_EOF);
+ ll_ra_stats_inc(inode, RA_STAT_EOF);
/* if we didn't get to the end of the region we reserved from
* the ras we need to go back and update the ras so that the
CDEBUG(D_VFSTRACE,
"VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
- file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
- MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
+ file_offset, file_offset, count >> PAGE_SHIFT,
+ MAX_DIO_SIZE >> PAGE_SHIFT);
/* Check that all user buffers are aligned as well */
- if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+ if (iov_iter_alignment(iter) & ~PAGE_MASK)
return -EINVAL;
env = cl_env_get(&refcheck);
* page worth of page pointers = 4MB on i386.
*/
if (result == -ENOMEM &&
- size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
- PAGE_CACHE_SIZE) {
+ size > (PAGE_SIZE / sizeof(*pages)) *
+ PAGE_SIZE) {
size = ((((size / 2) - 1) |
- ~CFS_PAGE_MASK) + 1) &
- CFS_PAGE_MASK;
+ ~PAGE_MASK) + 1) &
+ PAGE_MASK;
CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
size);
continue;
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct ll_cl_context *lcc;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct cl_page *page;
+ struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
- struct page *page;
- int rc;
- unsigned from = pos & (PAGE_SIZE - 1);
+ pgoff_t index = pos >> PAGE_SHIFT;
- unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
+ struct page *vmpage = NULL;
++ unsigned int from = pos & (PAGE_SIZE - 1);
+ unsigned int to = from + len;
+ int result = 0;
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
+
+ lcc = ll_cl_init(file, NULL);
+ if (IS_ERR(lcc)) {
+ result = PTR_ERR(lcc);
+ goto out;
+ }
+
+ env = lcc->lcc_env;
+ io = lcc->lcc_io;
+
+ /* To avoid deadlock, try to lock page first. */
+ vmpage = grab_cache_page_nowait(mapping, index);
+ if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_page_list *plist = &vio->u.write.vui_queue;
- *pagep = page;
+ /* if the page is already in dirty cache, we have to commit
+ * the pages right now; otherwise, it may cause deadlock
+ * because it holds page lock of a dirty page and request for
+ * more grants. It's okay for the dirty page to be the first
+ * one in commit page list, though.
+ */
+ if (vmpage && plist->pl_nr > 0) {
+ unlock_page(vmpage);
- page_cache_release(vmpage);
++ put_page(vmpage);
+ vmpage = NULL;
+ }
- rc = ll_prepare_write(file, page, from, from + len);
- if (rc) {
- unlock_page(page);
- put_page(page);
+ /* commit pages and then wait for page lock */
+ result = vvp_io_write_commit(env, io);
+ if (result < 0)
+ goto out;
+
+ if (!vmpage) {
+ vmpage = grab_cache_page_write_begin(mapping, index,
+ flags);
+ if (!vmpage) {
+ result = -ENOMEM;
+ goto out;
+ }
+ }
}
- return rc;
+
+ page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+ if (IS_ERR(page)) {
+ result = PTR_ERR(page);
+ goto out;
+ }
+
+ lcc->lcc_page = page;
+ lu_ref_add(&page->cp_reference, "cl_io", io);
+
+ cl_page_assume(env, io, page);
+ if (!PageUptodate(vmpage)) {
+ /*
+ * We're completely overwriting an existing page,
+ * so _don't_ set it up to date until commit_write
+ */
+ if (from == 0 && to == PAGE_SIZE) {
+ CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
+ POISON_PAGE(vmpage, 0x11);
+ } else {
+ /* TODO: can be optimized at OSC layer to check if it
+ * is a lockless IO. In that case, it's not necessary
+ * to read the data.
+ */
+ result = ll_prepare_partial_page(env, io, page);
+ if (result == 0)
+ SetPageUptodate(vmpage);
+ }
+ }
+ if (result < 0)
+ cl_page_unassume(env, io, page);
+out:
+ if (result < 0) {
+ if (vmpage) {
+ unlock_page(vmpage);
- page_cache_release(vmpage);
++ put_page(vmpage);
+ }
+ if (!IS_ERR(lcc))
+ ll_cl_fini(lcc);
+ } else {
+ *pagep = vmpage;
+ *fsdata = lcc;
+ }
+ return result;
}
static int ll_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct page *vmpage, void *fsdata)
{
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ struct ll_cl_context *lcc = fsdata;
+ struct lu_env *env;
+ struct cl_io *io;
+ struct vvp_io *vio;
+ struct cl_page *page;
- int rc;
+ unsigned from = pos & (PAGE_SIZE - 1);
- page_cache_release(vmpage);
+ bool unplug = false;
+ int result = 0;
+
++ put_page(vmpage);
+
+ env = lcc->lcc_env;
+ page = lcc->lcc_page;
+ io = lcc->lcc_io;
+ vio = vvp_env_io(env);
+
+ LASSERT(cl_page_is_owned(page, io));
+ if (copied > 0) {
+ struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+ lcc->lcc_page = NULL; /* page will be queued */
+
+ /* Add it into write queue */
+ cl_page_list_add(plist, page);
+ if (plist->pl_nr == 1) /* first page */
+ vio->u.write.vui_from = from;
+ else
+ LASSERT(from == 0);
+ vio->u.write.vui_to = from + copied;
+
+ /* We may have one full RPC, commit it soon */
+ if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
+ unplug = true;
+
+ CL_PAGE_DEBUG(D_VFSTRACE, env, page,
+ "queued page: %d.\n", plist->pl_nr);
+ } else {
+ cl_page_disown(env, io, page);
+
+ /* page list is not contiguous now, commit it now */
+ unplug = true;
+ }
- rc = ll_commit_write(file, page, from, from + copied);
- unlock_page(page);
- put_page(page);
+ if (unplug ||
+ file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
+ result = vvp_io_write_commit(env, io);
- return rc ?: copied;
+ ll_cl_fini(lcc);
+ return result >= 0 ? copied : result;
}
#ifdef CONFIG_MIGRATION
return ~0ULL;
clob = vvp_pgcache_obj(env, dev, &id);
if (clob) {
- struct cl_object_header *hdr;
- int nr;
- struct cl_page *pg;
-
- /* got an object. Find next page. */
- hdr = cl_object_header(clob);
+ struct inode *inode = vvp_object_inode(clob);
+ struct page *vmpage;
+ int nr;
- spin_lock(&hdr->coh_page_guard);
- nr = radix_tree_gang_lookup(&hdr->coh_tree,
- (void **)&pg,
- id.vpi_index, 1);
+ nr = find_get_pages_contig(inode->i_mapping,
+ id.vpi_index, 1, &vmpage);
if (nr > 0) {
- id.vpi_index = pg->cp_index;
+ id.vpi_index = vmpage->index;
/* Cant support over 16T file */
- nr = !(pg->cp_index > 0xffffffff);
+ nr = !(vmpage->index > 0xffffffff);
- page_cache_release(vmpage);
++ put_page(vmpage);
}
- spin_unlock(&hdr->coh_page_guard);
lu_object_ref_del(&clob->co_lu, "dump", current);
cl_object_put(env, clob);
sbi = f->private;
clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
if (clob) {
- hdr = cl_object_header(clob);
-
- spin_lock(&hdr->coh_page_guard);
- page = cl_page_lookup(hdr, id.vpi_index);
- spin_unlock(&hdr->coh_page_guard);
+ struct inode *inode = vvp_object_inode(clob);
+ struct cl_page *page = NULL;
+ struct page *vmpage;
+
+ result = find_get_pages_contig(inode->i_mapping,
+ id.vpi_index, 1,
+ &vmpage);
+ if (result > 0) {
+ lock_page(vmpage);
+ page = cl_vmpage_page(vmpage, clob);
+ unlock_page(vmpage);
-
- page_cache_release(vmpage);
++ put_page(vmpage);
+ }
- seq_printf(f, "%8x@"DFID": ",
- id.vpi_index, PFID(&hdr->coh_lu.loh_fid));
+ seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
+ PFID(lu_object_fid(&clob->co_lu)));
if (page) {
vvp_pgcache_page_show(env, f, page);
cl_page_put(env, page);
return rc;
}
- loff_t cur_index = start >> PAGE_CACHE_SHIFT;
- loff_t size_index = (size - 1) >>
- PAGE_CACHE_SHIFT;
+static void vvp_object_size_lock(struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ ll_inode_size_lock(inode);
+ cl_object_attr_lock(obj);
+}
+
+static void vvp_object_size_unlock(struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ cl_object_attr_unlock(obj);
+ ll_inode_size_unlock(inode);
+}
+
+/**
+ * Helper function that if necessary adjusts file size (inode->i_size), when
+ * position at the offset \a pos is accessed. File size can be arbitrary stale
+ * on a Lustre client, but client at least knows KMS. If accessed area is
+ * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
+ *
+ * Locking: cl_isize_lock is used to serialize changes to inode size and to
+ * protect consistency between inode size and cl_object
+ * attributes. cl_object_size_lock() protects consistency between cl_attr's of
+ * top-object and sub-objects.
+ */
+static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, loff_t start, size_t count,
+ int *exceed)
+{
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ struct inode *inode = vvp_object_inode(obj);
+ loff_t pos = start + count - 1;
+ loff_t kms;
+ int result;
+
+ /*
+ * Consistency guarantees: following possibilities exist for the
+ * relation between region being accessed and real file size at this
+ * moment:
+ *
+ * (A): the region is completely inside of the file;
+ *
+ * (B-x): x bytes of region are inside of the file, the rest is
+ * outside;
+ *
+ * (C): the region is completely outside of the file.
+ *
+ * This classification is stable under DLM lock already acquired by
+ * the caller, because to change the class, other client has to take
+ * DLM lock conflicting with our lock. Also, any updates to ->i_size
+ * by other threads on this client are serialized by
+ * ll_inode_size_lock(). This guarantees that short reads are handled
+ * correctly in the face of concurrent writes and truncates.
+ */
+ vvp_object_size_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ kms = attr->cat_kms;
+ if (pos > kms) {
+ /*
+ * A glimpse is necessary to determine whether we
+ * return a short read (B) or some zeroes at the end
+ * of the buffer (C)
+ */
+ vvp_object_size_unlock(obj);
+ result = cl_glimpse_lock(env, io, inode, obj, 0);
+ if (result == 0 && exceed) {
+ /* If objective page index exceed end-of-file
+ * page index, return directly. Do not expect
+ * kernel will check such case correctly.
+ * linux-2.6.18-128.1.1 miss to do that.
+ * --bug 17336
+ */
+ loff_t size = i_size_read(inode);
++ loff_t cur_index = start >> PAGE_SHIFT;
++ loff_t size_index = (size - 1) >> PAGE_SHIFT;
+
+ if ((size == 0 && cur_index != 0) ||
+ size_index < cur_index)
+ *exceed = 1;
+ }
+ return result;
+ }
+ /*
+ * region is within kms and, hence, within real file
+ * size (A). We need to increase i_size to cover the
+ * read region so that generic_file_read() will do its
+ * job, but that doesn't mean the kms size is
+ * _correct_, it is only the _minimum_ size. If
+ * someone does a stat they will get the correct size
+ * which will always be >= the kms value here.
+ * b=11081
+ */
+ if (i_size_read(inode) < kms) {
+ i_size_write(inode, kms);
+ CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ (__u64)i_size_read(inode));
+ }
+ }
+
+ vvp_object_size_unlock(obj);
+
+ return result;
+}
+
/*****************************************************************************
*
* io operations.
inode->i_ino, cnt, pos, i_size_read(inode));
/* turn off the kernel's read-ahead */
- cio->cui_fd->fd_file->f_ra.ra_pages = 0;
+ vio->vui_fd->fd_file->f_ra.ra_pages = 0;
/* initialize read-ahead window once per syscall */
- if (!vio->cui_ra_window_set) {
- vio->cui_ra_window_set = 1;
- bead->lrr_start = cl_index(obj, pos);
- /*
- * XXX: explicit PAGE_SIZE
- */
- bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
- ll_ra_read_in(file, bead);
+ if (!vio->vui_ra_valid) {
+ vio->vui_ra_valid = true;
+ vio->vui_ra_start = cl_index(obj, pos);
- vio->vui_ra_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1);
++ vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
+ ll_ras_enter(file);
}
/* BUG: 5972 */
*
*/
-static void vvp_page_fini_common(struct ccc_page *cp)
+static void vvp_page_fini_common(struct vvp_page *vpg)
{
- struct page *vmpage = cp->cpg_page;
+ struct page *vmpage = vpg->vpg_page;
LASSERT(vmpage);
- page_cache_release(vmpage);
+ put_page(vmpage);
}
static void vvp_page_fini(const struct lu_env *env,
};
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
- struct ccc_page *cpg = cl_object_page_slice(obj, page);
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+ struct page *vmpage = page->cp_vmpage;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
- cpg->cpg_page = vmpage;
+ vpg->vpg_page = vmpage;
- page_cache_get(vmpage);
+ get_page(vmpage);
- INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
+ INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
if (page->cp_type == CPT_CACHEABLE) {
+ /* in cache, decref in vvp_page_delete */
+ atomic_inc(&page->cp_ref);
SetPagePrivate(vmpage);
vmpage->private = (unsigned long)page;
- cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops);
+ cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
+ &vvp_page_ops);
} else {
- struct ccc_object *clobj = cl2ccc(obj);
+ struct vvp_object *clobj = cl2vvp(obj);
- LASSERT(!inode_trylock(clobj->cob_inode));
- cl_page_slice_add(page, &cpg->cpg_cl, obj,
+ LASSERT(!inode_trylock(clobj->vob_inode));
+ cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
&vvp_transient_page_ops);
- clobj->cob_transient_pages++;
+ clobj->vob_transient_pages++;
}
return 0;
}
return lov_size;
}
- offset = lov_stripe_size(lsm, stripe_index << PAGE_CACHE_SHIFT,
- stripe);
- return offset >> PAGE_CACHE_SHIFT;
+/**
+ * Compute file level page index by stripe level page offset
+ */
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+ int stripe)
+{
+ loff_t offset;
+
++ offset = lov_stripe_size(lsm, stripe_index << PAGE_SHIFT, stripe);
++ return offset >> PAGE_SHIFT;
+}
+
/* we have an offset in file backed by an lov and want to find out where
* that offset lands in our given stripe of the file. for the easy
* case where the offset is within the stripe, we just have to scale the
*
*/
-static int lov_page_invariant(const struct cl_page_slice *slice)
+/**
+ * Adjust the stripe index by layout of raid0. @max_index is the maximum
+ * page index covered by an underlying DLM lock.
+ * This function converts max_index from stripe level to file level, and make
+ * sure it's not beyond one stripe.
+ */
+static int lov_raid0_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused,
+ pgoff_t *max_index)
{
- const struct cl_page *page = slice->cpl_page;
- const struct cl_page *sub = lov_sub_page(slice);
-
- return ergo(sub,
- page->cp_child == sub &&
- sub->cp_parent == page &&
- page->cp_state == sub->cp_state);
-}
+ struct lov_object *loo = cl2lov(slice->cpl_obj);
+ struct lov_layout_raid0 *r0 = lov_r0(loo);
+ pgoff_t index = *max_index;
+ unsigned int pps; /* pages per stripe */
-static void lov_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct cl_page *sub = lov_sub_page(slice);
+ CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr);
+ if (index == 0) /* the page is not covered by any lock */
+ return 0;
- LINVRNT(lov_page_invariant(slice));
+ if (r0->lo_nr == 1) /* single stripe file */
+ return 0;
- if (sub) {
- LASSERT(sub->cp_state == CPS_FREEING);
- lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent);
- sub->cp_parent = NULL;
- slice->cpl_page->cp_child = NULL;
- cl_page_put(env, sub);
+ /* max_index is stripe level, convert it into file level */
+ if (index != CL_PAGE_EOF) {
+ int stripeno = lov_page_stripe(slice->cpl_page);
+ *max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno);
}
-}
-
-static int lov_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io,
- int nonblock)
-{
- struct lov_io *lio = lov_env_io(env);
- struct lov_io_sub *sub;
- LINVRNT(lov_page_invariant(slice));
- LINVRNT(!cl2lov_page(slice)->lps_invalid);
+ /* calculate the end of current stripe */
- pps = loo->lo_lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
++ pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
+ index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1;
- sub = lov_page_subio(env, lio, slice);
- if (!IS_ERR(sub)) {
- lov_sub_page(slice)->cp_owner = sub->sub_io;
- lov_sub_put(sub);
- } else
- LBUG(); /* Arrgh */
+ /* never exceed the end of the stripe */
+ *max_index = min_t(pgoff_t, *max_index, index);
return 0;
}
CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
ret = -EINVAL;
}
- if ((u64val & ~PAGE_MASK) >= PAGE_CACHE_SIZE) {
- if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
++ if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
CWARN("mask failed: u64val %llu >= %llu\n", u64val,
- (__u64)PAGE_CACHE_SIZE);
+ (__u64)PAGE_SIZE);
ret = -EINVAL;
}
static void echo_page_fini(const struct lu_env *env,
struct cl_page_slice *slice)
{
- struct echo_page *ep = cl2echo_page(slice);
struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
- struct page *vmpage = ep->ep_vmpage;
atomic_dec(&eco->eo_npages);
- page_cache_release(slice->cpl_page->cp_vmpage);
- put_page(vmpage);
++ put_page(slice->cpl_page->cp_vmpage);
}
static int echo_page_prep(const struct lu_env *env,
struct echo_page *ep = cl_object_page_slice(obj, page);
struct echo_object *eco = cl2echo_obj(obj);
- page_cache_get(page->cp_vmpage);
- ep->ep_vmpage = vmpage;
- get_page(vmpage);
++ get_page(page->cp_vmpage);
mutex_init(&ep->ep_lock);
- cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
+ cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops);
atomic_inc(&eco->eo_npages);
return 0;
}
u64 npages, tot_pages;
int i, ret = 0, brw_flags = 0;
- if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0)
+ if (count <= 0 || (count & (~PAGE_MASK)) != 0)
return -EINVAL;
- npages = batch >> PAGE_CACHE_SHIFT;
- tot_pages = count >> PAGE_CACHE_SHIFT;
+ npages = batch >> PAGE_SHIFT;
+ tot_pages = count >> PAGE_SHIFT;
lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS);
rnb = kcalloc(npages, sizeof(struct niobuf_remote), GFP_NOFS);
pages_number > totalram_pages / 4) /* 1/4 of RAM */
return -ERANGE;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
- cli->cl_dirty_max = (u32)(pages_number << PAGE_CACHE_SHIFT);
+ cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
osc_wake_cache_waiters(cli);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return count;
}
/* if the max_pages is specified in bytes, convert to pages */
if (val >= ONE_MB_BRW_SIZE)
- val >>= PAGE_CACHE_SHIFT;
+ val >>= PAGE_SHIFT;
- chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_CACHE_SHIFT)) - 1);
+ chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1);
/* max_pages_per_rpc must be chunk aligned */
val = (val + ~chunk_mask) & chunk_mask;
- if (val == 0 || val > ocd->ocd_brw_size >> PAGE_CACHE_SHIFT) {
+ if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
return -ERANGE;
}
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_max_pages_per_rpc = val;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return count;
}
if (cur->oe_max_end != victim->oe_max_end)
return -ERANGE;
- LASSERT(cur->oe_osclock == victim->oe_osclock);
+ LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
- ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
+ ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
chunk_start = cur->oe_start >> ppc_bits;
chunk_end = cur->oe_end >> ppc_bits;
if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
if (!cur)
return ERR_PTR(-ENOMEM);
- lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
- LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+ olck = osc_env_io(env)->oi_write_osclock;
+ LASSERTF(olck, "page %lu is not covered by lock\n", index);
+ LASSERT(olck->ols_state == OLS_GRANTED);
+
+ descr = &olck->ols_cl.cls_lock->cll_descr;
+ LASSERT(descr->cld_mode >= CLM_WRITE);
- LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
- ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+ LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
+ ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
chunk_mask = ~((1 << ppc_bits) - 1);
chunksize = 1 << cli->cl_chunkbits;
chunk = index >> ppc_bits;
if (result < 0)
return result;
kms = attr->cat_kms;
- if (cl_offset(obj, page->cp_index) >= kms)
+ if (cl_offset(obj, index) >= kms)
/* catch race with truncate */
return 0;
- else if (cl_offset(obj, page->cp_index + 1) > kms)
+ else if (cl_offset(obj, index + 1) > kms)
/* catch sub-page write at end of file */
- return kms % PAGE_CACHE_SIZE;
+ return kms % PAGE_SIZE;
else
- return PAGE_CACHE_SIZE;
+ return PAGE_SIZE;
}
static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
static void osc_consume_write_grant(struct client_obd *cli,
struct brw_page *pga)
{
- assert_spin_locked(&cli->cl_loi_list_lock.lock);
+ assert_spin_locked(&cli->cl_loi_list_lock);
LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
atomic_inc(&obd_dirty_pages);
- cli->cl_dirty += PAGE_CACHE_SIZE;
+ cli->cl_dirty += PAGE_SIZE;
pga->flag |= OBD_BRW_FROM_GRANT;
CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
- PAGE_CACHE_SIZE, pga, pga->pg);
+ PAGE_SIZE, pga, pga->pg);
osc_update_next_shrink(cli);
}
{
int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
atomic_sub(nr_pages, &obd_dirty_pages);
- cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT;
+ cli->cl_dirty -= nr_pages << PAGE_SHIFT;
cli->cl_lost_grant += lost_grant;
if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
/* borrow some grant from truncate to avoid the case that
return result;
}
-static int osc_io_commit_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
+static int osc_io_rw_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
{
- struct osc_io *oio = cl2osc_io(env, ios);
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
- struct osc_async_page *oap = &opg->ops_oap;
+ struct cl_io *io = ios->cis_io;
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct client_obd *cli = osc_cli(osc);
+ unsigned long c;
+ unsigned int npages;
+ unsigned int max_pages;
+
+ if (cl_io_is_append(io))
+ return 0;
+
- npages = io->u.ci_rw.crw_count >> PAGE_CACHE_SHIFT;
++ npages = io->u.ci_rw.crw_count >> PAGE_SHIFT;
+ if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
+ ++npages;
+
+ max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
+ if (npages > max_pages)
+ npages = max_pages;
+
+ c = atomic_read(cli->cl_lru_left);
+ if (c < npages && osc_lru_reclaim(cli) > 0)
+ c = atomic_read(cli->cl_lru_left);
+ while (c >= npages) {
+ if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) {
+ oio->oi_lru_reserved = npages;
+ break;
+ }
+ c = atomic_read(cli->cl_lru_left);
+ }
- LASSERT(to > 0);
- /*
- * XXX instead of calling osc_page_touch() here and in
- * osc_io_fault_start() it might be more logical to introduce
- * cl_page_touch() method, that generic cl_io_commit_write() and page
- * fault code calls.
- */
- osc_page_touch(env, cl2osc_page(slice), to);
- if (!client_is_remote(osc_export(obj)) &&
- capable(CFS_CAP_SYS_RESOURCE))
- oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+ return 0;
+}
- if (oio->oi_lockless)
- /* see osc_io_prepare_write() for lockless io handling. */
- cl_page_clip(env, slice->cpl_page, from, to);
+static void osc_io_rw_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct client_obd *cli = osc_cli(osc);
- return 0;
+ if (oio->oi_lru_reserved > 0) {
+ atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
+ oio->oi_lru_reserved = 0;
+ }
+ oio->oi_write_osclock = NULL;
}
static int osc_io_fault_start(const struct lu_env *env,
int result;
opg->ops_from = 0;
- opg->ops_to = PAGE_CACHE_SIZE;
+ opg->ops_to = PAGE_SIZE;
- result = osc_prep_async_page(osc, opg, vmpage,
- cl_offset(obj, page->cp_index));
+ result = osc_prep_async_page(osc, opg, page->cp_vmpage,
+ cl_offset(obj, index));
if (result == 0) {
struct osc_io *oio = osc_env_io(env);
/* LRU pages are freed in batch mode. OSC should at least free this
* number of pages to avoid running out of LRU budget, and..
*/
- static const int lru_shrink_min = 2 << (20 - PAGE_CACHE_SHIFT); /* 2M */
+ static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */
/* free this number at most otherwise it will take too long time to finish. */
- static const int lru_shrink_max = 8 << (20 - PAGE_CACHE_SHIFT); /* 8M */
-static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
++static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
/* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
* we should free slots aggressively. In this way, slots are freed in a steady
static int osc_shrink_grant(struct client_obd *cli)
{
__u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
- (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT);
+ (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (cli->cl_avail_grant <= target_bytes)
- target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+ target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return osc_shrink_grant_to_target(cli, target_bytes);
}
* We don't want to shrink below a single RPC, as that will negatively
* impact block allocation and long-term performance.
*/
- if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT)
- target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+ if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_SHIFT)
+ target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
if (target_bytes >= cli->cl_avail_grant) {
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return 0;
}
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
body = kzalloc(sizeof(*body), GFP_NOFS);
if (!body)
}
/* determine the appropriate chunk size used by osc_extent. */
- cli->cl_chunkbits = max_t(int, PAGE_CACHE_SHIFT, ocd->ocd_blocksize);
+ cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
cli->cl_import->imp_obd->obd_name,
if (tmp)
tmp->oap_request = ptlrpc_request_addref(req);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
- starting_offset >>= PAGE_CACHE_SHIFT;
+ starting_offset >>= PAGE_SHIFT;
if (cmd == OBD_BRW_READ) {
cli->cl_r_in_flight++;
lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
goto skip_locking;
policy.l_extent.start = fm_key->fiemap.fm_start &
- CFS_PAGE_MASK;
+ PAGE_MASK;
if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
- fm_key->fiemap.fm_start + PAGE_CACHE_SIZE - 1)
+ fm_key->fiemap.fm_start + PAGE_SIZE - 1)
policy.l_extent.end = OBD_OBJECT_EOF;
else
policy.l_extent.end = (fm_key->fiemap.fm_start +
fm_key->fiemap.fm_length +
- PAGE_CACHE_SIZE - 1) & PAGE_MASK;
- PAGE_SIZE - 1) & CFS_PAGE_MASK;
++ PAGE_SIZE - 1) & PAGE_MASK;
ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
mode = ldlm_lock_match(exp->exp_obd->obd_namespace,