From: James Simmons Date: Tue, 8 Mar 2016 22:35:26 +0000 (-0500) Subject: staging: lustre: move libcfs to lnet layer X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=71bf3883ae5723f7895226c4b5fafa452c0cd00f;p=GitHub%2Fmoto-9609%2Fandroid_kernel_motorola_exynos9610.git staging: lustre: move libcfs to lnet layer The lustre file system has a layered architecture with libcfs as the lowest layer and LNet layered on top. Then on top of LNet we run the lustre client. This patch moves the libcfs module code out of lustre into the lnet tree. This fits into the long term goal of eventually merging libcfs into LNet. Signed-off-by: James Simmons Signed-off-by: Greg Kroah-Hartman --- diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile index f6f03e304d81..0a380fe88ce8 100644 --- a/drivers/staging/lustre/lnet/Makefile +++ b/drivers/staging/lustre/lnet/Makefile @@ -1 +1 @@ -obj-$(CONFIG_LNET) += lnet/ klnds/ selftest/ +obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/ diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile new file mode 100644 index 000000000000..8c8945545375 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/Makefile @@ -0,0 +1,17 @@ +obj-$(CONFIG_LNET) += libcfs.o + +libcfs-linux-objs := linux-tracefile.o linux-debug.o +libcfs-linux-objs += linux-prim.o linux-cpu.o +libcfs-linux-objs += linux-curproc.o +libcfs-linux-objs += linux-module.o +libcfs-linux-objs += linux-crypto.o +libcfs-linux-objs += linux-crypto-adler.o +libcfs-linux-objs += linux-mem.o + +libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) + +libcfs-all-objs := debug.o fail.o module.o tracefile.o \ + libcfs_string.o hash.o prng.o workitem.o \ + libcfs_cpu.o libcfs_mem.o libcfs_lock.o + +libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c new file mode 100644 index 000000000000..c90e5102fe06 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -0,0 +1,560 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/debug.c + * + * Author: Phil Schwan + * + */ + +# define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" +#include "tracefile.h" + +static char debug_file_name[1024]; + +unsigned int libcfs_subsystem_debug = ~0; +EXPORT_SYMBOL(libcfs_subsystem_debug); +module_param(libcfs_subsystem_debug, int, 0644); +MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask"); + +unsigned int libcfs_debug = (D_CANTMASK | + D_NETERROR | D_HA | D_CONFIG | D_IOCTL); +EXPORT_SYMBOL(libcfs_debug); +module_param(libcfs_debug, int, 0644); +MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask"); + +static int libcfs_param_debug_mb_set(const char *val, + const struct kernel_param *kp) +{ + int rc; + unsigned num; + + rc = kstrtouint(val, 0, &num); + if (rc < 0) + return rc; + + if (!*((unsigned int *)kp->arg)) { + *((unsigned int *)kp->arg) = num; + return 0; + } + + rc = cfs_trace_set_debug_mb(num); + + if (!rc) + *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb(); + + return rc; +} + +/* While debug_mb setting look like unsigned int, in fact + * it needs quite a bunch of extra processing, so we define special + * debugmb parameter type with corresponding methods to handle this case + */ +static struct kernel_param_ops param_ops_debugmb = { + .set = libcfs_param_debug_mb_set, + .get = param_get_uint, +}; + +#define param_check_debugmb(name, p) \ + __param_check(name, p, unsigned int) + +static unsigned int libcfs_debug_mb; +module_param(libcfs_debug_mb, debugmb, 0644); +MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size."); + +unsigned int libcfs_printk = D_CANTMASK; +module_param(libcfs_printk, uint, 0644); +MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask"); + +unsigned int libcfs_console_ratelimit = 1; +module_param(libcfs_console_ratelimit, uint, 0644); +MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)"); + +static int param_set_delay_minmax(const char *val, + const struct kernel_param *kp, + long min, long max) +{ + long d; + int sec; + int rc; + + rc = kstrtoint(val, 0, &sec); + if (rc) + return -EINVAL; + + d = cfs_time_seconds(sec) / 100; + if (d < min || d > max) + return -EINVAL; + + *((unsigned int *)kp->arg) = d; + + return 0; +} + +static int param_get_delay(char *buffer, const struct kernel_param *kp) +{ + unsigned int d = *(unsigned int *)kp->arg; + + return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100)); +} + +unsigned int libcfs_console_max_delay; +unsigned int libcfs_console_min_delay; + +static int param_set_console_max_delay(const char *val, + const struct kernel_param *kp) +{ + return param_set_delay_minmax(val, kp, + libcfs_console_min_delay, INT_MAX); +} + +static struct kernel_param_ops param_ops_console_max_delay = { + .set = param_set_console_max_delay, + .get = param_get_delay, +}; + +#define param_check_console_max_delay(name, p) \ + __param_check(name, p, unsigned int) + +module_param(libcfs_console_max_delay, console_max_delay, 0644); +MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)"); + +static int param_set_console_min_delay(const char *val, + const struct kernel_param *kp) +{ + return param_set_delay_minmax(val, kp, + 1, libcfs_console_max_delay); +} + +static struct kernel_param_ops param_ops_console_min_delay = { + .set = param_set_console_min_delay, + .get = param_get_delay, +}; + +#define param_check_console_min_delay(name, p) \ + __param_check(name, p, unsigned int) + +module_param(libcfs_console_min_delay, console_min_delay, 0644); +MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)"); + +static int param_set_uint_minmax(const char *val, + const struct kernel_param *kp, + unsigned int min, unsigned int max) +{ + unsigned int num; + int ret; + + if (!val) + return -EINVAL; + ret = kstrtouint(val, 0, &num); + if (ret < 0 || num < min || num > max) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} + +static int param_set_uintpos(const char *val, const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 1, -1); +} + +static struct kernel_param_ops param_ops_uintpos = { + .set = param_set_uintpos, + .get = param_get_uint, +}; + +#define param_check_uintpos(name, p) \ + __param_check(name, p, unsigned int) + +unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; +module_param(libcfs_console_backoff, uintpos, 0644); +MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor"); + +unsigned int libcfs_debug_binary = 1; + +unsigned int libcfs_stack = 3 * THREAD_SIZE / 4; +EXPORT_SYMBOL(libcfs_stack); + +unsigned int libcfs_catastrophe; +EXPORT_SYMBOL(libcfs_catastrophe); + +unsigned int libcfs_panic_on_lbug = 1; +module_param(libcfs_panic_on_lbug, uint, 0644); +MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); + +static wait_queue_head_t debug_ctlwq; + +char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT; + +/* We need to pass a pointer here, but elsewhere this must be a const */ +static char *libcfs_debug_file_path; +module_param(libcfs_debug_file_path, charp, 0644); +MODULE_PARM_DESC(libcfs_debug_file_path, + "Path for dumping debug logs, set 'NONE' to prevent log dumping"); + +int libcfs_panic_in_progress; + +/* libcfs_debug_token2mask() expects the returned string in lower-case */ +static const char * +libcfs_debug_subsys2str(int subsys) +{ + switch (1 << subsys) { + default: + return NULL; + case S_UNDEFINED: + return "undefined"; + case S_MDC: + return "mdc"; + case S_MDS: + return "mds"; + case S_OSC: + return "osc"; + case S_OST: + return "ost"; + case S_CLASS: + return "class"; + case S_LOG: + return "log"; + case S_LLITE: + return "llite"; + case S_RPC: + return "rpc"; + case S_LNET: + return "lnet"; + case S_LND: + return "lnd"; + case S_PINGER: + return "pinger"; + case S_FILTER: + return "filter"; + case S_ECHO: + return "echo"; + case S_LDLM: + return "ldlm"; + case S_LOV: + return "lov"; + case S_LQUOTA: + return "lquota"; + case S_OSD: + return "osd"; + case S_LFSCK: + return "lfsck"; + case S_LMV: + return "lmv"; + case S_SEC: + return "sec"; + case S_GSS: + return "gss"; + case S_MGC: + return "mgc"; + case S_MGS: + return "mgs"; + case S_FID: + return "fid"; + case S_FLD: + return "fld"; + } +} + +/* libcfs_debug_token2mask() expects the returned string in lower-case */ +static const char * +libcfs_debug_dbg2str(int debug) +{ + switch (1 << debug) { + default: + return NULL; + case D_TRACE: + return "trace"; + case D_INODE: + return "inode"; + case D_SUPER: + return "super"; + case D_EXT2: + return "ext2"; + case D_MALLOC: + return "malloc"; + case D_CACHE: + return "cache"; + case D_INFO: + return "info"; + case D_IOCTL: + return "ioctl"; + case D_NETERROR: + return "neterror"; + case D_NET: + return "net"; + case D_WARNING: + return "warning"; + case D_BUFFS: + return "buffs"; + case D_OTHER: + return "other"; + case D_DENTRY: + return "dentry"; + case D_NETTRACE: + return "nettrace"; + case D_PAGE: + return "page"; + case D_DLMTRACE: + return "dlmtrace"; + case D_ERROR: + return "error"; + case D_EMERG: + return "emerg"; + case D_HA: + return "ha"; + case D_RPCTRACE: + return "rpctrace"; + case D_VFSTRACE: + return "vfstrace"; + case D_READA: + return "reada"; + case D_MMAP: + return "mmap"; + case D_CONFIG: + return "config"; + case D_CONSOLE: + return "console"; + case D_QUOTA: + return "quota"; + case D_SEC: + return "sec"; + case D_LFSCK: + return "lfsck"; + } +} + +int +libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : + libcfs_debug_dbg2str; + int len = 0; + const char *token; + int i; + + if (mask == 0) { /* "0" */ + if (size > 0) + str[0] = '0'; + len = 1; + } else { /* space-separated tokens */ + for (i = 0; i < 32; i++) { + if ((mask & (1 << i)) == 0) + continue; + + token = fn(i); + if (!token) /* unused bit */ + continue; + + if (len > 0) { /* separator? */ + if (len < size) + str[len] = ' '; + len++; + } + + while (*token != 0) { + if (len < size) + str[len] = *token; + token++; + len++; + } + } + } + + /* terminate 'str' */ + if (len < size) + str[len] = 0; + else + str[size - 1] = 0; + + return len; +} + +int +libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : + libcfs_debug_dbg2str; + int m = 0; + int matched; + int n; + int t; + + /* Allow a number for backwards compatibility */ + + for (n = strlen(str); n > 0; n--) + if (!isspace(str[n - 1])) + break; + matched = n; + t = sscanf(str, "%i%n", &m, &matched); + if (t >= 1 && matched == n) { + /* don't print warning for lctl set_param debug=0 or -1 */ + if (m != 0 && m != -1) + CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); + *mask = m; + return 0; + } + + return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK, + 0xffffffff); +} + +/** + * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() + */ +void libcfs_debug_dumplog_internal(void *arg) +{ + void *journal_info; + + journal_info = current->journal_info; + current->journal_info = NULL; + + if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) { + snprintf(debug_file_name, sizeof(debug_file_name) - 1, + "%s.%lld.%ld", libcfs_debug_file_path_arr, + (s64)ktime_get_real_seconds(), (long_ptr_t)arg); + pr_alert("LustreError: dumping log to %s\n", debug_file_name); + cfs_tracefile_dump_all_pages(debug_file_name); + libcfs_run_debug_log_upcall(debug_file_name); + } + + current->journal_info = journal_info; +} + +static int libcfs_debug_dumplog_thread(void *arg) +{ + libcfs_debug_dumplog_internal(arg); + wake_up(&debug_ctlwq); + return 0; +} + +void libcfs_debug_dumplog(void) +{ + wait_queue_t wait; + struct task_struct *dumper; + + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() + */ + init_waitqueue_entry(&wait, current); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&debug_ctlwq, &wait); + + dumper = kthread_run(libcfs_debug_dumplog_thread, + (void *)(long)current_pid(), + "libcfs_debug_dumper"); + if (IS_ERR(dumper)) + pr_err("LustreError: cannot start log dump thread: %ld\n", + PTR_ERR(dumper)); + else + schedule(); + + /* be sure to teardown if cfs_create_thread() failed */ + remove_wait_queue(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); +} +EXPORT_SYMBOL(libcfs_debug_dumplog); + +int libcfs_debug_init(unsigned long bufsize) +{ + int rc = 0; + unsigned int max = libcfs_debug_mb; + + init_waitqueue_head(&debug_ctlwq); + + if (libcfs_console_max_delay <= 0 || /* not set by user or */ + libcfs_console_min_delay <= 0 || /* set to invalid values */ + libcfs_console_min_delay >= libcfs_console_max_delay) { + libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; + libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; + } + + if (libcfs_debug_file_path) { + strlcpy(libcfs_debug_file_path_arr, + libcfs_debug_file_path, + sizeof(libcfs_debug_file_path_arr)); + } + + /* If libcfs_debug_mb is set to an invalid value or uninitialized + * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES + */ + if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) { + max = TCD_MAX_PAGES; + } else { + max = max / num_possible_cpus(); + max <<= (20 - PAGE_CACHE_SHIFT); + } + rc = cfs_tracefile_init(max); + + if (rc == 0) { + libcfs_register_panic_notifier(); + libcfs_debug_mb = cfs_trace_get_debug_mb(); + } + + return rc; +} + +int libcfs_debug_cleanup(void) +{ + libcfs_unregister_panic_notifier(); + cfs_tracefile_exit(); + return 0; +} + +int libcfs_debug_clear_buffer(void) +{ + cfs_trace_flush_pages(); + return 0; +} + +/* Debug markers, although printed by S_LNET should not be be marked as such. */ +#undef DEBUG_SUBSYSTEM +#define DEBUG_SUBSYSTEM S_UNDEFINED +int libcfs_debug_mark_buffer(const char *text) +{ + CDEBUG(D_TRACE, + "***************************************************\n"); + LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text); + CDEBUG(D_TRACE, + "***************************************************\n"); + + return 0; +} + +#undef DEBUG_SUBSYSTEM +#define DEBUG_SUBSYSTEM S_LNET diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c new file mode 100644 index 000000000000..dadaf7685cbd --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/fail.c @@ -0,0 +1,139 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores, + * CA 94065 USA or visit www.oracle.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Oracle Corporation, Inc. + */ + +#include "../../include/linux/libcfs/libcfs.h" + +unsigned long cfs_fail_loc; +EXPORT_SYMBOL(cfs_fail_loc); + +unsigned int cfs_fail_val; +EXPORT_SYMBOL(cfs_fail_val); + +DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq); +EXPORT_SYMBOL(cfs_race_waitq); + +int cfs_race_state; +EXPORT_SYMBOL(cfs_race_state); + +int __cfs_fail_check_set(__u32 id, __u32 value, int set) +{ + static atomic_t cfs_fail_count = ATOMIC_INIT(0); + + LASSERT(!(id & CFS_FAIL_ONCE)); + + if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) == + (CFS_FAILED | CFS_FAIL_ONCE)) { + atomic_set(&cfs_fail_count, 0); /* paranoia */ + return 0; + } + + /* Fail 1/cfs_fail_val times */ + if (cfs_fail_loc & CFS_FAIL_RAND) { + if (cfs_fail_val < 2 || cfs_rand() % cfs_fail_val > 0) + return 0; + } + + /* Skip the first cfs_fail_val, then fail */ + if (cfs_fail_loc & CFS_FAIL_SKIP) { + if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val) + return 0; + } + + /* check cfs_fail_val... */ + if (set == CFS_FAIL_LOC_VALUE) { + if (cfs_fail_val != -1 && cfs_fail_val != value) + return 0; + } + + /* Fail cfs_fail_val times, overridden by FAIL_ONCE */ + if (cfs_fail_loc & CFS_FAIL_SOME && + (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) { + int count = atomic_inc_return(&cfs_fail_count); + + if (count >= cfs_fail_val) { + set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); + atomic_set(&cfs_fail_count, 0); + /* we are lost race to increase */ + if (count > cfs_fail_val) + return 0; + } + } + + if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) && + (value & CFS_FAIL_ONCE)) + set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); + /* Lost race to set CFS_FAILED_BIT. */ + if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { + /* If CFS_FAIL_ONCE is valid, only one process can fail, + * otherwise multi-process can fail at the same time. + */ + if (cfs_fail_loc & CFS_FAIL_ONCE) + return 0; + } + + switch (set) { + case CFS_FAIL_LOC_NOSET: + case CFS_FAIL_LOC_VALUE: + break; + case CFS_FAIL_LOC_ORSET: + cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE); + break; + case CFS_FAIL_LOC_RESET: + cfs_fail_loc = value; + break; + default: + LASSERTF(0, "called with bad set %u\n", set); + break; + } + + return 1; +} +EXPORT_SYMBOL(__cfs_fail_check_set); + +int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) +{ + int ret; + + ret = __cfs_fail_check_set(id, value, set); + if (ret && likely(ms > 0)) { + CERROR("cfs_fail_timeout id %x sleeping for %dms\n", + id, ms); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(ms) / 1000); + CERROR("cfs_fail_timeout id %x awake\n", id); + } + return ret; +} +EXPORT_SYMBOL(__cfs_fail_timeout_set); diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c new file mode 100644 index 000000000000..f60feb3a3dc7 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/hash.c @@ -0,0 +1,2085 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/hash.c + * + * Implement a hash class for hash process in lustre system. + * + * Author: YuZhangyong + * + * 2008-08-15: Brian Behlendorf + * - Simplified API and improved documentation + * - Added per-hash feature flags: + * * CFS_HASH_DEBUG additional validation + * * CFS_HASH_REHASH dynamic rehashing + * - Added per-hash statistics + * - General performance enhancements + * + * 2009-07-31: Liang Zhen + * - move all stuff to libcfs + * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH + * - ignore hs_rwlock if without CFS_HASH_REHASH setting + * - buckets are allocated one by one(instead of contiguous memory), + * to avoid unnecessary cacheline conflict + * + * 2010-03-01: Liang Zhen + * - "bucket" is a group of hlist_head now, user can specify bucket size + * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share + * one lock for reducing memory overhead. + * + * - support lockless hash, caller will take care of locks: + * avoid lock overhead for hash tables that are already protected + * by locking in the caller for another reason + * + * - support both spin_lock/rwlock for bucket: + * overhead of spinlock contention is lower than read/write + * contention of rwlock, so using spinlock to serialize operations on + * bucket is more reasonable for those frequently changed hash tables + * + * - support one-single lock mode: + * one lock to protect all hash operations to avoid overhead of + * multiple locks if hash table is always small + * + * - removed a lot of unnecessary addref & decref on hash element: + * addref & decref are atomic operations in many use-cases which + * are expensive. + * + * - support non-blocking cfs_hash_add() and cfs_hash_findadd(): + * some lustre use-cases require these functions to be strictly + * non-blocking, we need to schedule required rehash on a different + * thread on those cases. + * + * - safer rehash on large hash table + * In old implementation, rehash function will exclusively lock the + * hash table and finish rehash in one batch, it's dangerous on SMP + * system because rehash millions of elements could take long time. + * New implemented rehash can release lock and relax CPU in middle + * of rehash, it's safe for another thread to search/change on the + * hash table even it's in rehasing. + * + * - support two different refcount modes + * . hash table has refcount on element + * . hash table doesn't change refcount on adding/removing element + * + * - support long name hash table (for param-tree) + * + * - fix a bug for cfs_hash_rehash_key: + * in old implementation, cfs_hash_rehash_key could screw up the + * hash-table because @key is overwritten without any protection. + * Now we need user to define hs_keycpy for those rehash enabled + * hash tables, cfs_hash_rehash_key will overwrite hash-key + * inside lock by calling hs_keycpy. + * + * - better hash iteration: + * Now we support both locked iteration & lockless iteration of hash + * table. Also, user can break the iteration by return 1 in callback. + */ +#include +#include + +#include "../../include/linux/libcfs/libcfs.h" + +#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 +static unsigned int warn_on_depth = 8; +module_param(warn_on_depth, uint, 0644); +MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high."); +#endif + +struct cfs_wi_sched *cfs_sched_rehash; + +static inline void +cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {} + +static inline void +cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {} + +static inline void +cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive) + __acquires(&lock->spin) +{ + spin_lock(&lock->spin); +} + +static inline void +cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive) + __releases(&lock->spin) +{ + spin_unlock(&lock->spin); +} + +static inline void +cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive) + __acquires(&lock->rw) +{ + if (!exclusive) + read_lock(&lock->rw); + else + write_lock(&lock->rw); +} + +static inline void +cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive) + __releases(&lock->rw) +{ + if (!exclusive) + read_unlock(&lock->rw); + else + write_unlock(&lock->rw); +} + +/** No lock hash */ +static struct cfs_hash_lock_ops cfs_hash_nl_lops = { + .hs_lock = cfs_hash_nl_lock, + .hs_unlock = cfs_hash_nl_unlock, + .hs_bkt_lock = cfs_hash_nl_lock, + .hs_bkt_unlock = cfs_hash_nl_unlock, +}; + +/** no bucket lock, one spinlock to protect everything */ +static struct cfs_hash_lock_ops cfs_hash_nbl_lops = { + .hs_lock = cfs_hash_spin_lock, + .hs_unlock = cfs_hash_spin_unlock, + .hs_bkt_lock = cfs_hash_nl_lock, + .hs_bkt_unlock = cfs_hash_nl_unlock, +}; + +/** spin bucket lock, rehash is enabled */ +static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = { + .hs_lock = cfs_hash_rw_lock, + .hs_unlock = cfs_hash_rw_unlock, + .hs_bkt_lock = cfs_hash_spin_lock, + .hs_bkt_unlock = cfs_hash_spin_unlock, +}; + +/** rw bucket lock, rehash is enabled */ +static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = { + .hs_lock = cfs_hash_rw_lock, + .hs_unlock = cfs_hash_rw_unlock, + .hs_bkt_lock = cfs_hash_rw_lock, + .hs_bkt_unlock = cfs_hash_rw_unlock, +}; + +/** spin bucket lock, rehash is disabled */ +static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = { + .hs_lock = cfs_hash_nl_lock, + .hs_unlock = cfs_hash_nl_unlock, + .hs_bkt_lock = cfs_hash_spin_lock, + .hs_bkt_unlock = cfs_hash_spin_unlock, +}; + +/** rw bucket lock, rehash is disabled */ +static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = { + .hs_lock = cfs_hash_nl_lock, + .hs_unlock = cfs_hash_nl_unlock, + .hs_bkt_lock = cfs_hash_rw_lock, + .hs_bkt_unlock = cfs_hash_rw_unlock, +}; + +static void +cfs_hash_lock_setup(struct cfs_hash *hs) +{ + if (cfs_hash_with_no_lock(hs)) { + hs->hs_lops = &cfs_hash_nl_lops; + + } else if (cfs_hash_with_no_bktlock(hs)) { + hs->hs_lops = &cfs_hash_nbl_lops; + spin_lock_init(&hs->hs_lock.spin); + + } else if (cfs_hash_with_rehash(hs)) { + rwlock_init(&hs->hs_lock.rw); + + if (cfs_hash_with_rw_bktlock(hs)) + hs->hs_lops = &cfs_hash_bkt_rw_lops; + else if (cfs_hash_with_spin_bktlock(hs)) + hs->hs_lops = &cfs_hash_bkt_spin_lops; + else + LBUG(); + } else { + if (cfs_hash_with_rw_bktlock(hs)) + hs->hs_lops = &cfs_hash_nr_bkt_rw_lops; + else if (cfs_hash_with_spin_bktlock(hs)) + hs->hs_lops = &cfs_hash_nr_bkt_spin_lops; + else + LBUG(); + } +} + +/** + * Simple hash head without depth tracking + * new element is always added to head of hlist + */ +struct cfs_hash_head { + struct hlist_head hh_head; /**< entries list */ +}; + +static int +cfs_hash_hh_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_head); +} + +static struct hlist_head * +cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_head *head; + + head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].hh_head; +} + +static int +cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd)); + return -1; /* unknown depth */ +} + +static int +cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + hlist_del_init(hnode); + return -1; /* unknown depth */ +} + +/** + * Simple hash head with depth tracking + * new element is always added to head of hlist + */ +struct cfs_hash_head_dep { + struct hlist_head hd_head; /**< entries list */ + unsigned int hd_depth; /**< list length */ +}; + +static int +cfs_hash_hd_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_head_dep); +} + +static struct hlist_head * +cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_head_dep *head; + + head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].hd_head; +} + +static int +cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_head_dep *hh; + + hh = container_of(cfs_hash_hd_hhead(hs, bd), + struct cfs_hash_head_dep, hd_head); + hlist_add_head(hnode, &hh->hd_head); + return ++hh->hd_depth; +} + +static int +cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_head_dep *hh; + + hh = container_of(cfs_hash_hd_hhead(hs, bd), + struct cfs_hash_head_dep, hd_head); + hlist_del_init(hnode); + return --hh->hd_depth; +} + +/** + * double links hash head without depth tracking + * new element is always added to tail of hlist + */ +struct cfs_hash_dhead { + struct hlist_head dh_head; /**< entries list */ + struct hlist_node *dh_tail; /**< the last entry */ +}; + +static int +cfs_hash_dh_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_dhead); +} + +static struct hlist_head * +cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_dhead *head; + + head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].dh_head; +} + +static int +cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_dhead *dh; + + dh = container_of(cfs_hash_dh_hhead(hs, bd), + struct cfs_hash_dhead, dh_head); + if (dh->dh_tail) /* not empty */ + hlist_add_behind(hnode, dh->dh_tail); + else /* empty list */ + hlist_add_head(hnode, &dh->dh_head); + dh->dh_tail = hnode; + return -1; /* unknown depth */ +} + +static int +cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnd) +{ + struct cfs_hash_dhead *dh; + + dh = container_of(cfs_hash_dh_hhead(hs, bd), + struct cfs_hash_dhead, dh_head); + if (!hnd->next) { /* it's the tail */ + dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL : + container_of(hnd->pprev, struct hlist_node, next); + } + hlist_del_init(hnd); + return -1; /* unknown depth */ +} + +/** + * double links hash head with depth tracking + * new element is always added to tail of hlist + */ +struct cfs_hash_dhead_dep { + struct hlist_head dd_head; /**< entries list */ + struct hlist_node *dd_tail; /**< the last entry */ + unsigned int dd_depth; /**< list length */ +}; + +static int +cfs_hash_dd_hhead_size(struct cfs_hash *hs) +{ + return sizeof(struct cfs_hash_dhead_dep); +} + +static struct hlist_head * +cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) +{ + struct cfs_hash_dhead_dep *head; + + head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0]; + return &head[bd->bd_offset].dd_head; +} + +static int +cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + struct cfs_hash_dhead_dep *dh; + + dh = container_of(cfs_hash_dd_hhead(hs, bd), + struct cfs_hash_dhead_dep, dd_head); + if (dh->dd_tail) /* not empty */ + hlist_add_behind(hnode, dh->dd_tail); + else /* empty list */ + hlist_add_head(hnode, &dh->dd_head); + dh->dd_tail = hnode; + return ++dh->dd_depth; +} + +static int +cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnd) +{ + struct cfs_hash_dhead_dep *dh; + + dh = container_of(cfs_hash_dd_hhead(hs, bd), + struct cfs_hash_dhead_dep, dd_head); + if (!hnd->next) { /* it's the tail */ + dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL : + container_of(hnd->pprev, struct hlist_node, next); + } + hlist_del_init(hnd); + return --dh->dd_depth; +} + +static struct cfs_hash_hlist_ops cfs_hash_hh_hops = { + .hop_hhead = cfs_hash_hh_hhead, + .hop_hhead_size = cfs_hash_hh_hhead_size, + .hop_hnode_add = cfs_hash_hh_hnode_add, + .hop_hnode_del = cfs_hash_hh_hnode_del, +}; + +static struct cfs_hash_hlist_ops cfs_hash_hd_hops = { + .hop_hhead = cfs_hash_hd_hhead, + .hop_hhead_size = cfs_hash_hd_hhead_size, + .hop_hnode_add = cfs_hash_hd_hnode_add, + .hop_hnode_del = cfs_hash_hd_hnode_del, +}; + +static struct cfs_hash_hlist_ops cfs_hash_dh_hops = { + .hop_hhead = cfs_hash_dh_hhead, + .hop_hhead_size = cfs_hash_dh_hhead_size, + .hop_hnode_add = cfs_hash_dh_hnode_add, + .hop_hnode_del = cfs_hash_dh_hnode_del, +}; + +static struct cfs_hash_hlist_ops cfs_hash_dd_hops = { + .hop_hhead = cfs_hash_dd_hhead, + .hop_hhead_size = cfs_hash_dd_hhead_size, + .hop_hnode_add = cfs_hash_dd_hnode_add, + .hop_hnode_del = cfs_hash_dd_hnode_del, +}; + +static void +cfs_hash_hlist_setup(struct cfs_hash *hs) +{ + if (cfs_hash_with_add_tail(hs)) { + hs->hs_hops = cfs_hash_with_depth(hs) ? + &cfs_hash_dd_hops : &cfs_hash_dh_hops; + } else { + hs->hs_hops = cfs_hash_with_depth(hs) ? + &cfs_hash_hd_hops : &cfs_hash_hh_hops; + } +} + +static void +cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts, + unsigned int bits, const void *key, struct cfs_hash_bd *bd) +{ + unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1); + + LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits); + + bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)]; + bd->bd_offset = index >> (bits - hs->hs_bkt_bits); +} + +void +cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd) +{ + /* NB: caller should hold hs->hs_rwlock if REHASH is set */ + if (likely(!hs->hs_rehash_buckets)) { + cfs_hash_bd_from_key(hs, hs->hs_buckets, + hs->hs_cur_bits, key, bd); + } else { + LASSERT(hs->hs_rehash_bits != 0); + cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, + hs->hs_rehash_bits, key, bd); + } +} +EXPORT_SYMBOL(cfs_hash_bd_get); + +static inline void +cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur) +{ + if (likely(dep_cur <= bd->bd_bucket->hsb_depmax)) + return; + + bd->bd_bucket->hsb_depmax = dep_cur; +# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 + if (likely(warn_on_depth == 0 || + max(warn_on_depth, hs->hs_dep_max) >= dep_cur)) + return; + + spin_lock(&hs->hs_dep_lock); + hs->hs_dep_max = dep_cur; + hs->hs_dep_bkt = bd->bd_bucket->hsb_index; + hs->hs_dep_off = bd->bd_offset; + hs->hs_dep_bits = hs->hs_cur_bits; + spin_unlock(&hs->hs_dep_lock); + + cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi); +# endif +} + +void +cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + int rc; + + rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode); + cfs_hash_bd_dep_record(hs, bd, rc); + bd->bd_bucket->hsb_version++; + if (unlikely(bd->bd_bucket->hsb_version == 0)) + bd->bd_bucket->hsb_version++; + bd->bd_bucket->hsb_count++; + + if (cfs_hash_with_counter(hs)) + atomic_inc(&hs->hs_count); + if (!cfs_hash_with_no_itemref(hs)) + cfs_hash_get(hs, hnode); +} +EXPORT_SYMBOL(cfs_hash_bd_add_locked); + +void +cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode) +{ + hs->hs_hops->hop_hnode_del(hs, bd, hnode); + + LASSERT(bd->bd_bucket->hsb_count > 0); + bd->bd_bucket->hsb_count--; + bd->bd_bucket->hsb_version++; + if (unlikely(bd->bd_bucket->hsb_version == 0)) + bd->bd_bucket->hsb_version++; + + if (cfs_hash_with_counter(hs)) { + LASSERT(atomic_read(&hs->hs_count) > 0); + atomic_dec(&hs->hs_count); + } + if (!cfs_hash_with_no_itemref(hs)) + cfs_hash_put_locked(hs, hnode); +} +EXPORT_SYMBOL(cfs_hash_bd_del_locked); + +void +cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, + struct cfs_hash_bd *bd_new, struct hlist_node *hnode) +{ + struct cfs_hash_bucket *obkt = bd_old->bd_bucket; + struct cfs_hash_bucket *nbkt = bd_new->bd_bucket; + int rc; + + if (cfs_hash_bd_compare(bd_old, bd_new) == 0) + return; + + /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops + * in cfs_hash_bd_del/add_locked + */ + hs->hs_hops->hop_hnode_del(hs, bd_old, hnode); + rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode); + cfs_hash_bd_dep_record(hs, bd_new, rc); + + LASSERT(obkt->hsb_count > 0); + obkt->hsb_count--; + obkt->hsb_version++; + if (unlikely(obkt->hsb_version == 0)) + obkt->hsb_version++; + nbkt->hsb_count++; + nbkt->hsb_version++; + if (unlikely(nbkt->hsb_version == 0)) + nbkt->hsb_version++; +} + +enum { + /** always set, for sanity (avoid ZERO intent) */ + CFS_HS_LOOKUP_MASK_FIND = BIT(0), + /** return entry with a ref */ + CFS_HS_LOOKUP_MASK_REF = BIT(1), + /** add entry if not existing */ + CFS_HS_LOOKUP_MASK_ADD = BIT(2), + /** delete entry, ignore other masks */ + CFS_HS_LOOKUP_MASK_DEL = BIT(3), +}; + +enum cfs_hash_lookup_intent { + /** return item w/o refcount */ + CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND, + /** return item with refcount */ + CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND | + CFS_HS_LOOKUP_MASK_REF), + /** return item w/o refcount if existed, otherwise add */ + CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND | + CFS_HS_LOOKUP_MASK_ADD), + /** return item with refcount if existed, otherwise add */ + CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND | + CFS_HS_LOOKUP_MASK_ADD), + /** delete if existed */ + CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND | + CFS_HS_LOOKUP_MASK_DEL) +}; + +static struct hlist_node * +cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, + const void *key, struct hlist_node *hnode, + enum cfs_hash_lookup_intent intent) + +{ + struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd); + struct hlist_node *ehnode; + struct hlist_node *match; + int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0; + + /* with this function, we can avoid a lot of useless refcount ops, + * which are expensive atomic operations most time. + */ + match = intent_add ? NULL : hnode; + hlist_for_each(ehnode, hhead) { + if (!cfs_hash_keycmp(hs, key, ehnode)) + continue; + + if (match && match != ehnode) /* can't match */ + continue; + + /* match and ... */ + if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) { + cfs_hash_bd_del_locked(hs, bd, ehnode); + return ehnode; + } + + /* caller wants refcount? */ + if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0) + cfs_hash_get(hs, ehnode); + return ehnode; + } + /* no match item */ + if (!intent_add) + return NULL; + + LASSERT(hnode); + cfs_hash_bd_add_locked(hs, bd, hnode); + return hnode; +} + +struct hlist_node * +cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + const void *key) +{ + return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, + CFS_HS_LOOKUP_IT_FIND); +} +EXPORT_SYMBOL(cfs_hash_bd_lookup_locked); + +struct hlist_node * +cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + const void *key) +{ + return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, + CFS_HS_LOOKUP_IT_PEEK); +} +EXPORT_SYMBOL(cfs_hash_bd_peek_locked); + +static void +cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, int excl) +{ + struct cfs_hash_bucket *prev = NULL; + int i; + + /** + * bds must be ascendantly ordered by bd->bd_bucket->hsb_index. + * NB: it's possible that several bds point to the same bucket but + * have different bd::bd_offset, so need take care of deadlock. + */ + cfs_hash_for_each_bd(bds, n, i) { + if (prev == bds[i].bd_bucket) + continue; + + LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index); + cfs_hash_bd_lock(hs, &bds[i], excl); + prev = bds[i].bd_bucket; + } +} + +static void +cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, int excl) +{ + struct cfs_hash_bucket *prev = NULL; + int i; + + cfs_hash_for_each_bd(bds, n, i) { + if (prev != bds[i].bd_bucket) { + cfs_hash_bd_unlock(hs, &bds[i], excl); + prev = bds[i].bd_bucket; + } + } +} + +static struct hlist_node * +cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, const void *key) +{ + struct hlist_node *ehnode; + unsigned i; + + cfs_hash_for_each_bd(bds, n, i) { + ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL, + CFS_HS_LOOKUP_IT_FIND); + if (ehnode) + return ehnode; + } + return NULL; +} + +static struct hlist_node * +cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, const void *key, + struct hlist_node *hnode, int noref) +{ + struct hlist_node *ehnode; + int intent; + unsigned i; + + LASSERT(hnode); + intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK; + + cfs_hash_for_each_bd(bds, n, i) { + ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, + NULL, intent); + if (ehnode) + return ehnode; + } + + if (i == 1) { /* only one bucket */ + cfs_hash_bd_add_locked(hs, &bds[0], hnode); + } else { + struct cfs_hash_bd mybd; + + cfs_hash_bd_get(hs, key, &mybd); + cfs_hash_bd_add_locked(hs, &mybd, hnode); + } + + return hnode; +} + +static struct hlist_node * +cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + unsigned n, const void *key, + struct hlist_node *hnode) +{ + struct hlist_node *ehnode; + unsigned int i; + + cfs_hash_for_each_bd(bds, n, i) { + ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode, + CFS_HS_LOOKUP_IT_FINDDEL); + if (ehnode) + return ehnode; + } + return NULL; +} + +static void +cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2) +{ + int rc; + + if (!bd2->bd_bucket) + return; + + if (!bd1->bd_bucket) { + *bd1 = *bd2; + bd2->bd_bucket = NULL; + return; + } + + rc = cfs_hash_bd_compare(bd1, bd2); + if (!rc) + bd2->bd_bucket = NULL; + else if (rc > 0) + swap(*bd1, *bd2); /* swap bd1 and bd2 */ +} + +void +cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, + struct cfs_hash_bd *bds) +{ + /* NB: caller should hold hs_lock.rw if REHASH is set */ + cfs_hash_bd_from_key(hs, hs->hs_buckets, + hs->hs_cur_bits, key, &bds[0]); + if (likely(!hs->hs_rehash_buckets)) { + /* no rehash or not rehashing */ + bds[1].bd_bucket = NULL; + return; + } + + LASSERT(hs->hs_rehash_bits != 0); + cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, + hs->hs_rehash_bits, key, &bds[1]); + + cfs_hash_bd_order(&bds[0], &bds[1]); +} + +void +cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) +{ + cfs_hash_multi_bd_lock(hs, bds, 2, excl); +} + +void +cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) +{ + cfs_hash_multi_bd_unlock(hs, bds, 2, excl); +} + +struct hlist_node * +cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + const void *key) +{ + return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key); +} + +struct hlist_node * +cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + const void *key, struct hlist_node *hnode, + int noref) +{ + return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key, + hnode, noref); +} + +struct hlist_node * +cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, + const void *key, struct hlist_node *hnode) +{ + return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode); +} + +static void +cfs_hash_buckets_free(struct cfs_hash_bucket **buckets, + int bkt_size, int prev_size, int size) +{ + int i; + + for (i = prev_size; i < size; i++) { + if (buckets[i]) + LIBCFS_FREE(buckets[i], bkt_size); + } + + LIBCFS_FREE(buckets, sizeof(buckets[0]) * size); +} + +/* + * Create or grow bucket memory. Return old_buckets if no allocation was + * needed, the newly allocated buckets if allocation was needed and + * successful, and NULL on error. + */ +static struct cfs_hash_bucket ** +cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, + unsigned int old_size, unsigned int new_size) +{ + struct cfs_hash_bucket **new_bkts; + int i; + + LASSERT(old_size == 0 || old_bkts); + + if (old_bkts && old_size == new_size) + return old_bkts; + + LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size); + if (!new_bkts) + return NULL; + + if (old_bkts) { + memcpy(new_bkts, old_bkts, + min(old_size, new_size) * sizeof(*old_bkts)); + } + + for (i = old_size; i < new_size; i++) { + struct hlist_head *hhead; + struct cfs_hash_bd bd; + + LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs)); + if (!new_bkts[i]) { + cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs), + old_size, new_size); + return NULL; + } + + new_bkts[i]->hsb_index = i; + new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ + new_bkts[i]->hsb_depmax = -1; /* unknown */ + bd.bd_bucket = new_bkts[i]; + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) + INIT_HLIST_HEAD(hhead); + + if (cfs_hash_with_no_lock(hs) || + cfs_hash_with_no_bktlock(hs)) + continue; + + if (cfs_hash_with_rw_bktlock(hs)) + rwlock_init(&new_bkts[i]->hsb_lock.rw); + else if (cfs_hash_with_spin_bktlock(hs)) + spin_lock_init(&new_bkts[i]->hsb_lock.spin); + else + LBUG(); /* invalid use-case */ + } + return new_bkts; +} + +/** + * Initialize new libcfs hash, where: + * @name - Descriptive hash name + * @cur_bits - Initial hash table size, in bits + * @max_bits - Maximum allowed hash table resize, in bits + * @ops - Registered hash table operations + * @flags - CFS_HASH_REHASH enable synamic hash resizing + * - CFS_HASH_SORT enable chained hash sort + */ +static int cfs_hash_rehash_worker(cfs_workitem_t *wi); + +#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 +static int cfs_hash_dep_print(cfs_workitem_t *wi) +{ + struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi); + int dep; + int bkt; + int off; + int bits; + + spin_lock(&hs->hs_dep_lock); + dep = hs->hs_dep_max; + bkt = hs->hs_dep_bkt; + off = hs->hs_dep_off; + bits = hs->hs_dep_bits; + spin_unlock(&hs->hs_dep_lock); + + LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n", + hs->hs_name, bits, dep, bkt, off); + spin_lock(&hs->hs_dep_lock); + hs->hs_dep_bits = 0; /* mark as workitem done */ + spin_unlock(&hs->hs_dep_lock); + return 0; +} + +static void cfs_hash_depth_wi_init(struct cfs_hash *hs) +{ + spin_lock_init(&hs->hs_dep_lock); + cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print); +} + +static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) +{ + if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi)) + return; + + spin_lock(&hs->hs_dep_lock); + while (hs->hs_dep_bits != 0) { + spin_unlock(&hs->hs_dep_lock); + cond_resched(); + spin_lock(&hs->hs_dep_lock); + } + spin_unlock(&hs->hs_dep_lock); +} + +#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */ + +static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {} +static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {} + +#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */ + +struct cfs_hash * +cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, + unsigned bkt_bits, unsigned extra_bytes, + unsigned min_theta, unsigned max_theta, + struct cfs_hash_ops *ops, unsigned flags) +{ + struct cfs_hash *hs; + int len; + + CLASSERT(CFS_HASH_THETA_BITS < 15); + + LASSERT(name); + LASSERT(ops->hs_key); + LASSERT(ops->hs_hash); + LASSERT(ops->hs_object); + LASSERT(ops->hs_keycmp); + LASSERT(ops->hs_get); + LASSERT(ops->hs_put_locked); + + if ((flags & CFS_HASH_REHASH) != 0) + flags |= CFS_HASH_COUNTER; /* must have counter */ + + LASSERT(cur_bits > 0); + LASSERT(cur_bits >= bkt_bits); + LASSERT(max_bits >= cur_bits && max_bits < 31); + LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits)); + LASSERT(ergo((flags & CFS_HASH_REHASH) != 0, + (flags & CFS_HASH_NO_LOCK) == 0)); + LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy)); + + len = (flags & CFS_HASH_BIGNAME) == 0 ? + CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN; + LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len])); + if (!hs) + return NULL; + + strlcpy(hs->hs_name, name, len); + hs->hs_flags = flags; + + atomic_set(&hs->hs_refcount, 1); + atomic_set(&hs->hs_count, 0); + + cfs_hash_lock_setup(hs); + cfs_hash_hlist_setup(hs); + + hs->hs_cur_bits = (__u8)cur_bits; + hs->hs_min_bits = (__u8)cur_bits; + hs->hs_max_bits = (__u8)max_bits; + hs->hs_bkt_bits = (__u8)bkt_bits; + + hs->hs_ops = ops; + hs->hs_extra_bytes = extra_bytes; + hs->hs_rehash_bits = 0; + cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker); + cfs_hash_depth_wi_init(hs); + + if (cfs_hash_with_rehash(hs)) + __cfs_hash_set_theta(hs, min_theta, max_theta); + + hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0, + CFS_HASH_NBKT(hs)); + if (hs->hs_buckets) + return hs; + + LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[len])); + return NULL; +} +EXPORT_SYMBOL(cfs_hash_create); + +/** + * Cleanup libcfs hash @hs. + */ +static void +cfs_hash_destroy(struct cfs_hash *hs) +{ + struct hlist_node *hnode; + struct hlist_node *pos; + struct cfs_hash_bd bd; + int i; + + LASSERT(hs); + LASSERT(!cfs_hash_is_exiting(hs) && + !cfs_hash_is_iterating(hs)); + + /** + * prohibit further rehashes, don't need any lock because + * I'm the only (last) one can change it. + */ + hs->hs_exiting = 1; + if (cfs_hash_with_rehash(hs)) + cfs_hash_rehash_cancel(hs); + + cfs_hash_depth_wi_cancel(hs); + /* rehash should be done/canceled */ + LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets); + + cfs_hash_for_each_bucket(hs, &bd, i) { + struct hlist_head *hhead; + + LASSERT(bd.bd_bucket); + /* no need to take this lock, just for consistent code */ + cfs_hash_bd_lock(hs, &bd, 1); + + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { + hlist_for_each_safe(hnode, pos, hhead) { + LASSERTF(!cfs_hash_with_assert_empty(hs), + "hash %s bucket %u(%u) is not empty: %u items left\n", + hs->hs_name, bd.bd_bucket->hsb_index, + bd.bd_offset, bd.bd_bucket->hsb_count); + /* can't assert key valicate, because we + * can interrupt rehash + */ + cfs_hash_bd_del_locked(hs, &bd, hnode); + cfs_hash_exit(hs, hnode); + } + } + LASSERT(bd.bd_bucket->hsb_count == 0); + cfs_hash_bd_unlock(hs, &bd, 1); + cond_resched(); + } + + LASSERT(atomic_read(&hs->hs_count) == 0); + + cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs), + 0, CFS_HASH_NBKT(hs)); + i = cfs_hash_with_bigname(hs) ? + CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN; + LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[i])); +} + +struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs) +{ + if (atomic_inc_not_zero(&hs->hs_refcount)) + return hs; + return NULL; +} +EXPORT_SYMBOL(cfs_hash_getref); + +void cfs_hash_putref(struct cfs_hash *hs) +{ + if (atomic_dec_and_test(&hs->hs_refcount)) + cfs_hash_destroy(hs); +} +EXPORT_SYMBOL(cfs_hash_putref); + +static inline int +cfs_hash_rehash_bits(struct cfs_hash *hs) +{ + if (cfs_hash_with_no_lock(hs) || + !cfs_hash_with_rehash(hs)) + return -EOPNOTSUPP; + + if (unlikely(cfs_hash_is_exiting(hs))) + return -ESRCH; + + if (unlikely(cfs_hash_is_rehashing(hs))) + return -EALREADY; + + if (unlikely(cfs_hash_is_iterating(hs))) + return -EAGAIN; + + /* XXX: need to handle case with max_theta != 2.0 + * and the case with min_theta != 0.5 + */ + if ((hs->hs_cur_bits < hs->hs_max_bits) && + (__cfs_hash_theta(hs) > hs->hs_max_theta)) + return hs->hs_cur_bits + 1; + + if (!cfs_hash_with_shrink(hs)) + return 0; + + if ((hs->hs_cur_bits > hs->hs_min_bits) && + (__cfs_hash_theta(hs) < hs->hs_min_theta)) + return hs->hs_cur_bits - 1; + + return 0; +} + +/** + * don't allow inline rehash if: + * - user wants non-blocking change (add/del) on hash table + * - too many elements + */ +static inline int +cfs_hash_rehash_inline(struct cfs_hash *hs) +{ + return !cfs_hash_with_nblk_change(hs) && + atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG; +} + +/** + * Add item @hnode to libcfs hash @hs using @key. The registered + * ops->hs_get function will be called when the item is added. + */ +void +cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) +{ + struct cfs_hash_bd bd; + int bits; + + LASSERT(hlist_unhashed(hnode)); + + cfs_hash_lock(hs, 0); + cfs_hash_bd_get_and_lock(hs, key, &bd, 1); + + cfs_hash_key_validate(hs, key, hnode); + cfs_hash_bd_add_locked(hs, &bd, hnode); + + cfs_hash_bd_unlock(hs, &bd, 1); + + bits = cfs_hash_rehash_bits(hs); + cfs_hash_unlock(hs, 0); + if (bits > 0) + cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); +} +EXPORT_SYMBOL(cfs_hash_add); + +static struct hlist_node * +cfs_hash_find_or_add(struct cfs_hash *hs, const void *key, + struct hlist_node *hnode, int noref) +{ + struct hlist_node *ehnode; + struct cfs_hash_bd bds[2]; + int bits = 0; + + LASSERT(hlist_unhashed(hnode)); + + cfs_hash_lock(hs, 0); + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); + + cfs_hash_key_validate(hs, key, hnode); + ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key, + hnode, noref); + cfs_hash_dual_bd_unlock(hs, bds, 1); + + if (ehnode == hnode) /* new item added */ + bits = cfs_hash_rehash_bits(hs); + cfs_hash_unlock(hs, 0); + if (bits > 0) + cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); + + return ehnode; +} + +/** + * Add item @hnode to libcfs hash @hs using @key. The registered + * ops->hs_get function will be called if the item was added. + * Returns 0 on success or -EALREADY on key collisions. + */ +int +cfs_hash_add_unique(struct cfs_hash *hs, const void *key, + struct hlist_node *hnode) +{ + return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ? + -EALREADY : 0; +} +EXPORT_SYMBOL(cfs_hash_add_unique); + +/** + * Add item @hnode to libcfs hash @hs using @key. If this @key + * already exists in the hash then ops->hs_get will be called on the + * conflicting entry and that entry will be returned to the caller. + * Otherwise ops->hs_get is called on the item which was added. + */ +void * +cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key, + struct hlist_node *hnode) +{ + hnode = cfs_hash_find_or_add(hs, key, hnode, 0); + + return cfs_hash_object(hs, hnode); +} +EXPORT_SYMBOL(cfs_hash_findadd_unique); + +/** + * Delete item @hnode from the libcfs hash @hs using @key. The @key + * is required to ensure the correct hash bucket is locked since there + * is no direct linkage from the item to the bucket. The object + * removed from the hash will be returned and obs->hs_put is called + * on the removed object. + */ +void * +cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) +{ + void *obj = NULL; + int bits = 0; + struct cfs_hash_bd bds[2]; + + cfs_hash_lock(hs, 0); + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); + + /* NB: do nothing if @hnode is not in hash table */ + if (!hnode || !hlist_unhashed(hnode)) { + if (!bds[1].bd_bucket && hnode) { + cfs_hash_bd_del_locked(hs, &bds[0], hnode); + } else { + hnode = cfs_hash_dual_bd_finddel_locked(hs, bds, + key, hnode); + } + } + + if (hnode) { + obj = cfs_hash_object(hs, hnode); + bits = cfs_hash_rehash_bits(hs); + } + + cfs_hash_dual_bd_unlock(hs, bds, 1); + cfs_hash_unlock(hs, 0); + if (bits > 0) + cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); + + return obj; +} +EXPORT_SYMBOL(cfs_hash_del); + +/** + * Delete item given @key in libcfs hash @hs. The first @key found in + * the hash will be removed, if the key exists multiple times in the hash + * @hs this function must be called once per key. The removed object + * will be returned and ops->hs_put is called on the removed object. + */ +void * +cfs_hash_del_key(struct cfs_hash *hs, const void *key) +{ + return cfs_hash_del(hs, key, NULL); +} +EXPORT_SYMBOL(cfs_hash_del_key); + +/** + * Lookup an item using @key in the libcfs hash @hs and return it. + * If the @key is found in the hash hs->hs_get() is called and the + * matching objects is returned. It is the callers responsibility + * to call the counterpart ops->hs_put using the cfs_hash_put() macro + * when when finished with the object. If the @key was not found + * in the hash @hs NULL is returned. + */ +void * +cfs_hash_lookup(struct cfs_hash *hs, const void *key) +{ + void *obj = NULL; + struct hlist_node *hnode; + struct cfs_hash_bd bds[2]; + + cfs_hash_lock(hs, 0); + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); + + hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key); + if (hnode) + obj = cfs_hash_object(hs, hnode); + + cfs_hash_dual_bd_unlock(hs, bds, 0); + cfs_hash_unlock(hs, 0); + + return obj; +} +EXPORT_SYMBOL(cfs_hash_lookup); + +static void +cfs_hash_for_each_enter(struct cfs_hash *hs) +{ + LASSERT(!cfs_hash_is_exiting(hs)); + + if (!cfs_hash_with_rehash(hs)) + return; + /* + * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter + * because it's just an unreliable signal to rehash-thread, + * rehash-thread will try to finish rehash ASAP when seeing this. + */ + hs->hs_iterating = 1; + + cfs_hash_lock(hs, 1); + hs->hs_iterators++; + + /* NB: iteration is mostly called by service thread, + * we tend to cancel pending rehash-request, instead of + * blocking service thread, we will relaunch rehash request + * after iteration + */ + if (cfs_hash_is_rehashing(hs)) + cfs_hash_rehash_cancel_locked(hs); + cfs_hash_unlock(hs, 1); +} + +static void +cfs_hash_for_each_exit(struct cfs_hash *hs) +{ + int remained; + int bits; + + if (!cfs_hash_with_rehash(hs)) + return; + cfs_hash_lock(hs, 1); + remained = --hs->hs_iterators; + bits = cfs_hash_rehash_bits(hs); + cfs_hash_unlock(hs, 1); + /* NB: it's race on cfs_has_t::hs_iterating, see above */ + if (remained == 0) + hs->hs_iterating = 0; + if (bits > 0) { + cfs_hash_rehash(hs, atomic_read(&hs->hs_count) < + CFS_HASH_LOOP_HOG); + } +} + +/** + * For each item in the libcfs hash @hs call the passed callback @func + * and pass to it as an argument each hash item and the private @data. + * + * a) the function may sleep! + * b) during the callback: + * . the bucket lock is held so the callback must never sleep. + * . if @removal_safe is true, use can remove current item by + * cfs_hash_bd_del_locked + */ +static __u64 +cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data, int remove_safe) +{ + struct hlist_node *hnode; + struct hlist_node *pos; + struct cfs_hash_bd bd; + __u64 count = 0; + int excl = !!remove_safe; + int loop = 0; + int i; + + cfs_hash_for_each_enter(hs); + + cfs_hash_lock(hs, 0); + LASSERT(!cfs_hash_is_rehashing(hs)); + + cfs_hash_for_each_bucket(hs, &bd, i) { + struct hlist_head *hhead; + + cfs_hash_bd_lock(hs, &bd, excl); + if (!func) { /* only glimpse size */ + count += bd.bd_bucket->hsb_count; + cfs_hash_bd_unlock(hs, &bd, excl); + continue; + } + + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { + hlist_for_each_safe(hnode, pos, hhead) { + cfs_hash_bucket_validate(hs, &bd, hnode); + count++; + loop++; + if (func(hs, &bd, hnode, data)) { + cfs_hash_bd_unlock(hs, &bd, excl); + goto out; + } + } + } + cfs_hash_bd_unlock(hs, &bd, excl); + if (loop < CFS_HASH_LOOP_HOG) + continue; + loop = 0; + cfs_hash_unlock(hs, 0); + cond_resched(); + cfs_hash_lock(hs, 0); + } + out: + cfs_hash_unlock(hs, 0); + + cfs_hash_for_each_exit(hs); + return count; +} + +struct cfs_hash_cond_arg { + cfs_hash_cond_opt_cb_t func; + void *arg; +}; + +static int +cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *data) +{ + struct cfs_hash_cond_arg *cond = data; + + if (cond->func(cfs_hash_object(hs, hnode), cond->arg)) + cfs_hash_bd_del_locked(hs, bd, hnode); + return 0; +} + +/** + * Delete item from the libcfs hash @hs when @func return true. + * The write lock being hold during loop for each bucket to avoid + * any object be reference. + */ +void +cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data) +{ + struct cfs_hash_cond_arg arg = { + .func = func, + .arg = data, + }; + + cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1); +} +EXPORT_SYMBOL(cfs_hash_cond_del); + +void +cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + cfs_hash_for_each_tight(hs, func, data, 0); +} +EXPORT_SYMBOL(cfs_hash_for_each); + +void +cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + cfs_hash_for_each_tight(hs, func, data, 1); +} +EXPORT_SYMBOL(cfs_hash_for_each_safe); + +static int +cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *data) +{ + *(int *)data = 0; + return 1; /* return 1 to break the loop */ +} + +int +cfs_hash_is_empty(struct cfs_hash *hs) +{ + int empty = 1; + + cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0); + return empty; +} +EXPORT_SYMBOL(cfs_hash_is_empty); + +__u64 +cfs_hash_size_get(struct cfs_hash *hs) +{ + return cfs_hash_with_counter(hs) ? + atomic_read(&hs->hs_count) : + cfs_hash_for_each_tight(hs, NULL, NULL, 0); +} +EXPORT_SYMBOL(cfs_hash_size_get); + +/* + * cfs_hash_for_each_relax: + * Iterate the hash table and call @func on each item without + * any lock. This function can't guarantee to finish iteration + * if these features are enabled: + * + * a. if rehash_key is enabled, an item can be moved from + * one bucket to another bucket + * b. user can remove non-zero-ref item from hash-table, + * so the item can be removed from hash-table, even worse, + * it's possible that user changed key and insert to another + * hash bucket. + * there's no way for us to finish iteration correctly on previous + * two cases, so iteration has to be stopped on change. + */ +static int +cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + struct hlist_node *hnode; + struct hlist_node *tmp; + struct cfs_hash_bd bd; + __u32 version; + int count = 0; + int stop_on_change; + int rc; + int i; + + stop_on_change = cfs_hash_with_rehash_key(hs) || + !cfs_hash_with_no_itemref(hs) || + !hs->hs_ops->hs_put_locked; + cfs_hash_lock(hs, 0); + LASSERT(!cfs_hash_is_rehashing(hs)); + + cfs_hash_for_each_bucket(hs, &bd, i) { + struct hlist_head *hhead; + + cfs_hash_bd_lock(hs, &bd, 0); + version = cfs_hash_bd_version_get(&bd); + + cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { + for (hnode = hhead->first; hnode;) { + cfs_hash_bucket_validate(hs, &bd, hnode); + cfs_hash_get(hs, hnode); + cfs_hash_bd_unlock(hs, &bd, 0); + cfs_hash_unlock(hs, 0); + + rc = func(hs, &bd, hnode, data); + if (stop_on_change) + cfs_hash_put(hs, hnode); + cond_resched(); + count++; + + cfs_hash_lock(hs, 0); + cfs_hash_bd_lock(hs, &bd, 0); + if (!stop_on_change) { + tmp = hnode->next; + cfs_hash_put_locked(hs, hnode); + hnode = tmp; + } else { /* bucket changed? */ + if (version != + cfs_hash_bd_version_get(&bd)) + break; + /* safe to continue because no change */ + hnode = hnode->next; + } + if (rc) /* callback wants to break iteration */ + break; + } + if (rc) /* callback wants to break iteration */ + break; + } + cfs_hash_bd_unlock(hs, &bd, 0); + if (rc) /* callback wants to break iteration */ + break; + } + cfs_hash_unlock(hs, 0); + + return count; +} + +int +cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + if (cfs_hash_with_no_lock(hs) || + cfs_hash_with_rehash_key(hs) || + !cfs_hash_with_no_itemref(hs)) + return -EOPNOTSUPP; + + if (!hs->hs_ops->hs_get || + (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) + return -EOPNOTSUPP; + + cfs_hash_for_each_enter(hs); + cfs_hash_for_each_relax(hs, func, data); + cfs_hash_for_each_exit(hs); + + return 0; +} +EXPORT_SYMBOL(cfs_hash_for_each_nolock); + +/** + * For each hash bucket in the libcfs hash @hs call the passed callback + * @func until all the hash buckets are empty. The passed callback @func + * or the previously registered callback hs->hs_put must remove the item + * from the hash. You may either use the cfs_hash_del() or hlist_del() + * functions. No rwlocks will be held during the callback @func it is + * safe to sleep if needed. This function will not terminate until the + * hash is empty. Note it is still possible to concurrently add new + * items in to the hash. It is the callers responsibility to ensure + * the required locking is in place to prevent concurrent insertions. + */ +int +cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, + void *data) +{ + unsigned i = 0; + + if (cfs_hash_with_no_lock(hs)) + return -EOPNOTSUPP; + + if (!hs->hs_ops->hs_get || + (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) + return -EOPNOTSUPP; + + cfs_hash_for_each_enter(hs); + while (cfs_hash_for_each_relax(hs, func, data)) { + CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n", + hs->hs_name, i++); + } + cfs_hash_for_each_exit(hs); + return 0; +} +EXPORT_SYMBOL(cfs_hash_for_each_empty); + +void +cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex, + cfs_hash_for_each_cb_t func, void *data) +{ + struct hlist_head *hhead; + struct hlist_node *hnode; + struct cfs_hash_bd bd; + + cfs_hash_for_each_enter(hs); + cfs_hash_lock(hs, 0); + if (hindex >= CFS_HASH_NHLIST(hs)) + goto out; + + cfs_hash_bd_index_set(hs, hindex, &bd); + + cfs_hash_bd_lock(hs, &bd, 0); + hhead = cfs_hash_bd_hhead(hs, &bd); + hlist_for_each(hnode, hhead) { + if (func(hs, &bd, hnode, data)) + break; + } + cfs_hash_bd_unlock(hs, &bd, 0); +out: + cfs_hash_unlock(hs, 0); + cfs_hash_for_each_exit(hs); +} +EXPORT_SYMBOL(cfs_hash_hlist_for_each); + +/* + * For each item in the libcfs hash @hs which matches the @key call + * the passed callback @func and pass to it as an argument each hash + * item and the private @data. During the callback the bucket lock + * is held so the callback must never sleep. + */ +void +cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, + cfs_hash_for_each_cb_t func, void *data) +{ + struct hlist_node *hnode; + struct cfs_hash_bd bds[2]; + unsigned int i; + + cfs_hash_lock(hs, 0); + + cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); + + cfs_hash_for_each_bd(bds, 2, i) { + struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]); + + hlist_for_each(hnode, hlist) { + cfs_hash_bucket_validate(hs, &bds[i], hnode); + + if (cfs_hash_keycmp(hs, key, hnode)) { + if (func(hs, &bds[i], hnode, data)) + break; + } + } + } + + cfs_hash_dual_bd_unlock(hs, bds, 0); + cfs_hash_unlock(hs, 0); +} +EXPORT_SYMBOL(cfs_hash_for_each_key); + +/** + * Rehash the libcfs hash @hs to the given @bits. This can be used + * to grow the hash size when excessive chaining is detected, or to + * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH + * flag is set in @hs the libcfs hash may be dynamically rehashed + * during addition or removal if the hash's theta value exceeds + * either the hs->hs_min_theta or hs->max_theta values. By default + * these values are tuned to keep the chained hash depth small, and + * this approach assumes a reasonably uniform hashing function. The + * theta thresholds for @hs are tunable via cfs_hash_set_theta(). + */ +void +cfs_hash_rehash_cancel_locked(struct cfs_hash *hs) +{ + int i; + + /* need hold cfs_hash_lock(hs, 1) */ + LASSERT(cfs_hash_with_rehash(hs) && + !cfs_hash_with_no_lock(hs)); + + if (!cfs_hash_is_rehashing(hs)) + return; + + if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) { + hs->hs_rehash_bits = 0; + return; + } + + for (i = 2; cfs_hash_is_rehashing(hs); i++) { + cfs_hash_unlock(hs, 1); + /* raise console warning while waiting too long */ + CDEBUG(is_power_of_2(i >> 3) ? D_WARNING : D_INFO, + "hash %s is still rehashing, rescheded %d\n", + hs->hs_name, i - 1); + cond_resched(); + cfs_hash_lock(hs, 1); + } +} + +void +cfs_hash_rehash_cancel(struct cfs_hash *hs) +{ + cfs_hash_lock(hs, 1); + cfs_hash_rehash_cancel_locked(hs); + cfs_hash_unlock(hs, 1); +} + +int +cfs_hash_rehash(struct cfs_hash *hs, int do_rehash) +{ + int rc; + + LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs)); + + cfs_hash_lock(hs, 1); + + rc = cfs_hash_rehash_bits(hs); + if (rc <= 0) { + cfs_hash_unlock(hs, 1); + return rc; + } + + hs->hs_rehash_bits = rc; + if (!do_rehash) { + /* launch and return */ + cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi); + cfs_hash_unlock(hs, 1); + return 0; + } + + /* rehash right now */ + cfs_hash_unlock(hs, 1); + + return cfs_hash_rehash_worker(&hs->hs_rehash_wi); +} + +static int +cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old) +{ + struct cfs_hash_bd new; + struct hlist_head *hhead; + struct hlist_node *hnode; + struct hlist_node *pos; + void *key; + int c = 0; + + /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */ + cfs_hash_bd_for_each_hlist(hs, old, hhead) { + hlist_for_each_safe(hnode, pos, hhead) { + key = cfs_hash_key(hs, hnode); + LASSERT(key); + /* Validate hnode is in the correct bucket. */ + cfs_hash_bucket_validate(hs, old, hnode); + /* + * Delete from old hash bucket; move to new bucket. + * ops->hs_key must be defined. + */ + cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, + hs->hs_rehash_bits, key, &new); + cfs_hash_bd_move_locked(hs, old, &new, hnode); + c++; + } + } + + return c; +} + +static int +cfs_hash_rehash_worker(cfs_workitem_t *wi) +{ + struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi); + struct cfs_hash_bucket **bkts; + struct cfs_hash_bd bd; + unsigned int old_size; + unsigned int new_size; + int bsize; + int count = 0; + int rc = 0; + int i; + + LASSERT(hs && cfs_hash_with_rehash(hs)); + + cfs_hash_lock(hs, 0); + LASSERT(cfs_hash_is_rehashing(hs)); + + old_size = CFS_HASH_NBKT(hs); + new_size = CFS_HASH_RH_NBKT(hs); + + cfs_hash_unlock(hs, 0); + + /* + * don't need hs::hs_rwlock for hs::hs_buckets, + * because nobody can change bkt-table except me. + */ + bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets, + old_size, new_size); + cfs_hash_lock(hs, 1); + if (!bkts) { + rc = -ENOMEM; + goto out; + } + + if (bkts == hs->hs_buckets) { + bkts = NULL; /* do nothing */ + goto out; + } + + rc = __cfs_hash_theta(hs); + if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) { + /* free the new allocated bkt-table */ + old_size = new_size; + new_size = CFS_HASH_NBKT(hs); + rc = -EALREADY; + goto out; + } + + LASSERT(!hs->hs_rehash_buckets); + hs->hs_rehash_buckets = bkts; + + rc = 0; + cfs_hash_for_each_bucket(hs, &bd, i) { + if (cfs_hash_is_exiting(hs)) { + rc = -ESRCH; + /* someone wants to destroy the hash, abort now */ + if (old_size < new_size) /* OK to free old bkt-table */ + break; + /* it's shrinking, need free new bkt-table */ + hs->hs_rehash_buckets = NULL; + old_size = new_size; + new_size = CFS_HASH_NBKT(hs); + goto out; + } + + count += cfs_hash_rehash_bd(hs, &bd); + if (count < CFS_HASH_LOOP_HOG || + cfs_hash_is_iterating(hs)) { /* need to finish ASAP */ + continue; + } + + count = 0; + cfs_hash_unlock(hs, 1); + cond_resched(); + cfs_hash_lock(hs, 1); + } + + hs->hs_rehash_count++; + + bkts = hs->hs_buckets; + hs->hs_buckets = hs->hs_rehash_buckets; + hs->hs_rehash_buckets = NULL; + + hs->hs_cur_bits = hs->hs_rehash_bits; +out: + hs->hs_rehash_bits = 0; + if (rc == -ESRCH) /* never be scheduled again */ + cfs_wi_exit(cfs_sched_rehash, wi); + bsize = cfs_hash_bkt_size(hs); + cfs_hash_unlock(hs, 1); + /* can't refer to @hs anymore because it could be destroyed */ + if (bkts) + cfs_hash_buckets_free(bkts, bsize, new_size, old_size); + if (rc != 0) + CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc); + /* return 1 only if cfs_wi_exit is called */ + return rc == -ESRCH; +} + +/** + * Rehash the object referenced by @hnode in the libcfs hash @hs. The + * @old_key must be provided to locate the objects previous location + * in the hash, and the @new_key will be used to reinsert the object. + * Use this function instead of a cfs_hash_add() + cfs_hash_del() + * combo when it is critical that there is no window in time where the + * object is missing from the hash. When an object is being rehashed + * the registered cfs_hash_get() and cfs_hash_put() functions will + * not be called. + */ +void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key, + void *new_key, struct hlist_node *hnode) +{ + struct cfs_hash_bd bds[3]; + struct cfs_hash_bd old_bds[2]; + struct cfs_hash_bd new_bd; + + LASSERT(!hlist_unhashed(hnode)); + + cfs_hash_lock(hs, 0); + + cfs_hash_dual_bd_get(hs, old_key, old_bds); + cfs_hash_bd_get(hs, new_key, &new_bd); + + bds[0] = old_bds[0]; + bds[1] = old_bds[1]; + bds[2] = new_bd; + + /* NB: bds[0] and bds[1] are ordered already */ + cfs_hash_bd_order(&bds[1], &bds[2]); + cfs_hash_bd_order(&bds[0], &bds[1]); + + cfs_hash_multi_bd_lock(hs, bds, 3, 1); + if (likely(!old_bds[1].bd_bucket)) { + cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode); + } else { + cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode); + cfs_hash_bd_add_locked(hs, &new_bd, hnode); + } + /* overwrite key inside locks, otherwise may screw up with + * other operations, i.e: rehash + */ + cfs_hash_keycpy(hs, hnode, new_key); + + cfs_hash_multi_bd_unlock(hs, bds, 3, 1); + cfs_hash_unlock(hs, 0); +} +EXPORT_SYMBOL(cfs_hash_rehash_key); + +void cfs_hash_debug_header(struct seq_file *m) +{ + seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n", + CFS_HASH_BIGNAME_LEN, "name"); +} +EXPORT_SYMBOL(cfs_hash_debug_header); + +static struct cfs_hash_bucket ** +cfs_hash_full_bkts(struct cfs_hash *hs) +{ + /* NB: caller should hold hs->hs_rwlock if REHASH is set */ + if (!hs->hs_rehash_buckets) + return hs->hs_buckets; + + LASSERT(hs->hs_rehash_bits != 0); + return hs->hs_rehash_bits > hs->hs_cur_bits ? + hs->hs_rehash_buckets : hs->hs_buckets; +} + +static unsigned int +cfs_hash_full_nbkt(struct cfs_hash *hs) +{ + /* NB: caller should hold hs->hs_rwlock if REHASH is set */ + if (!hs->hs_rehash_buckets) + return CFS_HASH_NBKT(hs); + + LASSERT(hs->hs_rehash_bits != 0); + return hs->hs_rehash_bits > hs->hs_cur_bits ? + CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs); +} + +void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m) +{ + int dist[8] = { 0, }; + int maxdep = -1; + int maxdepb = -1; + int total = 0; + int theta; + int i; + + cfs_hash_lock(hs, 0); + theta = __cfs_hash_theta(hs); + + seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ", + CFS_HASH_BIGNAME_LEN, hs->hs_name, + 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits, + 1 << hs->hs_max_bits, + __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta), + __cfs_hash_theta_int(hs->hs_min_theta), + __cfs_hash_theta_frac(hs->hs_min_theta), + __cfs_hash_theta_int(hs->hs_max_theta), + __cfs_hash_theta_frac(hs->hs_max_theta), + hs->hs_flags, hs->hs_rehash_count); + + /* + * The distribution is a summary of the chained hash depth in + * each of the libcfs hash buckets. Each buckets hsb_count is + * divided by the hash theta value and used to generate a + * histogram of the hash distribution. A uniform hash will + * result in all hash buckets being close to the average thus + * only the first few entries in the histogram will be non-zero. + * If you hash function results in a non-uniform hash the will + * be observable by outlier bucks in the distribution histogram. + * + * Uniform hash distribution: 128/128/0/0/0/0/0/0 + * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 + */ + for (i = 0; i < cfs_hash_full_nbkt(hs); i++) { + struct cfs_hash_bd bd; + + bd.bd_bucket = cfs_hash_full_bkts(hs)[i]; + cfs_hash_bd_lock(hs, &bd, 0); + if (maxdep < bd.bd_bucket->hsb_depmax) { + maxdep = bd.bd_bucket->hsb_depmax; + maxdepb = ffz(~maxdep); + } + total += bd.bd_bucket->hsb_count; + dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++; + cfs_hash_bd_unlock(hs, &bd, 0); + } + + seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb); + for (i = 0; i < 8; i++) + seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/'); + + cfs_hash_unlock(hs, 0); +} +EXPORT_SYMBOL(cfs_hash_debug_str); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c new file mode 100644 index 000000000000..33352af6c27f --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c @@ -0,0 +1,227 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +/** Global CPU partition table */ +struct cfs_cpt_table *cfs_cpt_table __read_mostly; +EXPORT_SYMBOL(cfs_cpt_table); + +#ifndef HAVE_LIBCFS_CPT + +#define CFS_CPU_VERSION_MAGIC 0xbabecafe + +struct cfs_cpt_table * +cfs_cpt_table_alloc(unsigned int ncpt) +{ + struct cfs_cpt_table *cptab; + + if (ncpt != 1) { + CERROR("Can't support cpu partition number %d\n", ncpt); + return NULL; + } + + LIBCFS_ALLOC(cptab, sizeof(*cptab)); + if (cptab) { + cptab->ctb_version = CFS_CPU_VERSION_MAGIC; + node_set(0, cptab->ctb_nodemask); + cptab->ctb_nparts = ncpt; + } + + return cptab; +} +EXPORT_SYMBOL(cfs_cpt_table_alloc); + +void +cfs_cpt_table_free(struct cfs_cpt_table *cptab) +{ + LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC); + + LIBCFS_FREE(cptab, sizeof(*cptab)); +} +EXPORT_SYMBOL(cfs_cpt_table_free); + +#ifdef CONFIG_SMP +int +cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) +{ + int rc; + + rc = snprintf(buf, len, "%d\t: %d\n", 0, 0); + len -= rc; + if (len <= 0) + return -EFBIG; + + return rc; +} +EXPORT_SYMBOL(cfs_cpt_table_print); +#endif /* CONFIG_SMP */ + +int +cfs_cpt_number(struct cfs_cpt_table *cptab) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_number); + +int +cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_weight); + +int +cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_online); + +nodemask_t * +cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) +{ + return &cptab->ctb_nodemask; +} +EXPORT_SYMBOL(cfs_cpt_cpumask); + +int +cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpu); + +void +cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_cpu); + +int +cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpumask); + +void +cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_cpumask); + +int +cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_node); + +void +cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_node); + +int +cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_nodemask); + +void +cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ +} +EXPORT_SYMBOL(cfs_cpt_unset_nodemask); + +void +cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) +{ +} +EXPORT_SYMBOL(cfs_cpt_clear); + +int +cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_spread_node); + +int +cfs_cpu_ht_nsiblings(int cpu) +{ + return 1; +} +EXPORT_SYMBOL(cfs_cpu_ht_nsiblings); + +int +cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_current); + +int +cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_of_cpu); + +int +cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) +{ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_bind); + +void +cfs_cpu_fini(void) +{ + if (cfs_cpt_table) { + cfs_cpt_table_free(cfs_cpt_table); + cfs_cpt_table = NULL; + } +} + +int +cfs_cpu_init(void) +{ + cfs_cpt_table = cfs_cpt_table_alloc(1); + + return cfs_cpt_table ? 0 : -1; +} + +#endif /* HAVE_LIBCFS_CPT */ diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c new file mode 100644 index 000000000000..2de9eeae0232 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c @@ -0,0 +1,185 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +/** destroy cpu-partition lock, see libcfs_private.h for more detail */ +void +cfs_percpt_lock_free(struct cfs_percpt_lock *pcl) +{ + LASSERT(pcl->pcl_locks); + LASSERT(!pcl->pcl_locked); + + cfs_percpt_free(pcl->pcl_locks); + LIBCFS_FREE(pcl, sizeof(*pcl)); +} +EXPORT_SYMBOL(cfs_percpt_lock_free); + +/** + * create cpu-partition lock, see libcfs_private.h for more detail. + * + * cpu-partition lock is designed for large-scale SMP system, so we need to + * reduce cacheline conflict as possible as we can, that's the + * reason we always allocate cacheline-aligned memory block. + */ +struct cfs_percpt_lock * +cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab) +{ + struct cfs_percpt_lock *pcl; + spinlock_t *lock; + int i; + + /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */ + LIBCFS_ALLOC(pcl, sizeof(*pcl)); + if (!pcl) + return NULL; + + pcl->pcl_cptab = cptab; + pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock)); + if (!pcl->pcl_locks) { + LIBCFS_FREE(pcl, sizeof(*pcl)); + return NULL; + } + + cfs_percpt_for_each(lock, i, pcl->pcl_locks) + spin_lock_init(lock); + + return pcl; +} +EXPORT_SYMBOL(cfs_percpt_lock_alloc); + +/** + * lock a CPU partition + * + * \a index != CFS_PERCPT_LOCK_EX + * hold private lock indexed by \a index + * + * \a index == CFS_PERCPT_LOCK_EX + * exclusively lock @pcl and nobody can take private lock + */ +void +cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) + __acquires(pcl->pcl_locks) +{ + int ncpt = cfs_cpt_number(pcl->pcl_cptab); + int i; + + LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt); + + if (ncpt == 1) { + index = 0; + } else { /* serialize with exclusive lock */ + while (pcl->pcl_locked) + cpu_relax(); + } + + if (likely(index != CFS_PERCPT_LOCK_EX)) { + spin_lock(pcl->pcl_locks[index]); + return; + } + + /* exclusive lock request */ + for (i = 0; i < ncpt; i++) { + spin_lock(pcl->pcl_locks[i]); + if (i == 0) { + LASSERT(!pcl->pcl_locked); + /* nobody should take private lock after this + * so I wouldn't starve for too long time + */ + pcl->pcl_locked = 1; + } + } +} +EXPORT_SYMBOL(cfs_percpt_lock); + +/** unlock a CPU partition */ +void +cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) + __releases(pcl->pcl_locks) +{ + int ncpt = cfs_cpt_number(pcl->pcl_cptab); + int i; + + index = ncpt == 1 ? 0 : index; + + if (likely(index != CFS_PERCPT_LOCK_EX)) { + spin_unlock(pcl->pcl_locks[index]); + return; + } + + for (i = ncpt - 1; i >= 0; i--) { + if (i == 0) { + LASSERT(pcl->pcl_locked); + pcl->pcl_locked = 0; + } + spin_unlock(pcl->pcl_locks[i]); + } +} +EXPORT_SYMBOL(cfs_percpt_unlock); + +/** free cpu-partition refcount */ +void +cfs_percpt_atomic_free(atomic_t **refs) +{ + cfs_percpt_free(refs); +} +EXPORT_SYMBOL(cfs_percpt_atomic_free); + +/** allocate cpu-partition refcount with initial value @init_val */ +atomic_t ** +cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val) +{ + atomic_t **refs; + atomic_t *ref; + int i; + + refs = cfs_percpt_alloc(cptab, sizeof(*ref)); + if (!refs) + return NULL; + + cfs_percpt_for_each(ref, i, refs) + atomic_set(ref, init_val); + return refs; +} +EXPORT_SYMBOL(cfs_percpt_atomic_alloc); + +/** return sum of cpu-partition refs */ +int +cfs_percpt_atomic_summary(atomic_t **refs) +{ + atomic_t *ref; + int i; + int val = 0; + + cfs_percpt_for_each(ref, i, refs) + val += atomic_read(ref); + + return val; +} +EXPORT_SYMBOL(cfs_percpt_atomic_summary); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c new file mode 100644 index 000000000000..c5a6951516ed --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c @@ -0,0 +1,196 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +struct cfs_var_array { + unsigned int va_count; /* # of buffers */ + unsigned int va_size; /* size of each var */ + struct cfs_cpt_table *va_cptab; /* cpu partition table */ + void *va_ptrs[0]; /* buffer addresses */ +}; + +/* + * free per-cpu data, see more detail in cfs_percpt_free + */ +void +cfs_percpt_free(void *vars) +{ + struct cfs_var_array *arr; + int i; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + for (i = 0; i < arr->va_count; i++) { + if (arr->va_ptrs[i]) + LIBCFS_FREE(arr->va_ptrs[i], arr->va_size); + } + + LIBCFS_FREE(arr, offsetof(struct cfs_var_array, + va_ptrs[arr->va_count])); +} +EXPORT_SYMBOL(cfs_percpt_free); + +/* + * allocate per cpu-partition variables, returned value is an array of pointers, + * variable can be indexed by CPU partition ID, i.e: + * + * arr = cfs_percpt_alloc(cfs_cpu_pt, size); + * then caller can access memory block for CPU 0 by arr[0], + * memory block for CPU 1 by arr[1]... + * memory block for CPU N by arr[N]... + * + * cacheline aligned. + */ +void * +cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size) +{ + struct cfs_var_array *arr; + int count; + int i; + + count = cfs_cpt_number(cptab); + + LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); + if (!arr) + return NULL; + + size = L1_CACHE_ALIGN(size); + arr->va_size = size; + arr->va_count = count; + arr->va_cptab = cptab; + + for (i = 0; i < count; i++) { + LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size); + if (!arr->va_ptrs[i]) { + cfs_percpt_free((void *)&arr->va_ptrs[0]); + return NULL; + } + } + + return (void *)&arr->va_ptrs[0]; +} +EXPORT_SYMBOL(cfs_percpt_alloc); + +/* + * return number of CPUs (or number of elements in per-cpu data) + * according to cptab of @vars + */ +int +cfs_percpt_number(void *vars) +{ + struct cfs_var_array *arr; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + return arr->va_count; +} +EXPORT_SYMBOL(cfs_percpt_number); + +/* + * return memory block shadowed from current CPU + */ +void * +cfs_percpt_current(void *vars) +{ + struct cfs_var_array *arr; + int cpt; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + cpt = cfs_cpt_current(arr->va_cptab, 0); + if (cpt < 0) + return NULL; + + return arr->va_ptrs[cpt]; +} + +void * +cfs_percpt_index(void *vars, int idx) +{ + struct cfs_var_array *arr; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + LASSERT(idx >= 0 && idx < arr->va_count); + return arr->va_ptrs[idx]; +} + +/* + * free variable array, see more detail in cfs_array_alloc + */ +void +cfs_array_free(void *vars) +{ + struct cfs_var_array *arr; + int i; + + arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); + + for (i = 0; i < arr->va_count; i++) { + if (!arr->va_ptrs[i]) + continue; + + LIBCFS_FREE(arr->va_ptrs[i], arr->va_size); + } + LIBCFS_FREE(arr, offsetof(struct cfs_var_array, + va_ptrs[arr->va_count])); +} +EXPORT_SYMBOL(cfs_array_free); + +/* + * allocate a variable array, returned value is an array of pointers. + * Caller can specify length of array by @count, @size is size of each + * memory block in array. + */ +void * +cfs_array_alloc(int count, unsigned int size) +{ + struct cfs_var_array *arr; + int i; + + LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); + if (!arr) + return NULL; + + arr->va_count = count; + arr->va_size = size; + + for (i = 0; i < count; i++) { + LIBCFS_ALLOC(arr->va_ptrs[i], size); + + if (!arr->va_ptrs[i]) { + cfs_array_free((void *)&arr->va_ptrs[0]); + return NULL; + } + } + + return (void *)&arr->va_ptrs[0]; +} +EXPORT_SYMBOL(cfs_array_alloc); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c new file mode 100644 index 000000000000..50ac1536db4b --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c @@ -0,0 +1,581 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * String manipulation functions. + * + * libcfs/libcfs/libcfs_string.c + * + * Author: Nathan Rutman + */ + +#include "../../include/linux/libcfs/libcfs.h" + +/* Convert a text string to a bitmask */ +int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), + int *oldmask, int minmask, int allmask) +{ + const char *debugstr; + char op = '\0'; + int newmask = minmask, i, len, found = 0; + + /* must be a list of tokens separated by whitespace + * and optionally an operator ('+' or '-'). If an operator + * appears first in , '*oldmask' is used as the starting point + * (relative), otherwise minmask is used (absolute). An operator + * applies to all following tokens up to the next operator. + */ + while (*str != '\0') { + while (isspace(*str)) + str++; + if (*str == '\0') + break; + if (*str == '+' || *str == '-') { + op = *str++; + if (!found) + /* only if first token is relative */ + newmask = *oldmask; + while (isspace(*str)) + str++; + if (*str == '\0') /* trailing op */ + return -EINVAL; + } + + /* find token length */ + len = 0; + while (str[len] != '\0' && !isspace(str[len]) && + str[len] != '+' && str[len] != '-') + len++; + + /* match token */ + found = 0; + for (i = 0; i < 32; i++) { + debugstr = bit2str(i); + if (debugstr && strlen(debugstr) == len && + strncasecmp(str, debugstr, len) == 0) { + if (op == '-') + newmask &= ~(1 << i); + else + newmask |= (1 << i); + found = 1; + break; + } + } + if (!found && len == 3 && + (strncasecmp(str, "ALL", len) == 0)) { + if (op == '-') + newmask = minmask; + else + newmask = allmask; + found = 1; + } + if (!found) { + CWARN("unknown mask '%.*s'.\n" + "mask usage: [+|-] ...\n", len, str); + return -EINVAL; + } + str += len; + } + + *oldmask = newmask; + return 0; +} + +/* get the first string out of @str */ +char *cfs_firststr(char *str, size_t size) +{ + size_t i = 0; + char *end; + + /* trim leading spaces */ + while (i < size && *str && isspace(*str)) { + ++i; + ++str; + } + + /* string with all spaces */ + if (*str == '\0') + goto out; + + end = str; + while (i < size && *end != '\0' && !isspace(*end)) { + ++i; + ++end; + } + + *end = '\0'; +out: + return str; +} +EXPORT_SYMBOL(cfs_firststr); + +char * +cfs_trimwhite(char *str) +{ + char *end; + + while (isspace(*str)) + str++; + + end = str + strlen(str); + while (end > str) { + if (!isspace(end[-1])) + break; + end--; + } + + *end = 0; + return str; +} +EXPORT_SYMBOL(cfs_trimwhite); + +/** + * Extracts tokens from strings. + * + * Looks for \a delim in string \a next, sets \a res to point to + * substring before the delimiter, sets \a next right after the found + * delimiter. + * + * \retval 1 if \a res points to a string of non-whitespace characters + * \retval 0 otherwise + */ +int +cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res) +{ + char *end; + + if (!next->ls_str) + return 0; + + /* skip leading white spaces */ + while (next->ls_len) { + if (!isspace(*next->ls_str)) + break; + next->ls_str++; + next->ls_len--; + } + + if (next->ls_len == 0) /* whitespaces only */ + return 0; + + if (*next->ls_str == delim) { + /* first non-writespace is the delimiter */ + return 0; + } + + res->ls_str = next->ls_str; + end = memchr(next->ls_str, delim, next->ls_len); + if (!end) { + /* there is no the delimeter in the string */ + end = next->ls_str + next->ls_len; + next->ls_str = NULL; + } else { + next->ls_str = end + 1; + next->ls_len -= (end - res->ls_str + 1); + } + + /* skip ending whitespaces */ + while (--end != res->ls_str) { + if (!isspace(*end)) + break; + } + + res->ls_len = end - res->ls_str + 1; + return 1; +} +EXPORT_SYMBOL(cfs_gettok); + +/** + * Converts string to integer. + * + * Accepts decimal and hexadecimal number recordings. + * + * \retval 1 if first \a nob chars of \a str convert to decimal or + * hexadecimal integer in the range [\a min, \a max] + * \retval 0 otherwise + */ +int +cfs_str2num_check(char *str, int nob, unsigned *num, + unsigned min, unsigned max) +{ + bool all_numbers = true; + char *endp, cache; + int rc; + + str = cfs_trimwhite(str); + + /** + * kstrouint can only handle strings composed + * of only numbers. We need to scan the string + * passed in for the first non-digit character + * and end the string at that location. If we + * don't find any non-digit character we still + * need to place a '\0' at position nob since + * we are not interested in the rest of the + * string which is longer than nob in size. + * After we are done the character at the + * position we placed '\0' must be restored. + */ + for (endp = str; endp < str + nob; endp++) { + if (!isdigit(*endp)) { + all_numbers = false; + break; + } + } + cache = *endp; + *endp = '\0'; + + rc = kstrtouint(str, 10, num); + *endp = cache; + if (rc || !all_numbers) + return 0; + + return (*num >= min && *num <= max); +} +EXPORT_SYMBOL(cfs_str2num_check); + +/** + * Parses \ token of the syntax. If \a bracketed is false, + * \a src should only have a single token which can be \ or \* + * + * \retval pointer to allocated range_expr and initialized + * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a + `* src parses to + * \ | + * \ '-' \ | + * \ '-' \ '/' \ + * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or + * -ENOMEM will be returned. + */ +static int +cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max, + int bracketed, struct cfs_range_expr **expr) +{ + struct cfs_range_expr *re; + struct cfs_lstr tok; + + LIBCFS_ALLOC(re, sizeof(*re)); + if (!re) + return -ENOMEM; + + if (src->ls_len == 1 && src->ls_str[0] == '*') { + re->re_lo = min; + re->re_hi = max; + re->re_stride = 1; + goto out; + } + + if (cfs_str2num_check(src->ls_str, src->ls_len, + &re->re_lo, min, max)) { + /* is parsed */ + re->re_hi = re->re_lo; + re->re_stride = 1; + goto out; + } + + if (!bracketed || !cfs_gettok(src, '-', &tok)) + goto failed; + + if (!cfs_str2num_check(tok.ls_str, tok.ls_len, + &re->re_lo, min, max)) + goto failed; + + /* - */ + if (cfs_str2num_check(src->ls_str, src->ls_len, + &re->re_hi, min, max)) { + /* - is parsed */ + re->re_stride = 1; + goto out; + } + + /* go to check '-' '/' */ + if (cfs_gettok(src, '/', &tok)) { + if (!cfs_str2num_check(tok.ls_str, tok.ls_len, + &re->re_hi, min, max)) + goto failed; + + /* - / ... */ + if (cfs_str2num_check(src->ls_str, src->ls_len, + &re->re_stride, min, max)) { + /* - / is parsed */ + goto out; + } + } + + out: + *expr = re; + return 0; + + failed: + LIBCFS_FREE(re, sizeof(*re)); + return -EINVAL; +} + +/** + * Print the range expression \a re into specified \a buffer. + * If \a bracketed is true, expression does not need additional + * brackets. + * + * \retval number of characters written + */ +static int +cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr, + bool bracketed) +{ + int i; + char s[] = "["; + char e[] = "]"; + + if (bracketed) { + s[0] = '\0'; + e[0] = '\0'; + } + + if (expr->re_lo == expr->re_hi) + i = scnprintf(buffer, count, "%u", expr->re_lo); + else if (expr->re_stride == 1) + i = scnprintf(buffer, count, "%s%u-%u%s", + s, expr->re_lo, expr->re_hi, e); + else + i = scnprintf(buffer, count, "%s%u-%u/%u%s", + s, expr->re_lo, expr->re_hi, expr->re_stride, e); + return i; +} + +/** + * Print a list of range expressions (\a expr_list) into specified \a buffer. + * If the list contains several expressions, separate them with comma + * and surround the list with brackets. + * + * \retval number of characters written + */ +int +cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list) +{ + struct cfs_range_expr *expr; + int i = 0, j = 0; + int numexprs = 0; + + if (count <= 0) + return 0; + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) + numexprs++; + + if (numexprs > 1) + i += scnprintf(buffer + i, count - i, "["); + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + if (j++ != 0) + i += scnprintf(buffer + i, count - i, ","); + i += cfs_range_expr_print(buffer + i, count - i, expr, + numexprs > 1); + } + + if (numexprs > 1) + i += scnprintf(buffer + i, count - i, "]"); + + return i; +} +EXPORT_SYMBOL(cfs_expr_list_print); + +/** + * Matches value (\a value) against ranges expression list \a expr_list. + * + * \retval 1 if \a value matches + * \retval 0 otherwise + */ +int +cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list) +{ + struct cfs_range_expr *expr; + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + if (value >= expr->re_lo && value <= expr->re_hi && + ((value - expr->re_lo) % expr->re_stride) == 0) + return 1; + } + + return 0; +} +EXPORT_SYMBOL(cfs_expr_list_match); + +/** + * Convert express list (\a expr_list) to an array of all matched values + * + * \retval N N is total number of all matched values + * \retval 0 if expression list is empty + * \retval < 0 for failure + */ +int +cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp) +{ + struct cfs_range_expr *expr; + __u32 *val; + int count = 0; + int i; + + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + for (i = expr->re_lo; i <= expr->re_hi; i++) { + if (((i - expr->re_lo) % expr->re_stride) == 0) + count++; + } + } + + if (count == 0) /* empty expression list */ + return 0; + + if (count > max) { + CERROR("Number of values %d exceeds max allowed %d\n", + max, count); + return -EINVAL; + } + + LIBCFS_ALLOC(val, sizeof(val[0]) * count); + if (!val) + return -ENOMEM; + + count = 0; + list_for_each_entry(expr, &expr_list->el_exprs, re_link) { + for (i = expr->re_lo; i <= expr->re_hi; i++) { + if (((i - expr->re_lo) % expr->re_stride) == 0) + val[count++] = i; + } + } + + *valpp = val; + return count; +} +EXPORT_SYMBOL(cfs_expr_list_values); + +/** + * Frees cfs_range_expr structures of \a expr_list. + * + * \retval none + */ +void +cfs_expr_list_free(struct cfs_expr_list *expr_list) +{ + while (!list_empty(&expr_list->el_exprs)) { + struct cfs_range_expr *expr; + + expr = list_entry(expr_list->el_exprs.next, + struct cfs_range_expr, re_link); + list_del(&expr->re_link); + LIBCFS_FREE(expr, sizeof(*expr)); + } + + LIBCFS_FREE(expr_list, sizeof(*expr_list)); +} +EXPORT_SYMBOL(cfs_expr_list_free); + +/** + * Parses \ token of the syntax. + * + * \retval 0 if \a str parses to \ | \ + * \retval -errno otherwise + */ +int +cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, + struct cfs_expr_list **elpp) +{ + struct cfs_expr_list *expr_list; + struct cfs_range_expr *expr; + struct cfs_lstr src; + int rc; + + LIBCFS_ALLOC(expr_list, sizeof(*expr_list)); + if (!expr_list) + return -ENOMEM; + + src.ls_str = str; + src.ls_len = len; + + INIT_LIST_HEAD(&expr_list->el_exprs); + + if (src.ls_str[0] == '[' && + src.ls_str[src.ls_len - 1] == ']') { + src.ls_str++; + src.ls_len -= 2; + + rc = -EINVAL; + while (src.ls_str) { + struct cfs_lstr tok; + + if (!cfs_gettok(&src, ',', &tok)) { + rc = -EINVAL; + break; + } + + rc = cfs_range_expr_parse(&tok, min, max, 1, &expr); + if (rc != 0) + break; + + list_add_tail(&expr->re_link, &expr_list->el_exprs); + } + } else { + rc = cfs_range_expr_parse(&src, min, max, 0, &expr); + if (rc == 0) + list_add_tail(&expr->re_link, &expr_list->el_exprs); + } + + if (rc != 0) + cfs_expr_list_free(expr_list); + else + *elpp = expr_list; + + return rc; +} +EXPORT_SYMBOL(cfs_expr_list_parse); + +/** + * Frees cfs_expr_list structures of \a list. + * + * For each struct cfs_expr_list structure found on \a list it frees + * range_expr list attached to it and frees the cfs_expr_list itself. + * + * \retval none + */ +void +cfs_expr_list_free_list(struct list_head *list) +{ + struct cfs_expr_list *el; + + while (!list_empty(list)) { + el = list_entry(list->next, struct cfs_expr_list, el_link); + list_del(&el->el_link); + cfs_expr_list_free(el); + } +} +EXPORT_SYMBOL(cfs_expr_list_free_list); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c new file mode 100644 index 000000000000..389fb9eeea75 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -0,0 +1,1040 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: liang@whamcloud.com + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include "../../../include/linux/libcfs/libcfs.h" + +#ifdef CONFIG_SMP + +/** + * modparam for setting number of partitions + * + * 0 : estimate best value based on cores or NUMA nodes + * 1 : disable multiple partitions + * >1 : specify number of partitions + */ +static int cpu_npartitions; +module_param(cpu_npartitions, int, 0444); +MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); + +/** + * modparam for setting CPU partitions patterns: + * + * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID, + * number in bracket is processor ID (core or HT) + * + * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket + * are NUMA node ID, number before bracket is CPU partition ID. + * + * NB: If user specified cpu_pattern, cpu_npartitions will be ignored + */ +static char *cpu_pattern = ""; +module_param(cpu_pattern, charp, 0444); +MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); + +struct cfs_cpt_data { + /* serialize hotplug etc */ + spinlock_t cpt_lock; + /* reserved for hotplug */ + unsigned long cpt_version; + /* mutex to protect cpt_cpumask */ + struct mutex cpt_mutex; + /* scratch buffer for set/unset_node */ + cpumask_t *cpt_cpumask; +}; + +static struct cfs_cpt_data cpt_data; + +void +cfs_cpt_table_free(struct cfs_cpt_table *cptab) +{ + int i; + + if (cptab->ctb_cpu2cpt) { + LIBCFS_FREE(cptab->ctb_cpu2cpt, + num_possible_cpus() * + sizeof(cptab->ctb_cpu2cpt[0])); + } + + for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) { + struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; + + if (part->cpt_nodemask) { + LIBCFS_FREE(part->cpt_nodemask, + sizeof(*part->cpt_nodemask)); + } + + if (part->cpt_cpumask) + LIBCFS_FREE(part->cpt_cpumask, cpumask_size()); + } + + if (cptab->ctb_parts) { + LIBCFS_FREE(cptab->ctb_parts, + cptab->ctb_nparts * sizeof(cptab->ctb_parts[0])); + } + + if (cptab->ctb_nodemask) + LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); + if (cptab->ctb_cpumask) + LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size()); + + LIBCFS_FREE(cptab, sizeof(*cptab)); +} +EXPORT_SYMBOL(cfs_cpt_table_free); + +struct cfs_cpt_table * +cfs_cpt_table_alloc(unsigned int ncpt) +{ + struct cfs_cpt_table *cptab; + int i; + + LIBCFS_ALLOC(cptab, sizeof(*cptab)); + if (!cptab) + return NULL; + + cptab->ctb_nparts = ncpt; + + LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size()); + LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); + + if (!cptab->ctb_cpumask || !cptab->ctb_nodemask) + goto failed; + + LIBCFS_ALLOC(cptab->ctb_cpu2cpt, + num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); + if (!cptab->ctb_cpu2cpt) + goto failed; + + memset(cptab->ctb_cpu2cpt, -1, + num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); + + LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0])); + if (!cptab->ctb_parts) + goto failed; + + for (i = 0; i < ncpt; i++) { + struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; + + LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size()); + LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask)); + if (!part->cpt_cpumask || !part->cpt_nodemask) + goto failed; + } + + spin_lock(&cpt_data.cpt_lock); + /* Reserved for hotplug */ + cptab->ctb_version = cpt_data.cpt_version; + spin_unlock(&cpt_data.cpt_lock); + + return cptab; + + failed: + cfs_cpt_table_free(cptab); + return NULL; +} +EXPORT_SYMBOL(cfs_cpt_table_alloc); + +int +cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) +{ + char *tmp = buf; + int rc = 0; + int i; + int j; + + for (i = 0; i < cptab->ctb_nparts; i++) { + if (len > 0) { + rc = snprintf(tmp, len, "%d\t: ", i); + len -= rc; + } + + if (len <= 0) { + rc = -EFBIG; + goto out; + } + + tmp += rc; + for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { + rc = snprintf(tmp, len, "%d ", j); + len -= rc; + if (len <= 0) { + rc = -EFBIG; + goto out; + } + tmp += rc; + } + + *tmp = '\n'; + tmp++; + len--; + } + + out: + if (rc < 0) + return rc; + + return tmp - buf; +} +EXPORT_SYMBOL(cfs_cpt_table_print); + +int +cfs_cpt_number(struct cfs_cpt_table *cptab) +{ + return cptab->ctb_nparts; +} +EXPORT_SYMBOL(cfs_cpt_number); + +int +cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cpumask_weight(cptab->ctb_cpumask) : + cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask); +} +EXPORT_SYMBOL(cfs_cpt_weight); + +int +cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cpumask_any_and(cptab->ctb_cpumask, + cpu_online_mask) < nr_cpu_ids : + cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask, + cpu_online_mask) < nr_cpu_ids; +} +EXPORT_SYMBOL(cfs_cpt_online); + +cpumask_t * +cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask; +} +EXPORT_SYMBOL(cfs_cpt_cpumask); + +nodemask_t * +cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) +{ + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + return cpt == CFS_CPT_ANY ? + cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask; +} +EXPORT_SYMBOL(cfs_cpt_nodemask); + +int +cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ + int node; + + LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); + + if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) { + CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu); + return 0; + } + + if (cptab->ctb_cpu2cpt[cpu] != -1) { + CDEBUG(D_INFO, "CPU %d is already in partition %d\n", + cpu, cptab->ctb_cpu2cpt[cpu]); + return 0; + } + + cptab->ctb_cpu2cpt[cpu] = cpt; + + LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); + LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); + + cpumask_set_cpu(cpu, cptab->ctb_cpumask); + cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); + + node = cpu_to_node(cpu); + + /* first CPU of @node in this CPT table */ + if (!node_isset(node, *cptab->ctb_nodemask)) + node_set(node, *cptab->ctb_nodemask); + + /* first CPU of @node in this partition */ + if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)) + node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask); + + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpu); + +void +cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) +{ + int node; + int i; + + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + if (cpu < 0 || cpu >= nr_cpu_ids) { + CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu); + return; + } + + if (cpt == CFS_CPT_ANY) { + /* caller doesn't know the partition ID */ + cpt = cptab->ctb_cpu2cpt[cpu]; + if (cpt < 0) { /* not set in this CPT-table */ + CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n", + cpt, cptab); + return; + } + + } else if (cpt != cptab->ctb_cpu2cpt[cpu]) { + CDEBUG(D_INFO, + "CPU %d is not in cpu-partition %d\n", cpu, cpt); + return; + } + + LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); + LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask)); + + cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); + cpumask_clear_cpu(cpu, cptab->ctb_cpumask); + cptab->ctb_cpu2cpt[cpu] = -1; + + node = cpu_to_node(cpu); + + LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)); + LASSERT(node_isset(node, *cptab->ctb_nodemask)); + + for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) { + /* this CPT has other CPU belonging to this node? */ + if (cpu_to_node(i) == node) + break; + } + + if (i >= nr_cpu_ids) + node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask); + + for_each_cpu(i, cptab->ctb_cpumask) { + /* this CPT-table has other CPU belonging to this node? */ + if (cpu_to_node(i) == node) + break; + } + + if (i >= nr_cpu_ids) + node_clear(node, *cptab->ctb_nodemask); +} +EXPORT_SYMBOL(cfs_cpt_unset_cpu); + +int +cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ + int i; + + if (cpumask_weight(mask) == 0 || + cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { + CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n", + cpt); + return 0; + } + + for_each_cpu(i, mask) { + if (!cfs_cpt_set_cpu(cptab, cpt, i)) + return 0; + } + + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_cpumask); + +void +cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) +{ + int i; + + for_each_cpu(i, mask) + cfs_cpt_unset_cpu(cptab, cpt, i); +} +EXPORT_SYMBOL(cfs_cpt_unset_cpumask); + +int +cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ + cpumask_t *mask; + int rc; + + if (node < 0 || node >= MAX_NUMNODES) { + CDEBUG(D_INFO, + "Invalid NUMA id %d for CPU partition %d\n", node, cpt); + return 0; + } + + mutex_lock(&cpt_data.cpt_mutex); + + mask = cpt_data.cpt_cpumask; + cpumask_copy(mask, cpumask_of_node(node)); + + rc = cfs_cpt_set_cpumask(cptab, cpt, mask); + + mutex_unlock(&cpt_data.cpt_mutex); + + return rc; +} +EXPORT_SYMBOL(cfs_cpt_set_node); + +void +cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) +{ + cpumask_t *mask; + + if (node < 0 || node >= MAX_NUMNODES) { + CDEBUG(D_INFO, + "Invalid NUMA id %d for CPU partition %d\n", node, cpt); + return; + } + + mutex_lock(&cpt_data.cpt_mutex); + + mask = cpt_data.cpt_cpumask; + cpumask_copy(mask, cpumask_of_node(node)); + + cfs_cpt_unset_cpumask(cptab, cpt, mask); + + mutex_unlock(&cpt_data.cpt_mutex); +} +EXPORT_SYMBOL(cfs_cpt_unset_node); + +int +cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ + int i; + + for_each_node_mask(i, *mask) { + if (!cfs_cpt_set_node(cptab, cpt, i)) + return 0; + } + + return 1; +} +EXPORT_SYMBOL(cfs_cpt_set_nodemask); + +void +cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) +{ + int i; + + for_each_node_mask(i, *mask) + cfs_cpt_unset_node(cptab, cpt, i); +} +EXPORT_SYMBOL(cfs_cpt_unset_nodemask); + +void +cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) +{ + int last; + int i; + + if (cpt == CFS_CPT_ANY) { + last = cptab->ctb_nparts - 1; + cpt = 0; + } else { + last = cpt; + } + + for (; cpt <= last; cpt++) { + for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) + cfs_cpt_unset_cpu(cptab, cpt, i); + } +} +EXPORT_SYMBOL(cfs_cpt_clear); + +int +cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) +{ + nodemask_t *mask; + int weight; + int rotor; + int node; + + /* convert CPU partition ID to HW node id */ + + if (cpt < 0 || cpt >= cptab->ctb_nparts) { + mask = cptab->ctb_nodemask; + rotor = cptab->ctb_spread_rotor++; + } else { + mask = cptab->ctb_parts[cpt].cpt_nodemask; + rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++; + } + + weight = nodes_weight(*mask); + LASSERT(weight > 0); + + rotor %= weight; + + for_each_node_mask(node, *mask) { + if (rotor-- == 0) + return node; + } + + LBUG(); + return 0; +} +EXPORT_SYMBOL(cfs_cpt_spread_node); + +int +cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) +{ + int cpu = smp_processor_id(); + int cpt = cptab->ctb_cpu2cpt[cpu]; + + if (cpt < 0) { + if (!remap) + return cpt; + + /* don't return negative value for safety of upper layer, + * instead we shadow the unknown cpu to a valid partition ID + */ + cpt = cpu % cptab->ctb_nparts; + } + + return cpt; +} +EXPORT_SYMBOL(cfs_cpt_current); + +int +cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) +{ + LASSERT(cpu >= 0 && cpu < nr_cpu_ids); + + return cptab->ctb_cpu2cpt[cpu]; +} +EXPORT_SYMBOL(cfs_cpt_of_cpu); + +int +cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) +{ + cpumask_t *cpumask; + nodemask_t *nodemask; + int rc; + int i; + + LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); + + if (cpt == CFS_CPT_ANY) { + cpumask = cptab->ctb_cpumask; + nodemask = cptab->ctb_nodemask; + } else { + cpumask = cptab->ctb_parts[cpt].cpt_cpumask; + nodemask = cptab->ctb_parts[cpt].cpt_nodemask; + } + + if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) { + CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n", + cpt); + return -EINVAL; + } + + for_each_online_cpu(i) { + if (cpumask_test_cpu(i, cpumask)) + continue; + + rc = set_cpus_allowed_ptr(current, cpumask); + set_mems_allowed(*nodemask); + if (rc == 0) + schedule(); /* switch to allowed CPU */ + + return rc; + } + + /* don't need to set affinity because all online CPUs are covered */ + return 0; +} +EXPORT_SYMBOL(cfs_cpt_bind); + +/** + * Choose max to \a number CPUs from \a node and set them in \a cpt. + * We always prefer to choose CPU in the same core/socket. + */ +static int +cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, + cpumask_t *node, int number) +{ + cpumask_t *socket = NULL; + cpumask_t *core = NULL; + int rc = 0; + int cpu; + + LASSERT(number > 0); + + if (number >= cpumask_weight(node)) { + while (!cpumask_empty(node)) { + cpu = cpumask_first(node); + + rc = cfs_cpt_set_cpu(cptab, cpt, cpu); + if (!rc) + return -EINVAL; + cpumask_clear_cpu(cpu, node); + } + return 0; + } + + /* allocate scratch buffer */ + LIBCFS_ALLOC(socket, cpumask_size()); + LIBCFS_ALLOC(core, cpumask_size()); + if (!socket || !core) { + rc = -ENOMEM; + goto out; + } + + while (!cpumask_empty(node)) { + cpu = cpumask_first(node); + + /* get cpumask for cores in the same socket */ + cpumask_copy(socket, topology_core_cpumask(cpu)); + cpumask_and(socket, socket, node); + + LASSERT(!cpumask_empty(socket)); + + while (!cpumask_empty(socket)) { + int i; + + /* get cpumask for hts in the same core */ + cpumask_copy(core, topology_sibling_cpumask(cpu)); + cpumask_and(core, core, node); + + LASSERT(!cpumask_empty(core)); + + for_each_cpu(i, core) { + cpumask_clear_cpu(i, socket); + cpumask_clear_cpu(i, node); + + rc = cfs_cpt_set_cpu(cptab, cpt, i); + if (!rc) { + rc = -EINVAL; + goto out; + } + + if (--number == 0) + goto out; + } + cpu = cpumask_first(socket); + } + } + + out: + if (socket) + LIBCFS_FREE(socket, cpumask_size()); + if (core) + LIBCFS_FREE(core, cpumask_size()); + return rc; +} + +#define CPT_WEIGHT_MIN 4u + +static unsigned int +cfs_cpt_num_estimate(void) +{ + unsigned nnode = num_online_nodes(); + unsigned ncpu = num_online_cpus(); + unsigned ncpt; + + if (ncpu <= CPT_WEIGHT_MIN) { + ncpt = 1; + goto out; + } + + /* generate reasonable number of CPU partitions based on total number + * of CPUs, Preferred N should be power2 and match this condition: + * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 + */ + for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) + ; + + if (ncpt <= nnode) { /* fat numa system */ + while (nnode > ncpt) + nnode >>= 1; + + } else { /* ncpt > nnode */ + while ((nnode << 1) <= ncpt) + nnode <<= 1; + } + + ncpt = nnode; + + out: +#if (BITS_PER_LONG == 32) + /* config many CPU partitions on 32-bit system could consume + * too much memory + */ + ncpt = min(2U, ncpt); +#endif + while (ncpu % ncpt != 0) + ncpt--; /* worst case is 1 */ + + return ncpt; +} + +static struct cfs_cpt_table * +cfs_cpt_table_create(int ncpt) +{ + struct cfs_cpt_table *cptab = NULL; + cpumask_t *mask = NULL; + int cpt = 0; + int num; + int rc; + int i; + + rc = cfs_cpt_num_estimate(); + if (ncpt <= 0) + ncpt = rc; + + if (ncpt > num_online_cpus() || ncpt > 4 * rc) { + CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n", + ncpt, rc); + } + + if (num_online_cpus() % ncpt != 0) { + CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n", + (int)num_online_cpus(), ncpt); + goto failed; + } + + cptab = cfs_cpt_table_alloc(ncpt); + if (!cptab) { + CERROR("Failed to allocate CPU map(%d)\n", ncpt); + goto failed; + } + + num = num_online_cpus() / ncpt; + if (num == 0) { + CERROR("CPU changed while setting CPU partition\n"); + goto failed; + } + + LIBCFS_ALLOC(mask, cpumask_size()); + if (!mask) { + CERROR("Failed to allocate scratch cpumask\n"); + goto failed; + } + + for_each_online_node(i) { + cpumask_copy(mask, cpumask_of_node(i)); + + while (!cpumask_empty(mask)) { + struct cfs_cpu_partition *part; + int n; + + if (cpt >= ncpt) + goto failed; + + part = &cptab->ctb_parts[cpt]; + + n = num - cpumask_weight(part->cpt_cpumask); + LASSERT(n > 0); + + rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); + if (rc < 0) + goto failed; + + LASSERT(num >= cpumask_weight(part->cpt_cpumask)); + if (num == cpumask_weight(part->cpt_cpumask)) + cpt++; + } + } + + if (cpt != ncpt || + num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { + CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n", + cptab->ctb_nparts, num, cpt, + cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); + goto failed; + } + + LIBCFS_FREE(mask, cpumask_size()); + + return cptab; + + failed: + CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n", + ncpt, num_online_nodes(), num_online_cpus()); + + if (mask) + LIBCFS_FREE(mask, cpumask_size()); + + if (cptab) + cfs_cpt_table_free(cptab); + + return NULL; +} + +static struct cfs_cpt_table * +cfs_cpt_table_create_pattern(char *pattern) +{ + struct cfs_cpt_table *cptab; + char *str = pattern; + int node = 0; + int high; + int ncpt; + int c; + + for (ncpt = 0;; ncpt++) { /* quick scan bracket */ + str = strchr(str, '['); + if (!str) + break; + str++; + } + + str = cfs_trimwhite(pattern); + if (*str == 'n' || *str == 'N') { + pattern = str + 1; + node = 1; + } + + if (ncpt == 0 || + (node && ncpt > num_online_nodes()) || + (!node && ncpt > num_online_cpus())) { + CERROR("Invalid pattern %s, or too many partitions %d\n", + pattern, ncpt); + return NULL; + } + + high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; + + cptab = cfs_cpt_table_alloc(ncpt); + if (!cptab) { + CERROR("Failed to allocate cpu partition table\n"); + return NULL; + } + + for (str = cfs_trimwhite(pattern), c = 0;; c++) { + struct cfs_range_expr *range; + struct cfs_expr_list *el; + char *bracket = strchr(str, '['); + int cpt; + int rc; + int i; + int n; + + if (!bracket) { + if (*str != 0) { + CERROR("Invalid pattern %s\n", str); + goto failed; + } + if (c != ncpt) { + CERROR("expect %d partitions but found %d\n", + ncpt, c); + goto failed; + } + break; + } + + if (sscanf(str, "%d%n", &cpt, &n) < 1) { + CERROR("Invalid cpu pattern %s\n", str); + goto failed; + } + + if (cpt < 0 || cpt >= ncpt) { + CERROR("Invalid partition id %d, total partitions %d\n", + cpt, ncpt); + goto failed; + } + + if (cfs_cpt_weight(cptab, cpt) != 0) { + CERROR("Partition %d has already been set.\n", cpt); + goto failed; + } + + str = cfs_trimwhite(str + n); + if (str != bracket) { + CERROR("Invalid pattern %s\n", str); + goto failed; + } + + bracket = strchr(str, ']'); + if (!bracket) { + CERROR("missing right bracket for cpt %d, %s\n", + cpt, str); + goto failed; + } + + if (cfs_expr_list_parse(str, (bracket - str) + 1, + 0, high, &el) != 0) { + CERROR("Can't parse number range: %s\n", str); + goto failed; + } + + list_for_each_entry(range, &el->el_exprs, re_link) { + for (i = range->re_lo; i <= range->re_hi; i++) { + if ((i - range->re_lo) % range->re_stride != 0) + continue; + + rc = node ? cfs_cpt_set_node(cptab, cpt, i) : + cfs_cpt_set_cpu(cptab, cpt, i); + if (!rc) { + cfs_expr_list_free(el); + goto failed; + } + } + } + + cfs_expr_list_free(el); + + if (!cfs_cpt_online(cptab, cpt)) { + CERROR("No online CPU is found on partition %d\n", cpt); + goto failed; + } + + str = cfs_trimwhite(bracket + 1); + } + + return cptab; + + failed: + cfs_cpt_table_free(cptab); + return NULL; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int +cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + bool warn; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + spin_lock(&cpt_data.cpt_lock); + cpt_data.cpt_version++; + spin_unlock(&cpt_data.cpt_lock); + /* Fall through */ + default: + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) { + CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n", + cpu, action); + break; + } + + mutex_lock(&cpt_data.cpt_mutex); + /* if all HTs in a core are offline, it may break affinity */ + cpumask_copy(cpt_data.cpt_cpumask, + topology_sibling_cpumask(cpu)); + warn = cpumask_any_and(cpt_data.cpt_cpumask, + cpu_online_mask) >= nr_cpu_ids; + mutex_unlock(&cpt_data.cpt_mutex); + CDEBUG(warn ? D_WARNING : D_INFO, + "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n", + cpu, action); + } + + return NOTIFY_OK; +} + +static struct notifier_block cfs_cpu_notifier = { + .notifier_call = cfs_cpu_notify, + .priority = 0 +}; + +#endif + +void +cfs_cpu_fini(void) +{ + if (cfs_cpt_table) + cfs_cpt_table_free(cfs_cpt_table); + +#ifdef CONFIG_HOTPLUG_CPU + unregister_hotcpu_notifier(&cfs_cpu_notifier); +#endif + if (cpt_data.cpt_cpumask) + LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size()); +} + +int +cfs_cpu_init(void) +{ + LASSERT(!cfs_cpt_table); + + memset(&cpt_data, 0, sizeof(cpt_data)); + + LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size()); + if (!cpt_data.cpt_cpumask) { + CERROR("Failed to allocate scratch buffer\n"); + return -1; + } + + spin_lock_init(&cpt_data.cpt_lock); + mutex_init(&cpt_data.cpt_mutex); + +#ifdef CONFIG_HOTPLUG_CPU + register_hotcpu_notifier(&cfs_cpu_notifier); +#endif + + if (*cpu_pattern != 0) { + cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern); + if (!cfs_cpt_table) { + CERROR("Failed to create cptab from pattern %s\n", + cpu_pattern); + goto failed; + } + + } else { + cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions); + if (!cfs_cpt_table) { + CERROR("Failed to create ptable with npartitions %d\n", + cpu_npartitions); + goto failed; + } + } + + spin_lock(&cpt_data.cpt_lock); + if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) { + spin_unlock(&cpt_data.cpt_lock); + CERROR("CPU hotplug/unplug during setup\n"); + goto failed; + } + spin_unlock(&cpt_data.cpt_lock); + + LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n", + num_online_cpus(), cfs_cpt_number(cfs_cpt_table)); + return 0; + + failed: + cfs_cpu_fini(); + return -1; +} + +#endif diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c new file mode 100644 index 000000000000..db0572733712 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c @@ -0,0 +1,137 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + */ + +/* + * This is crypto api shash wrappers to zlib_adler32. + */ + +#include +#include +#include +#include "linux-crypto.h" + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +static int adler32_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 1; + + return 0; +} + +static int adler32_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = *(u32 *)key; + return 0; +} + +static int adler32_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *cksump = shash_desc_ctx(desc); + + *cksump = *mctx; + + return 0; +} + +static int adler32_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *cksump = shash_desc_ctx(desc); + + *cksump = zlib_adler32(*cksump, data, len); + return 0; +} + +static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len, + u8 *out) +{ + *(u32 *)out = zlib_adler32(*cksump, data, len); + return 0; +} + +static int adler32_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __adler32_finup(shash_desc_ctx(desc), data, len, out); +} + +static int adler32_final(struct shash_desc *desc, u8 *out) +{ + u32 *cksump = shash_desc_ctx(desc); + + *(u32 *)out = *cksump; + return 0; +} + +static int adler32_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = adler32_setkey, + .init = adler32_init, + .update = adler32_update, + .final = adler32_final, + .finup = adler32_finup, + .digest = adler32_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "adler32", + .cra_driver_name = "adler32-zlib", + .cra_priority = 100, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = adler32_cra_init, + } +}; + +int cfs_crypto_adler32_register(void) +{ + return crypto_register_shash(&alg); +} + +void cfs_crypto_adler32_unregister(void) +{ + crypto_unregister_shash(&alg); +} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c new file mode 100644 index 000000000000..1d2f70fda944 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c @@ -0,0 +1,284 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Copyright (c) 2012, Intel Corporation. + */ + +#include +#include +#include "../../../include/linux/libcfs/libcfs.h" +#include "linux-crypto.h" +/** + * Array of hash algorithm speed in MByte per second + */ +static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; + +static int cfs_crypto_hash_alloc(unsigned char alg_id, + const struct cfs_crypto_hash_type **type, + struct hash_desc *desc, unsigned char *key, + unsigned int key_len) +{ + int err = 0; + + *type = cfs_crypto_hash_type(alg_id); + + if (!*type) { + CWARN("Unsupported hash algorithm id = %d, max id is %d\n", + alg_id, CFS_HASH_ALG_MAX); + return -EINVAL; + } + desc->tfm = crypto_alloc_hash((*type)->cht_name, 0, 0); + + if (!desc->tfm) + return -EINVAL; + + if (IS_ERR(desc->tfm)) { + CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n", + (*type)->cht_name); + return PTR_ERR(desc->tfm); + } + + desc->flags = 0; + + /** Shash have different logic for initialization then digest + * shash: crypto_hash_setkey, crypto_hash_init + * digest: crypto_digest_init, crypto_digest_setkey + * Skip this function for digest, because we use shash logic at + * cfs_crypto_hash_alloc. + */ + if (key) + err = crypto_hash_setkey(desc->tfm, key, key_len); + else if ((*type)->cht_key != 0) + err = crypto_hash_setkey(desc->tfm, + (unsigned char *)&((*type)->cht_key), + (*type)->cht_size); + + if (err != 0) { + crypto_free_hash(desc->tfm); + return err; + } + + CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n", + (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_name, + (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_driver_name, + cfs_crypto_hash_speeds[alg_id]); + + return crypto_hash_init(desc); +} + +int cfs_crypto_hash_digest(unsigned char alg_id, + const void *buf, unsigned int buf_len, + unsigned char *key, unsigned int key_len, + unsigned char *hash, unsigned int *hash_len) +{ + struct scatterlist sl; + struct hash_desc hdesc; + int err; + const struct cfs_crypto_hash_type *type; + + if (!buf || buf_len == 0 || !hash_len) + return -EINVAL; + + err = cfs_crypto_hash_alloc(alg_id, &type, &hdesc, key, key_len); + if (err != 0) + return err; + + if (!hash || *hash_len < type->cht_size) { + *hash_len = type->cht_size; + crypto_free_hash(hdesc.tfm); + return -ENOSPC; + } + sg_init_one(&sl, buf, buf_len); + + hdesc.flags = 0; + err = crypto_hash_digest(&hdesc, &sl, sl.length, hash); + crypto_free_hash(hdesc.tfm); + + return err; +} +EXPORT_SYMBOL(cfs_crypto_hash_digest); + +struct cfs_crypto_hash_desc * + cfs_crypto_hash_init(unsigned char alg_id, + unsigned char *key, unsigned int key_len) +{ + struct hash_desc *hdesc; + int err; + const struct cfs_crypto_hash_type *type; + + hdesc = kmalloc(sizeof(*hdesc), 0); + if (!hdesc) + return ERR_PTR(-ENOMEM); + + err = cfs_crypto_hash_alloc(alg_id, &type, hdesc, key, key_len); + + if (err) { + kfree(hdesc); + return ERR_PTR(err); + } + return (struct cfs_crypto_hash_desc *)hdesc; +} +EXPORT_SYMBOL(cfs_crypto_hash_init); + +int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc, + struct page *page, unsigned int offset, + unsigned int len) +{ + struct scatterlist sl; + + sg_init_table(&sl, 1); + sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK); + + return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length); +} +EXPORT_SYMBOL(cfs_crypto_hash_update_page); + +int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc, + const void *buf, unsigned int buf_len) +{ + struct scatterlist sl; + + sg_init_one(&sl, buf, buf_len); + + return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length); +} +EXPORT_SYMBOL(cfs_crypto_hash_update); + +/* If hash_len pointer is NULL - destroy descriptor. */ +int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, + unsigned char *hash, unsigned int *hash_len) +{ + int err; + int size = crypto_hash_digestsize(((struct hash_desc *)hdesc)->tfm); + + if (!hash_len) { + crypto_free_hash(((struct hash_desc *)hdesc)->tfm); + kfree(hdesc); + return 0; + } + if (!hash || *hash_len < size) { + *hash_len = size; + return -ENOSPC; + } + err = crypto_hash_final((struct hash_desc *)hdesc, hash); + + if (err < 0) { + /* May be caller can fix error */ + return err; + } + crypto_free_hash(((struct hash_desc *)hdesc)->tfm); + kfree(hdesc); + return err; +} +EXPORT_SYMBOL(cfs_crypto_hash_final); + +static void cfs_crypto_performance_test(unsigned char alg_id, + const unsigned char *buf, + unsigned int buf_len) +{ + unsigned long start, end; + int bcount, err = 0; + int sec = 1; /* do test only 1 sec */ + unsigned char hash[64]; + unsigned int hash_len = 64; + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0, + hash, &hash_len); + if (err) + break; + } + end = jiffies; + + if (err) { + cfs_crypto_hash_speeds[alg_id] = -1; + CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n", + cfs_crypto_hash_name(alg_id), err); + } else { + unsigned long tmp; + + tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * + 1000) / (1024 * 1024); + cfs_crypto_hash_speeds[alg_id] = (int)tmp; + } + CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n", + cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]); +} + +int cfs_crypto_hash_speed(unsigned char hash_alg) +{ + if (hash_alg < CFS_HASH_ALG_MAX) + return cfs_crypto_hash_speeds[hash_alg]; + return -1; +} +EXPORT_SYMBOL(cfs_crypto_hash_speed); + +/** + * Do performance test for all hash algorithms. + */ +static int cfs_crypto_test_hashes(void) +{ + unsigned char i; + unsigned char *data; + unsigned int j; + /* Data block size for testing hash. Maximum + * kmalloc size for 2.6.18 kernel is 128K + */ + unsigned int data_len = 1 * 128 * 1024; + + data = kmalloc(data_len, 0); + if (!data) + return -ENOMEM; + + for (j = 0; j < data_len; j++) + data[j] = j & 0xff; + + for (i = 0; i < CFS_HASH_ALG_MAX; i++) + cfs_crypto_performance_test(i, data, data_len); + + kfree(data); + return 0; +} + +static int adler32; + +int cfs_crypto_register(void) +{ + request_module("crc32c"); + + adler32 = cfs_crypto_adler32_register(); + + /* check all algorithms and do performance test */ + cfs_crypto_test_hashes(); + return 0; +} + +void cfs_crypto_unregister(void) +{ + if (adler32 == 0) + cfs_crypto_adler32_unregister(); +} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h new file mode 100644 index 000000000000..18e8cd4d8758 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h @@ -0,0 +1,29 @@ + /* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/** + * Functions for start/stop shash adler32 algorithm. + */ +int cfs_crypto_adler32_register(void); +void cfs_crypto_adler32_unregister(void); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c new file mode 100644 index 000000000000..13d31e8a931d --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c @@ -0,0 +1,111 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/linux/linux-curproc.c + * + * Lustre curproc API implementation for Linux kernel + * + * Author: Nikita Danilov + */ + +#include +#include + +#include +#include + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../../include/linux/libcfs/libcfs.h" + +/* + * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) + * for Linux kernel. + */ + +void cfs_cap_raise(cfs_cap_t cap) +{ + struct cred *cred; + + cred = prepare_creds(); + if (cred) { + cap_raise(cred->cap_effective, cap); + commit_creds(cred); + } +} +EXPORT_SYMBOL(cfs_cap_raise); + +void cfs_cap_lower(cfs_cap_t cap) +{ + struct cred *cred; + + cred = prepare_creds(); + if (cred) { + cap_lower(cred->cap_effective, cap); + commit_creds(cred); + } +} +EXPORT_SYMBOL(cfs_cap_lower); + +int cfs_cap_raised(cfs_cap_t cap) +{ + return cap_raised(current_cap(), cap); +} +EXPORT_SYMBOL(cfs_cap_raised); + +static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap) +{ + /* XXX lost high byte */ + *cap = kcap.cap[0]; +} + +cfs_cap_t cfs_curproc_cap_pack(void) +{ + cfs_cap_t cap; + + cfs_kernel_cap_pack(current_cap(), &cap); + return cap; +} +EXPORT_SYMBOL(cfs_curproc_cap_pack); + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c new file mode 100644 index 000000000000..638e4b33d3a9 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c @@ -0,0 +1,200 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/linux/linux-debug.c + * + * Author: Phil Schwan + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +# define DEBUG_SUBSYSTEM S_LNET + +#include "../../../include/linux/libcfs/libcfs.h" + +#include "../tracefile.h" + +#include + +char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; +char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; + +/** + * Upcall function once a Lustre log has been dumped. + * + * \param file path of the dumped log + */ +void libcfs_run_debug_log_upcall(char *file) +{ + char *argv[3]; + int rc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + + argv[0] = lnet_debug_log_upcall; + + LASSERTF(file, "called on a null filename\n"); + argv[1] = file; /* only need to pass the path of the file */ + + argv[2] = NULL; + + rc = call_usermodehelper(argv[0], argv, envp, 1); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", + rc, argv[0], argv[1]); + } else { + CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", + argv[0], argv[1]); + } +} + +void libcfs_run_upcall(char **argv) +{ + int rc; + int argc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + + argv[0] = lnet_upcall; + argc = 1; + while (argv[argc]) + argc++; + + LASSERT(argc >= 2); + + rc = call_usermodehelper(argv[0], argv, envp, 1); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n", + rc, argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } else { + CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", + argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } +} + +void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata) +{ + char *argv[6]; + char buf[32]; + + snprintf(buf, sizeof(buf), "%d", msgdata->msg_line); + + argv[1] = "LBUG"; + argv[2] = (char *)msgdata->msg_file; + argv[3] = (char *)msgdata->msg_fn; + argv[4] = buf; + argv[5] = NULL; + + libcfs_run_upcall(argv); +} +EXPORT_SYMBOL(libcfs_run_lbug_upcall); + +/* coverity[+kill] */ +void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) +{ + libcfs_catastrophe = 1; + libcfs_debug_msg(msgdata, "LBUG\n"); + + if (in_interrupt()) { + panic("LBUG in interrupt.\n"); + /* not reached */ + } + + dump_stack(); + if (!libcfs_panic_on_lbug) + libcfs_debug_dumplog(); + libcfs_run_lbug_upcall(msgdata); + if (libcfs_panic_on_lbug) + panic("LBUG"); + set_task_state(current, TASK_UNINTERRUPTIBLE); + while (1) + schedule(); +} +EXPORT_SYMBOL(lbug_with_loc); + +static int panic_notifier(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + if (libcfs_panic_in_progress) + return 0; + + libcfs_panic_in_progress = 1; + mb(); + + return 0; +} + +static struct notifier_block libcfs_panic_notifier = { + .notifier_call = panic_notifier, + .next = NULL, + .priority = 10000, +}; + +void libcfs_register_panic_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &libcfs_panic_notifier); +} + +void libcfs_unregister_panic_notifier(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &libcfs_panic_notifier); +} diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c new file mode 100644 index 000000000000..86f32ffc5d04 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c @@ -0,0 +1,59 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + */ +/* + * This file creates a memory allocation primitive for Lustre, that + * allows to fallback to vmalloc allocations should regular kernel allocations + * fail due to size or system memory fragmentation. + * + * Author: Oleg Drokin + * + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Seagate Technology. + */ +#include +#include + +#include "../../../include/linux/libcfs/libcfs.h" + +void *libcfs_kvzalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kzalloc(size, flags | __GFP_NOWARN); + if (!ret) + ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); + return ret; +} +EXPORT_SYMBOL(libcfs_kvzalloc); + +void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size, + gfp_t flags) +{ + void *ret; + + ret = kzalloc_node(size, flags | __GFP_NOWARN, + cfs_cpt_spread_node(cptab, cpt)); + if (!ret) { + WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH))); + ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); + } + + return ret; +} +EXPORT_SYMBOL(libcfs_kvzalloc_cpt); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c new file mode 100644 index 000000000000..ebc60ac9bb7a --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c @@ -0,0 +1,159 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../../include/linux/libcfs/libcfs.h" + +#define LNET_MINOR 240 + +int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) +{ + if (libcfs_ioctl_is_invalid(data)) { + CERROR("LNET: ioctl not correctly formatted\n"); + return -EINVAL; + } + + if (data->ioc_inllen1) + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + + if (data->ioc_inllen2) + data->ioc_inlbuf2 = &data->ioc_bulk[0] + + cfs_size_round(data->ioc_inllen1); + + return 0; +} + +int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg, + __u32 *len) +{ + struct libcfs_ioctl_hdr hdr; + + if (copy_from_user(&hdr, arg, sizeof(hdr))) + return -EFAULT; + + if (hdr.ioc_version != LIBCFS_IOCTL_VERSION && + hdr.ioc_version != LIBCFS_IOCTL_VERSION2) { + CERROR("LNET: version mismatch expected %#x, got %#x\n", + LIBCFS_IOCTL_VERSION, hdr.ioc_version); + return -EINVAL; + } + + *len = hdr.ioc_len; + + return 0; +} + +int libcfs_ioctl_popdata(void __user *arg, void *data, int size) +{ + if (copy_to_user(arg, data, size)) + return -EFAULT; + return 0; +} + +static int +libcfs_psdev_open(struct inode *inode, struct file *file) +{ + int rc = 0; + + if (!inode) + return -EINVAL; + if (libcfs_psdev_ops.p_open) + rc = libcfs_psdev_ops.p_open(0, NULL); + else + return -EPERM; + return rc; +} + +/* called when closing /dev/device */ +static int +libcfs_psdev_release(struct inode *inode, struct file *file) +{ + int rc = 0; + + if (!inode) + return -EINVAL; + if (libcfs_psdev_ops.p_close) + rc = libcfs_psdev_ops.p_close(0, NULL); + else + rc = -EPERM; + return rc; +} + +static long libcfs_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct cfs_psdev_file pfile; + int rc = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || + _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || + _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) { + CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + return -EINVAL; + } + + /* Handle platform-dependent IOC requests */ + switch (cmd) { + case IOC_LIBCFS_PANIC: + if (!capable(CFS_CAP_SYS_BOOT)) + return -EPERM; + panic("debugctl-invoked panic"); + return 0; + } + + if (libcfs_psdev_ops.p_ioctl) + rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg); + else + rc = -EPERM; + return rc; +} + +static const struct file_operations libcfs_fops = { + .unlocked_ioctl = libcfs_ioctl, + .open = libcfs_psdev_open, + .release = libcfs_psdev_release, +}; + +struct miscdevice libcfs_dev = { + .minor = LNET_MINOR, + .name = "lnet", + .fops = &libcfs_fops, +}; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c new file mode 100644 index 000000000000..89084460231a --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c @@ -0,0 +1,147 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LNET +#include +#include +#include +#include + +#include "../../../include/linux/libcfs/libcfs.h" + +#if defined(CONFIG_KGDB) +#include +#endif + +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if add_wait_queue_exclusive_head is used. + */ +void +add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link) +{ + unsigned long flags; + + spin_lock_irqsave(&waitq->lock, flags); + __add_wait_queue_exclusive(waitq, link); + spin_unlock_irqrestore(&waitq->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_head); + +sigset_t +cfs_block_allsigs(void) +{ + unsigned long flags; + sigset_t old; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + old = current->blocked; + sigfillset(¤t->blocked); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + + return old; +} +EXPORT_SYMBOL(cfs_block_allsigs); + +sigset_t cfs_block_sigs(unsigned long sigs) +{ + unsigned long flags; + sigset_t old; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + old = current->blocked; + sigaddsetmask(¤t->blocked, sigs); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + return old; +} +EXPORT_SYMBOL(cfs_block_sigs); + +/* Block all signals except for the @sigs */ +sigset_t cfs_block_sigsinv(unsigned long sigs) +{ + unsigned long flags; + sigset_t old; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + old = current->blocked; + sigaddsetmask(¤t->blocked, ~sigs); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + + return old; +} +EXPORT_SYMBOL(cfs_block_sigsinv); + +void +cfs_restore_sigs(sigset_t old) +{ + unsigned long flags; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->blocked = old; + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); +} +EXPORT_SYMBOL(cfs_restore_sigs); + +int +cfs_signal_pending(void) +{ + return signal_pending(current); +} +EXPORT_SYMBOL(cfs_signal_pending); + +void +cfs_clear_sigpending(void) +{ + unsigned long flags; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + clear_tsk_thread_flag(current, TIF_SIGPENDING); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); +} +EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c new file mode 100644 index 000000000000..91c2ae8f9d67 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c @@ -0,0 +1,259 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE + +#include "../../../include/linux/libcfs/libcfs.h" +#include "../tracefile.h" + +/* percents to share the total debug memory for each type */ +static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = { + 80, /* 80% pages for CFS_TCD_TYPE_PROC */ + 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */ + 10 /* 10% pages for CFS_TCD_TYPE_IRQ */ +}; + +char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; + +static DECLARE_RWSEM(cfs_tracefile_sem); + +int cfs_tracefile_init_arch(void) +{ + int i; + int j; + struct cfs_trace_cpu_data *tcd; + + /* initialize trace_data */ + memset(cfs_trace_data, 0, sizeof(cfs_trace_data)); + for (i = 0; i < CFS_TCD_TYPE_MAX; i++) { + cfs_trace_data[i] = + kmalloc(sizeof(union cfs_trace_data_union) * + num_possible_cpus(), GFP_KERNEL); + if (!cfs_trace_data[i]) + goto out; + } + + /* arch related info initialized */ + cfs_tcd_for_each(tcd, i, j) { + spin_lock_init(&tcd->tcd_lock); + tcd->tcd_pages_factor = pages_factor[i]; + tcd->tcd_type = i; + tcd->tcd_cpu = j; + } + + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < 3; j++) { + cfs_trace_console_buffers[i][j] = + kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE, + GFP_KERNEL); + + if (!cfs_trace_console_buffers[i][j]) + goto out; + } + + return 0; + +out: + cfs_tracefile_fini_arch(); + printk(KERN_ERR "lnet: Not enough memory\n"); + return -ENOMEM; +} + +void cfs_tracefile_fini_arch(void) +{ + int i; + int j; + + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < 3; j++) { + kfree(cfs_trace_console_buffers[i][j]); + cfs_trace_console_buffers[i][j] = NULL; + } + + for (i = 0; cfs_trace_data[i]; i++) { + kfree(cfs_trace_data[i]); + cfs_trace_data[i] = NULL; + } +} + +void cfs_tracefile_read_lock(void) +{ + down_read(&cfs_tracefile_sem); +} + +void cfs_tracefile_read_unlock(void) +{ + up_read(&cfs_tracefile_sem); +} + +void cfs_tracefile_write_lock(void) +{ + down_write(&cfs_tracefile_sem); +} + +void cfs_tracefile_write_unlock(void) +{ + up_write(&cfs_tracefile_sem); +} + +enum cfs_trace_buf_type cfs_trace_buf_idx_get(void) +{ + if (in_irq()) + return CFS_TCD_TYPE_IRQ; + if (in_softirq()) + return CFS_TCD_TYPE_SOFTIRQ; + return CFS_TCD_TYPE_PROC; +} + +/* + * The walking argument indicates the locking comes from all tcd types + * iterator and we must lock it and dissable local irqs to avoid deadlocks + * with other interrupt locks that might be happening. See LU-1311 + * for details. + */ +int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) + __acquires(&tcd->tc_lock) +{ + __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); + if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) + spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); + else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) + spin_lock_bh(&tcd->tcd_lock); + else if (unlikely(walking)) + spin_lock_irq(&tcd->tcd_lock); + else + spin_lock(&tcd->tcd_lock); + return 1; +} + +void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) + __releases(&tcd->tcd_lock) +{ + __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); + if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) + spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); + else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) + spin_unlock_bh(&tcd->tcd_lock); + else if (unlikely(walking)) + spin_unlock_irq(&tcd->tcd_lock); + else + spin_unlock(&tcd->tcd_lock); +} + +void +cfs_set_ptldebug_header(struct ptldebug_header *header, + struct libcfs_debug_msg_data *msgdata, + unsigned long stack) +{ + struct timespec64 ts; + + ktime_get_real_ts64(&ts); + + header->ph_subsys = msgdata->msg_subsys; + header->ph_mask = msgdata->msg_mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_type = cfs_trace_buf_idx_get(); + /* y2038 safe since all user space treats this as unsigned, but + * will overflow in 2106 + */ + header->ph_sec = (u32)ts.tv_sec; + header->ph_usec = ts.tv_nsec / NSEC_PER_USEC; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = msgdata->msg_line; + header->ph_extern_pid = 0; +} + +static char * +dbghdr_to_err_string(struct ptldebug_header *hdr) +{ + switch (hdr->ph_subsys) { + case S_LND: + case S_LNET: + return "LNetError"; + default: + return "LustreError"; + } +} + +static char * +dbghdr_to_info_string(struct ptldebug_header *hdr) +{ + switch (hdr->ph_subsys) { + case S_LND: + case S_LNET: + return "LNet"; + default: + return "Lustre"; + } +} + +void cfs_print_to_console(struct ptldebug_header *hdr, int mask, + const char *buf, int len, const char *file, + const char *fn) +{ + char *prefix = "Lustre", *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = dbghdr_to_err_string(hdr); + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = dbghdr_to_err_string(hdr); + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = dbghdr_to_info_string(hdr); + ptype = KERN_WARNING; + } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { + prefix = dbghdr_to_info_string(hdr); + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %.*s", ptype, prefix, len, buf); + } else { + printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, + hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num, + fn, len, buf); + } +} + +int cfs_trace_max_debug_mb(void) +{ + int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); + + return max(512, (total_mb * 80) / 100); +} diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c new file mode 100644 index 000000000000..cdc640bfdba8 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/module.c @@ -0,0 +1,674 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +# define DEBUG_SUBSYSTEM S_LNET + +#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \ + sizeof(struct lnet_ioctl_config_data)) + +#include "../../include/linux/libcfs/libcfs.h" +#include + +#include "../../include/linux/libcfs/libcfs_crypto.h" +#include "../../include/linux/lnet/lib-lnet.h" +#include "../../include/linux/lnet/lib-dlc.h" +#include "../../include/linux/lnet/lnet.h" +#include "tracefile.h" + +static struct dentry *lnet_debugfs_root; + +/* called when opening /dev/device */ +static int libcfs_psdev_open(unsigned long flags, void *args) +{ + try_module_get(THIS_MODULE); + return 0; +} + +/* called when closing /dev/device */ +static int libcfs_psdev_release(unsigned long flags, void *args) +{ + module_put(THIS_MODULE); + return 0; +} + +static DECLARE_RWSEM(ioctl_list_sem); +static LIST_HEAD(ioctl_list); + +int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) +{ + int rc = 0; + + down_write(&ioctl_list_sem); + if (!list_empty(&hand->item)) + rc = -EBUSY; + else + list_add_tail(&hand->item, &ioctl_list); + up_write(&ioctl_list_sem); + + return rc; +} +EXPORT_SYMBOL(libcfs_register_ioctl); + +int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) +{ + int rc = 0; + + down_write(&ioctl_list_sem); + if (list_empty(&hand->item)) + rc = -ENOENT; + else + list_del_init(&hand->item); + up_write(&ioctl_list_sem); + + return rc; +} +EXPORT_SYMBOL(libcfs_deregister_ioctl); + +static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd, + void __user *arg, struct libcfs_ioctl_hdr *hdr) +{ + struct libcfs_ioctl_data *data = NULL; + int err = -EINVAL; + + /* + * The libcfs_ioctl_data_adjust() function performs adjustment + * operations on the libcfs_ioctl_data structure to make + * it usable by the code. This doesn't need to be called + * for new data structures added. + */ + if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) { + data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); + err = libcfs_ioctl_data_adjust(data); + if (err) + return err; + } + + switch (cmd) { + case IOC_LIBCFS_CLEAR_DEBUG: + libcfs_debug_clear_buffer(); + return 0; + /* + * case IOC_LIBCFS_PANIC: + * Handled in arch/cfs_module.c + */ + case IOC_LIBCFS_MARK_DEBUG: + if (!data->ioc_inlbuf1 || + data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') + return -EINVAL; + libcfs_debug_mark_buffer(data->ioc_inlbuf1); + return 0; + + default: { + struct libcfs_ioctl_handler *hand; + + err = -EINVAL; + down_read(&ioctl_list_sem); + list_for_each_entry(hand, &ioctl_list, item) { + err = hand->handle_ioctl(cmd, hdr); + if (err != -EINVAL) { + if (err == 0) + err = libcfs_ioctl_popdata(arg, + hdr, hdr->ioc_len); + break; + } + } + up_read(&ioctl_list_sem); + break; + } + } + + return err; +} + +static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, + void __user *arg) +{ + struct libcfs_ioctl_hdr *hdr; + int err = 0; + __u32 buf_len; + + err = libcfs_ioctl_getdata_len(arg, &buf_len); + if (err) + return err; + + /* + * do a check here to restrict the size of the memory + * to allocate to guard against DoS attacks. + */ + if (buf_len > LNET_MAX_IOCTL_BUF_LEN) { + CERROR("LNET: user buffer exceeds kernel buffer\n"); + return -EINVAL; + } + + LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL); + if (!hdr) + return -ENOMEM; + + /* 'cmd' and permissions get checked in our arch-specific caller */ + if (copy_from_user(hdr, arg, buf_len)) { + CERROR("LNET ioctl: data error\n"); + err = -EFAULT; + goto out; + } + + err = libcfs_ioctl_handle(pfile, cmd, arg, hdr); + +out: + LIBCFS_FREE(hdr, buf_len); + return err; +} + +struct cfs_psdev_ops libcfs_psdev_ops = { + libcfs_psdev_open, + libcfs_psdev_release, + NULL, + NULL, + libcfs_ioctl +}; + +int lprocfs_call_handler(void *data, int write, loff_t *ppos, + void __user *buffer, size_t *lenp, + int (*handler)(void *data, int write, loff_t pos, + void __user *buffer, int len)) +{ + int rc = handler(data, write, *ppos, buffer, *lenp); + + if (rc < 0) + return rc; + + if (write) { + *ppos += *lenp; + } else { + *lenp = rc; + *ppos += rc; + } + return 0; +} +EXPORT_SYMBOL(lprocfs_call_handler); + +static int __proc_dobitmasks(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + const int tmpstrlen = 512; + char *tmpstr; + int rc; + unsigned int *mask = data; + int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; + int is_printk = (mask == &libcfs_printk) ? 1 : 0; + + rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen); + if (rc < 0) + return rc; + + if (!write) { + libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); + rc = strlen(tmpstr); + + if (pos >= rc) { + rc = 0; + } else { + rc = cfs_trace_copyout_string(buffer, nob, + tmpstr + pos, "\n"); + } + } else { + rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); + if (rc < 0) { + kfree(tmpstr); + return rc; + } + + rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); + /* Always print LBUG/LASSERT to console, so keep this mask */ + if (is_printk) + *mask |= D_EMERG; + } + + kfree(tmpstr); + return rc; +} + +static int proc_dobitmasks(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_dobitmasks); +} + +static int __proc_dump_kernel(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + if (!write) + return 0; + + return cfs_trace_dump_debug_buffer_usrstr(buffer, nob); +} + +static int proc_dump_kernel(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_dump_kernel); +} + +static int __proc_daemon_file(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + if (!write) { + int len = strlen(cfs_tracefile); + + if (pos >= len) + return 0; + + return cfs_trace_copyout_string(buffer, nob, + cfs_tracefile + pos, "\n"); + } + + return cfs_trace_daemon_command_usrstr(buffer, nob); +} + +static int proc_daemon_file(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_daemon_file); +} + +static int libcfs_force_lbug(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + if (write) + LBUG(); + return 0; +} + +static int proc_fail_loc(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int rc; + long old_fail_loc = cfs_fail_loc; + + rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); + if (old_fail_loc != cfs_fail_loc) + wake_up(&cfs_race_waitq); + return rc; +} + +static int __proc_cpt_table(void *data, int write, + loff_t pos, void __user *buffer, int nob) +{ + char *buf = NULL; + int len = 4096; + int rc = 0; + + if (write) + return -EPERM; + + LASSERT(cfs_cpt_table); + + while (1) { + LIBCFS_ALLOC(buf, len); + if (!buf) + return -ENOMEM; + + rc = cfs_cpt_table_print(cfs_cpt_table, buf, len); + if (rc >= 0) + break; + + if (rc == -EFBIG) { + LIBCFS_FREE(buf, len); + len <<= 1; + continue; + } + goto out; + } + + if (pos >= rc) { + rc = 0; + goto out; + } + + rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL); + out: + if (buf) + LIBCFS_FREE(buf, len); + return rc; +} + +static int proc_cpt_table(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, + __proc_cpt_table); +} + +static struct ctl_table lnet_table[] = { + { + .procname = "debug", + .data = &libcfs_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks, + }, + { + .procname = "subsystem_debug", + .data = &libcfs_subsystem_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks, + }, + { + .procname = "printk", + .data = &libcfs_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks, + }, + { + .procname = "cpu_partition_table", + .maxlen = 128, + .mode = 0444, + .proc_handler = &proc_cpt_table, + }, + + { + .procname = "upcall", + .data = lnet_upcall, + .maxlen = sizeof(lnet_upcall), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .procname = "debug_log_upcall", + .data = lnet_debug_log_upcall, + .maxlen = sizeof(lnet_debug_log_upcall), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .procname = "catastrophe", + .data = &libcfs_catastrophe, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .procname = "dump_kernel", + .maxlen = 256, + .mode = 0200, + .proc_handler = &proc_dump_kernel, + }, + { + .procname = "daemon_file", + .mode = 0644, + .maxlen = 256, + .proc_handler = &proc_daemon_file, + }, + { + .procname = "force_lbug", + .data = NULL, + .maxlen = 0, + .mode = 0200, + .proc_handler = &libcfs_force_lbug + }, + { + .procname = "fail_loc", + .data = &cfs_fail_loc, + .maxlen = sizeof(cfs_fail_loc), + .mode = 0644, + .proc_handler = &proc_fail_loc + }, + { + .procname = "fail_val", + .data = &cfs_fail_val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + } +}; + +static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = { + { "console_ratelimit", + "/sys/module/libcfs/parameters/libcfs_console_ratelimit"}, + { "debug_path", + "/sys/module/libcfs/parameters/libcfs_debug_file_path"}, + { "panic_on_lbug", + "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"}, + { "libcfs_console_backoff", + "/sys/module/libcfs/parameters/libcfs_console_backoff"}, + { "debug_mb", + "/sys/module/libcfs/parameters/libcfs_debug_mb"}, + { "console_min_delay_centisecs", + "/sys/module/libcfs/parameters/libcfs_console_min_delay"}, + { "console_max_delay_centisecs", + "/sys/module/libcfs/parameters/libcfs_console_max_delay"}, + {}, +}; + +static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct ctl_table *table = filp->private_data; + int error; + + error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos); + if (!error) + error = count; + + return error; +} + +static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct ctl_table *table = filp->private_data; + int error; + + error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos); + if (!error) + error = count; + + return error; +} + +static const struct file_operations lnet_debugfs_file_operations_rw = { + .open = simple_open, + .read = lnet_debugfs_read, + .write = lnet_debugfs_write, + .llseek = default_llseek, +}; + +static const struct file_operations lnet_debugfs_file_operations_ro = { + .open = simple_open, + .read = lnet_debugfs_read, + .llseek = default_llseek, +}; + +static const struct file_operations lnet_debugfs_file_operations_wo = { + .open = simple_open, + .write = lnet_debugfs_write, + .llseek = default_llseek, +}; + +static const struct file_operations *lnet_debugfs_fops_select(umode_t mode) +{ + if (!(mode & S_IWUGO)) + return &lnet_debugfs_file_operations_ro; + + if (!(mode & S_IRUGO)) + return &lnet_debugfs_file_operations_wo; + + return &lnet_debugfs_file_operations_rw; +} + +void lustre_insert_debugfs(struct ctl_table *table, + const struct lnet_debugfs_symlink_def *symlinks) +{ + if (!lnet_debugfs_root) + lnet_debugfs_root = debugfs_create_dir("lnet", NULL); + + /* Even if we cannot create, just ignore it altogether) */ + if (IS_ERR_OR_NULL(lnet_debugfs_root)) + return; + + /* We don't save the dentry returned in next two calls, because + * we don't call debugfs_remove() but rather remove_recursive() + */ + for (; table->procname; table++) + debugfs_create_file(table->procname, table->mode, + lnet_debugfs_root, table, + lnet_debugfs_fops_select(table->mode)); + + for (; symlinks && symlinks->name; symlinks++) + debugfs_create_symlink(symlinks->name, lnet_debugfs_root, + symlinks->target); +} +EXPORT_SYMBOL_GPL(lustre_insert_debugfs); + +static void lustre_remove_debugfs(void) +{ + debugfs_remove_recursive(lnet_debugfs_root); + + lnet_debugfs_root = NULL; +} + +static int libcfs_init(void) +{ + int rc; + + rc = libcfs_debug_init(5 * 1024 * 1024); + if (rc < 0) { + pr_err("LustreError: libcfs_debug_init: %d\n", rc); + return rc; + } + + rc = cfs_cpu_init(); + if (rc != 0) + goto cleanup_debug; + + rc = misc_register(&libcfs_dev); + if (rc) { + CERROR("misc_register: error %d\n", rc); + goto cleanup_cpu; + } + + rc = cfs_wi_startup(); + if (rc) { + CERROR("initialize workitem: error %d\n", rc); + goto cleanup_deregister; + } + + /* max to 4 threads, should be enough for rehash */ + rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4); + rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY, + rc, &cfs_sched_rehash); + if (rc != 0) { + CERROR("Startup workitem scheduler: error: %d\n", rc); + goto cleanup_deregister; + } + + rc = cfs_crypto_register(); + if (rc) { + CERROR("cfs_crypto_register: error %d\n", rc); + goto cleanup_wi; + } + + lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks); + + CDEBUG(D_OTHER, "portals setup OK\n"); + return 0; + cleanup_wi: + cfs_wi_shutdown(); + cleanup_deregister: + misc_deregister(&libcfs_dev); +cleanup_cpu: + cfs_cpu_fini(); + cleanup_debug: + libcfs_debug_cleanup(); + return rc; +} + +static void libcfs_exit(void) +{ + int rc; + + lustre_remove_debugfs(); + + if (cfs_sched_rehash) { + cfs_wi_sched_destroy(cfs_sched_rehash); + cfs_sched_rehash = NULL; + } + + cfs_crypto_unregister(); + cfs_wi_shutdown(); + + misc_deregister(&libcfs_dev); + + cfs_cpu_fini(); + + rc = libcfs_debug_cleanup(); + if (rc) + pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc); +} + +MODULE_AUTHOR("OpenSFS, Inc. "); +MODULE_DESCRIPTION("Lustre helper library"); +MODULE_VERSION(LIBCFS_VERSION); +MODULE_LICENSE("GPL"); + +module_init(libcfs_init); +module_exit(libcfs_exit); diff --git a/drivers/staging/lustre/lnet/libcfs/prng.c b/drivers/staging/lustre/lnet/libcfs/prng.c new file mode 100644 index 000000000000..c75ae9a68e76 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/prng.c @@ -0,0 +1,140 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/prng.c + * + * concatenation of following two 16-bit multiply with carry generators + * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16, + * number and carry packed within the same 32 bit integer. + * algorithm recommended by Marsaglia +*/ + +#include "../../include/linux/libcfs/libcfs.h" + +/* + * From: George Marsaglia + * Newsgroups: sci.math + * Subject: Re: A RANDOM NUMBER GENERATOR FOR C + * Date: Tue, 30 Sep 1997 05:29:35 -0700 + * + * You may replace the two constants 36969 and 18000 by any + * pair of distinct constants from this list: + * 18000 18030 18273 18513 18879 19074 19098 19164 19215 19584 + * 19599 19950 20088 20508 20544 20664 20814 20970 21153 21243 + * 21423 21723 21954 22125 22188 22293 22860 22938 22965 22974 + * 23109 23124 23163 23208 23508 23520 23553 23658 23865 24114 + * 24219 24660 24699 24864 24948 25023 25308 25443 26004 26088 + * 26154 26550 26679 26838 27183 27258 27753 27795 27810 27834 + * 27960 28320 28380 28689 28710 28794 28854 28959 28980 29013 + * 29379 29889 30135 30345 30459 30714 30903 30963 31059 31083 + * (or any other 16-bit constants k for which both k*2^16-1 + * and k*2^15-1 are prime) + */ + +#define RANDOM_CONST_A 18030 +#define RANDOM_CONST_B 29013 + +static unsigned int seed_x = 521288629; +static unsigned int seed_y = 362436069; + +/** + * cfs_rand - creates new seeds + * + * First it creates new seeds from the previous seeds. Then it generates a + * new pseudo random number for use. + * + * Returns a pseudo-random 32-bit integer + */ +unsigned int cfs_rand(void) +{ + seed_x = RANDOM_CONST_A * (seed_x & 65535) + (seed_x >> 16); + seed_y = RANDOM_CONST_B * (seed_y & 65535) + (seed_y >> 16); + + return ((seed_x << 16) + (seed_y & 65535)); +} +EXPORT_SYMBOL(cfs_rand); + +/** + * cfs_srand - sets the initial seed + * @seed1 : (seed_x) should have the most entropy in the low bits of the word + * @seed2 : (seed_y) should have the most entropy in the high bits of the word + * + * Replaces the original seeds with new values. Used to generate a new pseudo + * random numbers. + */ +void cfs_srand(unsigned int seed1, unsigned int seed2) +{ + if (seed1) + seed_x = seed1; /* use default seeds if parameter is 0 */ + if (seed2) + seed_y = seed2; +} +EXPORT_SYMBOL(cfs_srand); + +/** + * cfs_get_random_bytes - generate a bunch of random numbers + * @buf : buffer to fill with random numbers + * @size: size of passed in buffer + * + * Fills a buffer with random bytes + */ +void cfs_get_random_bytes(void *buf, int size) +{ + int *p = buf; + int rem, tmp; + + LASSERT(size >= 0); + + rem = min((int)((unsigned long)buf & (sizeof(int) - 1)), size); + if (rem) { + get_random_bytes(&tmp, sizeof(tmp)); + tmp ^= cfs_rand(); + memcpy(buf, &tmp, rem); + p = buf + rem; + size -= rem; + } + + while (size >= sizeof(int)) { + get_random_bytes(&tmp, sizeof(tmp)); + *p = cfs_rand() ^ tmp; + size -= sizeof(int); + p++; + } + buf = p; + if (size) { + get_random_bytes(&tmp, sizeof(tmp)); + tmp ^= cfs_rand(); + memcpy(buf, &tmp, size); + } +} +EXPORT_SYMBOL(cfs_get_random_bytes); diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c new file mode 100644 index 000000000000..ec3bc04bd89f --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c @@ -0,0 +1,1208 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/tracefile.c + * + * Author: Zach Brown + * Author: Phil Schwan + */ + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE +#include "tracefile.h" + +#include "../../include/linux/libcfs/libcfs.h" + +/* XXX move things up to the top, comment */ +union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; + +char cfs_tracefile[TRACEFILE_NAME_SIZE]; +long long cfs_tracefile_size = CFS_TRACEFILE_SIZE; +static struct tracefiled_ctl trace_tctl; +static DEFINE_MUTEX(cfs_trace_thread_mutex); +static int thread_running; + +static atomic_t cfs_tage_allocated = ATOMIC_INIT(0); + +struct page_collection { + struct list_head pc_pages; + /* + * if this flag is set, collect_pages() will spill both + * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, + * only ->tcd_pages are spilled. + */ + int pc_want_daemon_pages; +}; + +struct tracefiled_ctl { + struct completion tctl_start; + struct completion tctl_stop; + wait_queue_head_t tctl_waitq; + pid_t tctl_pid; + atomic_t tctl_shutdown; +}; + +/* + * small data-structure for each page owned by tracefiled. + */ +struct cfs_trace_page { + /* + * page itself + */ + struct page *page; + /* + * linkage into one of the lists in trace_data_union or + * page_collection + */ + struct list_head linkage; + /* + * number of bytes used within this page + */ + unsigned int used; + /* + * cpu that owns this page + */ + unsigned short cpu; + /* + * type(context) of this page + */ + unsigned short type; +}; + +static void put_pages_on_tcd_daemon_list(struct page_collection *pc, + struct cfs_trace_cpu_data *tcd); + +static inline struct cfs_trace_page * +cfs_tage_from_list(struct list_head *list) +{ + return list_entry(list, struct cfs_trace_page, linkage); +} + +static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) +{ + struct page *page; + struct cfs_trace_page *tage; + + /* My caller is trying to free memory */ + if (!in_interrupt() && memory_pressure_get()) + return NULL; + + /* + * Don't spam console with allocation failures: they will be reported + * by upper layer anyway. + */ + gfp |= __GFP_NOWARN; + page = alloc_page(gfp); + if (!page) + return NULL; + + tage = kmalloc(sizeof(*tage), gfp); + if (!tage) { + __free_page(page); + return NULL; + } + + tage->page = page; + atomic_inc(&cfs_tage_allocated); + return tage; +} + +static void cfs_tage_free(struct cfs_trace_page *tage) +{ + __free_page(tage->page); + kfree(tage); + atomic_dec(&cfs_tage_allocated); +} + +static void cfs_tage_to_tail(struct cfs_trace_page *tage, + struct list_head *queue) +{ + list_move_tail(&tage->linkage, queue); +} + +int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, + struct list_head *stock) +{ + int i; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) { + struct cfs_trace_page *tage; + + tage = cfs_tage_alloc(gfp); + if (!tage) + break; + list_add_tail(&tage->linkage, stock); + } + return i; +} + +/* return a page that has 'len' bytes left at the end */ +static struct cfs_trace_page * +cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len) +{ + struct cfs_trace_page *tage; + + if (tcd->tcd_cur_pages > 0) { + __LASSERT(!list_empty(&tcd->tcd_pages)); + tage = cfs_tage_from_list(tcd->tcd_pages.prev); + if (tage->used + len <= PAGE_CACHE_SIZE) + return tage; + } + + if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { + if (tcd->tcd_cur_stock_pages > 0) { + tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev); + --tcd->tcd_cur_stock_pages; + list_del_init(&tage->linkage); + } else { + tage = cfs_tage_alloc(GFP_ATOMIC); + if (unlikely(!tage)) { + if ((!memory_pressure_get() || + in_interrupt()) && printk_ratelimit()) + printk(KERN_WARNING + "cannot allocate a tage (%ld)\n", + tcd->tcd_cur_pages); + return NULL; + } + } + + tage->used = 0; + tage->cpu = smp_processor_id(); + tage->type = tcd->tcd_type; + list_add_tail(&tage->linkage, &tcd->tcd_pages); + tcd->tcd_cur_pages++; + + if (tcd->tcd_cur_pages > 8 && thread_running) { + struct tracefiled_ctl *tctl = &trace_tctl; + /* + * wake up tracefiled to process some pages. + */ + wake_up(&tctl->tctl_waitq); + } + return tage; + } + return NULL; +} + +static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) +{ + int pgcount = tcd->tcd_cur_pages / 10; + struct page_collection pc; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + if (printk_ratelimit()) + printk(KERN_WARNING "debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n", + pgcount + 1, tcd->tcd_cur_pages); + + INIT_LIST_HEAD(&pc.pc_pages); + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + if (pgcount-- == 0) + break; + + list_move_tail(&tage->linkage, &pc.pc_pages); + tcd->tcd_cur_pages--; + } + put_pages_on_tcd_daemon_list(&pc, tcd); +} + +/* return a page that has 'len' bytes left at the end */ +static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd, + unsigned long len) +{ + struct cfs_trace_page *tage; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + if (len > PAGE_CACHE_SIZE) { + pr_err("cowardly refusing to write %lu bytes in a page\n", len); + return NULL; + } + + tage = cfs_trace_get_tage_try(tcd, len); + if (tage) + return tage; + if (thread_running) + cfs_tcd_shrink(tcd); + if (tcd->tcd_cur_pages > 0) { + tage = cfs_tage_from_list(tcd->tcd_pages.next); + tage->used = 0; + cfs_tage_to_tail(tage, &tcd->tcd_pages); + } + return tage; +} + +int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, + const char *format, ...) +{ + va_list args; + int rc; + + va_start(args, format); + rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); + va_end(args); + + return rc; +} +EXPORT_SYMBOL(libcfs_debug_msg); + +int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, + const char *format1, va_list args, + const char *format2, ...) +{ + struct cfs_trace_cpu_data *tcd = NULL; + struct ptldebug_header header = {0}; + struct cfs_trace_page *tage; + /* string_buf is used only if tcd != NULL, and is always set then */ + char *string_buf = NULL; + char *debug_buf; + int known_size; + int needed = 85; /* average message length */ + int max_nob; + va_list ap; + int depth; + int i; + int remain; + int mask = msgdata->msg_mask; + const char *file = kbasename(msgdata->msg_file); + struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; + + tcd = cfs_trace_get_tcd(); + + /* cfs_trace_get_tcd() grabs a lock, which disables preemption and + * pins us to a particular CPU. This avoids an smp_processor_id() + * warning on Linux when debugging is enabled. + */ + cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK()); + + if (!tcd) /* arch may not log in IRQ context */ + goto console; + + if (tcd->tcd_cur_pages == 0) + header.ph_flags |= PH_FLAG_FIRST_RECORD; + + if (tcd->tcd_shutting_down) { + cfs_trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + depth = __current_nesting_level(); + known_size = strlen(file) + 1 + depth; + if (msgdata->msg_fn) + known_size += strlen(msgdata->msg_fn) + 1; + + if (libcfs_debug_binary) + known_size += sizeof(header); + + /* + * '2' used because vsnprintf return real size required for output + * _without_ terminating NULL. + * if needed is to small for this format. + */ + for (i = 0; i < 2; i++) { + tage = cfs_trace_get_tage(tcd, needed + known_size + 1); + if (!tage) { + if (needed + known_size > PAGE_CACHE_SIZE) + mask |= D_ERROR; + + cfs_trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + string_buf = (char *)page_address(tage->page) + + tage->used + known_size; + + max_nob = PAGE_CACHE_SIZE - tage->used - known_size; + if (max_nob <= 0) { + printk(KERN_EMERG "negative max_nob: %d\n", + max_nob); + mask |= D_ERROR; + cfs_trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, max_nob, format1, ap); + va_end(ap); + } + + if (format2) { + remain = max_nob - needed; + if (remain < 0) + remain = 0; + + va_start(ap, format2); + needed += vsnprintf(string_buf + needed, remain, + format2, ap); + va_end(ap); + } + + if (needed < max_nob) /* well. printing ok.. */ + break; + } + + if (*(string_buf + needed - 1) != '\n') + printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", + file, msgdata->msg_line, msgdata->msg_fn); + + header.ph_len = known_size + needed; + debug_buf = (char *)page_address(tage->page) + tage->used; + + if (libcfs_debug_binary) { + memcpy(debug_buf, &header, sizeof(header)); + tage->used += sizeof(header); + debug_buf += sizeof(header); + } + + /* indent message according to the nesting level */ + while (depth-- > 0) { + *(debug_buf++) = '.'; + ++tage->used; + } + + strcpy(debug_buf, file); + tage->used += strlen(file) + 1; + debug_buf += strlen(file) + 1; + + if (msgdata->msg_fn) { + strcpy(debug_buf, msgdata->msg_fn); + tage->used += strlen(msgdata->msg_fn) + 1; + debug_buf += strlen(msgdata->msg_fn) + 1; + } + + __LASSERT(debug_buf == string_buf); + + tage->used += needed; + __LASSERT(tage->used <= PAGE_CACHE_SIZE); + +console: + if ((mask & libcfs_printk) == 0) { + /* no console output requested */ + if (tcd) + cfs_trace_put_tcd(tcd); + return 1; + } + + if (cdls) { + if (libcfs_console_ratelimit && + cdls->cdls_next != 0 && /* not first time ever */ + !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { + /* skipping a console message */ + cdls->cdls_count++; + if (tcd) + cfs_trace_put_tcd(tcd); + return 1; + } + + if (cfs_time_after(cfs_time_current(), + cdls->cdls_next + libcfs_console_max_delay + + cfs_time_seconds(10))) { + /* last timeout was a long time ago */ + cdls->cdls_delay /= libcfs_console_backoff * 4; + } else { + cdls->cdls_delay *= libcfs_console_backoff; + } + + if (cdls->cdls_delay < libcfs_console_min_delay) + cdls->cdls_delay = libcfs_console_min_delay; + else if (cdls->cdls_delay > libcfs_console_max_delay) + cdls->cdls_delay = libcfs_console_max_delay; + + /* ensure cdls_next is never zero after it's been seen */ + cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; + } + + if (tcd) { + cfs_print_to_console(&header, mask, string_buf, needed, file, + msgdata->msg_fn); + cfs_trace_put_tcd(tcd); + } else { + string_buf = cfs_trace_get_console_buffer(); + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, + CFS_TRACE_CONSOLE_BUFFER_SIZE, + format1, ap); + va_end(ap); + } + if (format2) { + remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; + if (remain > 0) { + va_start(ap, format2); + needed += vsnprintf(string_buf + needed, remain, + format2, ap); + va_end(ap); + } + } + cfs_print_to_console(&header, mask, + string_buf, needed, file, msgdata->msg_fn); + + put_cpu(); + } + + if (cdls && cdls->cdls_count != 0) { + string_buf = cfs_trace_get_console_buffer(); + + needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, + "Skipped %d previous similar message%s\n", + cdls->cdls_count, + (cdls->cdls_count > 1) ? "s" : ""); + + cfs_print_to_console(&header, mask, + string_buf, needed, file, msgdata->msg_fn); + + put_cpu(); + cdls->cdls_count = 0; + } + + return 0; +} +EXPORT_SYMBOL(libcfs_debug_vmsg2); + +void +cfs_trace_assertion_failed(const char *str, + struct libcfs_debug_msg_data *msgdata) +{ + struct ptldebug_header hdr; + + libcfs_panic_in_progress = 1; + libcfs_catastrophe = 1; + mb(); + + cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK()); + + cfs_print_to_console(&hdr, D_EMERG, str, strlen(str), + msgdata->msg_file, msgdata->msg_fn); + + panic("Lustre debug assertion failure\n"); + + /* not reached */ +} + +static void +panic_collect_pages(struct page_collection *pc) +{ + /* Do the collect_pages job on a single CPU: assumes that all other + * CPUs have been stopped during a panic. If this isn't true for some + * arch, this will have to be implemented separately in each arch. + */ + int i; + int j; + struct cfs_trace_cpu_data *tcd; + + INIT_LIST_HEAD(&pc->pc_pages); + + cfs_tcd_for_each(tcd, i, j) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + + if (pc->pc_want_daemon_pages) { + list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); + tcd->tcd_cur_daemon_pages = 0; + } + } +} + +static void collect_pages_on_all_cpus(struct page_collection *pc) +{ + struct cfs_trace_cpu_data *tcd; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + if (pc->pc_want_daemon_pages) { + list_splice_init(&tcd->tcd_daemon_pages, + &pc->pc_pages); + tcd->tcd_cur_daemon_pages = 0; + } + } + } +} + +static void collect_pages(struct page_collection *pc) +{ + INIT_LIST_HEAD(&pc->pc_pages); + + if (libcfs_panic_in_progress) + panic_collect_pages(pc); + else + collect_pages_on_all_cpus(pc); +} + +static void put_pages_back_on_all_cpus(struct page_collection *pc) +{ + struct cfs_trace_cpu_data *tcd; + struct list_head *cur_head; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) { + cur_head = tcd->tcd_pages.next; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, + linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != cpu || tage->type != i) + continue; + + cfs_tage_to_tail(tage, cur_head); + tcd->tcd_cur_pages++; + } + } + } +} + +static void put_pages_back(struct page_collection *pc) +{ + if (!libcfs_panic_in_progress) + put_pages_back_on_all_cpus(pc); +} + +/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that + * we have a good amount of data at all times for dumping during an LBUG, even + * if we have been steadily writing (and otherwise discarding) pages via the + * debug daemon. + */ +static void put_pages_on_tcd_daemon_list(struct page_collection *pc, + struct cfs_trace_cpu_data *tcd) +{ + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type) + continue; + + cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages); + tcd->tcd_cur_daemon_pages++; + + if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { + struct cfs_trace_page *victim; + + __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); + victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next); + + __LASSERT_TAGE_INVARIANT(victim); + + list_del(&victim->linkage); + cfs_tage_free(victim); + tcd->tcd_cur_daemon_pages--; + } + } +} + +static void put_pages_on_daemon_list(struct page_collection *pc) +{ + struct cfs_trace_cpu_data *tcd; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) + put_pages_on_tcd_daemon_list(pc, tcd); + } +} + +void cfs_trace_debug_print(void) +{ + struct page_collection pc; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + char *p, *file, *fn; + struct page *page; + + __LASSERT_TAGE_INVARIANT(tage); + + page = tage->page; + p = page_address(page); + while (p < ((char *)page_address(page) + tage->used)) { + struct ptldebug_header *hdr; + int len; + + hdr = (void *)p; + p += sizeof(*hdr); + file = p; + p += strlen(file) + 1; + fn = p; + p += strlen(fn) + 1; + len = hdr->ph_len - (int)(p - (char *)hdr); + + cfs_print_to_console(hdr, D_EMERG, p, len, file, fn); + + p += len; + } + + list_del(&tage->linkage); + cfs_tage_free(tage); + } +} + +int cfs_tracefile_dump_all_pages(char *filename) +{ + struct page_collection pc; + struct file *filp; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + char *buf; + int rc; + + DECL_MMSPACE; + + cfs_tracefile_write_lock(); + + filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, + 0600); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + filp = NULL; + pr_err("LustreError: can't open %s for dump: rc %d\n", + filename, rc); + goto out; + } + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) { + rc = 0; + goto close; + } + + /* ok, for now, just write the pages. in the future we'll be building + * iobufs with the pages and calling generic_direct_IO + */ + MMSPACE_OPEN; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + buf = kmap(tage->page); + rc = vfs_write(filp, (__force const char __user *)buf, + tage->used, &filp->f_pos); + kunmap(tage->page); + + if (rc != (int)tage->used) { + printk(KERN_WARNING "wanted to write %u but wrote %d\n", + tage->used, rc); + put_pages_back(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + break; + } + list_del(&tage->linkage); + cfs_tage_free(tage); + } + MMSPACE_CLOSE; + rc = vfs_fsync(filp, 1); + if (rc) + pr_err("sync returns %d\n", rc); +close: + filp_close(filp, NULL); +out: + cfs_tracefile_write_unlock(); + return rc; +} + +void cfs_trace_flush_pages(void) +{ + struct page_collection pc; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + cfs_tage_free(tage); + } +} + +int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char __user *usr_buffer, int usr_buffer_nob) +{ + int nob; + + if (usr_buffer_nob > knl_buffer_nob) + return -EOVERFLOW; + + if (copy_from_user((void *)knl_buffer, + usr_buffer, usr_buffer_nob)) + return -EFAULT; + + nob = strnlen(knl_buffer, usr_buffer_nob); + while (nob-- >= 0) /* strip trailing whitespace */ + if (!isspace(knl_buffer[nob])) + break; + + if (nob < 0) /* empty string */ + return -EINVAL; + + if (nob == knl_buffer_nob) /* no space to terminate */ + return -EOVERFLOW; + + knl_buffer[nob + 1] = 0; /* terminate */ + return 0; +} +EXPORT_SYMBOL(cfs_trace_copyin_string); + +int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, + const char *knl_buffer, char *append) +{ + /* + * NB if 'append' != NULL, it's a single character to append to the + * copied out string - usually "\n" or "" (i.e. a terminating zero byte) + */ + int nob = strlen(knl_buffer); + + if (nob > usr_buffer_nob) + nob = usr_buffer_nob; + + if (copy_to_user(usr_buffer, knl_buffer, nob)) + return -EFAULT; + + if (append && nob < usr_buffer_nob) { + if (copy_to_user(usr_buffer + nob, append, 1)) + return -EFAULT; + + nob++; + } + + return nob; +} +EXPORT_SYMBOL(cfs_trace_copyout_string); + +int cfs_trace_allocate_string_buffer(char **str, int nob) +{ + if (nob > 2 * PAGE_CACHE_SIZE) /* string must be "sensible" */ + return -EINVAL; + + *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO); + if (!*str) + return -ENOMEM; + + return 0; +} + +int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) +{ + char *str; + int rc; + + rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); + if (rc != 0) + return rc; + + rc = cfs_trace_copyin_string(str, usr_str_nob + 1, + usr_str, usr_str_nob); + if (rc != 0) + goto out; + + if (str[0] != '/') { + rc = -EINVAL; + goto out; + } + rc = cfs_tracefile_dump_all_pages(str); +out: + kfree(str); + return rc; +} + +int cfs_trace_daemon_command(char *str) +{ + int rc = 0; + + cfs_tracefile_write_lock(); + + if (strcmp(str, "stop") == 0) { + cfs_tracefile_write_unlock(); + cfs_trace_stop_thread(); + cfs_tracefile_write_lock(); + memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); + + } else if (strncmp(str, "size=", 5) == 0) { + unsigned long tmp; + + rc = kstrtoul(str + 5, 10, &tmp); + if (!rc) { + if (tmp < 10 || tmp > 20480) + cfs_tracefile_size = CFS_TRACEFILE_SIZE; + else + cfs_tracefile_size = tmp << 20; + } + } else if (strlen(str) >= sizeof(cfs_tracefile)) { + rc = -ENAMETOOLONG; + } else if (str[0] != '/') { + rc = -EINVAL; + } else { + strcpy(cfs_tracefile, str); + + printk(KERN_INFO + "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n", + cfs_tracefile, + (long)(cfs_tracefile_size >> 10)); + + cfs_trace_start_thread(); + } + + cfs_tracefile_write_unlock(); + return rc; +} + +int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) +{ + char *str; + int rc; + + rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); + if (rc != 0) + return rc; + + rc = cfs_trace_copyin_string(str, usr_str_nob + 1, + usr_str, usr_str_nob); + if (rc == 0) + rc = cfs_trace_daemon_command(str); + + kfree(str); + return rc; +} + +int cfs_trace_set_debug_mb(int mb) +{ + int i; + int j; + int pages; + int limit = cfs_trace_max_debug_mb(); + struct cfs_trace_cpu_data *tcd; + + if (mb < num_possible_cpus()) { + printk(KERN_WARNING + "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n", + mb, num_possible_cpus()); + mb = num_possible_cpus(); + } + + if (mb > limit) { + printk(KERN_WARNING + "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n", + mb, limit); + mb = limit; + } + + mb /= num_possible_cpus(); + pages = mb << (20 - PAGE_CACHE_SHIFT); + + cfs_tracefile_write_lock(); + + cfs_tcd_for_each(tcd, i, j) + tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; + + cfs_tracefile_write_unlock(); + + return 0; +} + +int cfs_trace_get_debug_mb(void) +{ + int i; + int j; + struct cfs_trace_cpu_data *tcd; + int total_pages = 0; + + cfs_tracefile_read_lock(); + + cfs_tcd_for_each(tcd, i, j) + total_pages += tcd->tcd_max_pages; + + cfs_tracefile_read_unlock(); + + return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1; +} + +static int tracefiled(void *arg) +{ + struct page_collection pc; + struct tracefiled_ctl *tctl = arg; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + struct file *filp; + char *buf; + int last_loop = 0; + int rc; + + DECL_MMSPACE; + + /* we're started late enough that we pick up init's fs context */ + /* this is so broken in uml? what on earth is going on? */ + + complete(&tctl->tctl_start); + + while (1) { + wait_queue_t __wait; + + pc.pc_want_daemon_pages = 0; + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) + goto end_loop; + + filp = NULL; + cfs_tracefile_read_lock(); + if (cfs_tracefile[0] != 0) { + filp = filp_open(cfs_tracefile, + O_CREAT | O_RDWR | O_LARGEFILE, + 0600); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + filp = NULL; + printk(KERN_WARNING "couldn't open %s: %d\n", + cfs_tracefile, rc); + } + } + cfs_tracefile_read_unlock(); + if (!filp) { + put_pages_on_daemon_list(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + goto end_loop; + } + + MMSPACE_OPEN; + + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + static loff_t f_pos; + + __LASSERT_TAGE_INVARIANT(tage); + + if (f_pos >= (off_t)cfs_tracefile_size) + f_pos = 0; + else if (f_pos > i_size_read(file_inode(filp))) + f_pos = i_size_read(file_inode(filp)); + + buf = kmap(tage->page); + rc = vfs_write(filp, (__force const char __user *)buf, + tage->used, &f_pos); + kunmap(tage->page); + + if (rc != (int)tage->used) { + printk(KERN_WARNING "wanted to write %u but wrote %d\n", + tage->used, rc); + put_pages_back(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + break; + } + } + MMSPACE_CLOSE; + + filp_close(filp, NULL); + put_pages_on_daemon_list(&pc); + if (!list_empty(&pc.pc_pages)) { + int i; + + printk(KERN_ALERT "Lustre: trace pages aren't empty\n"); + pr_err("total cpus(%d): ", num_possible_cpus()); + for (i = 0; i < num_possible_cpus(); i++) + if (cpu_online(i)) + pr_cont("%d(on) ", i); + else + pr_cont("%d(off) ", i); + pr_cont("\n"); + + i = 0; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, + linkage) + pr_err("page %d belongs to cpu %d\n", + ++i, tage->cpu); + pr_err("There are %d pages unwritten\n", i); + } + __LASSERT(list_empty(&pc.pc_pages)); +end_loop: + if (atomic_read(&tctl->tctl_shutdown)) { + if (last_loop == 0) { + last_loop = 1; + continue; + } else { + break; + } + } + init_waitqueue_entry(&__wait, current); + add_wait_queue(&tctl->tctl_waitq, &__wait); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1)); + remove_wait_queue(&tctl->tctl_waitq, &__wait); + } + complete(&tctl->tctl_stop); + return 0; +} + +int cfs_trace_start_thread(void) +{ + struct tracefiled_ctl *tctl = &trace_tctl; + struct task_struct *task; + int rc = 0; + + mutex_lock(&cfs_trace_thread_mutex); + if (thread_running) + goto out; + + init_completion(&tctl->tctl_start); + init_completion(&tctl->tctl_stop); + init_waitqueue_head(&tctl->tctl_waitq); + atomic_set(&tctl->tctl_shutdown, 0); + + task = kthread_run(tracefiled, tctl, "ktracefiled"); + if (IS_ERR(task)) { + rc = PTR_ERR(task); + goto out; + } + + wait_for_completion(&tctl->tctl_start); + thread_running = 1; +out: + mutex_unlock(&cfs_trace_thread_mutex); + return rc; +} + +void cfs_trace_stop_thread(void) +{ + struct tracefiled_ctl *tctl = &trace_tctl; + + mutex_lock(&cfs_trace_thread_mutex); + if (thread_running) { + printk(KERN_INFO + "Lustre: shutting down debug daemon thread...\n"); + atomic_set(&tctl->tctl_shutdown, 1); + wait_for_completion(&tctl->tctl_stop); + thread_running = 0; + } + mutex_unlock(&cfs_trace_thread_mutex); +} + +int cfs_tracefile_init(int max_pages) +{ + struct cfs_trace_cpu_data *tcd; + int i; + int j; + int rc; + int factor; + + rc = cfs_tracefile_init_arch(); + if (rc != 0) + return rc; + + cfs_tcd_for_each(tcd, i, j) { + /* tcd_pages_factor is initialized int tracefile_init_arch. */ + factor = tcd->tcd_pages_factor; + INIT_LIST_HEAD(&tcd->tcd_pages); + INIT_LIST_HEAD(&tcd->tcd_stock_pages); + INIT_LIST_HEAD(&tcd->tcd_daemon_pages); + tcd->tcd_cur_pages = 0; + tcd->tcd_cur_stock_pages = 0; + tcd->tcd_cur_daemon_pages = 0; + tcd->tcd_max_pages = (max_pages * factor) / 100; + LASSERT(tcd->tcd_max_pages > 0); + tcd->tcd_shutting_down = 0; + } + + return 0; +} + +static void trace_cleanup_on_all_cpus(void) +{ + struct cfs_trace_cpu_data *tcd; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + int i, cpu; + + for_each_possible_cpu(cpu) { + cfs_tcd_for_each_type_lock(tcd, i, cpu) { + tcd->tcd_shutting_down = 1; + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, + linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + cfs_tage_free(tage); + } + + tcd->tcd_cur_pages = 0; + } + } +} + +static void cfs_trace_cleanup(void) +{ + struct page_collection pc; + + INIT_LIST_HEAD(&pc.pc_pages); + + trace_cleanup_on_all_cpus(); + + cfs_tracefile_fini_arch(); +} + +void cfs_tracefile_exit(void) +{ + cfs_trace_stop_thread(); + cfs_trace_cleanup(); +} diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h new file mode 100644 index 000000000000..4c77f9044dd3 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h @@ -0,0 +1,266 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LIBCFS_TRACEFILE_H__ +#define __LIBCFS_TRACEFILE_H__ + +#include "../../include/linux/libcfs/libcfs.h" + +enum cfs_trace_buf_type { + CFS_TCD_TYPE_PROC = 0, + CFS_TCD_TYPE_SOFTIRQ, + CFS_TCD_TYPE_IRQ, + CFS_TCD_TYPE_MAX +}; + +/* trace file lock routines */ + +#define TRACEFILE_NAME_SIZE 1024 +extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; +extern long long cfs_tracefile_size; + +void libcfs_run_debug_log_upcall(char *file); + +int cfs_tracefile_init_arch(void); +void cfs_tracefile_fini_arch(void); + +void cfs_tracefile_read_lock(void); +void cfs_tracefile_read_unlock(void); +void cfs_tracefile_write_lock(void); +void cfs_tracefile_write_unlock(void); + +int cfs_tracefile_dump_all_pages(char *filename); +void cfs_trace_debug_print(void); +void cfs_trace_flush_pages(void); +int cfs_trace_start_thread(void); +void cfs_trace_stop_thread(void); +int cfs_tracefile_init(int max_pages); +void cfs_tracefile_exit(void); + +int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char __user *usr_buffer, int usr_buffer_nob); +int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, + const char *knl_str, char *append); +int cfs_trace_allocate_string_buffer(char **str, int nob); +int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob); +int cfs_trace_daemon_command(char *str); +int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob); +int cfs_trace_set_debug_mb(int mb); +int cfs_trace_get_debug_mb(void); + +void libcfs_debug_dumplog_internal(void *arg); +void libcfs_register_panic_notifier(void); +void libcfs_unregister_panic_notifier(void); +extern int libcfs_panic_in_progress; +int cfs_trace_max_debug_mb(void); + +#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) +#define CFS_TRACEFILE_SIZE (500 << 20) + +#ifdef LUSTRE_TRACEFILE_PRIVATE + +/* + * Private declare for tracefile + */ +#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) + +#define CFS_TRACEFILE_SIZE (500 << 20) + +/* + * Size of a buffer for sprinting console messages if we can't get a page + * from system + */ +#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024 + +union cfs_trace_data_union { + struct cfs_trace_cpu_data { + /* + * Even though this structure is meant to be per-CPU, locking + * is needed because in some places the data may be accessed + * from other CPUs. This lock is directly used in trace_get_tcd + * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and + * tcd_for_each_type_lock + */ + spinlock_t tcd_lock; + unsigned long tcd_lock_flags; + + /* + * pages with trace records not yet processed by tracefiled. + */ + struct list_head tcd_pages; + /* number of pages on ->tcd_pages */ + unsigned long tcd_cur_pages; + + /* + * pages with trace records already processed by + * tracefiled. These pages are kept in memory, so that some + * portion of log can be written in the event of LBUG. This + * list is maintained in LRU order. + * + * Pages are moved to ->tcd_daemon_pages by tracefiled() + * (put_pages_on_daemon_list()). LRU pages from this list are + * discarded when list grows too large. + */ + struct list_head tcd_daemon_pages; + /* number of pages on ->tcd_daemon_pages */ + unsigned long tcd_cur_daemon_pages; + + /* + * Maximal number of pages allowed on ->tcd_pages and + * ->tcd_daemon_pages each. + * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current + * implementation. + */ + unsigned long tcd_max_pages; + + /* + * preallocated pages to write trace records into. Pages from + * ->tcd_stock_pages are moved to ->tcd_pages by + * portals_debug_msg(). + * + * This list is necessary, because on some platforms it's + * impossible to perform efficient atomic page allocation in a + * non-blockable context. + * + * Such platforms fill ->tcd_stock_pages "on occasion", when + * tracing code is entered in blockable context. + * + * trace_get_tage_try() tries to get a page from + * ->tcd_stock_pages first and resorts to atomic page + * allocation only if this queue is empty. ->tcd_stock_pages + * is replenished when tracing code is entered in blocking + * context (darwin-tracefile.c:trace_get_tcd()). We try to + * maintain TCD_STOCK_PAGES (40 by default) pages in this + * queue. Atomic allocation is only required if more than + * TCD_STOCK_PAGES pagesful are consumed by trace records all + * emitted in non-blocking contexts. Which is quite unlikely. + */ + struct list_head tcd_stock_pages; + /* number of pages on ->tcd_stock_pages */ + unsigned long tcd_cur_stock_pages; + + unsigned short tcd_shutting_down; + unsigned short tcd_cpu; + unsigned short tcd_type; + /* The factors to share debug memory. */ + unsigned short tcd_pages_factor; + } tcd; + char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))]; +}; + +#define TCD_MAX_TYPES 8 +extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS]; + +#define cfs_tcd_for_each(tcd, i, j) \ + for (i = 0; cfs_trace_data[i]; i++) \ + for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \ + j < num_possible_cpus(); \ + j++, (tcd) = &(*cfs_trace_data[i])[j].tcd) + +#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \ + for (i = 0; cfs_trace_data[i] && \ + (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \ + cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++) + +void cfs_set_ptldebug_header(struct ptldebug_header *header, + struct libcfs_debug_msg_data *m, + unsigned long stack); +void cfs_print_to_console(struct ptldebug_header *hdr, int mask, + const char *buf, int len, const char *file, + const char *fn); + +int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking); +void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking); + +extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; +enum cfs_trace_buf_type cfs_trace_buf_idx_get(void); + +static inline char * +cfs_trace_get_console_buffer(void) +{ + unsigned int i = get_cpu(); + unsigned int j = cfs_trace_buf_idx_get(); + + return cfs_trace_console_buffers[i][j]; +} + +static inline struct cfs_trace_cpu_data * +cfs_trace_get_tcd(void) +{ + struct cfs_trace_cpu_data *tcd = + &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd; + + cfs_trace_lock_tcd(tcd, 0); + + return tcd; +} + +static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd) +{ + cfs_trace_unlock_tcd(tcd, 0); + + put_cpu(); +} + +int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, + struct list_head *stock); + +void cfs_trace_assertion_failed(const char *str, + struct libcfs_debug_msg_data *m); + +/* ASSERTION that is safe to use within the debug system */ +#define __LASSERT(cond) \ +do { \ + if (unlikely(!(cond))) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ + cfs_trace_assertion_failed("ASSERTION("#cond") failed", \ + &msgdata); \ + } \ +} while (0) + +#define __LASSERT_TAGE_INVARIANT(tage) \ +do { \ + __LASSERT(tage); \ + __LASSERT(tage->page); \ + __LASSERT(tage->used <= PAGE_CACHE_SIZE); \ + __LASSERT(page_count(tage->page) > 0); \ +} while (0) + +#endif /* LUSTRE_TRACEFILE_PRIVATE */ + +#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c new file mode 100644 index 000000000000..f2ebed8e6ef5 --- /dev/null +++ b/drivers/staging/lustre/lnet/libcfs/workitem.c @@ -0,0 +1,470 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * libcfs/libcfs/workitem.c + * + * Author: Isaac Huang + * Liang Zhen + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include "../../include/linux/libcfs/libcfs.h" + +#define CFS_WS_NAME_LEN 16 + +struct cfs_wi_sched { + /* chain on global list */ + struct list_head ws_list; + /** serialised workitems */ + spinlock_t ws_lock; + /** where schedulers sleep */ + wait_queue_head_t ws_waitq; + /** concurrent workitems */ + struct list_head ws_runq; + /** + * rescheduled running-workitems, a workitem can be rescheduled + * while running in wi_action(), but we don't to execute it again + * unless it returns from wi_action(), so we put it on ws_rerunq + * while rescheduling, and move it to runq after it returns + * from wi_action() + */ + struct list_head ws_rerunq; + /** CPT-table for this scheduler */ + struct cfs_cpt_table *ws_cptab; + /** CPT id for affinity */ + int ws_cpt; + /** number of scheduled workitems */ + int ws_nscheduled; + /** started scheduler thread, protected by cfs_wi_data::wi_glock */ + unsigned int ws_nthreads:30; + /** shutting down, protected by cfs_wi_data::wi_glock */ + unsigned int ws_stopping:1; + /** serialize starting thread, protected by cfs_wi_data::wi_glock */ + unsigned int ws_starting:1; + /** scheduler name */ + char ws_name[CFS_WS_NAME_LEN]; +}; + +static struct cfs_workitem_data { + /** serialize */ + spinlock_t wi_glock; + /** list of all schedulers */ + struct list_head wi_scheds; + /** WI module is initialized */ + int wi_init; + /** shutting down the whole WI module */ + int wi_stopping; +} cfs_wi_data; + +static inline int +cfs_wi_sched_cansleep(struct cfs_wi_sched *sched) +{ + spin_lock(&sched->ws_lock); + if (sched->ws_stopping) { + spin_unlock(&sched->ws_lock); + return 0; + } + + if (!list_empty(&sched->ws_runq)) { + spin_unlock(&sched->ws_lock); + return 0; + } + spin_unlock(&sched->ws_lock); + return 1; +} + +/* XXX: + * 0. it only works when called from wi->wi_action. + * 1. when it returns no one shall try to schedule the workitem. + */ +void +cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +{ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ + LASSERT(!sched->ws_stopping); + + spin_lock(&sched->ws_lock); + + LASSERT(wi->wi_running); + if (wi->wi_scheduled) { /* cancel pending schedules */ + LASSERT(!list_empty(&wi->wi_list)); + list_del_init(&wi->wi_list); + + LASSERT(sched->ws_nscheduled > 0); + sched->ws_nscheduled--; + } + + LASSERT(list_empty(&wi->wi_list)); + + wi->wi_scheduled = 1; /* LBUG future schedule attempts */ + spin_unlock(&sched->ws_lock); +} +EXPORT_SYMBOL(cfs_wi_exit); + +/** + * cancel schedule request of workitem \a wi + */ +int +cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +{ + int rc; + + LASSERT(!in_interrupt()); /* because we use plain spinlock */ + LASSERT(!sched->ws_stopping); + + /* + * return 0 if it's running already, otherwise return 1, which + * means the workitem will not be scheduled and will not have + * any race with wi_action. + */ + spin_lock(&sched->ws_lock); + + rc = !(wi->wi_running); + + if (wi->wi_scheduled) { /* cancel pending schedules */ + LASSERT(!list_empty(&wi->wi_list)); + list_del_init(&wi->wi_list); + + LASSERT(sched->ws_nscheduled > 0); + sched->ws_nscheduled--; + + wi->wi_scheduled = 0; + } + + LASSERT(list_empty(&wi->wi_list)); + + spin_unlock(&sched->ws_lock); + return rc; +} +EXPORT_SYMBOL(cfs_wi_deschedule); + +/* + * Workitem scheduled with (serial == 1) is strictly serialised not only with + * itself, but also with others scheduled this way. + * + * Now there's only one static serialised queue, but in the future more might + * be added, and even dynamic creation of serialised queues might be supported. + */ +void +cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +{ + LASSERT(!in_interrupt()); /* because we use plain spinlock */ + LASSERT(!sched->ws_stopping); + + spin_lock(&sched->ws_lock); + + if (!wi->wi_scheduled) { + LASSERT(list_empty(&wi->wi_list)); + + wi->wi_scheduled = 1; + sched->ws_nscheduled++; + if (!wi->wi_running) { + list_add_tail(&wi->wi_list, &sched->ws_runq); + wake_up(&sched->ws_waitq); + } else { + list_add(&wi->wi_list, &sched->ws_rerunq); + } + } + + LASSERT(!list_empty(&wi->wi_list)); + spin_unlock(&sched->ws_lock); +} +EXPORT_SYMBOL(cfs_wi_schedule); + +static int cfs_wi_scheduler(void *arg) +{ + struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg; + + cfs_block_allsigs(); + + /* CPT affinity scheduler? */ + if (sched->ws_cptab) + if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0) + CWARN("Failed to bind %s on CPT %d\n", + sched->ws_name, sched->ws_cpt); + + spin_lock(&cfs_wi_data.wi_glock); + + LASSERT(sched->ws_starting == 1); + sched->ws_starting--; + sched->ws_nthreads++; + + spin_unlock(&cfs_wi_data.wi_glock); + + spin_lock(&sched->ws_lock); + + while (!sched->ws_stopping) { + int nloops = 0; + int rc; + cfs_workitem_t *wi; + + while (!list_empty(&sched->ws_runq) && + nloops < CFS_WI_RESCHED) { + wi = list_entry(sched->ws_runq.next, cfs_workitem_t, + wi_list); + LASSERT(wi->wi_scheduled && !wi->wi_running); + + list_del_init(&wi->wi_list); + + LASSERT(sched->ws_nscheduled > 0); + sched->ws_nscheduled--; + + wi->wi_running = 1; + wi->wi_scheduled = 0; + + spin_unlock(&sched->ws_lock); + nloops++; + + rc = (*wi->wi_action) (wi); + + spin_lock(&sched->ws_lock); + if (rc != 0) /* WI should be dead, even be freed! */ + continue; + + wi->wi_running = 0; + if (list_empty(&wi->wi_list)) + continue; + + LASSERT(wi->wi_scheduled); + /* wi is rescheduled, should be on rerunq now, we + * move it to runq so it can run action now + */ + list_move_tail(&wi->wi_list, &sched->ws_runq); + } + + if (!list_empty(&sched->ws_runq)) { + spin_unlock(&sched->ws_lock); + /* don't sleep because some workitems still + * expect me to come back soon + */ + cond_resched(); + spin_lock(&sched->ws_lock); + continue; + } + + spin_unlock(&sched->ws_lock); + rc = wait_event_interruptible_exclusive(sched->ws_waitq, + !cfs_wi_sched_cansleep(sched)); + spin_lock(&sched->ws_lock); + } + + spin_unlock(&sched->ws_lock); + + spin_lock(&cfs_wi_data.wi_glock); + sched->ws_nthreads--; + spin_unlock(&cfs_wi_data.wi_glock); + + return 0; +} + +void +cfs_wi_sched_destroy(struct cfs_wi_sched *sched) +{ + int i; + + LASSERT(cfs_wi_data.wi_init); + LASSERT(!cfs_wi_data.wi_stopping); + + spin_lock(&cfs_wi_data.wi_glock); + if (sched->ws_stopping) { + CDEBUG(D_INFO, "%s is in progress of stopping\n", + sched->ws_name); + spin_unlock(&cfs_wi_data.wi_glock); + return; + } + + LASSERT(!list_empty(&sched->ws_list)); + sched->ws_stopping = 1; + + spin_unlock(&cfs_wi_data.wi_glock); + + i = 2; + wake_up_all(&sched->ws_waitq); + + spin_lock(&cfs_wi_data.wi_glock); + while (sched->ws_nthreads > 0) { + CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET, + "waiting for %d threads of WI sched[%s] to terminate\n", + sched->ws_nthreads, sched->ws_name); + + spin_unlock(&cfs_wi_data.wi_glock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 20); + spin_lock(&cfs_wi_data.wi_glock); + } + + list_del(&sched->ws_list); + + spin_unlock(&cfs_wi_data.wi_glock); + LASSERT(sched->ws_nscheduled == 0); + + LIBCFS_FREE(sched, sizeof(*sched)); +} +EXPORT_SYMBOL(cfs_wi_sched_destroy); + +int +cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, + int cpt, int nthrs, struct cfs_wi_sched **sched_pp) +{ + struct cfs_wi_sched *sched; + int rc; + + LASSERT(cfs_wi_data.wi_init); + LASSERT(!cfs_wi_data.wi_stopping); + LASSERT(!cptab || cpt == CFS_CPT_ANY || + (cpt >= 0 && cpt < cfs_cpt_number(cptab))); + + LIBCFS_ALLOC(sched, sizeof(*sched)); + if (!sched) + return -ENOMEM; + + if (strlen(name) > sizeof(sched->ws_name) - 1) { + LIBCFS_FREE(sched, sizeof(*sched)); + return -E2BIG; + } + strncpy(sched->ws_name, name, sizeof(sched->ws_name)); + + sched->ws_cptab = cptab; + sched->ws_cpt = cpt; + + spin_lock_init(&sched->ws_lock); + init_waitqueue_head(&sched->ws_waitq); + INIT_LIST_HEAD(&sched->ws_runq); + INIT_LIST_HEAD(&sched->ws_rerunq); + INIT_LIST_HEAD(&sched->ws_list); + + rc = 0; + while (nthrs > 0) { + char name[16]; + struct task_struct *task; + + spin_lock(&cfs_wi_data.wi_glock); + while (sched->ws_starting > 0) { + spin_unlock(&cfs_wi_data.wi_glock); + schedule(); + spin_lock(&cfs_wi_data.wi_glock); + } + + sched->ws_starting++; + spin_unlock(&cfs_wi_data.wi_glock); + + if (sched->ws_cptab && sched->ws_cpt >= 0) { + snprintf(name, sizeof(name), "%s_%02d_%02u", + sched->ws_name, sched->ws_cpt, + sched->ws_nthreads); + } else { + snprintf(name, sizeof(name), "%s_%02u", + sched->ws_name, sched->ws_nthreads); + } + + task = kthread_run(cfs_wi_scheduler, sched, "%s", name); + if (!IS_ERR(task)) { + nthrs--; + continue; + } + rc = PTR_ERR(task); + + CERROR("Failed to create thread for WI scheduler %s: %d\n", + name, rc); + + spin_lock(&cfs_wi_data.wi_glock); + + /* make up for cfs_wi_sched_destroy */ + list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); + sched->ws_starting--; + + spin_unlock(&cfs_wi_data.wi_glock); + + cfs_wi_sched_destroy(sched); + return rc; + } + spin_lock(&cfs_wi_data.wi_glock); + list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); + spin_unlock(&cfs_wi_data.wi_glock); + + *sched_pp = sched; + return 0; +} +EXPORT_SYMBOL(cfs_wi_sched_create); + +int +cfs_wi_startup(void) +{ + memset(&cfs_wi_data, 0, sizeof(cfs_wi_data)); + + spin_lock_init(&cfs_wi_data.wi_glock); + INIT_LIST_HEAD(&cfs_wi_data.wi_scheds); + cfs_wi_data.wi_init = 1; + + return 0; +} + +void +cfs_wi_shutdown(void) +{ + struct cfs_wi_sched *sched; + + spin_lock(&cfs_wi_data.wi_glock); + cfs_wi_data.wi_stopping = 1; + spin_unlock(&cfs_wi_data.wi_glock); + + /* nobody should contend on this list */ + list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { + sched->ws_stopping = 1; + wake_up_all(&sched->ws_waitq); + } + + list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { + spin_lock(&cfs_wi_data.wi_glock); + + while (sched->ws_nthreads != 0) { + spin_unlock(&cfs_wi_data.wi_glock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1) / 20); + spin_lock(&cfs_wi_data.wi_glock); + } + spin_unlock(&cfs_wi_data.wi_glock); + } + while (!list_empty(&cfs_wi_data.wi_scheds)) { + sched = list_entry(cfs_wi_data.wi_scheds.next, + struct cfs_wi_sched, ws_list); + list_del(&sched->ws_list); + LIBCFS_FREE(sched, sizeof(*sched)); + } + + cfs_wi_data.wi_stopping = 0; + cfs_wi_data.wi_init = 0; +} diff --git a/drivers/staging/lustre/lustre/Makefile b/drivers/staging/lustre/lustre/Makefile index 35d8b0b2dff4..331e4fcdd5a2 100644 --- a/drivers/staging/lustre/lustre/Makefile +++ b/drivers/staging/lustre/lustre/Makefile @@ -1,2 +1,2 @@ -obj-$(CONFIG_LUSTRE_FS) += libcfs/ obdclass/ ptlrpc/ fld/ osc/ mgc/ \ +obj-$(CONFIG_LUSTRE_FS) += obdclass/ ptlrpc/ fld/ osc/ mgc/ \ fid/ lov/ mdc/ lmv/ llite/ obdecho/ diff --git a/drivers/staging/lustre/lustre/libcfs/Makefile b/drivers/staging/lustre/lustre/libcfs/Makefile deleted file mode 100644 index 277c1235eb32..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -obj-$(CONFIG_LUSTRE_FS) += libcfs.o - -libcfs-linux-objs := linux-tracefile.o linux-debug.o -libcfs-linux-objs += linux-prim.o linux-cpu.o -libcfs-linux-objs += linux-curproc.o -libcfs-linux-objs += linux-module.o -libcfs-linux-objs += linux-crypto.o -libcfs-linux-objs += linux-crypto-adler.o -libcfs-linux-objs += linux-mem.o - -libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) - -libcfs-all-objs := debug.o fail.o module.o tracefile.o \ - libcfs_string.o hash.o prng.o workitem.o \ - libcfs_cpu.o libcfs_mem.o libcfs_lock.o - -libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/drivers/staging/lustre/lustre/libcfs/debug.c b/drivers/staging/lustre/lustre/libcfs/debug.c deleted file mode 100644 index c90e5102fe06..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/debug.c +++ /dev/null @@ -1,560 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/debug.c - * - * Author: Phil Schwan - * - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include "../../include/linux/libcfs/libcfs.h" -#include "tracefile.h" - -static char debug_file_name[1024]; - -unsigned int libcfs_subsystem_debug = ~0; -EXPORT_SYMBOL(libcfs_subsystem_debug); -module_param(libcfs_subsystem_debug, int, 0644); -MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask"); - -unsigned int libcfs_debug = (D_CANTMASK | - D_NETERROR | D_HA | D_CONFIG | D_IOCTL); -EXPORT_SYMBOL(libcfs_debug); -module_param(libcfs_debug, int, 0644); -MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask"); - -static int libcfs_param_debug_mb_set(const char *val, - const struct kernel_param *kp) -{ - int rc; - unsigned num; - - rc = kstrtouint(val, 0, &num); - if (rc < 0) - return rc; - - if (!*((unsigned int *)kp->arg)) { - *((unsigned int *)kp->arg) = num; - return 0; - } - - rc = cfs_trace_set_debug_mb(num); - - if (!rc) - *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb(); - - return rc; -} - -/* While debug_mb setting look like unsigned int, in fact - * it needs quite a bunch of extra processing, so we define special - * debugmb parameter type with corresponding methods to handle this case - */ -static struct kernel_param_ops param_ops_debugmb = { - .set = libcfs_param_debug_mb_set, - .get = param_get_uint, -}; - -#define param_check_debugmb(name, p) \ - __param_check(name, p, unsigned int) - -static unsigned int libcfs_debug_mb; -module_param(libcfs_debug_mb, debugmb, 0644); -MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size."); - -unsigned int libcfs_printk = D_CANTMASK; -module_param(libcfs_printk, uint, 0644); -MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask"); - -unsigned int libcfs_console_ratelimit = 1; -module_param(libcfs_console_ratelimit, uint, 0644); -MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)"); - -static int param_set_delay_minmax(const char *val, - const struct kernel_param *kp, - long min, long max) -{ - long d; - int sec; - int rc; - - rc = kstrtoint(val, 0, &sec); - if (rc) - return -EINVAL; - - d = cfs_time_seconds(sec) / 100; - if (d < min || d > max) - return -EINVAL; - - *((unsigned int *)kp->arg) = d; - - return 0; -} - -static int param_get_delay(char *buffer, const struct kernel_param *kp) -{ - unsigned int d = *(unsigned int *)kp->arg; - - return sprintf(buffer, "%u", (unsigned int)cfs_duration_sec(d * 100)); -} - -unsigned int libcfs_console_max_delay; -unsigned int libcfs_console_min_delay; - -static int param_set_console_max_delay(const char *val, - const struct kernel_param *kp) -{ - return param_set_delay_minmax(val, kp, - libcfs_console_min_delay, INT_MAX); -} - -static struct kernel_param_ops param_ops_console_max_delay = { - .set = param_set_console_max_delay, - .get = param_get_delay, -}; - -#define param_check_console_max_delay(name, p) \ - __param_check(name, p, unsigned int) - -module_param(libcfs_console_max_delay, console_max_delay, 0644); -MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)"); - -static int param_set_console_min_delay(const char *val, - const struct kernel_param *kp) -{ - return param_set_delay_minmax(val, kp, - 1, libcfs_console_max_delay); -} - -static struct kernel_param_ops param_ops_console_min_delay = { - .set = param_set_console_min_delay, - .get = param_get_delay, -}; - -#define param_check_console_min_delay(name, p) \ - __param_check(name, p, unsigned int) - -module_param(libcfs_console_min_delay, console_min_delay, 0644); -MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)"); - -static int param_set_uint_minmax(const char *val, - const struct kernel_param *kp, - unsigned int min, unsigned int max) -{ - unsigned int num; - int ret; - - if (!val) - return -EINVAL; - ret = kstrtouint(val, 0, &num); - if (ret < 0 || num < min || num > max) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} - -static int param_set_uintpos(const char *val, const struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, 1, -1); -} - -static struct kernel_param_ops param_ops_uintpos = { - .set = param_set_uintpos, - .get = param_get_uint, -}; - -#define param_check_uintpos(name, p) \ - __param_check(name, p, unsigned int) - -unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; -module_param(libcfs_console_backoff, uintpos, 0644); -MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor"); - -unsigned int libcfs_debug_binary = 1; - -unsigned int libcfs_stack = 3 * THREAD_SIZE / 4; -EXPORT_SYMBOL(libcfs_stack); - -unsigned int libcfs_catastrophe; -EXPORT_SYMBOL(libcfs_catastrophe); - -unsigned int libcfs_panic_on_lbug = 1; -module_param(libcfs_panic_on_lbug, uint, 0644); -MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); - -static wait_queue_head_t debug_ctlwq; - -char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT; - -/* We need to pass a pointer here, but elsewhere this must be a const */ -static char *libcfs_debug_file_path; -module_param(libcfs_debug_file_path, charp, 0644); -MODULE_PARM_DESC(libcfs_debug_file_path, - "Path for dumping debug logs, set 'NONE' to prevent log dumping"); - -int libcfs_panic_in_progress; - -/* libcfs_debug_token2mask() expects the returned string in lower-case */ -static const char * -libcfs_debug_subsys2str(int subsys) -{ - switch (1 << subsys) { - default: - return NULL; - case S_UNDEFINED: - return "undefined"; - case S_MDC: - return "mdc"; - case S_MDS: - return "mds"; - case S_OSC: - return "osc"; - case S_OST: - return "ost"; - case S_CLASS: - return "class"; - case S_LOG: - return "log"; - case S_LLITE: - return "llite"; - case S_RPC: - return "rpc"; - case S_LNET: - return "lnet"; - case S_LND: - return "lnd"; - case S_PINGER: - return "pinger"; - case S_FILTER: - return "filter"; - case S_ECHO: - return "echo"; - case S_LDLM: - return "ldlm"; - case S_LOV: - return "lov"; - case S_LQUOTA: - return "lquota"; - case S_OSD: - return "osd"; - case S_LFSCK: - return "lfsck"; - case S_LMV: - return "lmv"; - case S_SEC: - return "sec"; - case S_GSS: - return "gss"; - case S_MGC: - return "mgc"; - case S_MGS: - return "mgs"; - case S_FID: - return "fid"; - case S_FLD: - return "fld"; - } -} - -/* libcfs_debug_token2mask() expects the returned string in lower-case */ -static const char * -libcfs_debug_dbg2str(int debug) -{ - switch (1 << debug) { - default: - return NULL; - case D_TRACE: - return "trace"; - case D_INODE: - return "inode"; - case D_SUPER: - return "super"; - case D_EXT2: - return "ext2"; - case D_MALLOC: - return "malloc"; - case D_CACHE: - return "cache"; - case D_INFO: - return "info"; - case D_IOCTL: - return "ioctl"; - case D_NETERROR: - return "neterror"; - case D_NET: - return "net"; - case D_WARNING: - return "warning"; - case D_BUFFS: - return "buffs"; - case D_OTHER: - return "other"; - case D_DENTRY: - return "dentry"; - case D_NETTRACE: - return "nettrace"; - case D_PAGE: - return "page"; - case D_DLMTRACE: - return "dlmtrace"; - case D_ERROR: - return "error"; - case D_EMERG: - return "emerg"; - case D_HA: - return "ha"; - case D_RPCTRACE: - return "rpctrace"; - case D_VFSTRACE: - return "vfstrace"; - case D_READA: - return "reada"; - case D_MMAP: - return "mmap"; - case D_CONFIG: - return "config"; - case D_CONSOLE: - return "console"; - case D_QUOTA: - return "quota"; - case D_SEC: - return "sec"; - case D_LFSCK: - return "lfsck"; - } -} - -int -libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int len = 0; - const char *token; - int i; - - if (mask == 0) { /* "0" */ - if (size > 0) - str[0] = '0'; - len = 1; - } else { /* space-separated tokens */ - for (i = 0; i < 32; i++) { - if ((mask & (1 << i)) == 0) - continue; - - token = fn(i); - if (!token) /* unused bit */ - continue; - - if (len > 0) { /* separator? */ - if (len < size) - str[len] = ' '; - len++; - } - - while (*token != 0) { - if (len < size) - str[len] = *token; - token++; - len++; - } - } - } - - /* terminate 'str' */ - if (len < size) - str[len] = 0; - else - str[size - 1] = 0; - - return len; -} - -int -libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int m = 0; - int matched; - int n; - int t; - - /* Allow a number for backwards compatibility */ - - for (n = strlen(str); n > 0; n--) - if (!isspace(str[n - 1])) - break; - matched = n; - t = sscanf(str, "%i%n", &m, &matched); - if (t >= 1 && matched == n) { - /* don't print warning for lctl set_param debug=0 or -1 */ - if (m != 0 && m != -1) - CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); - *mask = m; - return 0; - } - - return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK, - 0xffffffff); -} - -/** - * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() - */ -void libcfs_debug_dumplog_internal(void *arg) -{ - void *journal_info; - - journal_info = current->journal_info; - current->journal_info = NULL; - - if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) { - snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s.%lld.%ld", libcfs_debug_file_path_arr, - (s64)ktime_get_real_seconds(), (long_ptr_t)arg); - pr_alert("LustreError: dumping log to %s\n", debug_file_name); - cfs_tracefile_dump_all_pages(debug_file_name); - libcfs_run_debug_log_upcall(debug_file_name); - } - - current->journal_info = journal_info; -} - -static int libcfs_debug_dumplog_thread(void *arg) -{ - libcfs_debug_dumplog_internal(arg); - wake_up(&debug_ctlwq); - return 0; -} - -void libcfs_debug_dumplog(void) -{ - wait_queue_t wait; - struct task_struct *dumper; - - /* we're being careful to ensure that the kernel thread is - * able to set our state to running as it exits before we - * get to schedule() - */ - init_waitqueue_entry(&wait, current); - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&debug_ctlwq, &wait); - - dumper = kthread_run(libcfs_debug_dumplog_thread, - (void *)(long)current_pid(), - "libcfs_debug_dumper"); - if (IS_ERR(dumper)) - pr_err("LustreError: cannot start log dump thread: %ld\n", - PTR_ERR(dumper)); - else - schedule(); - - /* be sure to teardown if cfs_create_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); - set_current_state(TASK_RUNNING); -} -EXPORT_SYMBOL(libcfs_debug_dumplog); - -int libcfs_debug_init(unsigned long bufsize) -{ - int rc = 0; - unsigned int max = libcfs_debug_mb; - - init_waitqueue_head(&debug_ctlwq); - - if (libcfs_console_max_delay <= 0 || /* not set by user or */ - libcfs_console_min_delay <= 0 || /* set to invalid values */ - libcfs_console_min_delay >= libcfs_console_max_delay) { - libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; - libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; - } - - if (libcfs_debug_file_path) { - strlcpy(libcfs_debug_file_path_arr, - libcfs_debug_file_path, - sizeof(libcfs_debug_file_path_arr)); - } - - /* If libcfs_debug_mb is set to an invalid value or uninitialized - * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES - */ - if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) { - max = TCD_MAX_PAGES; - } else { - max = max / num_possible_cpus(); - max <<= (20 - PAGE_CACHE_SHIFT); - } - rc = cfs_tracefile_init(max); - - if (rc == 0) { - libcfs_register_panic_notifier(); - libcfs_debug_mb = cfs_trace_get_debug_mb(); - } - - return rc; -} - -int libcfs_debug_cleanup(void) -{ - libcfs_unregister_panic_notifier(); - cfs_tracefile_exit(); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - cfs_trace_flush_pages(); - return 0; -} - -/* Debug markers, although printed by S_LNET should not be be marked as such. */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int libcfs_debug_mark_buffer(const char *text) -{ - CDEBUG(D_TRACE, - "***************************************************\n"); - LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text); - CDEBUG(D_TRACE, - "***************************************************\n"); - - return 0; -} - -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_LNET diff --git a/drivers/staging/lustre/lustre/libcfs/fail.c b/drivers/staging/lustre/lustre/libcfs/fail.c deleted file mode 100644 index dadaf7685cbd..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/fail.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores, - * CA 94065 USA or visit www.oracle.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Oracle Corporation, Inc. - */ - -#include "../../include/linux/libcfs/libcfs.h" - -unsigned long cfs_fail_loc; -EXPORT_SYMBOL(cfs_fail_loc); - -unsigned int cfs_fail_val; -EXPORT_SYMBOL(cfs_fail_val); - -DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq); -EXPORT_SYMBOL(cfs_race_waitq); - -int cfs_race_state; -EXPORT_SYMBOL(cfs_race_state); - -int __cfs_fail_check_set(__u32 id, __u32 value, int set) -{ - static atomic_t cfs_fail_count = ATOMIC_INIT(0); - - LASSERT(!(id & CFS_FAIL_ONCE)); - - if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) == - (CFS_FAILED | CFS_FAIL_ONCE)) { - atomic_set(&cfs_fail_count, 0); /* paranoia */ - return 0; - } - - /* Fail 1/cfs_fail_val times */ - if (cfs_fail_loc & CFS_FAIL_RAND) { - if (cfs_fail_val < 2 || cfs_rand() % cfs_fail_val > 0) - return 0; - } - - /* Skip the first cfs_fail_val, then fail */ - if (cfs_fail_loc & CFS_FAIL_SKIP) { - if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val) - return 0; - } - - /* check cfs_fail_val... */ - if (set == CFS_FAIL_LOC_VALUE) { - if (cfs_fail_val != -1 && cfs_fail_val != value) - return 0; - } - - /* Fail cfs_fail_val times, overridden by FAIL_ONCE */ - if (cfs_fail_loc & CFS_FAIL_SOME && - (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) { - int count = atomic_inc_return(&cfs_fail_count); - - if (count >= cfs_fail_val) { - set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); - atomic_set(&cfs_fail_count, 0); - /* we are lost race to increase */ - if (count > cfs_fail_val) - return 0; - } - } - - if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) && - (value & CFS_FAIL_ONCE)) - set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); - /* Lost race to set CFS_FAILED_BIT. */ - if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { - /* If CFS_FAIL_ONCE is valid, only one process can fail, - * otherwise multi-process can fail at the same time. - */ - if (cfs_fail_loc & CFS_FAIL_ONCE) - return 0; - } - - switch (set) { - case CFS_FAIL_LOC_NOSET: - case CFS_FAIL_LOC_VALUE: - break; - case CFS_FAIL_LOC_ORSET: - cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE); - break; - case CFS_FAIL_LOC_RESET: - cfs_fail_loc = value; - break; - default: - LASSERTF(0, "called with bad set %u\n", set); - break; - } - - return 1; -} -EXPORT_SYMBOL(__cfs_fail_check_set); - -int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) -{ - int ret; - - ret = __cfs_fail_check_set(id, value, set); - if (ret && likely(ms > 0)) { - CERROR("cfs_fail_timeout id %x sleeping for %dms\n", - id, ms); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(ms) / 1000); - CERROR("cfs_fail_timeout id %x awake\n", id); - } - return ret; -} -EXPORT_SYMBOL(__cfs_fail_timeout_set); diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c deleted file mode 100644 index f60feb3a3dc7..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/hash.c +++ /dev/null @@ -1,2085 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/hash.c - * - * Implement a hash class for hash process in lustre system. - * - * Author: YuZhangyong - * - * 2008-08-15: Brian Behlendorf - * - Simplified API and improved documentation - * - Added per-hash feature flags: - * * CFS_HASH_DEBUG additional validation - * * CFS_HASH_REHASH dynamic rehashing - * - Added per-hash statistics - * - General performance enhancements - * - * 2009-07-31: Liang Zhen - * - move all stuff to libcfs - * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH - * - ignore hs_rwlock if without CFS_HASH_REHASH setting - * - buckets are allocated one by one(instead of contiguous memory), - * to avoid unnecessary cacheline conflict - * - * 2010-03-01: Liang Zhen - * - "bucket" is a group of hlist_head now, user can specify bucket size - * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share - * one lock for reducing memory overhead. - * - * - support lockless hash, caller will take care of locks: - * avoid lock overhead for hash tables that are already protected - * by locking in the caller for another reason - * - * - support both spin_lock/rwlock for bucket: - * overhead of spinlock contention is lower than read/write - * contention of rwlock, so using spinlock to serialize operations on - * bucket is more reasonable for those frequently changed hash tables - * - * - support one-single lock mode: - * one lock to protect all hash operations to avoid overhead of - * multiple locks if hash table is always small - * - * - removed a lot of unnecessary addref & decref on hash element: - * addref & decref are atomic operations in many use-cases which - * are expensive. - * - * - support non-blocking cfs_hash_add() and cfs_hash_findadd(): - * some lustre use-cases require these functions to be strictly - * non-blocking, we need to schedule required rehash on a different - * thread on those cases. - * - * - safer rehash on large hash table - * In old implementation, rehash function will exclusively lock the - * hash table and finish rehash in one batch, it's dangerous on SMP - * system because rehash millions of elements could take long time. - * New implemented rehash can release lock and relax CPU in middle - * of rehash, it's safe for another thread to search/change on the - * hash table even it's in rehasing. - * - * - support two different refcount modes - * . hash table has refcount on element - * . hash table doesn't change refcount on adding/removing element - * - * - support long name hash table (for param-tree) - * - * - fix a bug for cfs_hash_rehash_key: - * in old implementation, cfs_hash_rehash_key could screw up the - * hash-table because @key is overwritten without any protection. - * Now we need user to define hs_keycpy for those rehash enabled - * hash tables, cfs_hash_rehash_key will overwrite hash-key - * inside lock by calling hs_keycpy. - * - * - better hash iteration: - * Now we support both locked iteration & lockless iteration of hash - * table. Also, user can break the iteration by return 1 in callback. - */ -#include -#include - -#include "../../include/linux/libcfs/libcfs.h" - -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static unsigned int warn_on_depth = 8; -module_param(warn_on_depth, uint, 0644); -MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high."); -#endif - -struct cfs_wi_sched *cfs_sched_rehash; - -static inline void -cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {} - -static inline void -cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {} - -static inline void -cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive) - __acquires(&lock->spin) -{ - spin_lock(&lock->spin); -} - -static inline void -cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive) - __releases(&lock->spin) -{ - spin_unlock(&lock->spin); -} - -static inline void -cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive) - __acquires(&lock->rw) -{ - if (!exclusive) - read_lock(&lock->rw); - else - write_lock(&lock->rw); -} - -static inline void -cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive) - __releases(&lock->rw) -{ - if (!exclusive) - read_unlock(&lock->rw); - else - write_unlock(&lock->rw); -} - -/** No lock hash */ -static struct cfs_hash_lock_ops cfs_hash_nl_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_nl_lock, - .hs_bkt_unlock = cfs_hash_nl_unlock, -}; - -/** no bucket lock, one spinlock to protect everything */ -static struct cfs_hash_lock_ops cfs_hash_nbl_lops = { - .hs_lock = cfs_hash_spin_lock, - .hs_unlock = cfs_hash_spin_unlock, - .hs_bkt_lock = cfs_hash_nl_lock, - .hs_bkt_unlock = cfs_hash_nl_unlock, -}; - -/** spin bucket lock, rehash is enabled */ -static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = { - .hs_lock = cfs_hash_rw_lock, - .hs_unlock = cfs_hash_rw_unlock, - .hs_bkt_lock = cfs_hash_spin_lock, - .hs_bkt_unlock = cfs_hash_spin_unlock, -}; - -/** rw bucket lock, rehash is enabled */ -static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = { - .hs_lock = cfs_hash_rw_lock, - .hs_unlock = cfs_hash_rw_unlock, - .hs_bkt_lock = cfs_hash_rw_lock, - .hs_bkt_unlock = cfs_hash_rw_unlock, -}; - -/** spin bucket lock, rehash is disabled */ -static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_spin_lock, - .hs_bkt_unlock = cfs_hash_spin_unlock, -}; - -/** rw bucket lock, rehash is disabled */ -static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_rw_lock, - .hs_bkt_unlock = cfs_hash_rw_unlock, -}; - -static void -cfs_hash_lock_setup(struct cfs_hash *hs) -{ - if (cfs_hash_with_no_lock(hs)) { - hs->hs_lops = &cfs_hash_nl_lops; - - } else if (cfs_hash_with_no_bktlock(hs)) { - hs->hs_lops = &cfs_hash_nbl_lops; - spin_lock_init(&hs->hs_lock.spin); - - } else if (cfs_hash_with_rehash(hs)) { - rwlock_init(&hs->hs_lock.rw); - - if (cfs_hash_with_rw_bktlock(hs)) - hs->hs_lops = &cfs_hash_bkt_rw_lops; - else if (cfs_hash_with_spin_bktlock(hs)) - hs->hs_lops = &cfs_hash_bkt_spin_lops; - else - LBUG(); - } else { - if (cfs_hash_with_rw_bktlock(hs)) - hs->hs_lops = &cfs_hash_nr_bkt_rw_lops; - else if (cfs_hash_with_spin_bktlock(hs)) - hs->hs_lops = &cfs_hash_nr_bkt_spin_lops; - else - LBUG(); - } -} - -/** - * Simple hash head without depth tracking - * new element is always added to head of hlist - */ -struct cfs_hash_head { - struct hlist_head hh_head; /**< entries list */ -}; - -static int -cfs_hash_hh_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_head); -} - -static struct hlist_head * -cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_head *head; - - head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].hh_head; -} - -static int -cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd)); - return -1; /* unknown depth */ -} - -static int -cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hlist_del_init(hnode); - return -1; /* unknown depth */ -} - -/** - * Simple hash head with depth tracking - * new element is always added to head of hlist - */ -struct cfs_hash_head_dep { - struct hlist_head hd_head; /**< entries list */ - unsigned int hd_depth; /**< list length */ -}; - -static int -cfs_hash_hd_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_head_dep); -} - -static struct hlist_head * -cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_head_dep *head; - - head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].hd_head; -} - -static int -cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_head_dep *hh; - - hh = container_of(cfs_hash_hd_hhead(hs, bd), - struct cfs_hash_head_dep, hd_head); - hlist_add_head(hnode, &hh->hd_head); - return ++hh->hd_depth; -} - -static int -cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_head_dep *hh; - - hh = container_of(cfs_hash_hd_hhead(hs, bd), - struct cfs_hash_head_dep, hd_head); - hlist_del_init(hnode); - return --hh->hd_depth; -} - -/** - * double links hash head without depth tracking - * new element is always added to tail of hlist - */ -struct cfs_hash_dhead { - struct hlist_head dh_head; /**< entries list */ - struct hlist_node *dh_tail; /**< the last entry */ -}; - -static int -cfs_hash_dh_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_dhead); -} - -static struct hlist_head * -cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_dhead *head; - - head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].dh_head; -} - -static int -cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_dhead *dh; - - dh = container_of(cfs_hash_dh_hhead(hs, bd), - struct cfs_hash_dhead, dh_head); - if (dh->dh_tail) /* not empty */ - hlist_add_behind(hnode, dh->dh_tail); - else /* empty list */ - hlist_add_head(hnode, &dh->dh_head); - dh->dh_tail = hnode; - return -1; /* unknown depth */ -} - -static int -cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnd) -{ - struct cfs_hash_dhead *dh; - - dh = container_of(cfs_hash_dh_hhead(hs, bd), - struct cfs_hash_dhead, dh_head); - if (!hnd->next) { /* it's the tail */ - dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL : - container_of(hnd->pprev, struct hlist_node, next); - } - hlist_del_init(hnd); - return -1; /* unknown depth */ -} - -/** - * double links hash head with depth tracking - * new element is always added to tail of hlist - */ -struct cfs_hash_dhead_dep { - struct hlist_head dd_head; /**< entries list */ - struct hlist_node *dd_tail; /**< the last entry */ - unsigned int dd_depth; /**< list length */ -}; - -static int -cfs_hash_dd_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_dhead_dep); -} - -static struct hlist_head * -cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_dhead_dep *head; - - head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].dd_head; -} - -static int -cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_dhead_dep *dh; - - dh = container_of(cfs_hash_dd_hhead(hs, bd), - struct cfs_hash_dhead_dep, dd_head); - if (dh->dd_tail) /* not empty */ - hlist_add_behind(hnode, dh->dd_tail); - else /* empty list */ - hlist_add_head(hnode, &dh->dd_head); - dh->dd_tail = hnode; - return ++dh->dd_depth; -} - -static int -cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnd) -{ - struct cfs_hash_dhead_dep *dh; - - dh = container_of(cfs_hash_dd_hhead(hs, bd), - struct cfs_hash_dhead_dep, dd_head); - if (!hnd->next) { /* it's the tail */ - dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL : - container_of(hnd->pprev, struct hlist_node, next); - } - hlist_del_init(hnd); - return --dh->dd_depth; -} - -static struct cfs_hash_hlist_ops cfs_hash_hh_hops = { - .hop_hhead = cfs_hash_hh_hhead, - .hop_hhead_size = cfs_hash_hh_hhead_size, - .hop_hnode_add = cfs_hash_hh_hnode_add, - .hop_hnode_del = cfs_hash_hh_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_hd_hops = { - .hop_hhead = cfs_hash_hd_hhead, - .hop_hhead_size = cfs_hash_hd_hhead_size, - .hop_hnode_add = cfs_hash_hd_hnode_add, - .hop_hnode_del = cfs_hash_hd_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_dh_hops = { - .hop_hhead = cfs_hash_dh_hhead, - .hop_hhead_size = cfs_hash_dh_hhead_size, - .hop_hnode_add = cfs_hash_dh_hnode_add, - .hop_hnode_del = cfs_hash_dh_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_dd_hops = { - .hop_hhead = cfs_hash_dd_hhead, - .hop_hhead_size = cfs_hash_dd_hhead_size, - .hop_hnode_add = cfs_hash_dd_hnode_add, - .hop_hnode_del = cfs_hash_dd_hnode_del, -}; - -static void -cfs_hash_hlist_setup(struct cfs_hash *hs) -{ - if (cfs_hash_with_add_tail(hs)) { - hs->hs_hops = cfs_hash_with_depth(hs) ? - &cfs_hash_dd_hops : &cfs_hash_dh_hops; - } else { - hs->hs_hops = cfs_hash_with_depth(hs) ? - &cfs_hash_hd_hops : &cfs_hash_hh_hops; - } -} - -static void -cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts, - unsigned int bits, const void *key, struct cfs_hash_bd *bd) -{ - unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1); - - LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits); - - bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)]; - bd->bd_offset = index >> (bits - hs->hs_bkt_bits); -} - -void -cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (likely(!hs->hs_rehash_buckets)) { - cfs_hash_bd_from_key(hs, hs->hs_buckets, - hs->hs_cur_bits, key, bd); - } else { - LASSERT(hs->hs_rehash_bits != 0); - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, bd); - } -} -EXPORT_SYMBOL(cfs_hash_bd_get); - -static inline void -cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur) -{ - if (likely(dep_cur <= bd->bd_bucket->hsb_depmax)) - return; - - bd->bd_bucket->hsb_depmax = dep_cur; -# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 - if (likely(warn_on_depth == 0 || - max(warn_on_depth, hs->hs_dep_max) >= dep_cur)) - return; - - spin_lock(&hs->hs_dep_lock); - hs->hs_dep_max = dep_cur; - hs->hs_dep_bkt = bd->bd_bucket->hsb_index; - hs->hs_dep_off = bd->bd_offset; - hs->hs_dep_bits = hs->hs_cur_bits; - spin_unlock(&hs->hs_dep_lock); - - cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi); -# endif -} - -void -cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - int rc; - - rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode); - cfs_hash_bd_dep_record(hs, bd, rc); - bd->bd_bucket->hsb_version++; - if (unlikely(bd->bd_bucket->hsb_version == 0)) - bd->bd_bucket->hsb_version++; - bd->bd_bucket->hsb_count++; - - if (cfs_hash_with_counter(hs)) - atomic_inc(&hs->hs_count); - if (!cfs_hash_with_no_itemref(hs)) - cfs_hash_get(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_bd_add_locked); - -void -cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hs->hs_hops->hop_hnode_del(hs, bd, hnode); - - LASSERT(bd->bd_bucket->hsb_count > 0); - bd->bd_bucket->hsb_count--; - bd->bd_bucket->hsb_version++; - if (unlikely(bd->bd_bucket->hsb_version == 0)) - bd->bd_bucket->hsb_version++; - - if (cfs_hash_with_counter(hs)) { - LASSERT(atomic_read(&hs->hs_count) > 0); - atomic_dec(&hs->hs_count); - } - if (!cfs_hash_with_no_itemref(hs)) - cfs_hash_put_locked(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_bd_del_locked); - -void -cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, - struct cfs_hash_bd *bd_new, struct hlist_node *hnode) -{ - struct cfs_hash_bucket *obkt = bd_old->bd_bucket; - struct cfs_hash_bucket *nbkt = bd_new->bd_bucket; - int rc; - - if (cfs_hash_bd_compare(bd_old, bd_new) == 0) - return; - - /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops - * in cfs_hash_bd_del/add_locked - */ - hs->hs_hops->hop_hnode_del(hs, bd_old, hnode); - rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode); - cfs_hash_bd_dep_record(hs, bd_new, rc); - - LASSERT(obkt->hsb_count > 0); - obkt->hsb_count--; - obkt->hsb_version++; - if (unlikely(obkt->hsb_version == 0)) - obkt->hsb_version++; - nbkt->hsb_count++; - nbkt->hsb_version++; - if (unlikely(nbkt->hsb_version == 0)) - nbkt->hsb_version++; -} - -enum { - /** always set, for sanity (avoid ZERO intent) */ - CFS_HS_LOOKUP_MASK_FIND = BIT(0), - /** return entry with a ref */ - CFS_HS_LOOKUP_MASK_REF = BIT(1), - /** add entry if not existing */ - CFS_HS_LOOKUP_MASK_ADD = BIT(2), - /** delete entry, ignore other masks */ - CFS_HS_LOOKUP_MASK_DEL = BIT(3), -}; - -enum cfs_hash_lookup_intent { - /** return item w/o refcount */ - CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND, - /** return item with refcount */ - CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_REF), - /** return item w/o refcount if existed, otherwise add */ - CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_ADD), - /** return item with refcount if existed, otherwise add */ - CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND | - CFS_HS_LOOKUP_MASK_ADD), - /** delete if existed */ - CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_DEL) -}; - -static struct hlist_node * -cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key, struct hlist_node *hnode, - enum cfs_hash_lookup_intent intent) - -{ - struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd); - struct hlist_node *ehnode; - struct hlist_node *match; - int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0; - - /* with this function, we can avoid a lot of useless refcount ops, - * which are expensive atomic operations most time. - */ - match = intent_add ? NULL : hnode; - hlist_for_each(ehnode, hhead) { - if (!cfs_hash_keycmp(hs, key, ehnode)) - continue; - - if (match && match != ehnode) /* can't match */ - continue; - - /* match and ... */ - if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) { - cfs_hash_bd_del_locked(hs, bd, ehnode); - return ehnode; - } - - /* caller wants refcount? */ - if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0) - cfs_hash_get(hs, ehnode); - return ehnode; - } - /* no match item */ - if (!intent_add) - return NULL; - - LASSERT(hnode); - cfs_hash_bd_add_locked(hs, bd, hnode); - return hnode; -} - -struct hlist_node * -cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key) -{ - return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, - CFS_HS_LOOKUP_IT_FIND); -} -EXPORT_SYMBOL(cfs_hash_bd_lookup_locked); - -struct hlist_node * -cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key) -{ - return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, - CFS_HS_LOOKUP_IT_PEEK); -} -EXPORT_SYMBOL(cfs_hash_bd_peek_locked); - -static void -cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, int excl) -{ - struct cfs_hash_bucket *prev = NULL; - int i; - - /** - * bds must be ascendantly ordered by bd->bd_bucket->hsb_index. - * NB: it's possible that several bds point to the same bucket but - * have different bd::bd_offset, so need take care of deadlock. - */ - cfs_hash_for_each_bd(bds, n, i) { - if (prev == bds[i].bd_bucket) - continue; - - LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index); - cfs_hash_bd_lock(hs, &bds[i], excl); - prev = bds[i].bd_bucket; - } -} - -static void -cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, int excl) -{ - struct cfs_hash_bucket *prev = NULL; - int i; - - cfs_hash_for_each_bd(bds, n, i) { - if (prev != bds[i].bd_bucket) { - cfs_hash_bd_unlock(hs, &bds[i], excl); - prev = bds[i].bd_bucket; - } - } -} - -static struct hlist_node * -cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, const void *key) -{ - struct hlist_node *ehnode; - unsigned i; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL, - CFS_HS_LOOKUP_IT_FIND); - if (ehnode) - return ehnode; - } - return NULL; -} - -static struct hlist_node * -cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, const void *key, - struct hlist_node *hnode, int noref) -{ - struct hlist_node *ehnode; - int intent; - unsigned i; - - LASSERT(hnode); - intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, - NULL, intent); - if (ehnode) - return ehnode; - } - - if (i == 1) { /* only one bucket */ - cfs_hash_bd_add_locked(hs, &bds[0], hnode); - } else { - struct cfs_hash_bd mybd; - - cfs_hash_bd_get(hs, key, &mybd); - cfs_hash_bd_add_locked(hs, &mybd, hnode); - } - - return hnode; -} - -static struct hlist_node * -cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, const void *key, - struct hlist_node *hnode) -{ - struct hlist_node *ehnode; - unsigned int i; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode, - CFS_HS_LOOKUP_IT_FINDDEL); - if (ehnode) - return ehnode; - } - return NULL; -} - -static void -cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2) -{ - int rc; - - if (!bd2->bd_bucket) - return; - - if (!bd1->bd_bucket) { - *bd1 = *bd2; - bd2->bd_bucket = NULL; - return; - } - - rc = cfs_hash_bd_compare(bd1, bd2); - if (!rc) - bd2->bd_bucket = NULL; - else if (rc > 0) - swap(*bd1, *bd2); /* swap bd1 and bd2 */ -} - -void -cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bds) -{ - /* NB: caller should hold hs_lock.rw if REHASH is set */ - cfs_hash_bd_from_key(hs, hs->hs_buckets, - hs->hs_cur_bits, key, &bds[0]); - if (likely(!hs->hs_rehash_buckets)) { - /* no rehash or not rehashing */ - bds[1].bd_bucket = NULL; - return; - } - - LASSERT(hs->hs_rehash_bits != 0); - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, &bds[1]); - - cfs_hash_bd_order(&bds[0], &bds[1]); -} - -void -cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_multi_bd_lock(hs, bds, 2, excl); -} - -void -cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_multi_bd_unlock(hs, bds, 2, excl); -} - -struct hlist_node * -cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key) -{ - return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key); -} - -struct hlist_node * -cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode, - int noref) -{ - return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key, - hnode, noref); -} - -struct hlist_node * -cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode) -{ - return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode); -} - -static void -cfs_hash_buckets_free(struct cfs_hash_bucket **buckets, - int bkt_size, int prev_size, int size) -{ - int i; - - for (i = prev_size; i < size; i++) { - if (buckets[i]) - LIBCFS_FREE(buckets[i], bkt_size); - } - - LIBCFS_FREE(buckets, sizeof(buckets[0]) * size); -} - -/* - * Create or grow bucket memory. Return old_buckets if no allocation was - * needed, the newly allocated buckets if allocation was needed and - * successful, and NULL on error. - */ -static struct cfs_hash_bucket ** -cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, - unsigned int old_size, unsigned int new_size) -{ - struct cfs_hash_bucket **new_bkts; - int i; - - LASSERT(old_size == 0 || old_bkts); - - if (old_bkts && old_size == new_size) - return old_bkts; - - LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size); - if (!new_bkts) - return NULL; - - if (old_bkts) { - memcpy(new_bkts, old_bkts, - min(old_size, new_size) * sizeof(*old_bkts)); - } - - for (i = old_size; i < new_size; i++) { - struct hlist_head *hhead; - struct cfs_hash_bd bd; - - LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs)); - if (!new_bkts[i]) { - cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs), - old_size, new_size); - return NULL; - } - - new_bkts[i]->hsb_index = i; - new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ - new_bkts[i]->hsb_depmax = -1; /* unknown */ - bd.bd_bucket = new_bkts[i]; - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) - INIT_HLIST_HEAD(hhead); - - if (cfs_hash_with_no_lock(hs) || - cfs_hash_with_no_bktlock(hs)) - continue; - - if (cfs_hash_with_rw_bktlock(hs)) - rwlock_init(&new_bkts[i]->hsb_lock.rw); - else if (cfs_hash_with_spin_bktlock(hs)) - spin_lock_init(&new_bkts[i]->hsb_lock.spin); - else - LBUG(); /* invalid use-case */ - } - return new_bkts; -} - -/** - * Initialize new libcfs hash, where: - * @name - Descriptive hash name - * @cur_bits - Initial hash table size, in bits - * @max_bits - Maximum allowed hash table resize, in bits - * @ops - Registered hash table operations - * @flags - CFS_HASH_REHASH enable synamic hash resizing - * - CFS_HASH_SORT enable chained hash sort - */ -static int cfs_hash_rehash_worker(cfs_workitem_t *wi); - -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static int cfs_hash_dep_print(cfs_workitem_t *wi) -{ - struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi); - int dep; - int bkt; - int off; - int bits; - - spin_lock(&hs->hs_dep_lock); - dep = hs->hs_dep_max; - bkt = hs->hs_dep_bkt; - off = hs->hs_dep_off; - bits = hs->hs_dep_bits; - spin_unlock(&hs->hs_dep_lock); - - LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n", - hs->hs_name, bits, dep, bkt, off); - spin_lock(&hs->hs_dep_lock); - hs->hs_dep_bits = 0; /* mark as workitem done */ - spin_unlock(&hs->hs_dep_lock); - return 0; -} - -static void cfs_hash_depth_wi_init(struct cfs_hash *hs) -{ - spin_lock_init(&hs->hs_dep_lock); - cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print); -} - -static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) -{ - if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi)) - return; - - spin_lock(&hs->hs_dep_lock); - while (hs->hs_dep_bits != 0) { - spin_unlock(&hs->hs_dep_lock); - cond_resched(); - spin_lock(&hs->hs_dep_lock); - } - spin_unlock(&hs->hs_dep_lock); -} - -#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */ - -static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {} -static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {} - -#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */ - -struct cfs_hash * -cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, - unsigned bkt_bits, unsigned extra_bytes, - unsigned min_theta, unsigned max_theta, - struct cfs_hash_ops *ops, unsigned flags) -{ - struct cfs_hash *hs; - int len; - - CLASSERT(CFS_HASH_THETA_BITS < 15); - - LASSERT(name); - LASSERT(ops->hs_key); - LASSERT(ops->hs_hash); - LASSERT(ops->hs_object); - LASSERT(ops->hs_keycmp); - LASSERT(ops->hs_get); - LASSERT(ops->hs_put_locked); - - if ((flags & CFS_HASH_REHASH) != 0) - flags |= CFS_HASH_COUNTER; /* must have counter */ - - LASSERT(cur_bits > 0); - LASSERT(cur_bits >= bkt_bits); - LASSERT(max_bits >= cur_bits && max_bits < 31); - LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits)); - LASSERT(ergo((flags & CFS_HASH_REHASH) != 0, - (flags & CFS_HASH_NO_LOCK) == 0)); - LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy)); - - len = (flags & CFS_HASH_BIGNAME) == 0 ? - CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN; - LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len])); - if (!hs) - return NULL; - - strlcpy(hs->hs_name, name, len); - hs->hs_flags = flags; - - atomic_set(&hs->hs_refcount, 1); - atomic_set(&hs->hs_count, 0); - - cfs_hash_lock_setup(hs); - cfs_hash_hlist_setup(hs); - - hs->hs_cur_bits = (__u8)cur_bits; - hs->hs_min_bits = (__u8)cur_bits; - hs->hs_max_bits = (__u8)max_bits; - hs->hs_bkt_bits = (__u8)bkt_bits; - - hs->hs_ops = ops; - hs->hs_extra_bytes = extra_bytes; - hs->hs_rehash_bits = 0; - cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker); - cfs_hash_depth_wi_init(hs); - - if (cfs_hash_with_rehash(hs)) - __cfs_hash_set_theta(hs, min_theta, max_theta); - - hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0, - CFS_HASH_NBKT(hs)); - if (hs->hs_buckets) - return hs; - - LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[len])); - return NULL; -} -EXPORT_SYMBOL(cfs_hash_create); - -/** - * Cleanup libcfs hash @hs. - */ -static void -cfs_hash_destroy(struct cfs_hash *hs) -{ - struct hlist_node *hnode; - struct hlist_node *pos; - struct cfs_hash_bd bd; - int i; - - LASSERT(hs); - LASSERT(!cfs_hash_is_exiting(hs) && - !cfs_hash_is_iterating(hs)); - - /** - * prohibit further rehashes, don't need any lock because - * I'm the only (last) one can change it. - */ - hs->hs_exiting = 1; - if (cfs_hash_with_rehash(hs)) - cfs_hash_rehash_cancel(hs); - - cfs_hash_depth_wi_cancel(hs); - /* rehash should be done/canceled */ - LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - LASSERT(bd.bd_bucket); - /* no need to take this lock, just for consistent code */ - cfs_hash_bd_lock(hs, &bd, 1); - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - LASSERTF(!cfs_hash_with_assert_empty(hs), - "hash %s bucket %u(%u) is not empty: %u items left\n", - hs->hs_name, bd.bd_bucket->hsb_index, - bd.bd_offset, bd.bd_bucket->hsb_count); - /* can't assert key valicate, because we - * can interrupt rehash - */ - cfs_hash_bd_del_locked(hs, &bd, hnode); - cfs_hash_exit(hs, hnode); - } - } - LASSERT(bd.bd_bucket->hsb_count == 0); - cfs_hash_bd_unlock(hs, &bd, 1); - cond_resched(); - } - - LASSERT(atomic_read(&hs->hs_count) == 0); - - cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs), - 0, CFS_HASH_NBKT(hs)); - i = cfs_hash_with_bigname(hs) ? - CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN; - LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[i])); -} - -struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs) -{ - if (atomic_inc_not_zero(&hs->hs_refcount)) - return hs; - return NULL; -} -EXPORT_SYMBOL(cfs_hash_getref); - -void cfs_hash_putref(struct cfs_hash *hs) -{ - if (atomic_dec_and_test(&hs->hs_refcount)) - cfs_hash_destroy(hs); -} -EXPORT_SYMBOL(cfs_hash_putref); - -static inline int -cfs_hash_rehash_bits(struct cfs_hash *hs) -{ - if (cfs_hash_with_no_lock(hs) || - !cfs_hash_with_rehash(hs)) - return -EOPNOTSUPP; - - if (unlikely(cfs_hash_is_exiting(hs))) - return -ESRCH; - - if (unlikely(cfs_hash_is_rehashing(hs))) - return -EALREADY; - - if (unlikely(cfs_hash_is_iterating(hs))) - return -EAGAIN; - - /* XXX: need to handle case with max_theta != 2.0 - * and the case with min_theta != 0.5 - */ - if ((hs->hs_cur_bits < hs->hs_max_bits) && - (__cfs_hash_theta(hs) > hs->hs_max_theta)) - return hs->hs_cur_bits + 1; - - if (!cfs_hash_with_shrink(hs)) - return 0; - - if ((hs->hs_cur_bits > hs->hs_min_bits) && - (__cfs_hash_theta(hs) < hs->hs_min_theta)) - return hs->hs_cur_bits - 1; - - return 0; -} - -/** - * don't allow inline rehash if: - * - user wants non-blocking change (add/del) on hash table - * - too many elements - */ -static inline int -cfs_hash_rehash_inline(struct cfs_hash *hs) -{ - return !cfs_hash_with_nblk_change(hs) && - atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG; -} - -/** - * Add item @hnode to libcfs hash @hs using @key. The registered - * ops->hs_get function will be called when the item is added. - */ -void -cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - struct cfs_hash_bd bd; - int bits; - - LASSERT(hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - cfs_hash_bd_get_and_lock(hs, key, &bd, 1); - - cfs_hash_key_validate(hs, key, hnode); - cfs_hash_bd_add_locked(hs, &bd, hnode); - - cfs_hash_bd_unlock(hs, &bd, 1); - - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); -} -EXPORT_SYMBOL(cfs_hash_add); - -static struct hlist_node * -cfs_hash_find_or_add(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode, int noref) -{ - struct hlist_node *ehnode; - struct cfs_hash_bd bds[2]; - int bits = 0; - - LASSERT(hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); - - cfs_hash_key_validate(hs, key, hnode); - ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key, - hnode, noref); - cfs_hash_dual_bd_unlock(hs, bds, 1); - - if (ehnode == hnode) /* new item added */ - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); - - return ehnode; -} - -/** - * Add item @hnode to libcfs hash @hs using @key. The registered - * ops->hs_get function will be called if the item was added. - * Returns 0 on success or -EALREADY on key collisions. - */ -int -cfs_hash_add_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ? - -EALREADY : 0; -} -EXPORT_SYMBOL(cfs_hash_add_unique); - -/** - * Add item @hnode to libcfs hash @hs using @key. If this @key - * already exists in the hash then ops->hs_get will be called on the - * conflicting entry and that entry will be returned to the caller. - * Otherwise ops->hs_get is called on the item which was added. - */ -void * -cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - hnode = cfs_hash_find_or_add(hs, key, hnode, 0); - - return cfs_hash_object(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_findadd_unique); - -/** - * Delete item @hnode from the libcfs hash @hs using @key. The @key - * is required to ensure the correct hash bucket is locked since there - * is no direct linkage from the item to the bucket. The object - * removed from the hash will be returned and obs->hs_put is called - * on the removed object. - */ -void * -cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - void *obj = NULL; - int bits = 0; - struct cfs_hash_bd bds[2]; - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); - - /* NB: do nothing if @hnode is not in hash table */ - if (!hnode || !hlist_unhashed(hnode)) { - if (!bds[1].bd_bucket && hnode) { - cfs_hash_bd_del_locked(hs, &bds[0], hnode); - } else { - hnode = cfs_hash_dual_bd_finddel_locked(hs, bds, - key, hnode); - } - } - - if (hnode) { - obj = cfs_hash_object(hs, hnode); - bits = cfs_hash_rehash_bits(hs); - } - - cfs_hash_dual_bd_unlock(hs, bds, 1); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); - - return obj; -} -EXPORT_SYMBOL(cfs_hash_del); - -/** - * Delete item given @key in libcfs hash @hs. The first @key found in - * the hash will be removed, if the key exists multiple times in the hash - * @hs this function must be called once per key. The removed object - * will be returned and ops->hs_put is called on the removed object. - */ -void * -cfs_hash_del_key(struct cfs_hash *hs, const void *key) -{ - return cfs_hash_del(hs, key, NULL); -} -EXPORT_SYMBOL(cfs_hash_del_key); - -/** - * Lookup an item using @key in the libcfs hash @hs and return it. - * If the @key is found in the hash hs->hs_get() is called and the - * matching objects is returned. It is the callers responsibility - * to call the counterpart ops->hs_put using the cfs_hash_put() macro - * when when finished with the object. If the @key was not found - * in the hash @hs NULL is returned. - */ -void * -cfs_hash_lookup(struct cfs_hash *hs, const void *key) -{ - void *obj = NULL; - struct hlist_node *hnode; - struct cfs_hash_bd bds[2]; - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); - - hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key); - if (hnode) - obj = cfs_hash_object(hs, hnode); - - cfs_hash_dual_bd_unlock(hs, bds, 0); - cfs_hash_unlock(hs, 0); - - return obj; -} -EXPORT_SYMBOL(cfs_hash_lookup); - -static void -cfs_hash_for_each_enter(struct cfs_hash *hs) -{ - LASSERT(!cfs_hash_is_exiting(hs)); - - if (!cfs_hash_with_rehash(hs)) - return; - /* - * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter - * because it's just an unreliable signal to rehash-thread, - * rehash-thread will try to finish rehash ASAP when seeing this. - */ - hs->hs_iterating = 1; - - cfs_hash_lock(hs, 1); - hs->hs_iterators++; - - /* NB: iteration is mostly called by service thread, - * we tend to cancel pending rehash-request, instead of - * blocking service thread, we will relaunch rehash request - * after iteration - */ - if (cfs_hash_is_rehashing(hs)) - cfs_hash_rehash_cancel_locked(hs); - cfs_hash_unlock(hs, 1); -} - -static void -cfs_hash_for_each_exit(struct cfs_hash *hs) -{ - int remained; - int bits; - - if (!cfs_hash_with_rehash(hs)) - return; - cfs_hash_lock(hs, 1); - remained = --hs->hs_iterators; - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 1); - /* NB: it's race on cfs_has_t::hs_iterating, see above */ - if (remained == 0) - hs->hs_iterating = 0; - if (bits > 0) { - cfs_hash_rehash(hs, atomic_read(&hs->hs_count) < - CFS_HASH_LOOP_HOG); - } -} - -/** - * For each item in the libcfs hash @hs call the passed callback @func - * and pass to it as an argument each hash item and the private @data. - * - * a) the function may sleep! - * b) during the callback: - * . the bucket lock is held so the callback must never sleep. - * . if @removal_safe is true, use can remove current item by - * cfs_hash_bd_del_locked - */ -static __u64 -cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int remove_safe) -{ - struct hlist_node *hnode; - struct hlist_node *pos; - struct cfs_hash_bd bd; - __u64 count = 0; - int excl = !!remove_safe; - int loop = 0; - int i; - - cfs_hash_for_each_enter(hs); - - cfs_hash_lock(hs, 0); - LASSERT(!cfs_hash_is_rehashing(hs)); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - cfs_hash_bd_lock(hs, &bd, excl); - if (!func) { /* only glimpse size */ - count += bd.bd_bucket->hsb_count; - cfs_hash_bd_unlock(hs, &bd, excl); - continue; - } - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - cfs_hash_bucket_validate(hs, &bd, hnode); - count++; - loop++; - if (func(hs, &bd, hnode, data)) { - cfs_hash_bd_unlock(hs, &bd, excl); - goto out; - } - } - } - cfs_hash_bd_unlock(hs, &bd, excl); - if (loop < CFS_HASH_LOOP_HOG) - continue; - loop = 0; - cfs_hash_unlock(hs, 0); - cond_resched(); - cfs_hash_lock(hs, 0); - } - out: - cfs_hash_unlock(hs, 0); - - cfs_hash_for_each_exit(hs); - return count; -} - -struct cfs_hash_cond_arg { - cfs_hash_cond_opt_cb_t func; - void *arg; -}; - -static int -cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - struct cfs_hash_cond_arg *cond = data; - - if (cond->func(cfs_hash_object(hs, hnode), cond->arg)) - cfs_hash_bd_del_locked(hs, bd, hnode); - return 0; -} - -/** - * Delete item from the libcfs hash @hs when @func return true. - * The write lock being hold during loop for each bucket to avoid - * any object be reference. - */ -void -cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data) -{ - struct cfs_hash_cond_arg arg = { - .func = func, - .arg = data, - }; - - cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1); -} -EXPORT_SYMBOL(cfs_hash_cond_del); - -void -cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - cfs_hash_for_each_tight(hs, func, data, 0); -} -EXPORT_SYMBOL(cfs_hash_for_each); - -void -cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - cfs_hash_for_each_tight(hs, func, data, 1); -} -EXPORT_SYMBOL(cfs_hash_for_each_safe); - -static int -cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - *(int *)data = 0; - return 1; /* return 1 to break the loop */ -} - -int -cfs_hash_is_empty(struct cfs_hash *hs) -{ - int empty = 1; - - cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0); - return empty; -} -EXPORT_SYMBOL(cfs_hash_is_empty); - -__u64 -cfs_hash_size_get(struct cfs_hash *hs) -{ - return cfs_hash_with_counter(hs) ? - atomic_read(&hs->hs_count) : - cfs_hash_for_each_tight(hs, NULL, NULL, 0); -} -EXPORT_SYMBOL(cfs_hash_size_get); - -/* - * cfs_hash_for_each_relax: - * Iterate the hash table and call @func on each item without - * any lock. This function can't guarantee to finish iteration - * if these features are enabled: - * - * a. if rehash_key is enabled, an item can be moved from - * one bucket to another bucket - * b. user can remove non-zero-ref item from hash-table, - * so the item can be removed from hash-table, even worse, - * it's possible that user changed key and insert to another - * hash bucket. - * there's no way for us to finish iteration correctly on previous - * two cases, so iteration has to be stopped on change. - */ -static int -cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - struct hlist_node *hnode; - struct hlist_node *tmp; - struct cfs_hash_bd bd; - __u32 version; - int count = 0; - int stop_on_change; - int rc; - int i; - - stop_on_change = cfs_hash_with_rehash_key(hs) || - !cfs_hash_with_no_itemref(hs) || - !hs->hs_ops->hs_put_locked; - cfs_hash_lock(hs, 0); - LASSERT(!cfs_hash_is_rehashing(hs)); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - cfs_hash_bd_lock(hs, &bd, 0); - version = cfs_hash_bd_version_get(&bd); - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - for (hnode = hhead->first; hnode;) { - cfs_hash_bucket_validate(hs, &bd, hnode); - cfs_hash_get(hs, hnode); - cfs_hash_bd_unlock(hs, &bd, 0); - cfs_hash_unlock(hs, 0); - - rc = func(hs, &bd, hnode, data); - if (stop_on_change) - cfs_hash_put(hs, hnode); - cond_resched(); - count++; - - cfs_hash_lock(hs, 0); - cfs_hash_bd_lock(hs, &bd, 0); - if (!stop_on_change) { - tmp = hnode->next; - cfs_hash_put_locked(hs, hnode); - hnode = tmp; - } else { /* bucket changed? */ - if (version != - cfs_hash_bd_version_get(&bd)) - break; - /* safe to continue because no change */ - hnode = hnode->next; - } - if (rc) /* callback wants to break iteration */ - break; - } - if (rc) /* callback wants to break iteration */ - break; - } - cfs_hash_bd_unlock(hs, &bd, 0); - if (rc) /* callback wants to break iteration */ - break; - } - cfs_hash_unlock(hs, 0); - - return count; -} - -int -cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - if (cfs_hash_with_no_lock(hs) || - cfs_hash_with_rehash_key(hs) || - !cfs_hash_with_no_itemref(hs)) - return -EOPNOTSUPP; - - if (!hs->hs_ops->hs_get || - (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) - return -EOPNOTSUPP; - - cfs_hash_for_each_enter(hs); - cfs_hash_for_each_relax(hs, func, data); - cfs_hash_for_each_exit(hs); - - return 0; -} -EXPORT_SYMBOL(cfs_hash_for_each_nolock); - -/** - * For each hash bucket in the libcfs hash @hs call the passed callback - * @func until all the hash buckets are empty. The passed callback @func - * or the previously registered callback hs->hs_put must remove the item - * from the hash. You may either use the cfs_hash_del() or hlist_del() - * functions. No rwlocks will be held during the callback @func it is - * safe to sleep if needed. This function will not terminate until the - * hash is empty. Note it is still possible to concurrently add new - * items in to the hash. It is the callers responsibility to ensure - * the required locking is in place to prevent concurrent insertions. - */ -int -cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - unsigned i = 0; - - if (cfs_hash_with_no_lock(hs)) - return -EOPNOTSUPP; - - if (!hs->hs_ops->hs_get || - (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) - return -EOPNOTSUPP; - - cfs_hash_for_each_enter(hs); - while (cfs_hash_for_each_relax(hs, func, data)) { - CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n", - hs->hs_name, i++); - } - cfs_hash_for_each_exit(hs); - return 0; -} -EXPORT_SYMBOL(cfs_hash_for_each_empty); - -void -cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex, - cfs_hash_for_each_cb_t func, void *data) -{ - struct hlist_head *hhead; - struct hlist_node *hnode; - struct cfs_hash_bd bd; - - cfs_hash_for_each_enter(hs); - cfs_hash_lock(hs, 0); - if (hindex >= CFS_HASH_NHLIST(hs)) - goto out; - - cfs_hash_bd_index_set(hs, hindex, &bd); - - cfs_hash_bd_lock(hs, &bd, 0); - hhead = cfs_hash_bd_hhead(hs, &bd); - hlist_for_each(hnode, hhead) { - if (func(hs, &bd, hnode, data)) - break; - } - cfs_hash_bd_unlock(hs, &bd, 0); -out: - cfs_hash_unlock(hs, 0); - cfs_hash_for_each_exit(hs); -} -EXPORT_SYMBOL(cfs_hash_hlist_for_each); - -/* - * For each item in the libcfs hash @hs which matches the @key call - * the passed callback @func and pass to it as an argument each hash - * item and the private @data. During the callback the bucket lock - * is held so the callback must never sleep. - */ -void -cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, - cfs_hash_for_each_cb_t func, void *data) -{ - struct hlist_node *hnode; - struct cfs_hash_bd bds[2]; - unsigned int i; - - cfs_hash_lock(hs, 0); - - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); - - cfs_hash_for_each_bd(bds, 2, i) { - struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]); - - hlist_for_each(hnode, hlist) { - cfs_hash_bucket_validate(hs, &bds[i], hnode); - - if (cfs_hash_keycmp(hs, key, hnode)) { - if (func(hs, &bds[i], hnode, data)) - break; - } - } - } - - cfs_hash_dual_bd_unlock(hs, bds, 0); - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_for_each_key); - -/** - * Rehash the libcfs hash @hs to the given @bits. This can be used - * to grow the hash size when excessive chaining is detected, or to - * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH - * flag is set in @hs the libcfs hash may be dynamically rehashed - * during addition or removal if the hash's theta value exceeds - * either the hs->hs_min_theta or hs->max_theta values. By default - * these values are tuned to keep the chained hash depth small, and - * this approach assumes a reasonably uniform hashing function. The - * theta thresholds for @hs are tunable via cfs_hash_set_theta(). - */ -void -cfs_hash_rehash_cancel_locked(struct cfs_hash *hs) -{ - int i; - - /* need hold cfs_hash_lock(hs, 1) */ - LASSERT(cfs_hash_with_rehash(hs) && - !cfs_hash_with_no_lock(hs)); - - if (!cfs_hash_is_rehashing(hs)) - return; - - if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) { - hs->hs_rehash_bits = 0; - return; - } - - for (i = 2; cfs_hash_is_rehashing(hs); i++) { - cfs_hash_unlock(hs, 1); - /* raise console warning while waiting too long */ - CDEBUG(is_power_of_2(i >> 3) ? D_WARNING : D_INFO, - "hash %s is still rehashing, rescheded %d\n", - hs->hs_name, i - 1); - cond_resched(); - cfs_hash_lock(hs, 1); - } -} - -void -cfs_hash_rehash_cancel(struct cfs_hash *hs) -{ - cfs_hash_lock(hs, 1); - cfs_hash_rehash_cancel_locked(hs); - cfs_hash_unlock(hs, 1); -} - -int -cfs_hash_rehash(struct cfs_hash *hs, int do_rehash) -{ - int rc; - - LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs)); - - cfs_hash_lock(hs, 1); - - rc = cfs_hash_rehash_bits(hs); - if (rc <= 0) { - cfs_hash_unlock(hs, 1); - return rc; - } - - hs->hs_rehash_bits = rc; - if (!do_rehash) { - /* launch and return */ - cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi); - cfs_hash_unlock(hs, 1); - return 0; - } - - /* rehash right now */ - cfs_hash_unlock(hs, 1); - - return cfs_hash_rehash_worker(&hs->hs_rehash_wi); -} - -static int -cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old) -{ - struct cfs_hash_bd new; - struct hlist_head *hhead; - struct hlist_node *hnode; - struct hlist_node *pos; - void *key; - int c = 0; - - /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */ - cfs_hash_bd_for_each_hlist(hs, old, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - key = cfs_hash_key(hs, hnode); - LASSERT(key); - /* Validate hnode is in the correct bucket. */ - cfs_hash_bucket_validate(hs, old, hnode); - /* - * Delete from old hash bucket; move to new bucket. - * ops->hs_key must be defined. - */ - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, &new); - cfs_hash_bd_move_locked(hs, old, &new, hnode); - c++; - } - } - - return c; -} - -static int -cfs_hash_rehash_worker(cfs_workitem_t *wi) -{ - struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi); - struct cfs_hash_bucket **bkts; - struct cfs_hash_bd bd; - unsigned int old_size; - unsigned int new_size; - int bsize; - int count = 0; - int rc = 0; - int i; - - LASSERT(hs && cfs_hash_with_rehash(hs)); - - cfs_hash_lock(hs, 0); - LASSERT(cfs_hash_is_rehashing(hs)); - - old_size = CFS_HASH_NBKT(hs); - new_size = CFS_HASH_RH_NBKT(hs); - - cfs_hash_unlock(hs, 0); - - /* - * don't need hs::hs_rwlock for hs::hs_buckets, - * because nobody can change bkt-table except me. - */ - bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets, - old_size, new_size); - cfs_hash_lock(hs, 1); - if (!bkts) { - rc = -ENOMEM; - goto out; - } - - if (bkts == hs->hs_buckets) { - bkts = NULL; /* do nothing */ - goto out; - } - - rc = __cfs_hash_theta(hs); - if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) { - /* free the new allocated bkt-table */ - old_size = new_size; - new_size = CFS_HASH_NBKT(hs); - rc = -EALREADY; - goto out; - } - - LASSERT(!hs->hs_rehash_buckets); - hs->hs_rehash_buckets = bkts; - - rc = 0; - cfs_hash_for_each_bucket(hs, &bd, i) { - if (cfs_hash_is_exiting(hs)) { - rc = -ESRCH; - /* someone wants to destroy the hash, abort now */ - if (old_size < new_size) /* OK to free old bkt-table */ - break; - /* it's shrinking, need free new bkt-table */ - hs->hs_rehash_buckets = NULL; - old_size = new_size; - new_size = CFS_HASH_NBKT(hs); - goto out; - } - - count += cfs_hash_rehash_bd(hs, &bd); - if (count < CFS_HASH_LOOP_HOG || - cfs_hash_is_iterating(hs)) { /* need to finish ASAP */ - continue; - } - - count = 0; - cfs_hash_unlock(hs, 1); - cond_resched(); - cfs_hash_lock(hs, 1); - } - - hs->hs_rehash_count++; - - bkts = hs->hs_buckets; - hs->hs_buckets = hs->hs_rehash_buckets; - hs->hs_rehash_buckets = NULL; - - hs->hs_cur_bits = hs->hs_rehash_bits; -out: - hs->hs_rehash_bits = 0; - if (rc == -ESRCH) /* never be scheduled again */ - cfs_wi_exit(cfs_sched_rehash, wi); - bsize = cfs_hash_bkt_size(hs); - cfs_hash_unlock(hs, 1); - /* can't refer to @hs anymore because it could be destroyed */ - if (bkts) - cfs_hash_buckets_free(bkts, bsize, new_size, old_size); - if (rc != 0) - CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc); - /* return 1 only if cfs_wi_exit is called */ - return rc == -ESRCH; -} - -/** - * Rehash the object referenced by @hnode in the libcfs hash @hs. The - * @old_key must be provided to locate the objects previous location - * in the hash, and the @new_key will be used to reinsert the object. - * Use this function instead of a cfs_hash_add() + cfs_hash_del() - * combo when it is critical that there is no window in time where the - * object is missing from the hash. When an object is being rehashed - * the registered cfs_hash_get() and cfs_hash_put() functions will - * not be called. - */ -void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key, - void *new_key, struct hlist_node *hnode) -{ - struct cfs_hash_bd bds[3]; - struct cfs_hash_bd old_bds[2]; - struct cfs_hash_bd new_bd; - - LASSERT(!hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - - cfs_hash_dual_bd_get(hs, old_key, old_bds); - cfs_hash_bd_get(hs, new_key, &new_bd); - - bds[0] = old_bds[0]; - bds[1] = old_bds[1]; - bds[2] = new_bd; - - /* NB: bds[0] and bds[1] are ordered already */ - cfs_hash_bd_order(&bds[1], &bds[2]); - cfs_hash_bd_order(&bds[0], &bds[1]); - - cfs_hash_multi_bd_lock(hs, bds, 3, 1); - if (likely(!old_bds[1].bd_bucket)) { - cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode); - } else { - cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode); - cfs_hash_bd_add_locked(hs, &new_bd, hnode); - } - /* overwrite key inside locks, otherwise may screw up with - * other operations, i.e: rehash - */ - cfs_hash_keycpy(hs, hnode, new_key); - - cfs_hash_multi_bd_unlock(hs, bds, 3, 1); - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_rehash_key); - -void cfs_hash_debug_header(struct seq_file *m) -{ - seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n", - CFS_HASH_BIGNAME_LEN, "name"); -} -EXPORT_SYMBOL(cfs_hash_debug_header); - -static struct cfs_hash_bucket ** -cfs_hash_full_bkts(struct cfs_hash *hs) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (!hs->hs_rehash_buckets) - return hs->hs_buckets; - - LASSERT(hs->hs_rehash_bits != 0); - return hs->hs_rehash_bits > hs->hs_cur_bits ? - hs->hs_rehash_buckets : hs->hs_buckets; -} - -static unsigned int -cfs_hash_full_nbkt(struct cfs_hash *hs) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (!hs->hs_rehash_buckets) - return CFS_HASH_NBKT(hs); - - LASSERT(hs->hs_rehash_bits != 0); - return hs->hs_rehash_bits > hs->hs_cur_bits ? - CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs); -} - -void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m) -{ - int dist[8] = { 0, }; - int maxdep = -1; - int maxdepb = -1; - int total = 0; - int theta; - int i; - - cfs_hash_lock(hs, 0); - theta = __cfs_hash_theta(hs); - - seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ", - CFS_HASH_BIGNAME_LEN, hs->hs_name, - 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits, - 1 << hs->hs_max_bits, - __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta), - __cfs_hash_theta_int(hs->hs_min_theta), - __cfs_hash_theta_frac(hs->hs_min_theta), - __cfs_hash_theta_int(hs->hs_max_theta), - __cfs_hash_theta_frac(hs->hs_max_theta), - hs->hs_flags, hs->hs_rehash_count); - - /* - * The distribution is a summary of the chained hash depth in - * each of the libcfs hash buckets. Each buckets hsb_count is - * divided by the hash theta value and used to generate a - * histogram of the hash distribution. A uniform hash will - * result in all hash buckets being close to the average thus - * only the first few entries in the histogram will be non-zero. - * If you hash function results in a non-uniform hash the will - * be observable by outlier bucks in the distribution histogram. - * - * Uniform hash distribution: 128/128/0/0/0/0/0/0 - * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 - */ - for (i = 0; i < cfs_hash_full_nbkt(hs); i++) { - struct cfs_hash_bd bd; - - bd.bd_bucket = cfs_hash_full_bkts(hs)[i]; - cfs_hash_bd_lock(hs, &bd, 0); - if (maxdep < bd.bd_bucket->hsb_depmax) { - maxdep = bd.bd_bucket->hsb_depmax; - maxdepb = ffz(~maxdep); - } - total += bd.bd_bucket->hsb_count; - dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++; - cfs_hash_bd_unlock(hs, &bd, 0); - } - - seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb); - for (i = 0; i < 8; i++) - seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/'); - - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_debug_str); diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c deleted file mode 100644 index 33352af6c27f..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/libcfs_cpu.c +++ /dev/null @@ -1,227 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "../../include/linux/libcfs/libcfs.h" - -/** Global CPU partition table */ -struct cfs_cpt_table *cfs_cpt_table __read_mostly; -EXPORT_SYMBOL(cfs_cpt_table); - -#ifndef HAVE_LIBCFS_CPT - -#define CFS_CPU_VERSION_MAGIC 0xbabecafe - -struct cfs_cpt_table * -cfs_cpt_table_alloc(unsigned int ncpt) -{ - struct cfs_cpt_table *cptab; - - if (ncpt != 1) { - CERROR("Can't support cpu partition number %d\n", ncpt); - return NULL; - } - - LIBCFS_ALLOC(cptab, sizeof(*cptab)); - if (cptab) { - cptab->ctb_version = CFS_CPU_VERSION_MAGIC; - node_set(0, cptab->ctb_nodemask); - cptab->ctb_nparts = ncpt; - } - - return cptab; -} -EXPORT_SYMBOL(cfs_cpt_table_alloc); - -void -cfs_cpt_table_free(struct cfs_cpt_table *cptab) -{ - LASSERT(cptab->ctb_version == CFS_CPU_VERSION_MAGIC); - - LIBCFS_FREE(cptab, sizeof(*cptab)); -} -EXPORT_SYMBOL(cfs_cpt_table_free); - -#ifdef CONFIG_SMP -int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) -{ - int rc; - - rc = snprintf(buf, len, "%d\t: %d\n", 0, 0); - len -= rc; - if (len <= 0) - return -EFBIG; - - return rc; -} -EXPORT_SYMBOL(cfs_cpt_table_print); -#endif /* CONFIG_SMP */ - -int -cfs_cpt_number(struct cfs_cpt_table *cptab) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_number); - -int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_weight); - -int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_online); - -nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) -{ - return &cptab->ctb_nodemask; -} -EXPORT_SYMBOL(cfs_cpt_cpumask); - -int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpu); - -void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_cpu); - -int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpumask); - -void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_cpumask); - -int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_node); - -void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_node); - -int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_nodemask); - -void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ -} -EXPORT_SYMBOL(cfs_cpt_unset_nodemask); - -void -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) -{ -} -EXPORT_SYMBOL(cfs_cpt_clear); - -int -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_spread_node); - -int -cfs_cpu_ht_nsiblings(int cpu) -{ - return 1; -} -EXPORT_SYMBOL(cfs_cpu_ht_nsiblings); - -int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_current); - -int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_of_cpu); - -int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) -{ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_bind); - -void -cfs_cpu_fini(void) -{ - if (cfs_cpt_table) { - cfs_cpt_table_free(cfs_cpt_table); - cfs_cpt_table = NULL; - } -} - -int -cfs_cpu_init(void) -{ - cfs_cpt_table = cfs_cpt_table_alloc(1); - - return cfs_cpt_table ? 0 : -1; -} - -#endif /* HAVE_LIBCFS_CPT */ diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c b/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c deleted file mode 100644 index 2de9eeae0232..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/libcfs_lock.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "../../include/linux/libcfs/libcfs.h" - -/** destroy cpu-partition lock, see libcfs_private.h for more detail */ -void -cfs_percpt_lock_free(struct cfs_percpt_lock *pcl) -{ - LASSERT(pcl->pcl_locks); - LASSERT(!pcl->pcl_locked); - - cfs_percpt_free(pcl->pcl_locks); - LIBCFS_FREE(pcl, sizeof(*pcl)); -} -EXPORT_SYMBOL(cfs_percpt_lock_free); - -/** - * create cpu-partition lock, see libcfs_private.h for more detail. - * - * cpu-partition lock is designed for large-scale SMP system, so we need to - * reduce cacheline conflict as possible as we can, that's the - * reason we always allocate cacheline-aligned memory block. - */ -struct cfs_percpt_lock * -cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab) -{ - struct cfs_percpt_lock *pcl; - spinlock_t *lock; - int i; - - /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */ - LIBCFS_ALLOC(pcl, sizeof(*pcl)); - if (!pcl) - return NULL; - - pcl->pcl_cptab = cptab; - pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock)); - if (!pcl->pcl_locks) { - LIBCFS_FREE(pcl, sizeof(*pcl)); - return NULL; - } - - cfs_percpt_for_each(lock, i, pcl->pcl_locks) - spin_lock_init(lock); - - return pcl; -} -EXPORT_SYMBOL(cfs_percpt_lock_alloc); - -/** - * lock a CPU partition - * - * \a index != CFS_PERCPT_LOCK_EX - * hold private lock indexed by \a index - * - * \a index == CFS_PERCPT_LOCK_EX - * exclusively lock @pcl and nobody can take private lock - */ -void -cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) - __acquires(pcl->pcl_locks) -{ - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; - - LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt); - - if (ncpt == 1) { - index = 0; - } else { /* serialize with exclusive lock */ - while (pcl->pcl_locked) - cpu_relax(); - } - - if (likely(index != CFS_PERCPT_LOCK_EX)) { - spin_lock(pcl->pcl_locks[index]); - return; - } - - /* exclusive lock request */ - for (i = 0; i < ncpt; i++) { - spin_lock(pcl->pcl_locks[i]); - if (i == 0) { - LASSERT(!pcl->pcl_locked); - /* nobody should take private lock after this - * so I wouldn't starve for too long time - */ - pcl->pcl_locked = 1; - } - } -} -EXPORT_SYMBOL(cfs_percpt_lock); - -/** unlock a CPU partition */ -void -cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) - __releases(pcl->pcl_locks) -{ - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; - - index = ncpt == 1 ? 0 : index; - - if (likely(index != CFS_PERCPT_LOCK_EX)) { - spin_unlock(pcl->pcl_locks[index]); - return; - } - - for (i = ncpt - 1; i >= 0; i--) { - if (i == 0) { - LASSERT(pcl->pcl_locked); - pcl->pcl_locked = 0; - } - spin_unlock(pcl->pcl_locks[i]); - } -} -EXPORT_SYMBOL(cfs_percpt_unlock); - -/** free cpu-partition refcount */ -void -cfs_percpt_atomic_free(atomic_t **refs) -{ - cfs_percpt_free(refs); -} -EXPORT_SYMBOL(cfs_percpt_atomic_free); - -/** allocate cpu-partition refcount with initial value @init_val */ -atomic_t ** -cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val) -{ - atomic_t **refs; - atomic_t *ref; - int i; - - refs = cfs_percpt_alloc(cptab, sizeof(*ref)); - if (!refs) - return NULL; - - cfs_percpt_for_each(ref, i, refs) - atomic_set(ref, init_val); - return refs; -} -EXPORT_SYMBOL(cfs_percpt_atomic_alloc); - -/** return sum of cpu-partition refs */ -int -cfs_percpt_atomic_summary(atomic_t **refs) -{ - atomic_t *ref; - int i; - int val = 0; - - cfs_percpt_for_each(ref, i, refs) - val += atomic_read(ref); - - return val; -} -EXPORT_SYMBOL(cfs_percpt_atomic_summary); diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c b/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c deleted file mode 100644 index c5a6951516ed..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/libcfs_mem.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "../../include/linux/libcfs/libcfs.h" - -struct cfs_var_array { - unsigned int va_count; /* # of buffers */ - unsigned int va_size; /* size of each var */ - struct cfs_cpt_table *va_cptab; /* cpu partition table */ - void *va_ptrs[0]; /* buffer addresses */ -}; - -/* - * free per-cpu data, see more detail in cfs_percpt_free - */ -void -cfs_percpt_free(void *vars) -{ - struct cfs_var_array *arr; - int i; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - for (i = 0; i < arr->va_count; i++) { - if (arr->va_ptrs[i]) - LIBCFS_FREE(arr->va_ptrs[i], arr->va_size); - } - - LIBCFS_FREE(arr, offsetof(struct cfs_var_array, - va_ptrs[arr->va_count])); -} -EXPORT_SYMBOL(cfs_percpt_free); - -/* - * allocate per cpu-partition variables, returned value is an array of pointers, - * variable can be indexed by CPU partition ID, i.e: - * - * arr = cfs_percpt_alloc(cfs_cpu_pt, size); - * then caller can access memory block for CPU 0 by arr[0], - * memory block for CPU 1 by arr[1]... - * memory block for CPU N by arr[N]... - * - * cacheline aligned. - */ -void * -cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size) -{ - struct cfs_var_array *arr; - int count; - int i; - - count = cfs_cpt_number(cptab); - - LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); - if (!arr) - return NULL; - - size = L1_CACHE_ALIGN(size); - arr->va_size = size; - arr->va_count = count; - arr->va_cptab = cptab; - - for (i = 0; i < count; i++) { - LIBCFS_CPT_ALLOC(arr->va_ptrs[i], cptab, i, size); - if (!arr->va_ptrs[i]) { - cfs_percpt_free((void *)&arr->va_ptrs[0]); - return NULL; - } - } - - return (void *)&arr->va_ptrs[0]; -} -EXPORT_SYMBOL(cfs_percpt_alloc); - -/* - * return number of CPUs (or number of elements in per-cpu data) - * according to cptab of @vars - */ -int -cfs_percpt_number(void *vars) -{ - struct cfs_var_array *arr; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - return arr->va_count; -} -EXPORT_SYMBOL(cfs_percpt_number); - -/* - * return memory block shadowed from current CPU - */ -void * -cfs_percpt_current(void *vars) -{ - struct cfs_var_array *arr; - int cpt; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - cpt = cfs_cpt_current(arr->va_cptab, 0); - if (cpt < 0) - return NULL; - - return arr->va_ptrs[cpt]; -} - -void * -cfs_percpt_index(void *vars, int idx) -{ - struct cfs_var_array *arr; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - LASSERT(idx >= 0 && idx < arr->va_count); - return arr->va_ptrs[idx]; -} - -/* - * free variable array, see more detail in cfs_array_alloc - */ -void -cfs_array_free(void *vars) -{ - struct cfs_var_array *arr; - int i; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - for (i = 0; i < arr->va_count; i++) { - if (!arr->va_ptrs[i]) - continue; - - LIBCFS_FREE(arr->va_ptrs[i], arr->va_size); - } - LIBCFS_FREE(arr, offsetof(struct cfs_var_array, - va_ptrs[arr->va_count])); -} -EXPORT_SYMBOL(cfs_array_free); - -/* - * allocate a variable array, returned value is an array of pointers. - * Caller can specify length of array by @count, @size is size of each - * memory block in array. - */ -void * -cfs_array_alloc(int count, unsigned int size) -{ - struct cfs_var_array *arr; - int i; - - LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); - if (!arr) - return NULL; - - arr->va_count = count; - arr->va_size = size; - - for (i = 0; i < count; i++) { - LIBCFS_ALLOC(arr->va_ptrs[i], size); - - if (!arr->va_ptrs[i]) { - cfs_array_free((void *)&arr->va_ptrs[0]); - return NULL; - } - } - - return (void *)&arr->va_ptrs[0]; -} -EXPORT_SYMBOL(cfs_array_alloc); diff --git a/drivers/staging/lustre/lustre/libcfs/libcfs_string.c b/drivers/staging/lustre/lustre/libcfs/libcfs_string.c deleted file mode 100644 index 50ac1536db4b..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/libcfs_string.c +++ /dev/null @@ -1,581 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * String manipulation functions. - * - * libcfs/libcfs/libcfs_string.c - * - * Author: Nathan Rutman - */ - -#include "../../include/linux/libcfs/libcfs.h" - -/* Convert a text string to a bitmask */ -int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), - int *oldmask, int minmask, int allmask) -{ - const char *debugstr; - char op = '\0'; - int newmask = minmask, i, len, found = 0; - - /* must be a list of tokens separated by whitespace - * and optionally an operator ('+' or '-'). If an operator - * appears first in , '*oldmask' is used as the starting point - * (relative), otherwise minmask is used (absolute). An operator - * applies to all following tokens up to the next operator. - */ - while (*str != '\0') { - while (isspace(*str)) - str++; - if (*str == '\0') - break; - if (*str == '+' || *str == '-') { - op = *str++; - if (!found) - /* only if first token is relative */ - newmask = *oldmask; - while (isspace(*str)) - str++; - if (*str == '\0') /* trailing op */ - return -EINVAL; - } - - /* find token length */ - len = 0; - while (str[len] != '\0' && !isspace(str[len]) && - str[len] != '+' && str[len] != '-') - len++; - - /* match token */ - found = 0; - for (i = 0; i < 32; i++) { - debugstr = bit2str(i); - if (debugstr && strlen(debugstr) == len && - strncasecmp(str, debugstr, len) == 0) { - if (op == '-') - newmask &= ~(1 << i); - else - newmask |= (1 << i); - found = 1; - break; - } - } - if (!found && len == 3 && - (strncasecmp(str, "ALL", len) == 0)) { - if (op == '-') - newmask = minmask; - else - newmask = allmask; - found = 1; - } - if (!found) { - CWARN("unknown mask '%.*s'.\n" - "mask usage: [+|-] ...\n", len, str); - return -EINVAL; - } - str += len; - } - - *oldmask = newmask; - return 0; -} - -/* get the first string out of @str */ -char *cfs_firststr(char *str, size_t size) -{ - size_t i = 0; - char *end; - - /* trim leading spaces */ - while (i < size && *str && isspace(*str)) { - ++i; - ++str; - } - - /* string with all spaces */ - if (*str == '\0') - goto out; - - end = str; - while (i < size && *end != '\0' && !isspace(*end)) { - ++i; - ++end; - } - - *end = '\0'; -out: - return str; -} -EXPORT_SYMBOL(cfs_firststr); - -char * -cfs_trimwhite(char *str) -{ - char *end; - - while (isspace(*str)) - str++; - - end = str + strlen(str); - while (end > str) { - if (!isspace(end[-1])) - break; - end--; - } - - *end = 0; - return str; -} -EXPORT_SYMBOL(cfs_trimwhite); - -/** - * Extracts tokens from strings. - * - * Looks for \a delim in string \a next, sets \a res to point to - * substring before the delimiter, sets \a next right after the found - * delimiter. - * - * \retval 1 if \a res points to a string of non-whitespace characters - * \retval 0 otherwise - */ -int -cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res) -{ - char *end; - - if (!next->ls_str) - return 0; - - /* skip leading white spaces */ - while (next->ls_len) { - if (!isspace(*next->ls_str)) - break; - next->ls_str++; - next->ls_len--; - } - - if (next->ls_len == 0) /* whitespaces only */ - return 0; - - if (*next->ls_str == delim) { - /* first non-writespace is the delimiter */ - return 0; - } - - res->ls_str = next->ls_str; - end = memchr(next->ls_str, delim, next->ls_len); - if (!end) { - /* there is no the delimeter in the string */ - end = next->ls_str + next->ls_len; - next->ls_str = NULL; - } else { - next->ls_str = end + 1; - next->ls_len -= (end - res->ls_str + 1); - } - - /* skip ending whitespaces */ - while (--end != res->ls_str) { - if (!isspace(*end)) - break; - } - - res->ls_len = end - res->ls_str + 1; - return 1; -} -EXPORT_SYMBOL(cfs_gettok); - -/** - * Converts string to integer. - * - * Accepts decimal and hexadecimal number recordings. - * - * \retval 1 if first \a nob chars of \a str convert to decimal or - * hexadecimal integer in the range [\a min, \a max] - * \retval 0 otherwise - */ -int -cfs_str2num_check(char *str, int nob, unsigned *num, - unsigned min, unsigned max) -{ - bool all_numbers = true; - char *endp, cache; - int rc; - - str = cfs_trimwhite(str); - - /** - * kstrouint can only handle strings composed - * of only numbers. We need to scan the string - * passed in for the first non-digit character - * and end the string at that location. If we - * don't find any non-digit character we still - * need to place a '\0' at position nob since - * we are not interested in the rest of the - * string which is longer than nob in size. - * After we are done the character at the - * position we placed '\0' must be restored. - */ - for (endp = str; endp < str + nob; endp++) { - if (!isdigit(*endp)) { - all_numbers = false; - break; - } - } - cache = *endp; - *endp = '\0'; - - rc = kstrtouint(str, 10, num); - *endp = cache; - if (rc || !all_numbers) - return 0; - - return (*num >= min && *num <= max); -} -EXPORT_SYMBOL(cfs_str2num_check); - -/** - * Parses \ token of the syntax. If \a bracketed is false, - * \a src should only have a single token which can be \ or \* - * - * \retval pointer to allocated range_expr and initialized - * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a - `* src parses to - * \ | - * \ '-' \ | - * \ '-' \ '/' \ - * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or - * -ENOMEM will be returned. - */ -static int -cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max, - int bracketed, struct cfs_range_expr **expr) -{ - struct cfs_range_expr *re; - struct cfs_lstr tok; - - LIBCFS_ALLOC(re, sizeof(*re)); - if (!re) - return -ENOMEM; - - if (src->ls_len == 1 && src->ls_str[0] == '*') { - re->re_lo = min; - re->re_hi = max; - re->re_stride = 1; - goto out; - } - - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_lo, min, max)) { - /* is parsed */ - re->re_hi = re->re_lo; - re->re_stride = 1; - goto out; - } - - if (!bracketed || !cfs_gettok(src, '-', &tok)) - goto failed; - - if (!cfs_str2num_check(tok.ls_str, tok.ls_len, - &re->re_lo, min, max)) - goto failed; - - /* - */ - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_hi, min, max)) { - /* - is parsed */ - re->re_stride = 1; - goto out; - } - - /* go to check '-' '/' */ - if (cfs_gettok(src, '/', &tok)) { - if (!cfs_str2num_check(tok.ls_str, tok.ls_len, - &re->re_hi, min, max)) - goto failed; - - /* - / ... */ - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_stride, min, max)) { - /* - / is parsed */ - goto out; - } - } - - out: - *expr = re; - return 0; - - failed: - LIBCFS_FREE(re, sizeof(*re)); - return -EINVAL; -} - -/** - * Print the range expression \a re into specified \a buffer. - * If \a bracketed is true, expression does not need additional - * brackets. - * - * \retval number of characters written - */ -static int -cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr, - bool bracketed) -{ - int i; - char s[] = "["; - char e[] = "]"; - - if (bracketed) { - s[0] = '\0'; - e[0] = '\0'; - } - - if (expr->re_lo == expr->re_hi) - i = scnprintf(buffer, count, "%u", expr->re_lo); - else if (expr->re_stride == 1) - i = scnprintf(buffer, count, "%s%u-%u%s", - s, expr->re_lo, expr->re_hi, e); - else - i = scnprintf(buffer, count, "%s%u-%u/%u%s", - s, expr->re_lo, expr->re_hi, expr->re_stride, e); - return i; -} - -/** - * Print a list of range expressions (\a expr_list) into specified \a buffer. - * If the list contains several expressions, separate them with comma - * and surround the list with brackets. - * - * \retval number of characters written - */ -int -cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list) -{ - struct cfs_range_expr *expr; - int i = 0, j = 0; - int numexprs = 0; - - if (count <= 0) - return 0; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) - numexprs++; - - if (numexprs > 1) - i += scnprintf(buffer + i, count - i, "["); - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (j++ != 0) - i += scnprintf(buffer + i, count - i, ","); - i += cfs_range_expr_print(buffer + i, count - i, expr, - numexprs > 1); - } - - if (numexprs > 1) - i += scnprintf(buffer + i, count - i, "]"); - - return i; -} -EXPORT_SYMBOL(cfs_expr_list_print); - -/** - * Matches value (\a value) against ranges expression list \a expr_list. - * - * \retval 1 if \a value matches - * \retval 0 otherwise - */ -int -cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list) -{ - struct cfs_range_expr *expr; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (value >= expr->re_lo && value <= expr->re_hi && - ((value - expr->re_lo) % expr->re_stride) == 0) - return 1; - } - - return 0; -} -EXPORT_SYMBOL(cfs_expr_list_match); - -/** - * Convert express list (\a expr_list) to an array of all matched values - * - * \retval N N is total number of all matched values - * \retval 0 if expression list is empty - * \retval < 0 for failure - */ -int -cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp) -{ - struct cfs_range_expr *expr; - __u32 *val; - int count = 0; - int i; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (((i - expr->re_lo) % expr->re_stride) == 0) - count++; - } - } - - if (count == 0) /* empty expression list */ - return 0; - - if (count > max) { - CERROR("Number of values %d exceeds max allowed %d\n", - max, count); - return -EINVAL; - } - - LIBCFS_ALLOC(val, sizeof(val[0]) * count); - if (!val) - return -ENOMEM; - - count = 0; - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (((i - expr->re_lo) % expr->re_stride) == 0) - val[count++] = i; - } - } - - *valpp = val; - return count; -} -EXPORT_SYMBOL(cfs_expr_list_values); - -/** - * Frees cfs_range_expr structures of \a expr_list. - * - * \retval none - */ -void -cfs_expr_list_free(struct cfs_expr_list *expr_list) -{ - while (!list_empty(&expr_list->el_exprs)) { - struct cfs_range_expr *expr; - - expr = list_entry(expr_list->el_exprs.next, - struct cfs_range_expr, re_link); - list_del(&expr->re_link); - LIBCFS_FREE(expr, sizeof(*expr)); - } - - LIBCFS_FREE(expr_list, sizeof(*expr_list)); -} -EXPORT_SYMBOL(cfs_expr_list_free); - -/** - * Parses \ token of the syntax. - * - * \retval 0 if \a str parses to \ | \ - * \retval -errno otherwise - */ -int -cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, - struct cfs_expr_list **elpp) -{ - struct cfs_expr_list *expr_list; - struct cfs_range_expr *expr; - struct cfs_lstr src; - int rc; - - LIBCFS_ALLOC(expr_list, sizeof(*expr_list)); - if (!expr_list) - return -ENOMEM; - - src.ls_str = str; - src.ls_len = len; - - INIT_LIST_HEAD(&expr_list->el_exprs); - - if (src.ls_str[0] == '[' && - src.ls_str[src.ls_len - 1] == ']') { - src.ls_str++; - src.ls_len -= 2; - - rc = -EINVAL; - while (src.ls_str) { - struct cfs_lstr tok; - - if (!cfs_gettok(&src, ',', &tok)) { - rc = -EINVAL; - break; - } - - rc = cfs_range_expr_parse(&tok, min, max, 1, &expr); - if (rc != 0) - break; - - list_add_tail(&expr->re_link, &expr_list->el_exprs); - } - } else { - rc = cfs_range_expr_parse(&src, min, max, 0, &expr); - if (rc == 0) - list_add_tail(&expr->re_link, &expr_list->el_exprs); - } - - if (rc != 0) - cfs_expr_list_free(expr_list); - else - *elpp = expr_list; - - return rc; -} -EXPORT_SYMBOL(cfs_expr_list_parse); - -/** - * Frees cfs_expr_list structures of \a list. - * - * For each struct cfs_expr_list structure found on \a list it frees - * range_expr list attached to it and frees the cfs_expr_list itself. - * - * \retval none - */ -void -cfs_expr_list_free_list(struct list_head *list) -{ - struct cfs_expr_list *el; - - while (!list_empty(list)) { - el = list_entry(list->next, struct cfs_expr_list, el_link); - list_del(&el->el_link); - cfs_expr_list_free(el); - } -} -EXPORT_SYMBOL(cfs_expr_list_free_list); diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c deleted file mode 100644 index 389fb9eeea75..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c +++ /dev/null @@ -1,1040 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include "../../../include/linux/libcfs/libcfs.h" - -#ifdef CONFIG_SMP - -/** - * modparam for setting number of partitions - * - * 0 : estimate best value based on cores or NUMA nodes - * 1 : disable multiple partitions - * >1 : specify number of partitions - */ -static int cpu_npartitions; -module_param(cpu_npartitions, int, 0444); -MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); - -/** - * modparam for setting CPU partitions patterns: - * - * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID, - * number in bracket is processor ID (core or HT) - * - * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket - * are NUMA node ID, number before bracket is CPU partition ID. - * - * NB: If user specified cpu_pattern, cpu_npartitions will be ignored - */ -static char *cpu_pattern = ""; -module_param(cpu_pattern, charp, 0444); -MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); - -struct cfs_cpt_data { - /* serialize hotplug etc */ - spinlock_t cpt_lock; - /* reserved for hotplug */ - unsigned long cpt_version; - /* mutex to protect cpt_cpumask */ - struct mutex cpt_mutex; - /* scratch buffer for set/unset_node */ - cpumask_t *cpt_cpumask; -}; - -static struct cfs_cpt_data cpt_data; - -void -cfs_cpt_table_free(struct cfs_cpt_table *cptab) -{ - int i; - - if (cptab->ctb_cpu2cpt) { - LIBCFS_FREE(cptab->ctb_cpu2cpt, - num_possible_cpus() * - sizeof(cptab->ctb_cpu2cpt[0])); - } - - for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) { - struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - - if (part->cpt_nodemask) { - LIBCFS_FREE(part->cpt_nodemask, - sizeof(*part->cpt_nodemask)); - } - - if (part->cpt_cpumask) - LIBCFS_FREE(part->cpt_cpumask, cpumask_size()); - } - - if (cptab->ctb_parts) { - LIBCFS_FREE(cptab->ctb_parts, - cptab->ctb_nparts * sizeof(cptab->ctb_parts[0])); - } - - if (cptab->ctb_nodemask) - LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); - if (cptab->ctb_cpumask) - LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size()); - - LIBCFS_FREE(cptab, sizeof(*cptab)); -} -EXPORT_SYMBOL(cfs_cpt_table_free); - -struct cfs_cpt_table * -cfs_cpt_table_alloc(unsigned int ncpt) -{ - struct cfs_cpt_table *cptab; - int i; - - LIBCFS_ALLOC(cptab, sizeof(*cptab)); - if (!cptab) - return NULL; - - cptab->ctb_nparts = ncpt; - - LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size()); - LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask)); - - if (!cptab->ctb_cpumask || !cptab->ctb_nodemask) - goto failed; - - LIBCFS_ALLOC(cptab->ctb_cpu2cpt, - num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); - if (!cptab->ctb_cpu2cpt) - goto failed; - - memset(cptab->ctb_cpu2cpt, -1, - num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); - - LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0])); - if (!cptab->ctb_parts) - goto failed; - - for (i = 0; i < ncpt; i++) { - struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - - LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size()); - LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask)); - if (!part->cpt_cpumask || !part->cpt_nodemask) - goto failed; - } - - spin_lock(&cpt_data.cpt_lock); - /* Reserved for hotplug */ - cptab->ctb_version = cpt_data.cpt_version; - spin_unlock(&cpt_data.cpt_lock); - - return cptab; - - failed: - cfs_cpt_table_free(cptab); - return NULL; -} -EXPORT_SYMBOL(cfs_cpt_table_alloc); - -int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) -{ - char *tmp = buf; - int rc = 0; - int i; - int j; - - for (i = 0; i < cptab->ctb_nparts; i++) { - if (len > 0) { - rc = snprintf(tmp, len, "%d\t: ", i); - len -= rc; - } - - if (len <= 0) { - rc = -EFBIG; - goto out; - } - - tmp += rc; - for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { - rc = snprintf(tmp, len, "%d ", j); - len -= rc; - if (len <= 0) { - rc = -EFBIG; - goto out; - } - tmp += rc; - } - - *tmp = '\n'; - tmp++; - len--; - } - - out: - if (rc < 0) - return rc; - - return tmp - buf; -} -EXPORT_SYMBOL(cfs_cpt_table_print); - -int -cfs_cpt_number(struct cfs_cpt_table *cptab) -{ - return cptab->ctb_nparts; -} -EXPORT_SYMBOL(cfs_cpt_number); - -int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cpumask_weight(cptab->ctb_cpumask) : - cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask); -} -EXPORT_SYMBOL(cfs_cpt_weight); - -int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cpumask_any_and(cptab->ctb_cpumask, - cpu_online_mask) < nr_cpu_ids : - cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask, - cpu_online_mask) < nr_cpu_ids; -} -EXPORT_SYMBOL(cfs_cpt_online); - -cpumask_t * -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask; -} -EXPORT_SYMBOL(cfs_cpt_cpumask); - -nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask; -} -EXPORT_SYMBOL(cfs_cpt_nodemask); - -int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - int node; - - LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); - - if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) { - CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu); - return 0; - } - - if (cptab->ctb_cpu2cpt[cpu] != -1) { - CDEBUG(D_INFO, "CPU %d is already in partition %d\n", - cpu, cptab->ctb_cpu2cpt[cpu]); - return 0; - } - - cptab->ctb_cpu2cpt[cpu] = cpt; - - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - - cpumask_set_cpu(cpu, cptab->ctb_cpumask); - cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); - - node = cpu_to_node(cpu); - - /* first CPU of @node in this CPT table */ - if (!node_isset(node, *cptab->ctb_nodemask)) - node_set(node, *cptab->ctb_nodemask); - - /* first CPU of @node in this partition */ - if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)) - node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask); - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpu); - -void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - int node; - int i; - - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - if (cpu < 0 || cpu >= nr_cpu_ids) { - CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu); - return; - } - - if (cpt == CFS_CPT_ANY) { - /* caller doesn't know the partition ID */ - cpt = cptab->ctb_cpu2cpt[cpu]; - if (cpt < 0) { /* not set in this CPT-table */ - CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n", - cpt, cptab); - return; - } - - } else if (cpt != cptab->ctb_cpu2cpt[cpu]) { - CDEBUG(D_INFO, - "CPU %d is not in cpu-partition %d\n", cpu, cpt); - return; - } - - LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - - cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); - cpumask_clear_cpu(cpu, cptab->ctb_cpumask); - cptab->ctb_cpu2cpt[cpu] = -1; - - node = cpu_to_node(cpu); - - LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)); - LASSERT(node_isset(node, *cptab->ctb_nodemask)); - - for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) { - /* this CPT has other CPU belonging to this node? */ - if (cpu_to_node(i) == node) - break; - } - - if (i >= nr_cpu_ids) - node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask); - - for_each_cpu(i, cptab->ctb_cpumask) { - /* this CPT-table has other CPU belonging to this node? */ - if (cpu_to_node(i) == node) - break; - } - - if (i >= nr_cpu_ids) - node_clear(node, *cptab->ctb_nodemask); -} -EXPORT_SYMBOL(cfs_cpt_unset_cpu); - -int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - int i; - - if (cpumask_weight(mask) == 0 || - cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { - CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n", - cpt); - return 0; - } - - for_each_cpu(i, mask) { - if (!cfs_cpt_set_cpu(cptab, cpt, i)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpumask); - -void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - int i; - - for_each_cpu(i, mask) - cfs_cpt_unset_cpu(cptab, cpt, i); -} -EXPORT_SYMBOL(cfs_cpt_unset_cpumask); - -int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - cpumask_t *mask; - int rc; - - if (node < 0 || node >= MAX_NUMNODES) { - CDEBUG(D_INFO, - "Invalid NUMA id %d for CPU partition %d\n", node, cpt); - return 0; - } - - mutex_lock(&cpt_data.cpt_mutex); - - mask = cpt_data.cpt_cpumask; - cpumask_copy(mask, cpumask_of_node(node)); - - rc = cfs_cpt_set_cpumask(cptab, cpt, mask); - - mutex_unlock(&cpt_data.cpt_mutex); - - return rc; -} -EXPORT_SYMBOL(cfs_cpt_set_node); - -void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - cpumask_t *mask; - - if (node < 0 || node >= MAX_NUMNODES) { - CDEBUG(D_INFO, - "Invalid NUMA id %d for CPU partition %d\n", node, cpt); - return; - } - - mutex_lock(&cpt_data.cpt_mutex); - - mask = cpt_data.cpt_cpumask; - cpumask_copy(mask, cpumask_of_node(node)); - - cfs_cpt_unset_cpumask(cptab, cpt, mask); - - mutex_unlock(&cpt_data.cpt_mutex); -} -EXPORT_SYMBOL(cfs_cpt_unset_node); - -int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - int i; - - for_each_node_mask(i, *mask) { - if (!cfs_cpt_set_node(cptab, cpt, i)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_nodemask); - -void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - int i; - - for_each_node_mask(i, *mask) - cfs_cpt_unset_node(cptab, cpt, i); -} -EXPORT_SYMBOL(cfs_cpt_unset_nodemask); - -void -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) -{ - int last; - int i; - - if (cpt == CFS_CPT_ANY) { - last = cptab->ctb_nparts - 1; - cpt = 0; - } else { - last = cpt; - } - - for (; cpt <= last; cpt++) { - for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) - cfs_cpt_unset_cpu(cptab, cpt, i); - } -} -EXPORT_SYMBOL(cfs_cpt_clear); - -int -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) -{ - nodemask_t *mask; - int weight; - int rotor; - int node; - - /* convert CPU partition ID to HW node id */ - - if (cpt < 0 || cpt >= cptab->ctb_nparts) { - mask = cptab->ctb_nodemask; - rotor = cptab->ctb_spread_rotor++; - } else { - mask = cptab->ctb_parts[cpt].cpt_nodemask; - rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++; - } - - weight = nodes_weight(*mask); - LASSERT(weight > 0); - - rotor %= weight; - - for_each_node_mask(node, *mask) { - if (rotor-- == 0) - return node; - } - - LBUG(); - return 0; -} -EXPORT_SYMBOL(cfs_cpt_spread_node); - -int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) -{ - int cpu = smp_processor_id(); - int cpt = cptab->ctb_cpu2cpt[cpu]; - - if (cpt < 0) { - if (!remap) - return cpt; - - /* don't return negative value for safety of upper layer, - * instead we shadow the unknown cpu to a valid partition ID - */ - cpt = cpu % cptab->ctb_nparts; - } - - return cpt; -} -EXPORT_SYMBOL(cfs_cpt_current); - -int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) -{ - LASSERT(cpu >= 0 && cpu < nr_cpu_ids); - - return cptab->ctb_cpu2cpt[cpu]; -} -EXPORT_SYMBOL(cfs_cpt_of_cpu); - -int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) -{ - cpumask_t *cpumask; - nodemask_t *nodemask; - int rc; - int i; - - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - if (cpt == CFS_CPT_ANY) { - cpumask = cptab->ctb_cpumask; - nodemask = cptab->ctb_nodemask; - } else { - cpumask = cptab->ctb_parts[cpt].cpt_cpumask; - nodemask = cptab->ctb_parts[cpt].cpt_nodemask; - } - - if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) { - CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n", - cpt); - return -EINVAL; - } - - for_each_online_cpu(i) { - if (cpumask_test_cpu(i, cpumask)) - continue; - - rc = set_cpus_allowed_ptr(current, cpumask); - set_mems_allowed(*nodemask); - if (rc == 0) - schedule(); /* switch to allowed CPU */ - - return rc; - } - - /* don't need to set affinity because all online CPUs are covered */ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_bind); - -/** - * Choose max to \a number CPUs from \a node and set them in \a cpt. - * We always prefer to choose CPU in the same core/socket. - */ -static int -cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, - cpumask_t *node, int number) -{ - cpumask_t *socket = NULL; - cpumask_t *core = NULL; - int rc = 0; - int cpu; - - LASSERT(number > 0); - - if (number >= cpumask_weight(node)) { - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); - - rc = cfs_cpt_set_cpu(cptab, cpt, cpu); - if (!rc) - return -EINVAL; - cpumask_clear_cpu(cpu, node); - } - return 0; - } - - /* allocate scratch buffer */ - LIBCFS_ALLOC(socket, cpumask_size()); - LIBCFS_ALLOC(core, cpumask_size()); - if (!socket || !core) { - rc = -ENOMEM; - goto out; - } - - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); - - /* get cpumask for cores in the same socket */ - cpumask_copy(socket, topology_core_cpumask(cpu)); - cpumask_and(socket, socket, node); - - LASSERT(!cpumask_empty(socket)); - - while (!cpumask_empty(socket)) { - int i; - - /* get cpumask for hts in the same core */ - cpumask_copy(core, topology_sibling_cpumask(cpu)); - cpumask_and(core, core, node); - - LASSERT(!cpumask_empty(core)); - - for_each_cpu(i, core) { - cpumask_clear_cpu(i, socket); - cpumask_clear_cpu(i, node); - - rc = cfs_cpt_set_cpu(cptab, cpt, i); - if (!rc) { - rc = -EINVAL; - goto out; - } - - if (--number == 0) - goto out; - } - cpu = cpumask_first(socket); - } - } - - out: - if (socket) - LIBCFS_FREE(socket, cpumask_size()); - if (core) - LIBCFS_FREE(core, cpumask_size()); - return rc; -} - -#define CPT_WEIGHT_MIN 4u - -static unsigned int -cfs_cpt_num_estimate(void) -{ - unsigned nnode = num_online_nodes(); - unsigned ncpu = num_online_cpus(); - unsigned ncpt; - - if (ncpu <= CPT_WEIGHT_MIN) { - ncpt = 1; - goto out; - } - - /* generate reasonable number of CPU partitions based on total number - * of CPUs, Preferred N should be power2 and match this condition: - * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 - */ - for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) - ; - - if (ncpt <= nnode) { /* fat numa system */ - while (nnode > ncpt) - nnode >>= 1; - - } else { /* ncpt > nnode */ - while ((nnode << 1) <= ncpt) - nnode <<= 1; - } - - ncpt = nnode; - - out: -#if (BITS_PER_LONG == 32) - /* config many CPU partitions on 32-bit system could consume - * too much memory - */ - ncpt = min(2U, ncpt); -#endif - while (ncpu % ncpt != 0) - ncpt--; /* worst case is 1 */ - - return ncpt; -} - -static struct cfs_cpt_table * -cfs_cpt_table_create(int ncpt) -{ - struct cfs_cpt_table *cptab = NULL; - cpumask_t *mask = NULL; - int cpt = 0; - int num; - int rc; - int i; - - rc = cfs_cpt_num_estimate(); - if (ncpt <= 0) - ncpt = rc; - - if (ncpt > num_online_cpus() || ncpt > 4 * rc) { - CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n", - ncpt, rc); - } - - if (num_online_cpus() % ncpt != 0) { - CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n", - (int)num_online_cpus(), ncpt); - goto failed; - } - - cptab = cfs_cpt_table_alloc(ncpt); - if (!cptab) { - CERROR("Failed to allocate CPU map(%d)\n", ncpt); - goto failed; - } - - num = num_online_cpus() / ncpt; - if (num == 0) { - CERROR("CPU changed while setting CPU partition\n"); - goto failed; - } - - LIBCFS_ALLOC(mask, cpumask_size()); - if (!mask) { - CERROR("Failed to allocate scratch cpumask\n"); - goto failed; - } - - for_each_online_node(i) { - cpumask_copy(mask, cpumask_of_node(i)); - - while (!cpumask_empty(mask)) { - struct cfs_cpu_partition *part; - int n; - - if (cpt >= ncpt) - goto failed; - - part = &cptab->ctb_parts[cpt]; - - n = num - cpumask_weight(part->cpt_cpumask); - LASSERT(n > 0); - - rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); - if (rc < 0) - goto failed; - - LASSERT(num >= cpumask_weight(part->cpt_cpumask)); - if (num == cpumask_weight(part->cpt_cpumask)) - cpt++; - } - } - - if (cpt != ncpt || - num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { - CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n", - cptab->ctb_nparts, num, cpt, - cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); - goto failed; - } - - LIBCFS_FREE(mask, cpumask_size()); - - return cptab; - - failed: - CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n", - ncpt, num_online_nodes(), num_online_cpus()); - - if (mask) - LIBCFS_FREE(mask, cpumask_size()); - - if (cptab) - cfs_cpt_table_free(cptab); - - return NULL; -} - -static struct cfs_cpt_table * -cfs_cpt_table_create_pattern(char *pattern) -{ - struct cfs_cpt_table *cptab; - char *str = pattern; - int node = 0; - int high; - int ncpt; - int c; - - for (ncpt = 0;; ncpt++) { /* quick scan bracket */ - str = strchr(str, '['); - if (!str) - break; - str++; - } - - str = cfs_trimwhite(pattern); - if (*str == 'n' || *str == 'N') { - pattern = str + 1; - node = 1; - } - - if (ncpt == 0 || - (node && ncpt > num_online_nodes()) || - (!node && ncpt > num_online_cpus())) { - CERROR("Invalid pattern %s, or too many partitions %d\n", - pattern, ncpt); - return NULL; - } - - high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; - - cptab = cfs_cpt_table_alloc(ncpt); - if (!cptab) { - CERROR("Failed to allocate cpu partition table\n"); - return NULL; - } - - for (str = cfs_trimwhite(pattern), c = 0;; c++) { - struct cfs_range_expr *range; - struct cfs_expr_list *el; - char *bracket = strchr(str, '['); - int cpt; - int rc; - int i; - int n; - - if (!bracket) { - if (*str != 0) { - CERROR("Invalid pattern %s\n", str); - goto failed; - } - if (c != ncpt) { - CERROR("expect %d partitions but found %d\n", - ncpt, c); - goto failed; - } - break; - } - - if (sscanf(str, "%d%n", &cpt, &n) < 1) { - CERROR("Invalid cpu pattern %s\n", str); - goto failed; - } - - if (cpt < 0 || cpt >= ncpt) { - CERROR("Invalid partition id %d, total partitions %d\n", - cpt, ncpt); - goto failed; - } - - if (cfs_cpt_weight(cptab, cpt) != 0) { - CERROR("Partition %d has already been set.\n", cpt); - goto failed; - } - - str = cfs_trimwhite(str + n); - if (str != bracket) { - CERROR("Invalid pattern %s\n", str); - goto failed; - } - - bracket = strchr(str, ']'); - if (!bracket) { - CERROR("missing right bracket for cpt %d, %s\n", - cpt, str); - goto failed; - } - - if (cfs_expr_list_parse(str, (bracket - str) + 1, - 0, high, &el) != 0) { - CERROR("Can't parse number range: %s\n", str); - goto failed; - } - - list_for_each_entry(range, &el->el_exprs, re_link) { - for (i = range->re_lo; i <= range->re_hi; i++) { - if ((i - range->re_lo) % range->re_stride != 0) - continue; - - rc = node ? cfs_cpt_set_node(cptab, cpt, i) : - cfs_cpt_set_cpu(cptab, cpt, i); - if (!rc) { - cfs_expr_list_free(el); - goto failed; - } - } - } - - cfs_expr_list_free(el); - - if (!cfs_cpt_online(cptab, cpt)) { - CERROR("No online CPU is found on partition %d\n", cpt); - goto failed; - } - - str = cfs_trimwhite(bracket + 1); - } - - return cptab; - - failed: - cfs_cpt_table_free(cptab); - return NULL; -} - -#ifdef CONFIG_HOTPLUG_CPU -static int -cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - bool warn; - - switch (action) { - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - spin_lock(&cpt_data.cpt_lock); - cpt_data.cpt_version++; - spin_unlock(&cpt_data.cpt_lock); - /* Fall through */ - default: - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) { - CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n", - cpu, action); - break; - } - - mutex_lock(&cpt_data.cpt_mutex); - /* if all HTs in a core are offline, it may break affinity */ - cpumask_copy(cpt_data.cpt_cpumask, - topology_sibling_cpumask(cpu)); - warn = cpumask_any_and(cpt_data.cpt_cpumask, - cpu_online_mask) >= nr_cpu_ids; - mutex_unlock(&cpt_data.cpt_mutex); - CDEBUG(warn ? D_WARNING : D_INFO, - "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n", - cpu, action); - } - - return NOTIFY_OK; -} - -static struct notifier_block cfs_cpu_notifier = { - .notifier_call = cfs_cpu_notify, - .priority = 0 -}; - -#endif - -void -cfs_cpu_fini(void) -{ - if (cfs_cpt_table) - cfs_cpt_table_free(cfs_cpt_table); - -#ifdef CONFIG_HOTPLUG_CPU - unregister_hotcpu_notifier(&cfs_cpu_notifier); -#endif - if (cpt_data.cpt_cpumask) - LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size()); -} - -int -cfs_cpu_init(void) -{ - LASSERT(!cfs_cpt_table); - - memset(&cpt_data, 0, sizeof(cpt_data)); - - LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size()); - if (!cpt_data.cpt_cpumask) { - CERROR("Failed to allocate scratch buffer\n"); - return -1; - } - - spin_lock_init(&cpt_data.cpt_lock); - mutex_init(&cpt_data.cpt_mutex); - -#ifdef CONFIG_HOTPLUG_CPU - register_hotcpu_notifier(&cfs_cpu_notifier); -#endif - - if (*cpu_pattern != 0) { - cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern); - if (!cfs_cpt_table) { - CERROR("Failed to create cptab from pattern %s\n", - cpu_pattern); - goto failed; - } - - } else { - cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions); - if (!cfs_cpt_table) { - CERROR("Failed to create ptable with npartitions %d\n", - cpu_npartitions); - goto failed; - } - } - - spin_lock(&cpt_data.cpt_lock); - if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) { - spin_unlock(&cpt_data.cpt_lock); - CERROR("CPU hotplug/unplug during setup\n"); - goto failed; - } - spin_unlock(&cpt_data.cpt_lock); - - LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n", - num_online_cpus(), cfs_cpt_number(cfs_cpt_table)); - return 0; - - failed: - cfs_cpu_fini(); - return -1; -} - -#endif diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c deleted file mode 100644 index db0572733712..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto-adler.c +++ /dev/null @@ -1,137 +0,0 @@ -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - */ - -/* - * This is crypto api shash wrappers to zlib_adler32. - */ - -#include -#include -#include -#include "linux-crypto.h" - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -static int adler32_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = 1; - - return 0; -} - -static int adler32_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - *mctx = *(u32 *)key; - return 0; -} - -static int adler32_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *cksump = shash_desc_ctx(desc); - - *cksump = *mctx; - - return 0; -} - -static int adler32_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *cksump = shash_desc_ctx(desc); - - *cksump = zlib_adler32(*cksump, data, len); - return 0; -} - -static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len, - u8 *out) -{ - *(u32 *)out = zlib_adler32(*cksump, data, len); - return 0; -} - -static int adler32_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __adler32_finup(shash_desc_ctx(desc), data, len, out); -} - -static int adler32_final(struct shash_desc *desc, u8 *out) -{ - u32 *cksump = shash_desc_ctx(desc); - - *(u32 *)out = *cksump; - return 0; -} - -static int adler32_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg = { - .setkey = adler32_setkey, - .init = adler32_init, - .update = adler32_update, - .final = adler32_final, - .finup = adler32_finup, - .digest = adler32_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "adler32", - .cra_driver_name = "adler32-zlib", - .cra_priority = 100, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = adler32_cra_init, - } -}; - -int cfs_crypto_adler32_register(void) -{ - return crypto_register_shash(&alg); -} - -void cfs_crypto_adler32_unregister(void) -{ - crypto_unregister_shash(&alg); -} diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c deleted file mode 100644 index 1d2f70fda944..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.c +++ /dev/null @@ -1,284 +0,0 @@ -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Copyright (c) 2012, Intel Corporation. - */ - -#include -#include -#include "../../../include/linux/libcfs/libcfs.h" -#include "linux-crypto.h" -/** - * Array of hash algorithm speed in MByte per second - */ -static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; - -static int cfs_crypto_hash_alloc(unsigned char alg_id, - const struct cfs_crypto_hash_type **type, - struct hash_desc *desc, unsigned char *key, - unsigned int key_len) -{ - int err = 0; - - *type = cfs_crypto_hash_type(alg_id); - - if (!*type) { - CWARN("Unsupported hash algorithm id = %d, max id is %d\n", - alg_id, CFS_HASH_ALG_MAX); - return -EINVAL; - } - desc->tfm = crypto_alloc_hash((*type)->cht_name, 0, 0); - - if (!desc->tfm) - return -EINVAL; - - if (IS_ERR(desc->tfm)) { - CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n", - (*type)->cht_name); - return PTR_ERR(desc->tfm); - } - - desc->flags = 0; - - /** Shash have different logic for initialization then digest - * shash: crypto_hash_setkey, crypto_hash_init - * digest: crypto_digest_init, crypto_digest_setkey - * Skip this function for digest, because we use shash logic at - * cfs_crypto_hash_alloc. - */ - if (key) - err = crypto_hash_setkey(desc->tfm, key, key_len); - else if ((*type)->cht_key != 0) - err = crypto_hash_setkey(desc->tfm, - (unsigned char *)&((*type)->cht_key), - (*type)->cht_size); - - if (err != 0) { - crypto_free_hash(desc->tfm); - return err; - } - - CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n", - (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_name, - (crypto_hash_tfm(desc->tfm))->__crt_alg->cra_driver_name, - cfs_crypto_hash_speeds[alg_id]); - - return crypto_hash_init(desc); -} - -int cfs_crypto_hash_digest(unsigned char alg_id, - const void *buf, unsigned int buf_len, - unsigned char *key, unsigned int key_len, - unsigned char *hash, unsigned int *hash_len) -{ - struct scatterlist sl; - struct hash_desc hdesc; - int err; - const struct cfs_crypto_hash_type *type; - - if (!buf || buf_len == 0 || !hash_len) - return -EINVAL; - - err = cfs_crypto_hash_alloc(alg_id, &type, &hdesc, key, key_len); - if (err != 0) - return err; - - if (!hash || *hash_len < type->cht_size) { - *hash_len = type->cht_size; - crypto_free_hash(hdesc.tfm); - return -ENOSPC; - } - sg_init_one(&sl, buf, buf_len); - - hdesc.flags = 0; - err = crypto_hash_digest(&hdesc, &sl, sl.length, hash); - crypto_free_hash(hdesc.tfm); - - return err; -} -EXPORT_SYMBOL(cfs_crypto_hash_digest); - -struct cfs_crypto_hash_desc * - cfs_crypto_hash_init(unsigned char alg_id, - unsigned char *key, unsigned int key_len) -{ - struct hash_desc *hdesc; - int err; - const struct cfs_crypto_hash_type *type; - - hdesc = kmalloc(sizeof(*hdesc), 0); - if (!hdesc) - return ERR_PTR(-ENOMEM); - - err = cfs_crypto_hash_alloc(alg_id, &type, hdesc, key, key_len); - - if (err) { - kfree(hdesc); - return ERR_PTR(err); - } - return (struct cfs_crypto_hash_desc *)hdesc; -} -EXPORT_SYMBOL(cfs_crypto_hash_init); - -int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc, - struct page *page, unsigned int offset, - unsigned int len) -{ - struct scatterlist sl; - - sg_init_table(&sl, 1); - sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK); - - return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length); -} -EXPORT_SYMBOL(cfs_crypto_hash_update_page); - -int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc, - const void *buf, unsigned int buf_len) -{ - struct scatterlist sl; - - sg_init_one(&sl, buf, buf_len); - - return crypto_hash_update((struct hash_desc *)hdesc, &sl, sl.length); -} -EXPORT_SYMBOL(cfs_crypto_hash_update); - -/* If hash_len pointer is NULL - destroy descriptor. */ -int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, - unsigned char *hash, unsigned int *hash_len) -{ - int err; - int size = crypto_hash_digestsize(((struct hash_desc *)hdesc)->tfm); - - if (!hash_len) { - crypto_free_hash(((struct hash_desc *)hdesc)->tfm); - kfree(hdesc); - return 0; - } - if (!hash || *hash_len < size) { - *hash_len = size; - return -ENOSPC; - } - err = crypto_hash_final((struct hash_desc *)hdesc, hash); - - if (err < 0) { - /* May be caller can fix error */ - return err; - } - crypto_free_hash(((struct hash_desc *)hdesc)->tfm); - kfree(hdesc); - return err; -} -EXPORT_SYMBOL(cfs_crypto_hash_final); - -static void cfs_crypto_performance_test(unsigned char alg_id, - const unsigned char *buf, - unsigned int buf_len) -{ - unsigned long start, end; - int bcount, err = 0; - int sec = 1; /* do test only 1 sec */ - unsigned char hash[64]; - unsigned int hash_len = 64; - - for (start = jiffies, end = start + sec * HZ, bcount = 0; - time_before(jiffies, end); bcount++) { - err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0, - hash, &hash_len); - if (err) - break; - } - end = jiffies; - - if (err) { - cfs_crypto_hash_speeds[alg_id] = -1; - CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n", - cfs_crypto_hash_name(alg_id), err); - } else { - unsigned long tmp; - - tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * - 1000) / (1024 * 1024); - cfs_crypto_hash_speeds[alg_id] = (int)tmp; - } - CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n", - cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]); -} - -int cfs_crypto_hash_speed(unsigned char hash_alg) -{ - if (hash_alg < CFS_HASH_ALG_MAX) - return cfs_crypto_hash_speeds[hash_alg]; - return -1; -} -EXPORT_SYMBOL(cfs_crypto_hash_speed); - -/** - * Do performance test for all hash algorithms. - */ -static int cfs_crypto_test_hashes(void) -{ - unsigned char i; - unsigned char *data; - unsigned int j; - /* Data block size for testing hash. Maximum - * kmalloc size for 2.6.18 kernel is 128K - */ - unsigned int data_len = 1 * 128 * 1024; - - data = kmalloc(data_len, 0); - if (!data) - return -ENOMEM; - - for (j = 0; j < data_len; j++) - data[j] = j & 0xff; - - for (i = 0; i < CFS_HASH_ALG_MAX; i++) - cfs_crypto_performance_test(i, data, data_len); - - kfree(data); - return 0; -} - -static int adler32; - -int cfs_crypto_register(void) -{ - request_module("crc32c"); - - adler32 = cfs_crypto_adler32_register(); - - /* check all algorithms and do performance test */ - cfs_crypto_test_hashes(); - return 0; -} - -void cfs_crypto_unregister(void) -{ - if (adler32 == 0) - cfs_crypto_adler32_unregister(); -} diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h deleted file mode 100644 index 18e8cd4d8758..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-crypto.h +++ /dev/null @@ -1,29 +0,0 @@ - /* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/** - * Functions for start/stop shash adler32 algorithm. - */ -int cfs_crypto_adler32_register(void); -void cfs_crypto_adler32_unregister(void); diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c deleted file mode 100644 index 13d31e8a931d..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-curproc.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/linux/linux-curproc.c - * - * Lustre curproc API implementation for Linux kernel - * - * Author: Nikita Danilov - */ - -#include -#include - -#include -#include - -#define DEBUG_SUBSYSTEM S_LNET - -#include "../../../include/linux/libcfs/libcfs.h" - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -void cfs_cap_raise(cfs_cap_t cap) -{ - struct cred *cred; - - cred = prepare_creds(); - if (cred) { - cap_raise(cred->cap_effective, cap); - commit_creds(cred); - } -} -EXPORT_SYMBOL(cfs_cap_raise); - -void cfs_cap_lower(cfs_cap_t cap) -{ - struct cred *cred; - - cred = prepare_creds(); - if (cred) { - cap_lower(cred->cap_effective, cap); - commit_creds(cred); - } -} -EXPORT_SYMBOL(cfs_cap_lower); - -int cfs_cap_raised(cfs_cap_t cap) -{ - return cap_raised(current_cap(), cap); -} -EXPORT_SYMBOL(cfs_cap_raised); - -static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap) -{ - /* XXX lost high byte */ - *cap = kcap.cap[0]; -} - -cfs_cap_t cfs_curproc_cap_pack(void) -{ - cfs_cap_t cap; - - cfs_kernel_cap_pack(current_cap(), &cap); - return cap; -} -EXPORT_SYMBOL(cfs_curproc_cap_pack); - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c deleted file mode 100644 index 638e4b33d3a9..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-debug.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/linux/linux-debug.c - * - * Author: Phil Schwan - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -# define DEBUG_SUBSYSTEM S_LNET - -#include "../../../include/linux/libcfs/libcfs.h" - -#include "../tracefile.h" - -#include - -char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; -char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; - -/** - * Upcall function once a Lustre log has been dumped. - * - * \param file path of the dumped log - */ -void libcfs_run_debug_log_upcall(char *file) -{ - char *argv[3]; - int rc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - argv[0] = lnet_debug_log_upcall; - - LASSERTF(file, "called on a null filename\n"); - argv[1] = file; /* only need to pass the path of the file */ - - argv[2] = NULL; - - rc = call_usermodehelper(argv[0], argv, envp, 1); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", - rc, argv[0], argv[1]); - } else { - CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", - argv[0], argv[1]); - } -} - -void libcfs_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - argv[0] = lnet_upcall; - argc = 1; - while (argv[argc]) - argc++; - - LASSERT(argc >= 2); - - rc = call_usermodehelper(argv[0], argv, envp, 1); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata) -{ - char *argv[6]; - char buf[32]; - - snprintf(buf, sizeof(buf), "%d", msgdata->msg_line); - - argv[1] = "LBUG"; - argv[2] = (char *)msgdata->msg_file; - argv[3] = (char *)msgdata->msg_fn; - argv[4] = buf; - argv[5] = NULL; - - libcfs_run_upcall(argv); -} -EXPORT_SYMBOL(libcfs_run_lbug_upcall); - -/* coverity[+kill] */ -void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(msgdata, "LBUG\n"); - - if (in_interrupt()) { - panic("LBUG in interrupt.\n"); - /* not reached */ - } - - dump_stack(); - if (!libcfs_panic_on_lbug) - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(msgdata); - if (libcfs_panic_on_lbug) - panic("LBUG"); - set_task_state(current, TASK_UNINTERRUPTIBLE); - while (1) - schedule(); -} -EXPORT_SYMBOL(lbug_with_loc); - -static int panic_notifier(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (libcfs_panic_in_progress) - return 0; - - libcfs_panic_in_progress = 1; - mb(); - - return 0; -} - -static struct notifier_block libcfs_panic_notifier = { - .notifier_call = panic_notifier, - .next = NULL, - .priority = 10000, -}; - -void libcfs_register_panic_notifier(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, - &libcfs_panic_notifier); -} - -void libcfs_unregister_panic_notifier(void) -{ - atomic_notifier_chain_unregister(&panic_notifier_list, - &libcfs_panic_notifier); -} diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c deleted file mode 100644 index 86f32ffc5d04..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-mem.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - */ -/* - * This file creates a memory allocation primitive for Lustre, that - * allows to fallback to vmalloc allocations should regular kernel allocations - * fail due to size or system memory fragmentation. - * - * Author: Oleg Drokin - * - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate Technology. - */ -#include -#include - -#include "../../../include/linux/libcfs/libcfs.h" - -void *libcfs_kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kzalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} -EXPORT_SYMBOL(libcfs_kvzalloc); - -void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size, - gfp_t flags) -{ - void *ret; - - ret = kzalloc_node(size, flags | __GFP_NOWARN, - cfs_cpt_spread_node(cptab, cpt)); - if (!ret) { - WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH))); - ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt)); - } - - return ret; -} -EXPORT_SYMBOL(libcfs_kvzalloc_cpt); diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c deleted file mode 100644 index ebc60ac9bb7a..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-module.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "../../../include/linux/libcfs/libcfs.h" - -#define LNET_MINOR 240 - -int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) -{ - if (libcfs_ioctl_is_invalid(data)) { - CERROR("LNET: ioctl not correctly formatted\n"); - return -EINVAL; - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - cfs_size_round(data->ioc_inllen1); - - return 0; -} - -int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg, - __u32 *len) -{ - struct libcfs_ioctl_hdr hdr; - - if (copy_from_user(&hdr, arg, sizeof(hdr))) - return -EFAULT; - - if (hdr.ioc_version != LIBCFS_IOCTL_VERSION && - hdr.ioc_version != LIBCFS_IOCTL_VERSION2) { - CERROR("LNET: version mismatch expected %#x, got %#x\n", - LIBCFS_IOCTL_VERSION, hdr.ioc_version); - return -EINVAL; - } - - *len = hdr.ioc_len; - - return 0; -} - -int libcfs_ioctl_popdata(void __user *arg, void *data, int size) -{ - if (copy_to_user(arg, data, size)) - return -EFAULT; - return 0; -} - -static int -libcfs_psdev_open(struct inode *inode, struct file *file) -{ - int rc = 0; - - if (!inode) - return -EINVAL; - if (libcfs_psdev_ops.p_open) - rc = libcfs_psdev_ops.p_open(0, NULL); - else - return -EPERM; - return rc; -} - -/* called when closing /dev/device */ -static int -libcfs_psdev_release(struct inode *inode, struct file *file) -{ - int rc = 0; - - if (!inode) - return -EINVAL; - if (libcfs_psdev_ops.p_close) - rc = libcfs_psdev_ops.p_close(0, NULL); - else - rc = -EPERM; - return rc; -} - -static long libcfs_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct cfs_psdev_file pfile; - int rc = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - return -EINVAL; - } - - /* Handle platform-dependent IOC requests */ - switch (cmd) { - case IOC_LIBCFS_PANIC: - if (!capable(CFS_CAP_SYS_BOOT)) - return -EPERM; - panic("debugctl-invoked panic"); - return 0; - } - - if (libcfs_psdev_ops.p_ioctl) - rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg); - else - rc = -EPERM; - return rc; -} - -static const struct file_operations libcfs_fops = { - .unlocked_ioctl = libcfs_ioctl, - .open = libcfs_psdev_open, - .release = libcfs_psdev_release, -}; - -struct miscdevice libcfs_dev = { - .minor = LNET_MINOR, - .name = "lnet", - .fops = &libcfs_fops, -}; diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c deleted file mode 100644 index 89084460231a..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-prim.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include -#include -#include -#include - -#include "../../../include/linux/libcfs/libcfs.h" - -#if defined(CONFIG_KGDB) -#include -#endif - -/** - * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively - * waiting threads, which is not always desirable because all threads will - * be waken up again and again, even user only needs a few of them to be - * active most time. This is not good for performance because cache can - * be polluted by different threads. - * - * LIFO list can resolve this problem because we always wakeup the most - * recent active thread by default. - * - * NB: please don't call non-exclusive & exclusive wait on the same - * waitq if add_wait_queue_exclusive_head is used. - */ -void -add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link) -{ - unsigned long flags; - - spin_lock_irqsave(&waitq->lock, flags); - __add_wait_queue_exclusive(waitq, link); - spin_unlock_irqrestore(&waitq->lock, flags); -} -EXPORT_SYMBOL(add_wait_queue_exclusive_head); - -sigset_t -cfs_block_allsigs(void) -{ - unsigned long flags; - sigset_t old; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old = current->blocked; - sigfillset(¤t->blocked); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - - return old; -} -EXPORT_SYMBOL(cfs_block_allsigs); - -sigset_t cfs_block_sigs(unsigned long sigs) -{ - unsigned long flags; - sigset_t old; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old = current->blocked; - sigaddsetmask(¤t->blocked, sigs); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - return old; -} -EXPORT_SYMBOL(cfs_block_sigs); - -/* Block all signals except for the @sigs */ -sigset_t cfs_block_sigsinv(unsigned long sigs) -{ - unsigned long flags; - sigset_t old; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old = current->blocked; - sigaddsetmask(¤t->blocked, ~sigs); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - - return old; -} -EXPORT_SYMBOL(cfs_block_sigsinv); - -void -cfs_restore_sigs(sigset_t old) -{ - unsigned long flags; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - current->blocked = old; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); -} -EXPORT_SYMBOL(cfs_restore_sigs); - -int -cfs_signal_pending(void) -{ - return signal_pending(current); -} -EXPORT_SYMBOL(cfs_signal_pending); - -void -cfs_clear_sigpending(void) -{ - unsigned long flags; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - clear_tsk_thread_flag(current, TIF_SIGPENDING); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); -} -EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c deleted file mode 100644 index 91c2ae8f9d67..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-tracefile.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include "../../../include/linux/libcfs/libcfs.h" -#include "../tracefile.h" - -/* percents to share the total debug memory for each type */ -static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = { - 80, /* 80% pages for CFS_TCD_TYPE_PROC */ - 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */ - 10 /* 10% pages for CFS_TCD_TYPE_IRQ */ -}; - -char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; - -static DECLARE_RWSEM(cfs_tracefile_sem); - -int cfs_tracefile_init_arch(void) -{ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - - /* initialize trace_data */ - memset(cfs_trace_data, 0, sizeof(cfs_trace_data)); - for (i = 0; i < CFS_TCD_TYPE_MAX; i++) { - cfs_trace_data[i] = - kmalloc(sizeof(union cfs_trace_data_union) * - num_possible_cpus(), GFP_KERNEL); - if (!cfs_trace_data[i]) - goto out; - } - - /* arch related info initialized */ - cfs_tcd_for_each(tcd, i, j) { - spin_lock_init(&tcd->tcd_lock); - tcd->tcd_pages_factor = pages_factor[i]; - tcd->tcd_type = i; - tcd->tcd_cpu = j; - } - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - cfs_trace_console_buffers[i][j] = - kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE, - GFP_KERNEL); - - if (!cfs_trace_console_buffers[i][j]) - goto out; - } - - return 0; - -out: - cfs_tracefile_fini_arch(); - printk(KERN_ERR "lnet: Not enough memory\n"); - return -ENOMEM; -} - -void cfs_tracefile_fini_arch(void) -{ - int i; - int j; - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - kfree(cfs_trace_console_buffers[i][j]); - cfs_trace_console_buffers[i][j] = NULL; - } - - for (i = 0; cfs_trace_data[i]; i++) { - kfree(cfs_trace_data[i]); - cfs_trace_data[i] = NULL; - } -} - -void cfs_tracefile_read_lock(void) -{ - down_read(&cfs_tracefile_sem); -} - -void cfs_tracefile_read_unlock(void) -{ - up_read(&cfs_tracefile_sem); -} - -void cfs_tracefile_write_lock(void) -{ - down_write(&cfs_tracefile_sem); -} - -void cfs_tracefile_write_unlock(void) -{ - up_write(&cfs_tracefile_sem); -} - -enum cfs_trace_buf_type cfs_trace_buf_idx_get(void) -{ - if (in_irq()) - return CFS_TCD_TYPE_IRQ; - if (in_softirq()) - return CFS_TCD_TYPE_SOFTIRQ; - return CFS_TCD_TYPE_PROC; -} - -/* - * The walking argument indicates the locking comes from all tcd types - * iterator and we must lock it and dissable local irqs to avoid deadlocks - * with other interrupt locks that might be happening. See LU-1311 - * for details. - */ -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) - __acquires(&tcd->tc_lock) -{ - __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); - if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) - spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); - else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) - spin_lock_bh(&tcd->tcd_lock); - else if (unlikely(walking)) - spin_lock_irq(&tcd->tcd_lock); - else - spin_lock(&tcd->tcd_lock); - return 1; -} - -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) - __releases(&tcd->tcd_lock) -{ - __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); - if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) - spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); - else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) - spin_unlock_bh(&tcd->tcd_lock); - else if (unlikely(walking)) - spin_unlock_irq(&tcd->tcd_lock); - else - spin_unlock(&tcd->tcd_lock); -} - -void -cfs_set_ptldebug_header(struct ptldebug_header *header, - struct libcfs_debug_msg_data *msgdata, - unsigned long stack) -{ - struct timespec64 ts; - - ktime_get_real_ts64(&ts); - - header->ph_subsys = msgdata->msg_subsys; - header->ph_mask = msgdata->msg_mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_type = cfs_trace_buf_idx_get(); - /* y2038 safe since all user space treats this as unsigned, but - * will overflow in 2106 - */ - header->ph_sec = (u32)ts.tv_sec; - header->ph_usec = ts.tv_nsec / NSEC_PER_USEC; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = msgdata->msg_line; - header->ph_extern_pid = 0; -} - -static char * -dbghdr_to_err_string(struct ptldebug_header *hdr) -{ - switch (hdr->ph_subsys) { - case S_LND: - case S_LNET: - return "LNetError"; - default: - return "LustreError"; - } -} - -static char * -dbghdr_to_info_string(struct ptldebug_header *hdr) -{ - switch (hdr->ph_subsys) { - case S_LND: - case S_LNET: - return "LNet"; - default: - return "Lustre"; - } -} - -void cfs_print_to_console(struct ptldebug_header *hdr, int mask, - const char *buf, int len, const char *file, - const char *fn) -{ - char *prefix = "Lustre", *ptype = NULL; - - if ((mask & D_EMERG) != 0) { - prefix = dbghdr_to_err_string(hdr); - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = dbghdr_to_err_string(hdr); - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = dbghdr_to_info_string(hdr); - ptype = KERN_WARNING; - } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { - prefix = dbghdr_to_info_string(hdr); - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %.*s", ptype, prefix, len, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, - hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num, - fn, len, buf); - } -} - -int cfs_trace_max_debug_mb(void) -{ - int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); - - return max(512, (total_mb * 80) / 100); -} diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c deleted file mode 100644 index cdc640bfdba8..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/module.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -# define DEBUG_SUBSYSTEM S_LNET - -#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \ - sizeof(struct lnet_ioctl_config_data)) - -#include "../../include/linux/libcfs/libcfs.h" -#include - -#include "../../include/linux/libcfs/libcfs_crypto.h" -#include "../../include/linux/lnet/lib-lnet.h" -#include "../../include/linux/lnet/lib-dlc.h" -#include "../../include/linux/lnet/lnet.h" -#include "tracefile.h" - -static struct dentry *lnet_debugfs_root; - -/* called when opening /dev/device */ -static int libcfs_psdev_open(unsigned long flags, void *args) -{ - try_module_get(THIS_MODULE); - return 0; -} - -/* called when closing /dev/device */ -static int libcfs_psdev_release(unsigned long flags, void *args) -{ - module_put(THIS_MODULE); - return 0; -} - -static DECLARE_RWSEM(ioctl_list_sem); -static LIST_HEAD(ioctl_list); - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (!list_empty(&hand->item)) - rc = -EBUSY; - else - list_add_tail(&hand->item, &ioctl_list); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_register_ioctl); - -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (list_empty(&hand->item)) - rc = -ENOENT; - else - list_del_init(&hand->item); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_deregister_ioctl); - -static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd, - void __user *arg, struct libcfs_ioctl_hdr *hdr) -{ - struct libcfs_ioctl_data *data = NULL; - int err = -EINVAL; - - /* - * The libcfs_ioctl_data_adjust() function performs adjustment - * operations on the libcfs_ioctl_data structure to make - * it usable by the code. This doesn't need to be called - * for new data structures added. - */ - if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) { - data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); - err = libcfs_ioctl_data_adjust(data); - if (err) - return err; - } - - switch (cmd) { - case IOC_LIBCFS_CLEAR_DEBUG: - libcfs_debug_clear_buffer(); - return 0; - /* - * case IOC_LIBCFS_PANIC: - * Handled in arch/cfs_module.c - */ - case IOC_LIBCFS_MARK_DEBUG: - if (!data->ioc_inlbuf1 || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') - return -EINVAL; - libcfs_debug_mark_buffer(data->ioc_inlbuf1); - return 0; - - default: { - struct libcfs_ioctl_handler *hand; - - err = -EINVAL; - down_read(&ioctl_list_sem); - list_for_each_entry(hand, &ioctl_list, item) { - err = hand->handle_ioctl(cmd, hdr); - if (err != -EINVAL) { - if (err == 0) - err = libcfs_ioctl_popdata(arg, - hdr, hdr->ioc_len); - break; - } - } - up_read(&ioctl_list_sem); - break; - } - } - - return err; -} - -static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, - void __user *arg) -{ - struct libcfs_ioctl_hdr *hdr; - int err = 0; - __u32 buf_len; - - err = libcfs_ioctl_getdata_len(arg, &buf_len); - if (err) - return err; - - /* - * do a check here to restrict the size of the memory - * to allocate to guard against DoS attacks. - */ - if (buf_len > LNET_MAX_IOCTL_BUF_LEN) { - CERROR("LNET: user buffer exceeds kernel buffer\n"); - return -EINVAL; - } - - LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL); - if (!hdr) - return -ENOMEM; - - /* 'cmd' and permissions get checked in our arch-specific caller */ - if (copy_from_user(hdr, arg, buf_len)) { - CERROR("LNET ioctl: data error\n"); - err = -EFAULT; - goto out; - } - - err = libcfs_ioctl_handle(pfile, cmd, arg, hdr); - -out: - LIBCFS_FREE(hdr, buf_len); - return err; -} - -struct cfs_psdev_ops libcfs_psdev_ops = { - libcfs_psdev_open, - libcfs_psdev_release, - NULL, - NULL, - libcfs_ioctl -}; - -int lprocfs_call_handler(void *data, int write, loff_t *ppos, - void __user *buffer, size_t *lenp, - int (*handler)(void *data, int write, loff_t pos, - void __user *buffer, int len)) -{ - int rc = handler(data, write, *ppos, buffer, *lenp); - - if (rc < 0) - return rc; - - if (write) { - *ppos += *lenp; - } else { - *lenp = rc; - *ppos += rc; - } - return 0; -} -EXPORT_SYMBOL(lprocfs_call_handler); - -static int __proc_dobitmasks(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - const int tmpstrlen = 512; - char *tmpstr; - int rc; - unsigned int *mask = data; - int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; - int is_printk = (mask == &libcfs_printk) ? 1 : 0; - - rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen); - if (rc < 0) - return rc; - - if (!write) { - libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); - rc = strlen(tmpstr); - - if (pos >= rc) { - rc = 0; - } else { - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, "\n"); - } - } else { - rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); - if (rc < 0) { - kfree(tmpstr); - return rc; - } - - rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); - /* Always print LBUG/LASSERT to console, so keep this mask */ - if (is_printk) - *mask |= D_EMERG; - } - - kfree(tmpstr); - return rc; -} - -static int proc_dobitmasks(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_dobitmasks); -} - -static int __proc_dump_kernel(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - if (!write) - return 0; - - return cfs_trace_dump_debug_buffer_usrstr(buffer, nob); -} - -static int proc_dump_kernel(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_dump_kernel); -} - -static int __proc_daemon_file(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - if (!write) { - int len = strlen(cfs_tracefile); - - if (pos >= len) - return 0; - - return cfs_trace_copyout_string(buffer, nob, - cfs_tracefile + pos, "\n"); - } - - return cfs_trace_daemon_command_usrstr(buffer, nob); -} - -static int proc_daemon_file(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_daemon_file); -} - -static int libcfs_force_lbug(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - if (write) - LBUG(); - return 0; -} - -static int proc_fail_loc(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - int rc; - long old_fail_loc = cfs_fail_loc; - - rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (old_fail_loc != cfs_fail_loc) - wake_up(&cfs_race_waitq); - return rc; -} - -static int __proc_cpt_table(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - char *buf = NULL; - int len = 4096; - int rc = 0; - - if (write) - return -EPERM; - - LASSERT(cfs_cpt_table); - - while (1) { - LIBCFS_ALLOC(buf, len); - if (!buf) - return -ENOMEM; - - rc = cfs_cpt_table_print(cfs_cpt_table, buf, len); - if (rc >= 0) - break; - - if (rc == -EFBIG) { - LIBCFS_FREE(buf, len); - len <<= 1; - continue; - } - goto out; - } - - if (pos >= rc) { - rc = 0; - goto out; - } - - rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL); - out: - if (buf) - LIBCFS_FREE(buf, len); - return rc; -} - -static int proc_cpt_table(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_cpt_table); -} - -static struct ctl_table lnet_table[] = { - { - .procname = "debug", - .data = &libcfs_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "subsystem_debug", - .data = &libcfs_subsystem_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "printk", - .data = &libcfs_printk, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "cpu_partition_table", - .maxlen = 128, - .mode = 0444, - .proc_handler = &proc_cpt_table, - }, - - { - .procname = "upcall", - .data = lnet_upcall, - .maxlen = sizeof(lnet_upcall), - .mode = 0644, - .proc_handler = &proc_dostring, - }, - { - .procname = "debug_log_upcall", - .data = lnet_debug_log_upcall, - .maxlen = sizeof(lnet_debug_log_upcall), - .mode = 0644, - .proc_handler = &proc_dostring, - }, - { - .procname = "catastrophe", - .data = &libcfs_catastrophe, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .procname = "dump_kernel", - .maxlen = 256, - .mode = 0200, - .proc_handler = &proc_dump_kernel, - }, - { - .procname = "daemon_file", - .mode = 0644, - .maxlen = 256, - .proc_handler = &proc_daemon_file, - }, - { - .procname = "force_lbug", - .data = NULL, - .maxlen = 0, - .mode = 0200, - .proc_handler = &libcfs_force_lbug - }, - { - .procname = "fail_loc", - .data = &cfs_fail_loc, - .maxlen = sizeof(cfs_fail_loc), - .mode = 0644, - .proc_handler = &proc_fail_loc - }, - { - .procname = "fail_val", - .data = &cfs_fail_val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - } -}; - -static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = { - { "console_ratelimit", - "/sys/module/libcfs/parameters/libcfs_console_ratelimit"}, - { "debug_path", - "/sys/module/libcfs/parameters/libcfs_debug_file_path"}, - { "panic_on_lbug", - "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"}, - { "libcfs_console_backoff", - "/sys/module/libcfs/parameters/libcfs_console_backoff"}, - { "debug_mb", - "/sys/module/libcfs/parameters/libcfs_debug_mb"}, - { "console_min_delay_centisecs", - "/sys/module/libcfs/parameters/libcfs_console_min_delay"}, - { "console_max_delay_centisecs", - "/sys/module/libcfs/parameters/libcfs_console_max_delay"}, - {}, -}; - -static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct ctl_table *table = filp->private_data; - int error; - - error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos); - if (!error) - error = count; - - return error; -} - -static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ctl_table *table = filp->private_data; - int error; - - error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos); - if (!error) - error = count; - - return error; -} - -static const struct file_operations lnet_debugfs_file_operations_rw = { - .open = simple_open, - .read = lnet_debugfs_read, - .write = lnet_debugfs_write, - .llseek = default_llseek, -}; - -static const struct file_operations lnet_debugfs_file_operations_ro = { - .open = simple_open, - .read = lnet_debugfs_read, - .llseek = default_llseek, -}; - -static const struct file_operations lnet_debugfs_file_operations_wo = { - .open = simple_open, - .write = lnet_debugfs_write, - .llseek = default_llseek, -}; - -static const struct file_operations *lnet_debugfs_fops_select(umode_t mode) -{ - if (!(mode & S_IWUGO)) - return &lnet_debugfs_file_operations_ro; - - if (!(mode & S_IRUGO)) - return &lnet_debugfs_file_operations_wo; - - return &lnet_debugfs_file_operations_rw; -} - -void lustre_insert_debugfs(struct ctl_table *table, - const struct lnet_debugfs_symlink_def *symlinks) -{ - if (!lnet_debugfs_root) - lnet_debugfs_root = debugfs_create_dir("lnet", NULL); - - /* Even if we cannot create, just ignore it altogether) */ - if (IS_ERR_OR_NULL(lnet_debugfs_root)) - return; - - /* We don't save the dentry returned in next two calls, because - * we don't call debugfs_remove() but rather remove_recursive() - */ - for (; table->procname; table++) - debugfs_create_file(table->procname, table->mode, - lnet_debugfs_root, table, - lnet_debugfs_fops_select(table->mode)); - - for (; symlinks && symlinks->name; symlinks++) - debugfs_create_symlink(symlinks->name, lnet_debugfs_root, - symlinks->target); -} -EXPORT_SYMBOL_GPL(lustre_insert_debugfs); - -static void lustre_remove_debugfs(void) -{ - debugfs_remove_recursive(lnet_debugfs_root); - - lnet_debugfs_root = NULL; -} - -static int libcfs_init(void) -{ - int rc; - - rc = libcfs_debug_init(5 * 1024 * 1024); - if (rc < 0) { - pr_err("LustreError: libcfs_debug_init: %d\n", rc); - return rc; - } - - rc = cfs_cpu_init(); - if (rc != 0) - goto cleanup_debug; - - rc = misc_register(&libcfs_dev); - if (rc) { - CERROR("misc_register: error %d\n", rc); - goto cleanup_cpu; - } - - rc = cfs_wi_startup(); - if (rc) { - CERROR("initialize workitem: error %d\n", rc); - goto cleanup_deregister; - } - - /* max to 4 threads, should be enough for rehash */ - rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4); - rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY, - rc, &cfs_sched_rehash); - if (rc != 0) { - CERROR("Startup workitem scheduler: error: %d\n", rc); - goto cleanup_deregister; - } - - rc = cfs_crypto_register(); - if (rc) { - CERROR("cfs_crypto_register: error %d\n", rc); - goto cleanup_wi; - } - - lustre_insert_debugfs(lnet_table, lnet_debugfs_symlinks); - - CDEBUG(D_OTHER, "portals setup OK\n"); - return 0; - cleanup_wi: - cfs_wi_shutdown(); - cleanup_deregister: - misc_deregister(&libcfs_dev); -cleanup_cpu: - cfs_cpu_fini(); - cleanup_debug: - libcfs_debug_cleanup(); - return rc; -} - -static void libcfs_exit(void) -{ - int rc; - - lustre_remove_debugfs(); - - if (cfs_sched_rehash) { - cfs_wi_sched_destroy(cfs_sched_rehash); - cfs_sched_rehash = NULL; - } - - cfs_crypto_unregister(); - cfs_wi_shutdown(); - - misc_deregister(&libcfs_dev); - - cfs_cpu_fini(); - - rc = libcfs_debug_cleanup(); - if (rc) - pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre helper library"); -MODULE_VERSION(LIBCFS_VERSION); -MODULE_LICENSE("GPL"); - -module_init(libcfs_init); -module_exit(libcfs_exit); diff --git a/drivers/staging/lustre/lustre/libcfs/prng.c b/drivers/staging/lustre/lustre/libcfs/prng.c deleted file mode 100644 index c75ae9a68e76..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/prng.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/prng.c - * - * concatenation of following two 16-bit multiply with carry generators - * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16, - * number and carry packed within the same 32 bit integer. - * algorithm recommended by Marsaglia -*/ - -#include "../../include/linux/libcfs/libcfs.h" - -/* - * From: George Marsaglia - * Newsgroups: sci.math - * Subject: Re: A RANDOM NUMBER GENERATOR FOR C - * Date: Tue, 30 Sep 1997 05:29:35 -0700 - * - * You may replace the two constants 36969 and 18000 by any - * pair of distinct constants from this list: - * 18000 18030 18273 18513 18879 19074 19098 19164 19215 19584 - * 19599 19950 20088 20508 20544 20664 20814 20970 21153 21243 - * 21423 21723 21954 22125 22188 22293 22860 22938 22965 22974 - * 23109 23124 23163 23208 23508 23520 23553 23658 23865 24114 - * 24219 24660 24699 24864 24948 25023 25308 25443 26004 26088 - * 26154 26550 26679 26838 27183 27258 27753 27795 27810 27834 - * 27960 28320 28380 28689 28710 28794 28854 28959 28980 29013 - * 29379 29889 30135 30345 30459 30714 30903 30963 31059 31083 - * (or any other 16-bit constants k for which both k*2^16-1 - * and k*2^15-1 are prime) - */ - -#define RANDOM_CONST_A 18030 -#define RANDOM_CONST_B 29013 - -static unsigned int seed_x = 521288629; -static unsigned int seed_y = 362436069; - -/** - * cfs_rand - creates new seeds - * - * First it creates new seeds from the previous seeds. Then it generates a - * new pseudo random number for use. - * - * Returns a pseudo-random 32-bit integer - */ -unsigned int cfs_rand(void) -{ - seed_x = RANDOM_CONST_A * (seed_x & 65535) + (seed_x >> 16); - seed_y = RANDOM_CONST_B * (seed_y & 65535) + (seed_y >> 16); - - return ((seed_x << 16) + (seed_y & 65535)); -} -EXPORT_SYMBOL(cfs_rand); - -/** - * cfs_srand - sets the initial seed - * @seed1 : (seed_x) should have the most entropy in the low bits of the word - * @seed2 : (seed_y) should have the most entropy in the high bits of the word - * - * Replaces the original seeds with new values. Used to generate a new pseudo - * random numbers. - */ -void cfs_srand(unsigned int seed1, unsigned int seed2) -{ - if (seed1) - seed_x = seed1; /* use default seeds if parameter is 0 */ - if (seed2) - seed_y = seed2; -} -EXPORT_SYMBOL(cfs_srand); - -/** - * cfs_get_random_bytes - generate a bunch of random numbers - * @buf : buffer to fill with random numbers - * @size: size of passed in buffer - * - * Fills a buffer with random bytes - */ -void cfs_get_random_bytes(void *buf, int size) -{ - int *p = buf; - int rem, tmp; - - LASSERT(size >= 0); - - rem = min((int)((unsigned long)buf & (sizeof(int) - 1)), size); - if (rem) { - get_random_bytes(&tmp, sizeof(tmp)); - tmp ^= cfs_rand(); - memcpy(buf, &tmp, rem); - p = buf + rem; - size -= rem; - } - - while (size >= sizeof(int)) { - get_random_bytes(&tmp, sizeof(tmp)); - *p = cfs_rand() ^ tmp; - size -= sizeof(int); - p++; - } - buf = p; - if (size) { - get_random_bytes(&tmp, sizeof(tmp)); - tmp ^= cfs_rand(); - memcpy(buf, &tmp, size); - } -} -EXPORT_SYMBOL(cfs_get_random_bytes); diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.c b/drivers/staging/lustre/lustre/libcfs/tracefile.c deleted file mode 100644 index ec3bc04bd89f..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/tracefile.c +++ /dev/null @@ -1,1208 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/tracefile.c - * - * Author: Zach Brown - * Author: Phil Schwan - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#include "tracefile.h" - -#include "../../include/linux/libcfs/libcfs.h" - -/* XXX move things up to the top, comment */ -union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; - -char cfs_tracefile[TRACEFILE_NAME_SIZE]; -long long cfs_tracefile_size = CFS_TRACEFILE_SIZE; -static struct tracefiled_ctl trace_tctl; -static DEFINE_MUTEX(cfs_trace_thread_mutex); -static int thread_running; - -static atomic_t cfs_tage_allocated = ATOMIC_INIT(0); - -struct page_collection { - struct list_head pc_pages; - /* - * if this flag is set, collect_pages() will spill both - * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, - * only ->tcd_pages are spilled. - */ - int pc_want_daemon_pages; -}; - -struct tracefiled_ctl { - struct completion tctl_start; - struct completion tctl_stop; - wait_queue_head_t tctl_waitq; - pid_t tctl_pid; - atomic_t tctl_shutdown; -}; - -/* - * small data-structure for each page owned by tracefiled. - */ -struct cfs_trace_page { - /* - * page itself - */ - struct page *page; - /* - * linkage into one of the lists in trace_data_union or - * page_collection - */ - struct list_head linkage; - /* - * number of bytes used within this page - */ - unsigned int used; - /* - * cpu that owns this page - */ - unsigned short cpu; - /* - * type(context) of this page - */ - unsigned short type; -}; - -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct cfs_trace_cpu_data *tcd); - -static inline struct cfs_trace_page * -cfs_tage_from_list(struct list_head *list) -{ - return list_entry(list, struct cfs_trace_page, linkage); -} - -static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) -{ - struct page *page; - struct cfs_trace_page *tage; - - /* My caller is trying to free memory */ - if (!in_interrupt() && memory_pressure_get()) - return NULL; - - /* - * Don't spam console with allocation failures: they will be reported - * by upper layer anyway. - */ - gfp |= __GFP_NOWARN; - page = alloc_page(gfp); - if (!page) - return NULL; - - tage = kmalloc(sizeof(*tage), gfp); - if (!tage) { - __free_page(page); - return NULL; - } - - tage->page = page; - atomic_inc(&cfs_tage_allocated); - return tage; -} - -static void cfs_tage_free(struct cfs_trace_page *tage) -{ - __free_page(tage->page); - kfree(tage); - atomic_dec(&cfs_tage_allocated); -} - -static void cfs_tage_to_tail(struct cfs_trace_page *tage, - struct list_head *queue) -{ - list_move_tail(&tage->linkage, queue); -} - -int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, - struct list_head *stock) -{ - int i; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) { - struct cfs_trace_page *tage; - - tage = cfs_tage_alloc(gfp); - if (!tage) - break; - list_add_tail(&tage->linkage, stock); - } - return i; -} - -/* return a page that has 'len' bytes left at the end */ -static struct cfs_trace_page * -cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len) -{ - struct cfs_trace_page *tage; - - if (tcd->tcd_cur_pages > 0) { - __LASSERT(!list_empty(&tcd->tcd_pages)); - tage = cfs_tage_from_list(tcd->tcd_pages.prev); - if (tage->used + len <= PAGE_CACHE_SIZE) - return tage; - } - - if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { - if (tcd->tcd_cur_stock_pages > 0) { - tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev); - --tcd->tcd_cur_stock_pages; - list_del_init(&tage->linkage); - } else { - tage = cfs_tage_alloc(GFP_ATOMIC); - if (unlikely(!tage)) { - if ((!memory_pressure_get() || - in_interrupt()) && printk_ratelimit()) - printk(KERN_WARNING - "cannot allocate a tage (%ld)\n", - tcd->tcd_cur_pages); - return NULL; - } - } - - tage->used = 0; - tage->cpu = smp_processor_id(); - tage->type = tcd->tcd_type; - list_add_tail(&tage->linkage, &tcd->tcd_pages); - tcd->tcd_cur_pages++; - - if (tcd->tcd_cur_pages > 8 && thread_running) { - struct tracefiled_ctl *tctl = &trace_tctl; - /* - * wake up tracefiled to process some pages. - */ - wake_up(&tctl->tctl_waitq); - } - return tage; - } - return NULL; -} - -static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) -{ - int pgcount = tcd->tcd_cur_pages / 10; - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - if (printk_ratelimit()) - printk(KERN_WARNING "debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n", - pgcount + 1, tcd->tcd_cur_pages); - - INIT_LIST_HEAD(&pc.pc_pages); - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - if (pgcount-- == 0) - break; - - list_move_tail(&tage->linkage, &pc.pc_pages); - tcd->tcd_cur_pages--; - } - put_pages_on_tcd_daemon_list(&pc, tcd); -} - -/* return a page that has 'len' bytes left at the end */ -static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd, - unsigned long len) -{ - struct cfs_trace_page *tage; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - if (len > PAGE_CACHE_SIZE) { - pr_err("cowardly refusing to write %lu bytes in a page\n", len); - return NULL; - } - - tage = cfs_trace_get_tage_try(tcd, len); - if (tage) - return tage; - if (thread_running) - cfs_tcd_shrink(tcd); - if (tcd->tcd_cur_pages > 0) { - tage = cfs_tage_from_list(tcd->tcd_pages.next); - tage->used = 0; - cfs_tage_to_tail(tage, &tcd->tcd_pages); - } - return tage; -} - -int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, - const char *format, ...) -{ - va_list args; - int rc; - - va_start(args, format); - rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); - va_end(args); - - return rc; -} -EXPORT_SYMBOL(libcfs_debug_msg); - -int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, - const char *format1, va_list args, - const char *format2, ...) -{ - struct cfs_trace_cpu_data *tcd = NULL; - struct ptldebug_header header = {0}; - struct cfs_trace_page *tage; - /* string_buf is used only if tcd != NULL, and is always set then */ - char *string_buf = NULL; - char *debug_buf; - int known_size; - int needed = 85; /* average message length */ - int max_nob; - va_list ap; - int depth; - int i; - int remain; - int mask = msgdata->msg_mask; - const char *file = kbasename(msgdata->msg_file); - struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; - - tcd = cfs_trace_get_tcd(); - - /* cfs_trace_get_tcd() grabs a lock, which disables preemption and - * pins us to a particular CPU. This avoids an smp_processor_id() - * warning on Linux when debugging is enabled. - */ - cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK()); - - if (!tcd) /* arch may not log in IRQ context */ - goto console; - - if (tcd->tcd_cur_pages == 0) - header.ph_flags |= PH_FLAG_FIRST_RECORD; - - if (tcd->tcd_shutting_down) { - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - depth = __current_nesting_level(); - known_size = strlen(file) + 1 + depth; - if (msgdata->msg_fn) - known_size += strlen(msgdata->msg_fn) + 1; - - if (libcfs_debug_binary) - known_size += sizeof(header); - - /* - * '2' used because vsnprintf return real size required for output - * _without_ terminating NULL. - * if needed is to small for this format. - */ - for (i = 0; i < 2; i++) { - tage = cfs_trace_get_tage(tcd, needed + known_size + 1); - if (!tage) { - if (needed + known_size > PAGE_CACHE_SIZE) - mask |= D_ERROR; - - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - string_buf = (char *)page_address(tage->page) + - tage->used + known_size; - - max_nob = PAGE_CACHE_SIZE - tage->used - known_size; - if (max_nob <= 0) { - printk(KERN_EMERG "negative max_nob: %d\n", - max_nob); - mask |= D_ERROR; - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, max_nob, format1, ap); - va_end(ap); - } - - if (format2) { - remain = max_nob - needed; - if (remain < 0) - remain = 0; - - va_start(ap, format2); - needed += vsnprintf(string_buf + needed, remain, - format2, ap); - va_end(ap); - } - - if (needed < max_nob) /* well. printing ok.. */ - break; - } - - if (*(string_buf + needed - 1) != '\n') - printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", - file, msgdata->msg_line, msgdata->msg_fn); - - header.ph_len = known_size + needed; - debug_buf = (char *)page_address(tage->page) + tage->used; - - if (libcfs_debug_binary) { - memcpy(debug_buf, &header, sizeof(header)); - tage->used += sizeof(header); - debug_buf += sizeof(header); - } - - /* indent message according to the nesting level */ - while (depth-- > 0) { - *(debug_buf++) = '.'; - ++tage->used; - } - - strcpy(debug_buf, file); - tage->used += strlen(file) + 1; - debug_buf += strlen(file) + 1; - - if (msgdata->msg_fn) { - strcpy(debug_buf, msgdata->msg_fn); - tage->used += strlen(msgdata->msg_fn) + 1; - debug_buf += strlen(msgdata->msg_fn) + 1; - } - - __LASSERT(debug_buf == string_buf); - - tage->used += needed; - __LASSERT(tage->used <= PAGE_CACHE_SIZE); - -console: - if ((mask & libcfs_printk) == 0) { - /* no console output requested */ - if (tcd) - cfs_trace_put_tcd(tcd); - return 1; - } - - if (cdls) { - if (libcfs_console_ratelimit && - cdls->cdls_next != 0 && /* not first time ever */ - !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { - /* skipping a console message */ - cdls->cdls_count++; - if (tcd) - cfs_trace_put_tcd(tcd); - return 1; - } - - if (cfs_time_after(cfs_time_current(), - cdls->cdls_next + libcfs_console_max_delay + - cfs_time_seconds(10))) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= libcfs_console_backoff * 4; - } else { - cdls->cdls_delay *= libcfs_console_backoff; - } - - if (cdls->cdls_delay < libcfs_console_min_delay) - cdls->cdls_delay = libcfs_console_min_delay; - else if (cdls->cdls_delay > libcfs_console_max_delay) - cdls->cdls_delay = libcfs_console_max_delay; - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; - } - - if (tcd) { - cfs_print_to_console(&header, mask, string_buf, needed, file, - msgdata->msg_fn); - cfs_trace_put_tcd(tcd); - } else { - string_buf = cfs_trace_get_console_buffer(); - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, - CFS_TRACE_CONSOLE_BUFFER_SIZE, - format1, ap); - va_end(ap); - } - if (format2) { - remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; - if (remain > 0) { - va_start(ap, format2); - needed += vsnprintf(string_buf + needed, remain, - format2, ap); - va_end(ap); - } - } - cfs_print_to_console(&header, mask, - string_buf, needed, file, msgdata->msg_fn); - - put_cpu(); - } - - if (cdls && cdls->cdls_count != 0) { - string_buf = cfs_trace_get_console_buffer(); - - needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, - "Skipped %d previous similar message%s\n", - cdls->cdls_count, - (cdls->cdls_count > 1) ? "s" : ""); - - cfs_print_to_console(&header, mask, - string_buf, needed, file, msgdata->msg_fn); - - put_cpu(); - cdls->cdls_count = 0; - } - - return 0; -} -EXPORT_SYMBOL(libcfs_debug_vmsg2); - -void -cfs_trace_assertion_failed(const char *str, - struct libcfs_debug_msg_data *msgdata) -{ - struct ptldebug_header hdr; - - libcfs_panic_in_progress = 1; - libcfs_catastrophe = 1; - mb(); - - cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK()); - - cfs_print_to_console(&hdr, D_EMERG, str, strlen(str), - msgdata->msg_file, msgdata->msg_fn); - - panic("Lustre debug assertion failure\n"); - - /* not reached */ -} - -static void -panic_collect_pages(struct page_collection *pc) -{ - /* Do the collect_pages job on a single CPU: assumes that all other - * CPUs have been stopped during a panic. If this isn't true for some - * arch, this will have to be implemented separately in each arch. - */ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - - INIT_LIST_HEAD(&pc->pc_pages); - - cfs_tcd_for_each(tcd, i, j) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } -} - -static void collect_pages_on_all_cpus(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, - &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } - } -} - -static void collect_pages(struct page_collection *pc) -{ - INIT_LIST_HEAD(&pc->pc_pages); - - if (libcfs_panic_in_progress) - panic_collect_pages(pc); - else - collect_pages_on_all_cpus(pc); -} - -static void put_pages_back_on_all_cpus(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - struct list_head *cur_head; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - cur_head = tcd->tcd_pages.next; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, - linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != cpu || tage->type != i) - continue; - - cfs_tage_to_tail(tage, cur_head); - tcd->tcd_cur_pages++; - } - } - } -} - -static void put_pages_back(struct page_collection *pc) -{ - if (!libcfs_panic_in_progress) - put_pages_back_on_all_cpus(pc); -} - -/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that - * we have a good amount of data at all times for dumping during an LBUG, even - * if we have been steadily writing (and otherwise discarding) pages via the - * debug daemon. - */ -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct cfs_trace_cpu_data *tcd) -{ - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type) - continue; - - cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages++; - - if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { - struct cfs_trace_page *victim; - - __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); - victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next); - - __LASSERT_TAGE_INVARIANT(victim); - - list_del(&victim->linkage); - cfs_tage_free(victim); - tcd->tcd_cur_daemon_pages--; - } - } -} - -static void put_pages_on_daemon_list(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) - put_pages_on_tcd_daemon_list(pc, tcd); - } -} - -void cfs_trace_debug_print(void) -{ - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - char *p, *file, *fn; - struct page *page; - - __LASSERT_TAGE_INVARIANT(tage); - - page = tage->page; - p = page_address(page); - while (p < ((char *)page_address(page) + tage->used)) { - struct ptldebug_header *hdr; - int len; - - hdr = (void *)p; - p += sizeof(*hdr); - file = p; - p += strlen(file) + 1; - fn = p; - p += strlen(fn) + 1; - len = hdr->ph_len - (int)(p - (char *)hdr); - - cfs_print_to_console(hdr, D_EMERG, p, len, file, fn); - - p += len; - } - - list_del(&tage->linkage); - cfs_tage_free(tage); - } -} - -int cfs_tracefile_dump_all_pages(char *filename) -{ - struct page_collection pc; - struct file *filp; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - char *buf; - int rc; - - DECL_MMSPACE; - - cfs_tracefile_write_lock(); - - filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - filp = NULL; - pr_err("LustreError: can't open %s for dump: rc %d\n", - filename, rc); - goto out; - } - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) { - rc = 0; - goto close; - } - - /* ok, for now, just write the pages. in the future we'll be building - * iobufs with the pages and calling generic_direct_IO - */ - MMSPACE_OPEN; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - buf = kmap(tage->page); - rc = vfs_write(filp, (__force const char __user *)buf, - tage->used, &filp->f_pos); - kunmap(tage->page); - - if (rc != (int)tage->used) { - printk(KERN_WARNING "wanted to write %u but wrote %d\n", - tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - list_del(&tage->linkage); - cfs_tage_free(tage); - } - MMSPACE_CLOSE; - rc = vfs_fsync(filp, 1); - if (rc) - pr_err("sync returns %d\n", rc); -close: - filp_close(filp, NULL); -out: - cfs_tracefile_write_unlock(); - return rc; -} - -void cfs_trace_flush_pages(void) -{ - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - cfs_tage_free(tage); - } -} - -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob) -{ - int nob; - - if (usr_buffer_nob > knl_buffer_nob) - return -EOVERFLOW; - - if (copy_from_user((void *)knl_buffer, - usr_buffer, usr_buffer_nob)) - return -EFAULT; - - nob = strnlen(knl_buffer, usr_buffer_nob); - while (nob-- >= 0) /* strip trailing whitespace */ - if (!isspace(knl_buffer[nob])) - break; - - if (nob < 0) /* empty string */ - return -EINVAL; - - if (nob == knl_buffer_nob) /* no space to terminate */ - return -EOVERFLOW; - - knl_buffer[nob + 1] = 0; /* terminate */ - return 0; -} -EXPORT_SYMBOL(cfs_trace_copyin_string); - -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_buffer, char *append) -{ - /* - * NB if 'append' != NULL, it's a single character to append to the - * copied out string - usually "\n" or "" (i.e. a terminating zero byte) - */ - int nob = strlen(knl_buffer); - - if (nob > usr_buffer_nob) - nob = usr_buffer_nob; - - if (copy_to_user(usr_buffer, knl_buffer, nob)) - return -EFAULT; - - if (append && nob < usr_buffer_nob) { - if (copy_to_user(usr_buffer + nob, append, 1)) - return -EFAULT; - - nob++; - } - - return nob; -} -EXPORT_SYMBOL(cfs_trace_copyout_string); - -int cfs_trace_allocate_string_buffer(char **str, int nob) -{ - if (nob > 2 * PAGE_CACHE_SIZE) /* string must be "sensible" */ - return -EINVAL; - - *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO); - if (!*str) - return -ENOMEM; - - return 0; -} - -int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc != 0) - return rc; - - rc = cfs_trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (rc != 0) - goto out; - - if (str[0] != '/') { - rc = -EINVAL; - goto out; - } - rc = cfs_tracefile_dump_all_pages(str); -out: - kfree(str); - return rc; -} - -int cfs_trace_daemon_command(char *str) -{ - int rc = 0; - - cfs_tracefile_write_lock(); - - if (strcmp(str, "stop") == 0) { - cfs_tracefile_write_unlock(); - cfs_trace_stop_thread(); - cfs_tracefile_write_lock(); - memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); - - } else if (strncmp(str, "size=", 5) == 0) { - unsigned long tmp; - - rc = kstrtoul(str + 5, 10, &tmp); - if (!rc) { - if (tmp < 10 || tmp > 20480) - cfs_tracefile_size = CFS_TRACEFILE_SIZE; - else - cfs_tracefile_size = tmp << 20; - } - } else if (strlen(str) >= sizeof(cfs_tracefile)) { - rc = -ENAMETOOLONG; - } else if (str[0] != '/') { - rc = -EINVAL; - } else { - strcpy(cfs_tracefile, str); - - printk(KERN_INFO - "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n", - cfs_tracefile, - (long)(cfs_tracefile_size >> 10)); - - cfs_trace_start_thread(); - } - - cfs_tracefile_write_unlock(); - return rc; -} - -int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc != 0) - return rc; - - rc = cfs_trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (rc == 0) - rc = cfs_trace_daemon_command(str); - - kfree(str); - return rc; -} - -int cfs_trace_set_debug_mb(int mb) -{ - int i; - int j; - int pages; - int limit = cfs_trace_max_debug_mb(); - struct cfs_trace_cpu_data *tcd; - - if (mb < num_possible_cpus()) { - printk(KERN_WARNING - "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n", - mb, num_possible_cpus()); - mb = num_possible_cpus(); - } - - if (mb > limit) { - printk(KERN_WARNING - "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n", - mb, limit); - mb = limit; - } - - mb /= num_possible_cpus(); - pages = mb << (20 - PAGE_CACHE_SHIFT); - - cfs_tracefile_write_lock(); - - cfs_tcd_for_each(tcd, i, j) - tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; - - cfs_tracefile_write_unlock(); - - return 0; -} - -int cfs_trace_get_debug_mb(void) -{ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - int total_pages = 0; - - cfs_tracefile_read_lock(); - - cfs_tcd_for_each(tcd, i, j) - total_pages += tcd->tcd_max_pages; - - cfs_tracefile_read_unlock(); - - return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1; -} - -static int tracefiled(void *arg) -{ - struct page_collection pc; - struct tracefiled_ctl *tctl = arg; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - struct file *filp; - char *buf; - int last_loop = 0; - int rc; - - DECL_MMSPACE; - - /* we're started late enough that we pick up init's fs context */ - /* this is so broken in uml? what on earth is going on? */ - - complete(&tctl->tctl_start); - - while (1) { - wait_queue_t __wait; - - pc.pc_want_daemon_pages = 0; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) - goto end_loop; - - filp = NULL; - cfs_tracefile_read_lock(); - if (cfs_tracefile[0] != 0) { - filp = filp_open(cfs_tracefile, - O_CREAT | O_RDWR | O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - filp = NULL; - printk(KERN_WARNING "couldn't open %s: %d\n", - cfs_tracefile, rc); - } - } - cfs_tracefile_read_unlock(); - if (!filp) { - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - goto end_loop; - } - - MMSPACE_OPEN; - - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - static loff_t f_pos; - - __LASSERT_TAGE_INVARIANT(tage); - - if (f_pos >= (off_t)cfs_tracefile_size) - f_pos = 0; - else if (f_pos > i_size_read(file_inode(filp))) - f_pos = i_size_read(file_inode(filp)); - - buf = kmap(tage->page); - rc = vfs_write(filp, (__force const char __user *)buf, - tage->used, &f_pos); - kunmap(tage->page); - - if (rc != (int)tage->used) { - printk(KERN_WARNING "wanted to write %u but wrote %d\n", - tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - } - MMSPACE_CLOSE; - - filp_close(filp, NULL); - put_pages_on_daemon_list(&pc); - if (!list_empty(&pc.pc_pages)) { - int i; - - printk(KERN_ALERT "Lustre: trace pages aren't empty\n"); - pr_err("total cpus(%d): ", num_possible_cpus()); - for (i = 0; i < num_possible_cpus(); i++) - if (cpu_online(i)) - pr_cont("%d(on) ", i); - else - pr_cont("%d(off) ", i); - pr_cont("\n"); - - i = 0; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, - linkage) - pr_err("page %d belongs to cpu %d\n", - ++i, tage->cpu); - pr_err("There are %d pages unwritten\n", i); - } - __LASSERT(list_empty(&pc.pc_pages)); -end_loop: - if (atomic_read(&tctl->tctl_shutdown)) { - if (last_loop == 0) { - last_loop = 1; - continue; - } else { - break; - } - } - init_waitqueue_entry(&__wait, current); - add_wait_queue(&tctl->tctl_waitq, &__wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); - remove_wait_queue(&tctl->tctl_waitq, &__wait); - } - complete(&tctl->tctl_stop); - return 0; -} - -int cfs_trace_start_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - struct task_struct *task; - int rc = 0; - - mutex_lock(&cfs_trace_thread_mutex); - if (thread_running) - goto out; - - init_completion(&tctl->tctl_start); - init_completion(&tctl->tctl_stop); - init_waitqueue_head(&tctl->tctl_waitq); - atomic_set(&tctl->tctl_shutdown, 0); - - task = kthread_run(tracefiled, tctl, "ktracefiled"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - goto out; - } - - wait_for_completion(&tctl->tctl_start); - thread_running = 1; -out: - mutex_unlock(&cfs_trace_thread_mutex); - return rc; -} - -void cfs_trace_stop_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - - mutex_lock(&cfs_trace_thread_mutex); - if (thread_running) { - printk(KERN_INFO - "Lustre: shutting down debug daemon thread...\n"); - atomic_set(&tctl->tctl_shutdown, 1); - wait_for_completion(&tctl->tctl_stop); - thread_running = 0; - } - mutex_unlock(&cfs_trace_thread_mutex); -} - -int cfs_tracefile_init(int max_pages) -{ - struct cfs_trace_cpu_data *tcd; - int i; - int j; - int rc; - int factor; - - rc = cfs_tracefile_init_arch(); - if (rc != 0) - return rc; - - cfs_tcd_for_each(tcd, i, j) { - /* tcd_pages_factor is initialized int tracefile_init_arch. */ - factor = tcd->tcd_pages_factor; - INIT_LIST_HEAD(&tcd->tcd_pages); - INIT_LIST_HEAD(&tcd->tcd_stock_pages); - INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_pages = 0; - tcd->tcd_cur_stock_pages = 0; - tcd->tcd_cur_daemon_pages = 0; - tcd->tcd_max_pages = (max_pages * factor) / 100; - LASSERT(tcd->tcd_max_pages > 0); - tcd->tcd_shutting_down = 0; - } - - return 0; -} - -static void trace_cleanup_on_all_cpus(void) -{ - struct cfs_trace_cpu_data *tcd; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - tcd->tcd_shutting_down = 1; - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, - linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - cfs_tage_free(tage); - } - - tcd->tcd_cur_pages = 0; - } - } -} - -static void cfs_trace_cleanup(void) -{ - struct page_collection pc; - - INIT_LIST_HEAD(&pc.pc_pages); - - trace_cleanup_on_all_cpus(); - - cfs_tracefile_fini_arch(); -} - -void cfs_tracefile_exit(void) -{ - cfs_trace_stop_thread(); - cfs_trace_cleanup(); -} diff --git a/drivers/staging/lustre/lustre/libcfs/tracefile.h b/drivers/staging/lustre/lustre/libcfs/tracefile.h deleted file mode 100644 index 4c77f9044dd3..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/tracefile.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LIBCFS_TRACEFILE_H__ -#define __LIBCFS_TRACEFILE_H__ - -#include "../../include/linux/libcfs/libcfs.h" - -enum cfs_trace_buf_type { - CFS_TCD_TYPE_PROC = 0, - CFS_TCD_TYPE_SOFTIRQ, - CFS_TCD_TYPE_IRQ, - CFS_TCD_TYPE_MAX -}; - -/* trace file lock routines */ - -#define TRACEFILE_NAME_SIZE 1024 -extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; -extern long long cfs_tracefile_size; - -void libcfs_run_debug_log_upcall(char *file); - -int cfs_tracefile_init_arch(void); -void cfs_tracefile_fini_arch(void); - -void cfs_tracefile_read_lock(void); -void cfs_tracefile_read_unlock(void); -void cfs_tracefile_write_lock(void); -void cfs_tracefile_write_unlock(void); - -int cfs_tracefile_dump_all_pages(char *filename); -void cfs_trace_debug_print(void); -void cfs_trace_flush_pages(void); -int cfs_trace_start_thread(void); -void cfs_trace_stop_thread(void); -int cfs_tracefile_init(int max_pages); -void cfs_tracefile_exit(void); - -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob); -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_str, char *append); -int cfs_trace_allocate_string_buffer(char **str, int nob); -int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob); -int cfs_trace_daemon_command(char *str); -int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob); -int cfs_trace_set_debug_mb(int mb); -int cfs_trace_get_debug_mb(void); - -void libcfs_debug_dumplog_internal(void *arg); -void libcfs_register_panic_notifier(void); -void libcfs_unregister_panic_notifier(void); -extern int libcfs_panic_in_progress; -int cfs_trace_max_debug_mb(void); - -#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) -#define CFS_TRACEFILE_SIZE (500 << 20) - -#ifdef LUSTRE_TRACEFILE_PRIVATE - -/* - * Private declare for tracefile - */ -#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) - -#define CFS_TRACEFILE_SIZE (500 << 20) - -/* - * Size of a buffer for sprinting console messages if we can't get a page - * from system - */ -#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024 - -union cfs_trace_data_union { - struct cfs_trace_cpu_data { - /* - * Even though this structure is meant to be per-CPU, locking - * is needed because in some places the data may be accessed - * from other CPUs. This lock is directly used in trace_get_tcd - * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and - * tcd_for_each_type_lock - */ - spinlock_t tcd_lock; - unsigned long tcd_lock_flags; - - /* - * pages with trace records not yet processed by tracefiled. - */ - struct list_head tcd_pages; - /* number of pages on ->tcd_pages */ - unsigned long tcd_cur_pages; - - /* - * pages with trace records already processed by - * tracefiled. These pages are kept in memory, so that some - * portion of log can be written in the event of LBUG. This - * list is maintained in LRU order. - * - * Pages are moved to ->tcd_daemon_pages by tracefiled() - * (put_pages_on_daemon_list()). LRU pages from this list are - * discarded when list grows too large. - */ - struct list_head tcd_daemon_pages; - /* number of pages on ->tcd_daemon_pages */ - unsigned long tcd_cur_daemon_pages; - - /* - * Maximal number of pages allowed on ->tcd_pages and - * ->tcd_daemon_pages each. - * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current - * implementation. - */ - unsigned long tcd_max_pages; - - /* - * preallocated pages to write trace records into. Pages from - * ->tcd_stock_pages are moved to ->tcd_pages by - * portals_debug_msg(). - * - * This list is necessary, because on some platforms it's - * impossible to perform efficient atomic page allocation in a - * non-blockable context. - * - * Such platforms fill ->tcd_stock_pages "on occasion", when - * tracing code is entered in blockable context. - * - * trace_get_tage_try() tries to get a page from - * ->tcd_stock_pages first and resorts to atomic page - * allocation only if this queue is empty. ->tcd_stock_pages - * is replenished when tracing code is entered in blocking - * context (darwin-tracefile.c:trace_get_tcd()). We try to - * maintain TCD_STOCK_PAGES (40 by default) pages in this - * queue. Atomic allocation is only required if more than - * TCD_STOCK_PAGES pagesful are consumed by trace records all - * emitted in non-blocking contexts. Which is quite unlikely. - */ - struct list_head tcd_stock_pages; - /* number of pages on ->tcd_stock_pages */ - unsigned long tcd_cur_stock_pages; - - unsigned short tcd_shutting_down; - unsigned short tcd_cpu; - unsigned short tcd_type; - /* The factors to share debug memory. */ - unsigned short tcd_pages_factor; - } tcd; - char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))]; -}; - -#define TCD_MAX_TYPES 8 -extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS]; - -#define cfs_tcd_for_each(tcd, i, j) \ - for (i = 0; cfs_trace_data[i]; i++) \ - for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \ - j < num_possible_cpus(); \ - j++, (tcd) = &(*cfs_trace_data[i])[j].tcd) - -#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \ - for (i = 0; cfs_trace_data[i] && \ - (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \ - cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++) - -void cfs_set_ptldebug_header(struct ptldebug_header *header, - struct libcfs_debug_msg_data *m, - unsigned long stack); -void cfs_print_to_console(struct ptldebug_header *hdr, int mask, - const char *buf, int len, const char *file, - const char *fn); - -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking); -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking); - -extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; -enum cfs_trace_buf_type cfs_trace_buf_idx_get(void); - -static inline char * -cfs_trace_get_console_buffer(void) -{ - unsigned int i = get_cpu(); - unsigned int j = cfs_trace_buf_idx_get(); - - return cfs_trace_console_buffers[i][j]; -} - -static inline struct cfs_trace_cpu_data * -cfs_trace_get_tcd(void) -{ - struct cfs_trace_cpu_data *tcd = - &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd; - - cfs_trace_lock_tcd(tcd, 0); - - return tcd; -} - -static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd) -{ - cfs_trace_unlock_tcd(tcd, 0); - - put_cpu(); -} - -int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, - struct list_head *stock); - -void cfs_trace_assertion_failed(const char *str, - struct libcfs_debug_msg_data *m); - -/* ASSERTION that is safe to use within the debug system */ -#define __LASSERT(cond) \ -do { \ - if (unlikely(!(cond))) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ - cfs_trace_assertion_failed("ASSERTION("#cond") failed", \ - &msgdata); \ - } \ -} while (0) - -#define __LASSERT_TAGE_INVARIANT(tage) \ -do { \ - __LASSERT(tage); \ - __LASSERT(tage->page); \ - __LASSERT(tage->used <= PAGE_CACHE_SIZE); \ - __LASSERT(page_count(tage->page) > 0); \ -} while (0) - -#endif /* LUSTRE_TRACEFILE_PRIVATE */ - -#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/drivers/staging/lustre/lustre/libcfs/workitem.c b/drivers/staging/lustre/lustre/libcfs/workitem.c deleted file mode 100644 index f2ebed8e6ef5..000000000000 --- a/drivers/staging/lustre/lustre/libcfs/workitem.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/workitem.c - * - * Author: Isaac Huang - * Liang Zhen - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "../../include/linux/libcfs/libcfs.h" - -#define CFS_WS_NAME_LEN 16 - -struct cfs_wi_sched { - /* chain on global list */ - struct list_head ws_list; - /** serialised workitems */ - spinlock_t ws_lock; - /** where schedulers sleep */ - wait_queue_head_t ws_waitq; - /** concurrent workitems */ - struct list_head ws_runq; - /** - * rescheduled running-workitems, a workitem can be rescheduled - * while running in wi_action(), but we don't to execute it again - * unless it returns from wi_action(), so we put it on ws_rerunq - * while rescheduling, and move it to runq after it returns - * from wi_action() - */ - struct list_head ws_rerunq; - /** CPT-table for this scheduler */ - struct cfs_cpt_table *ws_cptab; - /** CPT id for affinity */ - int ws_cpt; - /** number of scheduled workitems */ - int ws_nscheduled; - /** started scheduler thread, protected by cfs_wi_data::wi_glock */ - unsigned int ws_nthreads:30; - /** shutting down, protected by cfs_wi_data::wi_glock */ - unsigned int ws_stopping:1; - /** serialize starting thread, protected by cfs_wi_data::wi_glock */ - unsigned int ws_starting:1; - /** scheduler name */ - char ws_name[CFS_WS_NAME_LEN]; -}; - -static struct cfs_workitem_data { - /** serialize */ - spinlock_t wi_glock; - /** list of all schedulers */ - struct list_head wi_scheds; - /** WI module is initialized */ - int wi_init; - /** shutting down the whole WI module */ - int wi_stopping; -} cfs_wi_data; - -static inline int -cfs_wi_sched_cansleep(struct cfs_wi_sched *sched) -{ - spin_lock(&sched->ws_lock); - if (sched->ws_stopping) { - spin_unlock(&sched->ws_lock); - return 0; - } - - if (!list_empty(&sched->ws_runq)) { - spin_unlock(&sched->ws_lock); - return 0; - } - spin_unlock(&sched->ws_lock); - return 1; -} - -/* XXX: - * 0. it only works when called from wi->wi_action. - * 1. when it returns no one shall try to schedule the workitem. - */ -void -cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi) -{ - LASSERT(!in_interrupt()); /* because we use plain spinlock */ - LASSERT(!sched->ws_stopping); - - spin_lock(&sched->ws_lock); - - LASSERT(wi->wi_running); - if (wi->wi_scheduled) { /* cancel pending schedules */ - LASSERT(!list_empty(&wi->wi_list)); - list_del_init(&wi->wi_list); - - LASSERT(sched->ws_nscheduled > 0); - sched->ws_nscheduled--; - } - - LASSERT(list_empty(&wi->wi_list)); - - wi->wi_scheduled = 1; /* LBUG future schedule attempts */ - spin_unlock(&sched->ws_lock); -} -EXPORT_SYMBOL(cfs_wi_exit); - -/** - * cancel schedule request of workitem \a wi - */ -int -cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) -{ - int rc; - - LASSERT(!in_interrupt()); /* because we use plain spinlock */ - LASSERT(!sched->ws_stopping); - - /* - * return 0 if it's running already, otherwise return 1, which - * means the workitem will not be scheduled and will not have - * any race with wi_action. - */ - spin_lock(&sched->ws_lock); - - rc = !(wi->wi_running); - - if (wi->wi_scheduled) { /* cancel pending schedules */ - LASSERT(!list_empty(&wi->wi_list)); - list_del_init(&wi->wi_list); - - LASSERT(sched->ws_nscheduled > 0); - sched->ws_nscheduled--; - - wi->wi_scheduled = 0; - } - - LASSERT(list_empty(&wi->wi_list)); - - spin_unlock(&sched->ws_lock); - return rc; -} -EXPORT_SYMBOL(cfs_wi_deschedule); - -/* - * Workitem scheduled with (serial == 1) is strictly serialised not only with - * itself, but also with others scheduled this way. - * - * Now there's only one static serialised queue, but in the future more might - * be added, and even dynamic creation of serialised queues might be supported. - */ -void -cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) -{ - LASSERT(!in_interrupt()); /* because we use plain spinlock */ - LASSERT(!sched->ws_stopping); - - spin_lock(&sched->ws_lock); - - if (!wi->wi_scheduled) { - LASSERT(list_empty(&wi->wi_list)); - - wi->wi_scheduled = 1; - sched->ws_nscheduled++; - if (!wi->wi_running) { - list_add_tail(&wi->wi_list, &sched->ws_runq); - wake_up(&sched->ws_waitq); - } else { - list_add(&wi->wi_list, &sched->ws_rerunq); - } - } - - LASSERT(!list_empty(&wi->wi_list)); - spin_unlock(&sched->ws_lock); -} -EXPORT_SYMBOL(cfs_wi_schedule); - -static int cfs_wi_scheduler(void *arg) -{ - struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg; - - cfs_block_allsigs(); - - /* CPT affinity scheduler? */ - if (sched->ws_cptab) - if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0) - CWARN("Failed to bind %s on CPT %d\n", - sched->ws_name, sched->ws_cpt); - - spin_lock(&cfs_wi_data.wi_glock); - - LASSERT(sched->ws_starting == 1); - sched->ws_starting--; - sched->ws_nthreads++; - - spin_unlock(&cfs_wi_data.wi_glock); - - spin_lock(&sched->ws_lock); - - while (!sched->ws_stopping) { - int nloops = 0; - int rc; - cfs_workitem_t *wi; - - while (!list_empty(&sched->ws_runq) && - nloops < CFS_WI_RESCHED) { - wi = list_entry(sched->ws_runq.next, cfs_workitem_t, - wi_list); - LASSERT(wi->wi_scheduled && !wi->wi_running); - - list_del_init(&wi->wi_list); - - LASSERT(sched->ws_nscheduled > 0); - sched->ws_nscheduled--; - - wi->wi_running = 1; - wi->wi_scheduled = 0; - - spin_unlock(&sched->ws_lock); - nloops++; - - rc = (*wi->wi_action) (wi); - - spin_lock(&sched->ws_lock); - if (rc != 0) /* WI should be dead, even be freed! */ - continue; - - wi->wi_running = 0; - if (list_empty(&wi->wi_list)) - continue; - - LASSERT(wi->wi_scheduled); - /* wi is rescheduled, should be on rerunq now, we - * move it to runq so it can run action now - */ - list_move_tail(&wi->wi_list, &sched->ws_runq); - } - - if (!list_empty(&sched->ws_runq)) { - spin_unlock(&sched->ws_lock); - /* don't sleep because some workitems still - * expect me to come back soon - */ - cond_resched(); - spin_lock(&sched->ws_lock); - continue; - } - - spin_unlock(&sched->ws_lock); - rc = wait_event_interruptible_exclusive(sched->ws_waitq, - !cfs_wi_sched_cansleep(sched)); - spin_lock(&sched->ws_lock); - } - - spin_unlock(&sched->ws_lock); - - spin_lock(&cfs_wi_data.wi_glock); - sched->ws_nthreads--; - spin_unlock(&cfs_wi_data.wi_glock); - - return 0; -} - -void -cfs_wi_sched_destroy(struct cfs_wi_sched *sched) -{ - int i; - - LASSERT(cfs_wi_data.wi_init); - LASSERT(!cfs_wi_data.wi_stopping); - - spin_lock(&cfs_wi_data.wi_glock); - if (sched->ws_stopping) { - CDEBUG(D_INFO, "%s is in progress of stopping\n", - sched->ws_name); - spin_unlock(&cfs_wi_data.wi_glock); - return; - } - - LASSERT(!list_empty(&sched->ws_list)); - sched->ws_stopping = 1; - - spin_unlock(&cfs_wi_data.wi_glock); - - i = 2; - wake_up_all(&sched->ws_waitq); - - spin_lock(&cfs_wi_data.wi_glock); - while (sched->ws_nthreads > 0) { - CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET, - "waiting for %d threads of WI sched[%s] to terminate\n", - sched->ws_nthreads, sched->ws_name); - - spin_unlock(&cfs_wi_data.wi_glock); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1) / 20); - spin_lock(&cfs_wi_data.wi_glock); - } - - list_del(&sched->ws_list); - - spin_unlock(&cfs_wi_data.wi_glock); - LASSERT(sched->ws_nscheduled == 0); - - LIBCFS_FREE(sched, sizeof(*sched)); -} -EXPORT_SYMBOL(cfs_wi_sched_destroy); - -int -cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, - int cpt, int nthrs, struct cfs_wi_sched **sched_pp) -{ - struct cfs_wi_sched *sched; - int rc; - - LASSERT(cfs_wi_data.wi_init); - LASSERT(!cfs_wi_data.wi_stopping); - LASSERT(!cptab || cpt == CFS_CPT_ANY || - (cpt >= 0 && cpt < cfs_cpt_number(cptab))); - - LIBCFS_ALLOC(sched, sizeof(*sched)); - if (!sched) - return -ENOMEM; - - if (strlen(name) > sizeof(sched->ws_name) - 1) { - LIBCFS_FREE(sched, sizeof(*sched)); - return -E2BIG; - } - strncpy(sched->ws_name, name, sizeof(sched->ws_name)); - - sched->ws_cptab = cptab; - sched->ws_cpt = cpt; - - spin_lock_init(&sched->ws_lock); - init_waitqueue_head(&sched->ws_waitq); - INIT_LIST_HEAD(&sched->ws_runq); - INIT_LIST_HEAD(&sched->ws_rerunq); - INIT_LIST_HEAD(&sched->ws_list); - - rc = 0; - while (nthrs > 0) { - char name[16]; - struct task_struct *task; - - spin_lock(&cfs_wi_data.wi_glock); - while (sched->ws_starting > 0) { - spin_unlock(&cfs_wi_data.wi_glock); - schedule(); - spin_lock(&cfs_wi_data.wi_glock); - } - - sched->ws_starting++; - spin_unlock(&cfs_wi_data.wi_glock); - - if (sched->ws_cptab && sched->ws_cpt >= 0) { - snprintf(name, sizeof(name), "%s_%02d_%02u", - sched->ws_name, sched->ws_cpt, - sched->ws_nthreads); - } else { - snprintf(name, sizeof(name), "%s_%02u", - sched->ws_name, sched->ws_nthreads); - } - - task = kthread_run(cfs_wi_scheduler, sched, "%s", name); - if (!IS_ERR(task)) { - nthrs--; - continue; - } - rc = PTR_ERR(task); - - CERROR("Failed to create thread for WI scheduler %s: %d\n", - name, rc); - - spin_lock(&cfs_wi_data.wi_glock); - - /* make up for cfs_wi_sched_destroy */ - list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); - sched->ws_starting--; - - spin_unlock(&cfs_wi_data.wi_glock); - - cfs_wi_sched_destroy(sched); - return rc; - } - spin_lock(&cfs_wi_data.wi_glock); - list_add(&sched->ws_list, &cfs_wi_data.wi_scheds); - spin_unlock(&cfs_wi_data.wi_glock); - - *sched_pp = sched; - return 0; -} -EXPORT_SYMBOL(cfs_wi_sched_create); - -int -cfs_wi_startup(void) -{ - memset(&cfs_wi_data, 0, sizeof(cfs_wi_data)); - - spin_lock_init(&cfs_wi_data.wi_glock); - INIT_LIST_HEAD(&cfs_wi_data.wi_scheds); - cfs_wi_data.wi_init = 1; - - return 0; -} - -void -cfs_wi_shutdown(void) -{ - struct cfs_wi_sched *sched; - - spin_lock(&cfs_wi_data.wi_glock); - cfs_wi_data.wi_stopping = 1; - spin_unlock(&cfs_wi_data.wi_glock); - - /* nobody should contend on this list */ - list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { - sched->ws_stopping = 1; - wake_up_all(&sched->ws_waitq); - } - - list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { - spin_lock(&cfs_wi_data.wi_glock); - - while (sched->ws_nthreads != 0) { - spin_unlock(&cfs_wi_data.wi_glock); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1) / 20); - spin_lock(&cfs_wi_data.wi_glock); - } - spin_unlock(&cfs_wi_data.wi_glock); - } - while (!list_empty(&cfs_wi_data.wi_scheds)) { - sched = list_entry(cfs_wi_data.wi_scheds.next, - struct cfs_wi_sched, ws_list); - list_del(&sched->ws_list); - LIBCFS_FREE(sched, sizeof(*sched)); - } - - cfs_wi_data.wi_stopping = 0; - cfs_wi_data.wi_init = 0; -}