From: Mike Marshall Date: Fri, 17 Jul 2015 14:38:11 +0000 (-0400) Subject: Orangefs: kernel client part 1 X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=f7ab093f74bf;p=GitHub%2Fmoto-9609%2Fandroid_kernel_motorola_exynos9610.git Orangefs: kernel client part 1 OrangeFS (formerly PVFS) is an lgpl licensed userspace networked parallel file system. OrangeFS can be accessed through included system utilities, user integration libraries, MPI-IO and can be used by the Hadoop ecosystem as an alternative to the HDFS filesystem. OrangeFS is used widely for parallel science, data analytics and engineering applications. While applications often don't require Orangefs to be mounted into the VFS, users do like to be able to access their files in the normal way. The Orangefs kernel client allows Orangefs filesystems to be mounted as a VFS. The kernel client communicates with a userspace daemon which in turn communicates with the Orangefs server daemons that implement the filesystem. The server daemons (there's almost always more than one) need not be running on the same host as the kernel client. Orangefs filesystems can also be mounted with FUSE, and we ship code and instructions to facilitate that, but most of our users report preferring to use our kernel module instead. Further, as an example of a problem we can't solve with fuse, we have in the works a not-yet-ready-for-prime-time version of a file_operations lock function that accounts for the server daemons being distributed across more than one running kernel. Many people and organizations, including Clemson University, Argonne National Laboratories and Acxiom Corporation have helped to create what has become Orangefs over more than twenty years. Some of the more recent contributors to the kernel client include: Mike Marshall Christoph Hellwig Randy Martin Becky Ligon Walt Ligon Michael Moore Rob Ross Phil Carnes Signed-off-by: Mike Marshall --- diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h new file mode 100644 index 000000000000..a79129f875f3 --- /dev/null +++ b/fs/orangefs/downcall.h @@ -0,0 +1,138 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * Definitions of downcalls used in Linux kernel module. + */ + +#ifndef __DOWNCALL_H +#define __DOWNCALL_H + +/* + * Sanitized the device-client core interaction + * for clean 32-64 bit usage + */ +struct pvfs2_io_response { + __s64 amt_complete; +}; + +struct pvfs2_iox_response { + __s64 amt_complete; +}; + +struct pvfs2_lookup_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_create_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_symlink_response { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_getattr_response { + struct PVFS_sys_attr_s attributes; + char link_target[PVFS2_NAME_LEN]; +}; + +struct pvfs2_mkdir_response { + struct pvfs2_object_kref refn; +}; + +/* + * duplication of some system interface structures so that I don't have + * to allocate extra memory + */ +struct pvfs2_dirent { + char *d_name; + int d_length; + struct pvfs2_khandle khandle; +}; + +struct pvfs2_statfs_response { + __s64 block_size; + __s64 blocks_total; + __s64 blocks_avail; + __s64 files_total; + __s64 files_avail; +}; + +struct pvfs2_fs_mount_response { + __s32 fs_id; + __s32 id; + struct pvfs2_khandle root_khandle; +}; + +/* the getxattr response is the attribute value */ +struct pvfs2_getxattr_response { + __s32 val_sz; + __s32 __pad1; + char val[PVFS_MAX_XATTR_VALUELEN]; +}; + +/* the listxattr response is an array of attribute names */ +struct pvfs2_listxattr_response { + __s32 returned_count; + __s32 __pad1; + __u64 token; + char key[PVFS_MAX_XATTR_LISTLEN * PVFS_MAX_XATTR_NAMELEN]; + __s32 keylen; + __s32 __pad2; + __s32 lengths[PVFS_MAX_XATTR_LISTLEN]; +}; + +struct pvfs2_param_response { + __s64 value; +}; + +#define PERF_COUNT_BUF_SIZE 4096 +struct pvfs2_perf_count_response { + char buffer[PERF_COUNT_BUF_SIZE]; +}; + +#define FS_KEY_BUF_SIZE 4096 +struct pvfs2_fs_key_response { + __s32 fs_keylen; + __s32 __pad1; + char fs_key[FS_KEY_BUF_SIZE]; +}; + +struct pvfs2_downcall_s { + __s32 type; + __s32 status; + /* currently trailer is used only by readdir */ + __s64 trailer_size; + char * trailer_buf; + + union { + struct pvfs2_io_response io; + struct pvfs2_iox_response iox; + struct pvfs2_lookup_response lookup; + struct pvfs2_create_response create; + struct pvfs2_symlink_response sym; + struct pvfs2_getattr_response getattr; + struct pvfs2_mkdir_response mkdir; + struct pvfs2_statfs_response statfs; + struct pvfs2_fs_mount_response fs_mount; + struct pvfs2_getxattr_response getxattr; + struct pvfs2_listxattr_response listxattr; + struct pvfs2_param_response param; + struct pvfs2_perf_count_response perf_count; + struct pvfs2_fs_key_response fs_key; + } resp; +}; + +struct pvfs2_readdir_response_s { + __u64 token; + __u64 directory_version; + __u32 __pad2; + __u32 pvfs_dirent_outcount; + struct pvfs2_dirent *dirent_array; +}; + +#endif /* __DOWNCALL_H */ diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h new file mode 100644 index 000000000000..2fb3a63ae9ab --- /dev/null +++ b/fs/orangefs/protocol.h @@ -0,0 +1,681 @@ +#include +#include +#include + +extern struct client_debug_mask *cdm_array; +extern char *debug_help_string; +extern int help_string_initialized; +extern struct dentry *debug_dir; +extern struct dentry *help_file_dentry; +extern struct dentry *client_debug_dentry; +extern const struct file_operations debug_help_fops; +extern int client_all_index; +extern int client_verbose_index; +extern int cdm_element_count; +#define DEBUG_HELP_STRING_SIZE 4096 +#define HELP_STRING_UNINITIALIZED \ + "Client Debug Keywords are unknown until the first time\n" \ + "the client is started after boot.\n" +#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help" +#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug" +#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug" +#define PVFS2_VERBOSE "verbose" +#define PVFS2_ALL "all" + +/* pvfs2-config.h ***********************************************************/ +#define PVFS2_VERSION_MAJOR 2 +#define PVFS2_VERSION_MINOR 9 +#define PVFS2_VERSION_SUB 0 + +/* khandle stuff ***********************************************************/ + +/* + * The 2.9 core will put 64 bit handles in here like this: + * 1234 0000 0000 5678 + * The 3.0 and beyond cores will put 128 bit handles in here like this: + * 1234 5678 90AB CDEF + * The kernel module will always use the first four bytes and + * the last four bytes as an inum. + */ +struct pvfs2_khandle { + unsigned char u[16]; +} __aligned(8); + +/* + * kernel version of an object ref. + */ +struct pvfs2_object_kref { + struct pvfs2_khandle khandle; + __s32 fs_id; + __s32 __pad1; +}; + +/* + * compare 2 khandles assumes little endian thus from large address to + * small address + */ +static inline int PVFS_khandle_cmp(const struct pvfs2_khandle *kh1, + const struct pvfs2_khandle *kh2) +{ + int i; + + for (i = 15; i >= 0; i--) { + if (kh1->u[i] > kh2->u[i]) + return 1; + if (kh1->u[i] < kh2->u[i]) + return -1; + } + + return 0; +} + +/* copy a khandle to a field of arbitrary size */ +static inline void PVFS_khandle_to(const struct pvfs2_khandle *kh, + void *p, int size) +{ + int i; + unsigned char *c = p; + + memset(p, 0, size); + + for (i = 0; i < 16 && i < size; i++) + c[i] = kh->u[i]; +} + +/* copy a khandle from a field of arbitrary size */ +static inline void PVFS_khandle_from(struct pvfs2_khandle *kh, + void *p, int size) +{ + int i; + unsigned char *c = p; + + memset(kh, 0, 16); + + for (i = 0; i < 16 && i < size; i++) + kh->u[i] = c[i]; +} + +/* pvfs2-types.h ************************************************************/ +typedef __u32 PVFS_uid; +typedef __u32 PVFS_gid; +typedef __s32 PVFS_fs_id; +typedef __u32 PVFS_permissions; +typedef __u64 PVFS_time; +typedef __s64 PVFS_size; +typedef __u64 PVFS_flags; +typedef __u64 PVFS_ds_position; +typedef __s32 PVFS_error; +typedef __s64 PVFS_offset; + +#define PVFS2_SUPER_MAGIC 0x20030528 +#define PVFS_ERROR_BIT (1 << 30) +#define PVFS_NON_ERRNO_ERROR_BIT (1 << 29) +#define IS_PVFS_ERROR(__error) ((__error)&(PVFS_ERROR_BIT)) +#define IS_PVFS_NON_ERRNO_ERROR(__error) \ +(((__error)&(PVFS_NON_ERRNO_ERROR_BIT)) && IS_PVFS_ERROR(__error)) +#define PVFS_ERROR_TO_ERRNO(__error) PVFS_get_errno_mapping(__error) + +/* 7 bits are used for the errno mapped error codes */ +#define PVFS_ERROR_CODE(__error) \ +((__error) & (__s32)(0x7f|PVFS_ERROR_BIT)) +#define PVFS_ERROR_CLASS(__error) \ +((__error) & ~((__s32)(0x7f|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT))) +#define PVFS_NON_ERRNO_ERROR_CODE(__error) \ +((__error) & (__s32)(127|PVFS_ERROR_BIT|PVFS_NON_ERRNO_ERROR_BIT)) + +/* PVFS2 error codes, compliments of asm/errno.h */ +#define PVFS_EPERM E(1) /* Operation not permitted */ +#define PVFS_ENOENT E(2) /* No such file or directory */ +#define PVFS_EINTR E(3) /* Interrupted system call */ +#define PVFS_EIO E(4) /* I/O error */ +#define PVFS_ENXIO E(5) /* No such device or address */ +#define PVFS_EBADF E(6) /* Bad file number */ +#define PVFS_EAGAIN E(7) /* Try again */ +#define PVFS_ENOMEM E(8) /* Out of memory */ +#define PVFS_EFAULT E(9) /* Bad address */ +#define PVFS_EBUSY E(10) /* Device or resource busy */ +#define PVFS_EEXIST E(11) /* File exists */ +#define PVFS_ENODEV E(12) /* No such device */ +#define PVFS_ENOTDIR E(13) /* Not a directory */ +#define PVFS_EISDIR E(14) /* Is a directory */ +#define PVFS_EINVAL E(15) /* Invalid argument */ +#define PVFS_EMFILE E(16) /* Too many open files */ +#define PVFS_EFBIG E(17) /* File too large */ +#define PVFS_ENOSPC E(18) /* No space left on device */ +#define PVFS_EROFS E(19) /* Read-only file system */ +#define PVFS_EMLINK E(20) /* Too many links */ +#define PVFS_EPIPE E(21) /* Broken pipe */ +#define PVFS_EDEADLK E(22) /* Resource deadlock would occur */ +#define PVFS_ENAMETOOLONG E(23) /* File name too long */ +#define PVFS_ENOLCK E(24) /* No record locks available */ +#define PVFS_ENOSYS E(25) /* Function not implemented */ +#define PVFS_ENOTEMPTY E(26) /* Directory not empty */ + /* +#define PVFS_ELOOP E(27) * Too many symbolic links encountered + */ +#define PVFS_EWOULDBLOCK E(28) /* Operation would block */ +#define PVFS_ENOMSG E(29) /* No message of desired type */ +#define PVFS_EUNATCH E(30) /* Protocol driver not attached */ +#define PVFS_EBADR E(31) /* Invalid request descriptor */ +#define PVFS_EDEADLOCK E(32) +#define PVFS_ENODATA E(33) /* No data available */ +#define PVFS_ETIME E(34) /* Timer expired */ +#define PVFS_ENONET E(35) /* Machine is not on the network */ +#define PVFS_EREMOTE E(36) /* Object is remote */ +#define PVFS_ECOMM E(37) /* Communication error on send */ +#define PVFS_EPROTO E(38) /* Protocol error */ +#define PVFS_EBADMSG E(39) /* Not a data message */ + /* +#define PVFS_EOVERFLOW E(40) * Value too large for defined data + * type + */ + /* +#define PVFS_ERESTART E(41) * Interrupted system call should be + * restarted + */ +#define PVFS_EMSGSIZE E(42) /* Message too long */ +#define PVFS_EPROTOTYPE E(43) /* Protocol wrong type for socket */ +#define PVFS_ENOPROTOOPT E(44) /* Protocol not available */ +#define PVFS_EPROTONOSUPPORT E(45) /* Protocol not supported */ + /* +#define PVFS_EOPNOTSUPP E(46) * Operation not supported on transport + * endpoint + */ +#define PVFS_EADDRINUSE E(47) /* Address already in use */ +#define PVFS_EADDRNOTAVAIL E(48) /* Cannot assign requested address */ +#define PVFS_ENETDOWN E(49) /* Network is down */ +#define PVFS_ENETUNREACH E(50) /* Network is unreachable */ + /* +#define PVFS_ENETRESET E(51) * Network dropped connection because + * of reset + */ +#define PVFS_ENOBUFS E(52) /* No buffer space available */ +#define PVFS_ETIMEDOUT E(53) /* Connection timed out */ +#define PVFS_ECONNREFUSED E(54) /* Connection refused */ +#define PVFS_EHOSTDOWN E(55) /* Host is down */ +#define PVFS_EHOSTUNREACH E(56) /* No route to host */ +#define PVFS_EALREADY E(57) /* Operation already in progress */ +#define PVFS_EACCES E(58) /* Access not allowed */ +#define PVFS_ECONNRESET E(59) /* Connection reset by peer */ +#define PVFS_ERANGE E(60) /* Math out of range or buf too small */ + +/***************** non-errno/pvfs2 specific error codes *****************/ +#define PVFS_ECANCEL (1|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EDEVINIT (2|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EDETAIL (3|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EHOSTNTFD (4|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_EADDRNTFD (5|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENORECVR (6|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ETRYAGAIN (7|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ENOTPVFS (8|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) +#define PVFS_ESECURITY (9|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT)) + +/* + * NOTE: PLEASE DO NOT ARBITRARILY ADD NEW ERRNO ERROR CODES! + * + * IF YOU CHOOSE TO ADD A NEW ERROR CODE (DESPITE OUR PLEA), YOU ALSO + * NEED TO INCREMENT PVFS_ERRNO MAX (BELOW) AND ADD A MAPPING TO A + * UNIX ERRNO VALUE IN THE MACROS BELOW (USED IN + * src/common/misc/errno-mapping.c and the kernel module) + */ +#define PVFS_ERRNO_MAX 61 + +#define PVFS_ERROR_BMI (1 << 7) /* BMI-specific error */ +#define PVFS_ERROR_TROVE (2 << 7) /* Trove-specific error */ +#define PVFS_ERROR_FLOW (3 << 7) +#define PVFS_ERROR_SM (4 << 7) /* state machine specific error */ +#define PVFS_ERROR_SCHED (5 << 7) +#define PVFS_ERROR_CLIENT (6 << 7) +#define PVFS_ERROR_DEV (7 << 7) /* device file interaction */ + +#define PVFS_ERROR_CLASS_BITS \ + (PVFS_ERROR_BMI | \ + PVFS_ERROR_TROVE | \ + PVFS_ERROR_FLOW | \ + PVFS_ERROR_SM | \ + PVFS_ERROR_SCHED | \ + PVFS_ERROR_CLIENT | \ + PVFS_ERROR_DEV) + +#define DECLARE_ERRNO_MAPPING() \ +__s32 PINT_errno_mapping[PVFS_ERRNO_MAX + 1] = { \ + 0, /* leave this one empty */ \ + EPERM, /* 1 */ \ + ENOENT, \ + EINTR, \ + EIO, \ + ENXIO, \ + EBADF, \ + EAGAIN, \ + ENOMEM, \ + EFAULT, \ + EBUSY, /* 10 */ \ + EEXIST, \ + ENODEV, \ + ENOTDIR, \ + EISDIR, \ + EINVAL, \ + EMFILE, \ + EFBIG, \ + ENOSPC, \ + EROFS, \ + EMLINK, /* 20 */ \ + EPIPE, \ + EDEADLK, \ + ENAMETOOLONG, \ + ENOLCK, \ + ENOSYS, \ + ENOTEMPTY, \ + ELOOP, \ + EWOULDBLOCK, \ + ENOMSG, \ + EUNATCH, /* 30 */ \ + EBADR, \ + EDEADLOCK, \ + ENODATA, \ + ETIME, \ + ENONET, \ + EREMOTE, \ + ECOMM, \ + EPROTO, \ + EBADMSG, \ + EOVERFLOW, /* 40 */ \ + ERESTART, \ + EMSGSIZE, \ + EPROTOTYPE, \ + ENOPROTOOPT, \ + EPROTONOSUPPORT, \ + EOPNOTSUPP, \ + EADDRINUSE, \ + EADDRNOTAVAIL, \ + ENETDOWN, \ + ENETUNREACH, /* 50 */ \ + ENETRESET, \ + ENOBUFS, \ + ETIMEDOUT, \ + ECONNREFUSED, \ + EHOSTDOWN, \ + EHOSTUNREACH, \ + EALREADY, \ + EACCES, \ + ECONNRESET, /* 59 */ \ + ERANGE, \ + 0 /* PVFS_ERRNO_MAX */ \ +}; \ +const char *PINT_non_errno_strerror_mapping[] = { \ + "Success", /* 0 */ \ + "Operation cancelled (possibly due to timeout)", \ + "Device initialization failed", \ + "Detailed per-server errors are available", \ + "Unknown host", \ + "No address associated with name", \ + "Unknown server error", \ + "Host name lookup failure", \ + "Path contains non-PVFS elements", \ + "Security error", \ +}; \ +__s32 PINT_non_errno_mapping[] = { \ + 0, /* leave this one empty */ \ + PVFS_ECANCEL, /* 1 */ \ + PVFS_EDEVINIT, /* 2 */ \ + PVFS_EDETAIL, /* 3 */ \ + PVFS_EHOSTNTFD, /* 4 */ \ + PVFS_EADDRNTFD, /* 5 */ \ + PVFS_ENORECVR, /* 6 */ \ + PVFS_ETRYAGAIN, /* 7 */ \ + PVFS_ENOTPVFS, /* 8 */ \ + PVFS_ESECURITY, /* 9 */ \ +} + +/* + * NOTE: PVFS_get_errno_mapping will convert a PVFS_ERROR_CODE to an + * errno value. If the error code is a pvfs2 specific error code + * (i.e. a PVFS_NON_ERRNO_ERROR_CODE), PVFS_get_errno_mapping will + * return an index into the PINT_non_errno_strerror_mapping array which + * can be used for getting the pvfs2 specific strerror message given + * the error code. if the value is not a recognized error code, the + * passed in value will be returned unchanged. + */ +#define DECLARE_ERRNO_MAPPING_AND_FN() \ +extern __s32 PINT_errno_mapping[]; \ +extern __s32 PINT_non_errno_mapping[]; \ +extern const char *PINT_non_errno_strerror_mapping[]; \ +__s32 PVFS_get_errno_mapping(__s32 error) \ +{ \ + __s32 ret = error, mask = 0; \ + __s32 positive = ((error > -1) ? 1 : 0); \ + if (IS_PVFS_NON_ERRNO_ERROR((positive ? error : -error))) { \ + mask = (PVFS_NON_ERRNO_ERROR_BIT | \ + PVFS_ERROR_BIT | \ + PVFS_ERROR_CLASS_BITS); \ + ret = PVFS_NON_ERRNO_ERROR_CODE(((positive ? \ + error : \ + abs(error))) & \ + ~mask); \ + } \ + else if (IS_PVFS_ERROR((positive ? error : -error))) { \ + mask = (PVFS_ERROR_BIT | \ + PVFS_ERROR_CLASS_BITS); \ + ret = PINT_errno_mapping[PVFS_ERROR_CODE(((positive ? \ + error : \ + abs(error))) & \ + ~mask)]; \ + } \ + return ret; \ +} \ +__s32 PVFS_errno_to_error(int err) \ +{ \ + __s32 e = 0; \ + \ + for (; e < PVFS_ERRNO_MAX; ++e) \ + if (PINT_errno_mapping[e] == err) \ + return e | PVFS_ERROR_BIT; \ + \ + return err; \ +} \ +DECLARE_ERRNO_MAPPING() + +/* permission bits */ +#define PVFS_O_EXECUTE (1 << 0) +#define PVFS_O_WRITE (1 << 1) +#define PVFS_O_READ (1 << 2) +#define PVFS_G_EXECUTE (1 << 3) +#define PVFS_G_WRITE (1 << 4) +#define PVFS_G_READ (1 << 5) +#define PVFS_U_EXECUTE (1 << 6) +#define PVFS_U_WRITE (1 << 7) +#define PVFS_U_READ (1 << 8) +/* no PVFS_U_VTX (sticky bit) */ +#define PVFS_G_SGID (1 << 10) +#define PVFS_U_SUID (1 << 11) + +/* definition taken from stdint.h */ +#define INT32_MAX (2147483647) +#define PVFS_ITERATE_START (INT32_MAX - 1) +#define PVFS_ITERATE_END (INT32_MAX - 2) +#define PVFS_READDIR_START PVFS_ITERATE_START +#define PVFS_READDIR_END PVFS_ITERATE_END +#define PVFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define PVFS_APPEND_FL FS_APPEND_FL +#define PVFS_NOATIME_FL FS_NOATIME_FL +#define PVFS_MIRROR_FL 0x01000000ULL +#define PVFS_O_EXECUTE (1 << 0) +#define PVFS_FS_ID_NULL ((__s32)0) +#define PVFS_ATTR_SYS_UID (1 << 0) +#define PVFS_ATTR_SYS_GID (1 << 1) +#define PVFS_ATTR_SYS_PERM (1 << 2) +#define PVFS_ATTR_SYS_ATIME (1 << 3) +#define PVFS_ATTR_SYS_CTIME (1 << 4) +#define PVFS_ATTR_SYS_MTIME (1 << 5) +#define PVFS_ATTR_SYS_TYPE (1 << 6) +#define PVFS_ATTR_SYS_ATIME_SET (1 << 7) +#define PVFS_ATTR_SYS_MTIME_SET (1 << 8) +#define PVFS_ATTR_SYS_SIZE (1 << 20) +#define PVFS_ATTR_SYS_LNK_TARGET (1 << 24) +#define PVFS_ATTR_SYS_DFILE_COUNT (1 << 25) +#define PVFS_ATTR_SYS_DIRENT_COUNT (1 << 26) +#define PVFS_ATTR_SYS_BLKSIZE (1 << 28) +#define PVFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29) +#define PVFS_ATTR_SYS_COMMON_ALL \ + (PVFS_ATTR_SYS_UID | \ + PVFS_ATTR_SYS_GID | \ + PVFS_ATTR_SYS_PERM | \ + PVFS_ATTR_SYS_ATIME | \ + PVFS_ATTR_SYS_CTIME | \ + PVFS_ATTR_SYS_MTIME | \ + PVFS_ATTR_SYS_TYPE) + +#define PVFS_ATTR_SYS_ALL_SETABLE \ +(PVFS_ATTR_SYS_COMMON_ALL-PVFS_ATTR_SYS_TYPE) + +#define PVFS_ATTR_SYS_ALL_NOHINT \ + (PVFS_ATTR_SYS_COMMON_ALL | \ + PVFS_ATTR_SYS_SIZE | \ + PVFS_ATTR_SYS_LNK_TARGET | \ + PVFS_ATTR_SYS_DFILE_COUNT | \ + PVFS_ATTR_SYS_MIRROR_COPIES_COUNT | \ + PVFS_ATTR_SYS_DIRENT_COUNT | \ + PVFS_ATTR_SYS_BLKSIZE) +#define PVFS_XATTR_REPLACE 0x2 +#define PVFS_XATTR_CREATE 0x1 +#define PVFS_MAX_SERVER_ADDR_LEN 256 +#define PVFS_NAME_MAX 256 +/* + * max extended attribute name len as imposed by the VFS and exploited for the + * upcall request types. + * NOTE: Please retain them as multiples of 8 even if you wish to change them + * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit + * kernel. Due to implementation within DBPF, this really needs to be + * PVFS_NAME_MAX, which it was the same value as, but no reason to let it + * break if that changes in the future. + */ +#define PVFS_MAX_XATTR_NAMELEN PVFS_NAME_MAX /* Not the same as + * XATTR_NAME_MAX defined + * by + */ +#define PVFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX + * defined by + */ +#define PVFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX + * defined by + */ +/* + * PVFS I/O operation types, used in both system and server interfaces. + */ +enum PVFS_io_type { + PVFS_IO_READ = 1, + PVFS_IO_WRITE = 2 +}; + +/* + * If this enum is modified the server parameters related to the precreate pool + * batch and low threshold sizes may need to be modified to reflect this + * change. + */ +enum pvfs2_ds_type { + PVFS_TYPE_NONE = 0, + PVFS_TYPE_METAFILE = (1 << 0), + PVFS_TYPE_DATAFILE = (1 << 1), + PVFS_TYPE_DIRECTORY = (1 << 2), + PVFS_TYPE_SYMLINK = (1 << 3), + PVFS_TYPE_DIRDATA = (1 << 4), + PVFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */ +}; + +/* + * PVFS_certificate simply stores a buffer with the buffer size. + * The buffer can be converted to an OpenSSL X509 struct for use. + */ +struct PVFS_certificate { + __u32 buf_size; + unsigned char *buf; +}; + +/* + * A credential identifies a user and is signed by the client/user + * private key. + */ +struct PVFS_credential { + __u32 userid; /* user id */ + __u32 num_groups; /* length of group_array */ + __u32 *group_array; /* groups for which the user is a member */ + char *issuer; /* alias of the issuing server */ + __u64 timeout; /* seconds after epoch to time out */ + __u32 sig_size; /* length of the signature in bytes */ + unsigned char *signature; /* digital signature */ + struct PVFS_certificate certificate; /* user certificate buffer */ +}; +#define extra_size_PVFS_credential (PVFS_REQ_LIMIT_GROUPS * \ + sizeof(__u32) + \ + PVFS_REQ_LIMIT_ISSUER + \ + PVFS_REQ_LIMIT_SIGNATURE + \ + extra_size_PVFS_certificate) + +/* This structure is used by the VFS-client interaction alone */ +struct PVFS_keyval_pair { + char key[PVFS_MAX_XATTR_NAMELEN]; + __s32 key_sz; /* __s32 for portable, fixed-size structures */ + __s32 val_sz; + char val[PVFS_MAX_XATTR_VALUELEN]; +}; + +/* pvfs2-sysint.h ***********************************************************/ +/* Describes attributes for a file, directory, or symlink. */ +struct PVFS_sys_attr_s { + __u32 owner; + __u32 group; + __u32 perms; + __u64 atime; + __u64 mtime; + __u64 ctime; + __s64 size; + + /* NOTE: caller must free if valid */ + char *link_target; + + /* Changed to __s32 so that size of structure does not change */ + __s32 dfile_count; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_servers_initial; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_servers_max; + + /* Changed to __s32 so that size of structure does not change */ + __s32 distr_dir_split_size; + + __u32 mirror_copies_count; + + /* NOTE: caller must free if valid */ + char *dist_name; + + /* NOTE: caller must free if valid */ + char *dist_params; + + __s64 dirent_count; + enum pvfs2_ds_type objtype; + __u64 flags; + __u32 mask; + __s64 blksize; +}; + +#define PVFS2_LOOKUP_LINK_NO_FOLLOW 0 +#define PVFS2_LOOKUP_LINK_FOLLOW 1 + +/* pint-dev.h ***************************************************************/ + +/* parameter structure used in PVFS_DEV_DEBUG ioctl command */ +struct dev_mask_info_s { + enum { + KERNEL_MASK, + CLIENT_MASK, + } mask_type; + __u64 mask_value; +}; + +struct dev_mask2_info_s { + __u64 mask1_value; + __u64 mask2_value; +}; + +/* pvfs2-util.h *************************************************************/ +#define PVFS_util_min(x1, x2) (((x1) > (x2)) ? (x2) : (x1)) +__s32 PVFS_util_translate_mode(int mode); + +/* pvfs2-debug.h ************************************************************/ +#include "pvfs2-debug.h" + +/* pvfs2-internal.h *********************************************************/ +#define llu(x) (unsigned long long)(x) +#define lld(x) (long long)(x) + +/* pint-dev-shared.h ********************************************************/ +#define PVFS_DEV_MAGIC 'k' + +#define PVFS2_READDIR_DEFAULT_DESC_COUNT 5 + +#define DEV_GET_MAGIC 0x1 +#define DEV_GET_MAX_UPSIZE 0x2 +#define DEV_GET_MAX_DOWNSIZE 0x3 +#define DEV_MAP 0x4 +#define DEV_REMOUNT_ALL 0x5 +#define DEV_DEBUG 0x6 +#define DEV_UPSTREAM 0x7 +#define DEV_CLIENT_MASK 0x8 +#define DEV_CLIENT_STRING 0x9 +#define DEV_MAX_NR 0xa + +/* supported ioctls, codes are with respect to user-space */ +enum { + PVFS_DEV_GET_MAGIC = _IOW(PVFS_DEV_MAGIC, DEV_GET_MAGIC, __s32), + PVFS_DEV_GET_MAX_UPSIZE = + _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32), + PVFS_DEV_GET_MAX_DOWNSIZE = + _IOW(PVFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32), + PVFS_DEV_MAP = _IO(PVFS_DEV_MAGIC, DEV_MAP), + PVFS_DEV_REMOUNT_ALL = _IO(PVFS_DEV_MAGIC, DEV_REMOUNT_ALL), + PVFS_DEV_DEBUG = _IOR(PVFS_DEV_MAGIC, DEV_DEBUG, __s32), + PVFS_DEV_UPSTREAM = _IOW(PVFS_DEV_MAGIC, DEV_UPSTREAM, int), + PVFS_DEV_CLIENT_MASK = _IOW(PVFS_DEV_MAGIC, + DEV_CLIENT_MASK, + struct dev_mask2_info_s), + PVFS_DEV_CLIENT_STRING = _IOW(PVFS_DEV_MAGIC, + DEV_CLIENT_STRING, + char *), + PVFS_DEV_MAXNR = DEV_MAX_NR, +}; + +/* + * version number for use in communicating between kernel space and user + * space + */ +/* +#define PVFS_KERNEL_PROTO_VERSION \ + ((PVFS2_VERSION_MAJOR * 10000) + \ + (PVFS2_VERSION_MINOR * 100) + \ + PVFS2_VERSION_SUB) +*/ +#define PVFS_KERNEL_PROTO_VERSION 0 + +/* + * describes memory regions to map in the PVFS_DEV_MAP ioctl. + * NOTE: See devpvfs2-req.c for 32 bit compat structure. + * Since this structure has a variable-sized layout that is different + * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout + * on such systems before servicing ioctl calls from user-space binaries + * that may be 32 bit! + */ +struct PVFS_dev_map_desc { + void *ptr; + __s32 total_size; + __s32 size; + __s32 count; +}; + +/* gossip.h *****************************************************************/ + +#ifdef GOSSIP_DISABLE_DEBUG +#define gossip_debug(mask, format, f...) do {} while (0) +#else +extern __u64 gossip_debug_mask; +extern struct client_debug_mask client_debug_mask; + +/* try to avoid function call overhead by checking masks in macro */ +#define gossip_debug(mask, format, f...) \ +do { \ + if (gossip_debug_mask & mask) \ + printk(format, ##f); \ +} while (0) +#endif /* GOSSIP_DISABLE_DEBUG */ + +/* do file and line number printouts w/ the GNU preprocessor */ +#define gossip_ldebug(mask, format, f...) \ + gossip_debug(mask, "%s: " format, __func__, ##f) + +#define gossip_err printk +#define gossip_lerr(format, f...) \ + gossip_err("%s line %d: " format, \ + __FILE__, \ + __LINE__, \ + ##f) diff --git a/fs/orangefs/pvfs2-bufmap.h b/fs/orangefs/pvfs2-bufmap.h new file mode 100644 index 000000000000..e269deafbb74 --- /dev/null +++ b/fs/orangefs/pvfs2-bufmap.h @@ -0,0 +1,76 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __PVFS2_BUFMAP_H +#define __PVFS2_BUFMAP_H + +/* used to describe mapped buffers */ +struct pvfs_bufmap_desc { + void *uaddr; /* user space address pointer */ + struct page **page_array; /* array of mapped pages */ + int array_count; /* size of above arrays */ + struct list_head list_link; +}; + +struct pvfs2_bufmap; + +struct pvfs2_bufmap *pvfs2_bufmap_ref(void); +void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap); + +/* + * pvfs_bufmap_size_query is now an inline function because buffer + * sizes are not hardcoded + */ +int pvfs_bufmap_size_query(void); + +int pvfs_bufmap_shift_query(void); + +int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc); + +int get_bufmap_init(void); + +void pvfs_bufmap_finalize(void); + +int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index); + +void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index); + +int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index); + +void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index); + +int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap, + int buffer_index, + const struct iovec *iov, + unsigned long nr_segs, + size_t size); + +size_t pvfs_bufmap_copy_to_user_task_iovec(struct task_struct *tsk, + struct iovec *iovec, + unsigned long nr_segs, + struct pvfs2_bufmap *bufmap, + int buffer_index, + size_t bytes_to_be_copied); + +#endif /* __PVFS2_BUFMAP_H */ diff --git a/fs/orangefs/pvfs2-debug.h b/fs/orangefs/pvfs2-debug.h new file mode 100644 index 000000000000..4c27ad77fa16 --- /dev/null +++ b/fs/orangefs/pvfs2-debug.h @@ -0,0 +1,290 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* This file just defines debugging masks to be used with the gossip + * logging utility. All debugging masks for PVFS2 are kept here to make + * sure we don't have collisions. + */ + +#ifndef __PVFS2_DEBUG_H +#define __PVFS2_DEBUG_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define GOSSIP_NO_DEBUG (__u64)0 +#define GOSSIP_BMI_DEBUG_TCP ((__u64)1 << 0) +#define GOSSIP_BMI_DEBUG_CONTROL ((__u64)1 << 1) +#define GOSSIP_BMI_DEBUG_OFFSETS ((__u64)1 << 2) +#define GOSSIP_BMI_DEBUG_GM ((__u64)1 << 3) +#define GOSSIP_JOB_DEBUG ((__u64)1 << 4) +#define GOSSIP_SERVER_DEBUG ((__u64)1 << 5) +#define GOSSIP_STO_DEBUG_CTRL ((__u64)1 << 6) +#define GOSSIP_STO_DEBUG_DEFAULT ((__u64)1 << 7) +#define GOSSIP_FLOW_DEBUG ((__u64)1 << 8) +#define GOSSIP_BMI_DEBUG_GM_MEM ((__u64)1 << 9) +#define GOSSIP_REQUEST_DEBUG ((__u64)1 << 10) +#define GOSSIP_FLOW_PROTO_DEBUG ((__u64)1 << 11) +#define GOSSIP_NCACHE_DEBUG ((__u64)1 << 12) +#define GOSSIP_CLIENT_DEBUG ((__u64)1 << 13) +#define GOSSIP_REQ_SCHED_DEBUG ((__u64)1 << 14) +#define GOSSIP_ACACHE_DEBUG ((__u64)1 << 15) +#define GOSSIP_TROVE_DEBUG ((__u64)1 << 16) +#define GOSSIP_TROVE_OP_DEBUG ((__u64)1 << 17) +#define GOSSIP_DIST_DEBUG ((__u64)1 << 18) +#define GOSSIP_BMI_DEBUG_IB ((__u64)1 << 19) +#define GOSSIP_DBPF_ATTRCACHE_DEBUG ((__u64)1 << 20) +#define GOSSIP_MMAP_RCACHE_DEBUG ((__u64)1 << 21) +#define GOSSIP_LOOKUP_DEBUG ((__u64)1 << 22) +#define GOSSIP_REMOVE_DEBUG ((__u64)1 << 23) +#define GOSSIP_GETATTR_DEBUG ((__u64)1 << 24) +#define GOSSIP_READDIR_DEBUG ((__u64)1 << 25) +#define GOSSIP_IO_DEBUG ((__u64)1 << 26) +#define GOSSIP_DBPF_OPEN_CACHE_DEBUG ((__u64)1 << 27) +#define GOSSIP_PERMISSIONS_DEBUG ((__u64)1 << 28) +#define GOSSIP_CANCEL_DEBUG ((__u64)1 << 29) +#define GOSSIP_MSGPAIR_DEBUG ((__u64)1 << 30) +#define GOSSIP_CLIENTCORE_DEBUG ((__u64)1 << 31) +#define GOSSIP_CLIENTCORE_TIMING_DEBUG ((__u64)1 << 32) +#define GOSSIP_SETATTR_DEBUG ((__u64)1 << 33) +#define GOSSIP_MKDIR_DEBUG ((__u64)1 << 34) +#define GOSSIP_VARSTRIP_DEBUG ((__u64)1 << 35) +#define GOSSIP_GETEATTR_DEBUG ((__u64)1 << 36) +#define GOSSIP_SETEATTR_DEBUG ((__u64)1 << 37) +#define GOSSIP_ENDECODE_DEBUG ((__u64)1 << 38) +#define GOSSIP_DELEATTR_DEBUG ((__u64)1 << 39) +#define GOSSIP_ACCESS_DEBUG ((__u64)1 << 40) +#define GOSSIP_ACCESS_DETAIL_DEBUG ((__u64)1 << 41) +#define GOSSIP_LISTEATTR_DEBUG ((__u64)1 << 42) +#define GOSSIP_PERFCOUNTER_DEBUG ((__u64)1 << 43) +#define GOSSIP_STATE_MACHINE_DEBUG ((__u64)1 << 44) +#define GOSSIP_DBPF_KEYVAL_DEBUG ((__u64)1 << 45) +#define GOSSIP_LISTATTR_DEBUG ((__u64)1 << 46) +#define GOSSIP_DBPF_COALESCE_DEBUG ((__u64)1 << 47) +#define GOSSIP_ACCESS_HOSTNAMES ((__u64)1 << 48) +#define GOSSIP_FSCK_DEBUG ((__u64)1 << 49) +#define GOSSIP_BMI_DEBUG_MX ((__u64)1 << 50) +#define GOSSIP_BSTREAM_DEBUG ((__u64)1 << 51) +#define GOSSIP_BMI_DEBUG_PORTALS ((__u64)1 << 52) +#define GOSSIP_USER_DEV_DEBUG ((__u64)1 << 53) +#define GOSSIP_DIRECTIO_DEBUG ((__u64)1 << 54) +#define GOSSIP_MGMT_DEBUG ((__u64)1 << 55) +#define GOSSIP_MIRROR_DEBUG ((__u64)1 << 56) +#define GOSSIP_WIN_CLIENT_DEBUG ((__u64)1 << 57) +#define GOSSIP_SECURITY_DEBUG ((__u64)1 << 58) +#define GOSSIP_USRINT_DEBUG ((__u64)1 << 59) +#define GOSSIP_RCACHE_DEBUG ((__u64)1 << 60) +#define GOSSIP_SECCACHE_DEBUG ((__u64)1 << 61) + +#define GOSSIP_BMI_DEBUG_ALL ((__u64) (GOSSIP_BMI_DEBUG_TCP + \ + GOSSIP_BMI_DEBUG_CONTROL + \ + GOSSIP_BMI_DEBUG_GM + \ + GOSSIP_BMI_DEBUG_OFFSETS + \ + GOSSIP_BMI_DEBUG_IB + \ + GOSSIP_BMI_DEBUG_MX + \ + GOSSIP_BMI_DEBUG_PORTALS)) + +const char *PVFS_debug_get_next_debug_keyword(int position); + +#define GOSSIP_SUPER_DEBUG ((__u64)1 << 0) +#define GOSSIP_INODE_DEBUG ((__u64)1 << 1) +#define GOSSIP_FILE_DEBUG ((__u64)1 << 2) +#define GOSSIP_DIR_DEBUG ((__u64)1 << 3) +#define GOSSIP_UTILS_DEBUG ((__u64)1 << 4) +#define GOSSIP_WAIT_DEBUG ((__u64)1 << 5) +#define GOSSIP_ACL_DEBUG ((__u64)1 << 6) +#define GOSSIP_DCACHE_DEBUG ((__u64)1 << 7) +#define GOSSIP_DEV_DEBUG ((__u64)1 << 8) +#define GOSSIP_NAME_DEBUG ((__u64)1 << 9) +#define GOSSIP_BUFMAP_DEBUG ((__u64)1 << 10) +#define GOSSIP_CACHE_DEBUG ((__u64)1 << 11) +#define GOSSIP_DEBUGFS_DEBUG ((__u64)1 << 12) +#define GOSSIP_XATTR_DEBUG ((__u64)1 << 13) +#define GOSSIP_INIT_DEBUG ((__u64)1 << 14) +#define GOSSIP_SYSFS_DEBUG ((__u64)1 << 15) + +#define GOSSIP_MAX_NR 16 +#define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1) + +/*function prototypes*/ +__u64 PVFS_kmod_eventlog_to_mask(const char *event_logging); +__u64 PVFS_debug_eventlog_to_mask(const char *event_logging); +char *PVFS_debug_mask_to_eventlog(__u64 mask); +char *PVFS_kmod_mask_to_eventlog(__u64 mask); + +/* a private internal type */ +struct __keyword_mask_s { + const char *keyword; + __u64 mask_val; +}; + +#define __DEBUG_ALL ((__u64) -1) + +/* map all config keywords to pvfs2 debug masks here */ +static struct __keyword_mask_s s_keyword_mask_map[] = { + /* Log trove debugging info. Same as 'trove'. */ + {"storage", GOSSIP_TROVE_DEBUG}, + /* Log trove debugging info. Same as 'storage'. */ + {"trove", GOSSIP_TROVE_DEBUG}, + /* Log trove operations. */ + {"trove_op", GOSSIP_TROVE_OP_DEBUG}, + /* Log network debug info. */ + {"network", GOSSIP_BMI_DEBUG_ALL}, + /* Log server info, including new operations. */ + {"server", GOSSIP_SERVER_DEBUG}, + /* Log client sysint info. This is only useful for the client. */ + {"client", GOSSIP_CLIENT_DEBUG}, + /* Debug the varstrip distribution */ + {"varstrip", GOSSIP_VARSTRIP_DEBUG}, + /* Log job info */ + {"job", GOSSIP_JOB_DEBUG}, + /* Debug PINT_process_request calls. EXTREMELY verbose! */ + {"request", GOSSIP_REQUEST_DEBUG}, + /* Log request scheduler events */ + {"reqsched", GOSSIP_REQ_SCHED_DEBUG}, + /* Log the flow protocol events, including flowproto_multiqueue */ + {"flowproto", GOSSIP_FLOW_PROTO_DEBUG}, + /* Log flow calls */ + {"flow", GOSSIP_FLOW_DEBUG}, + /* Debug the client name cache. Only useful on the client. */ + {"ncache", GOSSIP_NCACHE_DEBUG}, + /* Debug read-ahead cache events. Only useful on the client. */ + {"mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG}, + /* Debug the attribute cache. Only useful on the client. */ + {"acache", GOSSIP_ACACHE_DEBUG}, + /* Log/Debug distribution calls */ + {"distribution", GOSSIP_DIST_DEBUG}, + /* Debug the server-side dbpf attribute cache */ + {"dbpfattrcache", GOSSIP_DBPF_ATTRCACHE_DEBUG}, + /* Debug the client lookup state machine. */ + {"lookup", GOSSIP_LOOKUP_DEBUG}, + /* Debug the client remove state macine. */ + {"remove", GOSSIP_REMOVE_DEBUG}, + /* Debug the server getattr state machine. */ + {"getattr", GOSSIP_GETATTR_DEBUG}, + /* Debug the server setattr state machine. */ + {"setattr", GOSSIP_SETATTR_DEBUG}, + /* vectored getattr server state machine */ + {"listattr", GOSSIP_LISTATTR_DEBUG}, + /* Debug the client and server get ext attributes SM. */ + {"geteattr", GOSSIP_GETEATTR_DEBUG}, + /* Debug the client and server set ext attributes SM. */ + {"seteattr", GOSSIP_SETEATTR_DEBUG}, + /* Debug the readdir operation (client and server) */ + {"readdir", GOSSIP_READDIR_DEBUG}, + /* Debug the mkdir operation (server only) */ + {"mkdir", GOSSIP_MKDIR_DEBUG}, + /* Debug the io operation (reads and writes) + * for both the client and server */ + {"io", GOSSIP_IO_DEBUG}, + /* Debug the server's open file descriptor cache */ + {"open_cache", GOSSIP_DBPF_OPEN_CACHE_DEBUG}, + /* Debug permissions checking on the server */ + {"permissions", GOSSIP_PERMISSIONS_DEBUG}, + /* Debug the cancel operation */ + {"cancel", GOSSIP_CANCEL_DEBUG}, + /* Debug the msgpair state machine */ + {"msgpair", GOSSIP_MSGPAIR_DEBUG}, + /* Debug the client core app */ + {"clientcore", GOSSIP_CLIENTCORE_DEBUG}, + /* Debug the client timing state machines (job timeout, etc.) */ + {"clientcore_timing", GOSSIP_CLIENTCORE_TIMING_DEBUG}, + /* network encoding */ + {"endecode", GOSSIP_ENDECODE_DEBUG}, + /* Show server file (metadata) accesses (both modify and read-only). */ + {"access", GOSSIP_ACCESS_DEBUG}, + /* Show more detailed server file accesses */ + {"access_detail", GOSSIP_ACCESS_DETAIL_DEBUG}, + /* Debug the listeattr operation */ + {"listeattr", GOSSIP_LISTEATTR_DEBUG}, + /* Debug the state machine management code */ + {"sm", GOSSIP_STATE_MACHINE_DEBUG}, + /* Debug the metadata dbpf keyval functions */ + {"keyval", GOSSIP_DBPF_KEYVAL_DEBUG}, + /* Debug the metadata sync coalescing code */ + {"coalesce", GOSSIP_DBPF_COALESCE_DEBUG}, + /* Display the hostnames instead of IP addrs in debug output */ + {"access_hostnames", GOSSIP_ACCESS_HOSTNAMES}, + /* Show the client device events */ + {"user_dev", GOSSIP_USER_DEV_DEBUG}, + /* Debug the fsck tool */ + {"fsck", GOSSIP_FSCK_DEBUG}, + /* Debug the bstream code */ + {"bstream", GOSSIP_BSTREAM_DEBUG}, + /* Debug trove in direct io mode */ + {"directio", GOSSIP_DIRECTIO_DEBUG}, + /* Debug direct io thread management */ + {"mgmt", GOSSIP_MGMT_DEBUG}, + /* Debug mirroring process */ + {"mirror", GOSSIP_MIRROR_DEBUG}, + /* Windows client */ + {"win_client", GOSSIP_WIN_CLIENT_DEBUG}, + /* Debug robust security code */ + {"security", GOSSIP_SECURITY_DEBUG}, + /* Capability Cache */ + {"seccache", GOSSIP_SECCACHE_DEBUG}, + /* Client User Interface */ + {"usrint", GOSSIP_USRINT_DEBUG}, + /* rcache */ + {"rcache", GOSSIP_RCACHE_DEBUG}, + /* Everything except the periodic events. Useful for debugging */ + {"verbose", + (__DEBUG_ALL & + ~(GOSSIP_PERFCOUNTER_DEBUG | GOSSIP_STATE_MACHINE_DEBUG | + GOSSIP_ENDECODE_DEBUG | GOSSIP_USER_DEV_DEBUG)) + }, + /* No debug output */ + {"none", GOSSIP_NO_DEBUG}, + /* Everything */ + {"all", __DEBUG_ALL} +}; + +#undef __DEBUG_ALL + +/* + * Map all kmod keywords to kmod debug masks here. Keep this + * structure "packed": + * + * "all" is always last... + * + * keyword mask_val index + * foo 1 0 + * bar 2 1 + * baz 4 2 + * qux 8 3 + * . . . + */ +static struct __keyword_mask_s s_kmod_keyword_mask_map[] = { + {"super", GOSSIP_SUPER_DEBUG}, + {"inode", GOSSIP_INODE_DEBUG}, + {"file", GOSSIP_FILE_DEBUG}, + {"dir", GOSSIP_DIR_DEBUG}, + {"utils", GOSSIP_UTILS_DEBUG}, + {"wait", GOSSIP_WAIT_DEBUG}, + {"acl", GOSSIP_ACL_DEBUG}, + {"dcache", GOSSIP_DCACHE_DEBUG}, + {"dev", GOSSIP_DEV_DEBUG}, + {"name", GOSSIP_NAME_DEBUG}, + {"bufmap", GOSSIP_BUFMAP_DEBUG}, + {"cache", GOSSIP_CACHE_DEBUG}, + {"debugfs", GOSSIP_DEBUGFS_DEBUG}, + {"xattr", GOSSIP_XATTR_DEBUG}, + {"init", GOSSIP_INIT_DEBUG}, + {"sysfs", GOSSIP_SYSFS_DEBUG}, + {"none", GOSSIP_NO_DEBUG}, + {"all", GOSSIP_MAX_DEBUG} +}; + +static const int num_kmod_keyword_mask_map = (int) + (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + +static const int num_keyword_mask_map = (int) + (sizeof(s_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + +#endif /* __PVFS2_DEBUG_H */ diff --git a/fs/orangefs/pvfs2-debugfs.h b/fs/orangefs/pvfs2-debugfs.h new file mode 100644 index 000000000000..a66b7d08c14d --- /dev/null +++ b/fs/orangefs/pvfs2-debugfs.h @@ -0,0 +1,3 @@ +int pvfs2_debugfs_init(void); +int pvfs2_kernel_debug_init(void); +void pvfs2_debugfs_cleanup(void); diff --git a/fs/orangefs/pvfs2-dev-proto.h b/fs/orangefs/pvfs2-dev-proto.h new file mode 100644 index 000000000000..9c82e6e651f3 --- /dev/null +++ b/fs/orangefs/pvfs2-dev-proto.h @@ -0,0 +1,102 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef _PVFS2_DEV_PROTO_H +#define _PVFS2_DEV_PROTO_H + +/* + * types and constants shared between user space and kernel space for + * device interaction using a common protocol + */ + +/* + * valid pvfs2 kernel operation types + */ +#define PVFS2_VFS_OP_INVALID 0xFF000000 +#define PVFS2_VFS_OP_FILE_IO 0xFF000001 +#define PVFS2_VFS_OP_LOOKUP 0xFF000002 +#define PVFS2_VFS_OP_CREATE 0xFF000003 +#define PVFS2_VFS_OP_GETATTR 0xFF000004 +#define PVFS2_VFS_OP_REMOVE 0xFF000005 +#define PVFS2_VFS_OP_MKDIR 0xFF000006 +#define PVFS2_VFS_OP_READDIR 0xFF000007 +#define PVFS2_VFS_OP_SETATTR 0xFF000008 +#define PVFS2_VFS_OP_SYMLINK 0xFF000009 +#define PVFS2_VFS_OP_RENAME 0xFF00000A +#define PVFS2_VFS_OP_STATFS 0xFF00000B +#define PVFS2_VFS_OP_TRUNCATE 0xFF00000C +#define PVFS2_VFS_OP_MMAP_RA_FLUSH 0xFF00000D +#define PVFS2_VFS_OP_FS_MOUNT 0xFF00000E +#define PVFS2_VFS_OP_FS_UMOUNT 0xFF00000F +#define PVFS2_VFS_OP_GETXATTR 0xFF000010 +#define PVFS2_VFS_OP_SETXATTR 0xFF000011 +#define PVFS2_VFS_OP_LISTXATTR 0xFF000012 +#define PVFS2_VFS_OP_REMOVEXATTR 0xFF000013 +#define PVFS2_VFS_OP_PARAM 0xFF000014 +#define PVFS2_VFS_OP_PERF_COUNT 0xFF000015 +#define PVFS2_VFS_OP_CANCEL 0xFF00EE00 +#define PVFS2_VFS_OP_FSYNC 0xFF00EE01 +#define PVFS2_VFS_OP_FSKEY 0xFF00EE02 +#define PVFS2_VFS_OP_READDIRPLUS 0xFF00EE03 +#define PVFS2_VFS_OP_FILE_IOX 0xFF00EE04 + +/* + * Misc constants. Please retain them as multiples of 8! + * Otherwise 32-64 bit interactions will be messed up :) + */ +#define PVFS2_NAME_LEN 0x00000100 +#define PVFS2_MAX_DEBUG_STRING_LEN 0x00000400 +#define PVFS2_MAX_DEBUG_ARRAY_LEN 0x00000800 + +/* + * MAX_DIRENT_COUNT cannot be larger than PVFS_REQ_LIMIT_LISTATTR. + * The value of PVFS_REQ_LIMIT_LISTATTR has been changed from 113 to 60 + * to accomodate an attribute object with mirrored handles. + * MAX_DIRENT_COUNT is replaced by MAX_DIRENT_COUNT_READDIR and + * MAX_DIRENT_COUNT_READDIRPLUS, since readdir doesn't trigger a listattr + * but readdirplus might. +*/ +#define MAX_DIRENT_COUNT_READDIR 0x00000060 +#define MAX_DIRENT_COUNT_READDIRPLUS 0x0000003C + +#include "upcall.h" +#include "downcall.h" + +/* + * These macros differ from proto macros in that they don't do any + * byte-swappings and are used to ensure that kernel-clientcore interactions + * don't cause any unaligned accesses etc on 64 bit machines + */ +#ifndef roundup4 +#define roundup4(x) (((x)+3) & ~3) +#endif + +#ifndef roundup8 +#define roundup8(x) (((x)+7) & ~7) +#endif + +/* strings; decoding just points into existing character data */ +#define enc_string(pptr, pbuf) do { \ + __u32 len = strlen(*pbuf); \ + *(__u32 *) *(pptr) = (len); \ + memcpy(*(pptr)+4, *pbuf, len+1); \ + *(pptr) += roundup8(4 + len + 1); \ +} while (0) + +#define dec_string(pptr, pbuf, plen) do { \ + __u32 len = (*(__u32 *) *(pptr)); \ + *pbuf = *(pptr) + 4; \ + *(pptr) += roundup8(4 + len + 1); \ + if (plen) \ + *plen = len;\ +} while (0) + +struct read_write_x { + __s64 off; + __s64 len; +}; + +#endif diff --git a/fs/orangefs/pvfs2-kernel.h b/fs/orangefs/pvfs2-kernel.h new file mode 100644 index 000000000000..6c787c4797d0 --- /dev/null +++ b/fs/orangefs/pvfs2-kernel.h @@ -0,0 +1,864 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + * The PVFS2 Linux kernel support allows PVFS2 volumes to be mounted and + * accessed through the Linux VFS (i.e. using standard I/O system calls). + * This support is only needed on clients that wish to mount the file system. + * + */ + +/* + * Declarations and macros for the PVFS2 Linux kernel support. + */ + +#ifndef __PVFS2KERNEL_H +#define __PVFS2KERNEL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "pvfs2-dev-proto.h" + +#ifdef PVFS2_KERNEL_DEBUG +#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 10 +#else +#define PVFS2_DEFAULT_OP_TIMEOUT_SECS 20 +#endif + +#define PVFS2_BUFMAP_WAIT_TIMEOUT_SECS 30 + +#define PVFS2_DEFAULT_SLOT_TIMEOUT_SECS 900 /* 15 minutes */ + +#define PVFS2_REQDEVICE_NAME "pvfs2-req" + +#define PVFS2_DEVREQ_MAGIC 0x20030529 +#define PVFS2_LINK_MAX 0x000000FF +#define PVFS2_PURGE_RETRY_COUNT 0x00000005 +#define PVFS2_SEEK_END 0x00000002 +#define PVFS2_MAX_NUM_OPTIONS 0x00000004 +#define PVFS2_MAX_MOUNT_OPT_LEN 0x00000080 +#define PVFS2_MAX_FSKEY_LEN 64 + +#define MAX_DEV_REQ_UPSIZE (2*sizeof(__s32) + \ +sizeof(__u64) + sizeof(struct pvfs2_upcall_s)) +#define MAX_DEV_REQ_DOWNSIZE (2*sizeof(__s32) + \ +sizeof(__u64) + sizeof(struct pvfs2_downcall_s)) + +#define BITS_PER_LONG_DIV_8 (BITS_PER_LONG >> 3) + +/* borrowed from irda.h */ +#ifndef MSECS_TO_JIFFIES +#define MSECS_TO_JIFFIES(ms) (((ms)*HZ+999)/1000) +#endif + +#define MAX_ALIGNED_DEV_REQ_UPSIZE \ + (MAX_DEV_REQ_UPSIZE + \ + ((((MAX_DEV_REQ_UPSIZE / \ + (BITS_PER_LONG_DIV_8)) * \ + (BITS_PER_LONG_DIV_8)) + \ + (BITS_PER_LONG_DIV_8)) - \ + MAX_DEV_REQ_UPSIZE)) + +#define MAX_ALIGNED_DEV_REQ_DOWNSIZE \ + (MAX_DEV_REQ_DOWNSIZE + \ + ((((MAX_DEV_REQ_DOWNSIZE / \ + (BITS_PER_LONG_DIV_8)) * \ + (BITS_PER_LONG_DIV_8)) + \ + (BITS_PER_LONG_DIV_8)) - \ + MAX_DEV_REQ_DOWNSIZE)) + +/* + * valid pvfs2 kernel operation states + * + * unknown - op was just initialized + * waiting - op is on request_list (upward bound) + * inprogr - op is in progress (waiting for downcall) + * serviced - op has matching downcall; ok + * purged - op has to start a timer since client-core + * exited uncleanly before servicing op + */ +enum pvfs2_vfs_op_states { + OP_VFS_STATE_UNKNOWN = 0, + OP_VFS_STATE_WAITING = 1, + OP_VFS_STATE_INPROGR = 2, + OP_VFS_STATE_SERVICED = 4, + OP_VFS_STATE_PURGED = 8, +}; + +#define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING) +#define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR) +#define set_op_state_serviced(op) ((op)->op_state = OP_VFS_STATE_SERVICED) +#define set_op_state_purged(op) ((op)->op_state |= OP_VFS_STATE_PURGED) + +#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) +#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) +#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) +#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) + +#define get_op(op) \ + do { \ + atomic_inc(&(op)->aio_ref_count); \ + gossip_debug(GOSSIP_DEV_DEBUG, \ + "(get) Alloced OP (%p:%llu)\n", \ + op, \ + llu((op)->tag)); \ + } while (0) + +#define put_op(op) \ + do { \ + if (atomic_sub_and_test(1, &(op)->aio_ref_count) == 1) { \ + gossip_debug(GOSSIP_DEV_DEBUG, \ + "(put) Releasing OP (%p:%llu)\n", \ + op, \ + llu((op)->tag)); \ + op_release(op); \ + } \ + } while (0) + +#define op_wait(op) (atomic_read(&(op)->aio_ref_count) <= 2 ? 0 : 1) + +/* + * Defines for controlling whether I/O upcalls are for async or sync operations + */ +enum PVFS_async_io_type { + PVFS_VFS_SYNC_IO = 0, + PVFS_VFS_ASYNC_IO = 1, +}; + +/* + * An array of client_debug_mask will be built to hold debug keyword/mask + * values fetched from userspace. + */ +struct client_debug_mask { + char *keyword; + __u64 mask1; + __u64 mask2; +}; + +/* + * pvfs2 kernel memory related flags + */ + +#if ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) +#define PVFS2_CACHE_CREATE_FLAGS SLAB_RED_ZONE +#else +#define PVFS2_CACHE_CREATE_FLAGS 0 +#endif /* ((defined PVFS2_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ + +#define PVFS2_CACHE_ALLOC_FLAGS (GFP_KERNEL) +#define PVFS2_GFP_FLAGS (GFP_KERNEL) +#define PVFS2_BUFMAP_GFP_FLAGS (GFP_KERNEL) + +#define pvfs2_kmap(page) kmap(page) +#define pvfs2_kunmap(page) kunmap(page) + +/* pvfs2 xattr and acl related defines */ +#define PVFS2_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define PVFS2_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define PVFS2_XATTR_INDEX_TRUSTED 3 +#define PVFS2_XATTR_INDEX_DEFAULT 4 + +#if 0 +#ifndef POSIX_ACL_XATTR_ACCESS +#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access" +#endif +#ifndef POSIX_ACL_XATTR_DEFAULT +#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default" +#endif +#endif + +#define PVFS2_XATTR_NAME_ACL_ACCESS POSIX_ACL_XATTR_ACCESS +#define PVFS2_XATTR_NAME_ACL_DEFAULT POSIX_ACL_XATTR_DEFAULT +#define PVFS2_XATTR_NAME_TRUSTED_PREFIX "trusted." +#define PVFS2_XATTR_NAME_DEFAULT_PREFIX "" + +/* these functions are defined in pvfs2-utils.c */ +int orangefs_prepare_cdm_array(char *debug_array_string); +int orangefs_prepare_debugfs_help_string(int); + +/* defined in pvfs2-debugfs.c */ +int pvfs2_client_debug_init(void); + +void debug_string_to_mask(char *, void *, int); +void do_c_mask(int, char *, struct client_debug_mask **); +void do_k_mask(int, char *, __u64 **); + +void debug_mask_to_string(void *, int); +void do_k_string(void *, int); +void do_c_string(void *, int); +int check_amalgam_keyword(void *, int); +int keyword_is_amalgam(char *); + +/*these variables are defined in pvfs2-mod.c */ +extern char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN]; +extern char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN]; +/* HELLO +extern struct client_debug_mask current_client_mask; +*/ +extern unsigned int kernel_mask_set_mod_init; + +extern int pvfs2_init_acl(struct inode *inode, struct inode *dir); +extern const struct xattr_handler *pvfs2_xattr_handlers[]; + +extern struct posix_acl *pvfs2_get_acl(struct inode *inode, int type); +extern int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); + +int pvfs2_xattr_set_default(struct dentry *dentry, + const char *name, + const void *buffer, + size_t size, + int flags, + int handler_flags); + +int pvfs2_xattr_get_default(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int handler_flags); + +/* + * Redefine xtvec structure so that we could move helper functions out of + * the define + */ +struct xtvec { + __kernel_off_t xtv_off; /* must be off_t */ + __kernel_size_t xtv_len; /* must be size_t */ +}; + +/* + * pvfs2 data structures + */ +struct pvfs2_kernel_op_s { + enum pvfs2_vfs_op_states op_state; + __u64 tag; + + /* + * Set uses_shared_memory to 1 if this operation uses shared memory. + * If true, then a retry on the op must also get a new shared memory + * buffer and re-populate it. + */ + int uses_shared_memory; + + struct pvfs2_upcall_s upcall; + struct pvfs2_downcall_s downcall; + + wait_queue_head_t waitq; + spinlock_t lock; + + int io_completed; + wait_queue_head_t io_completion_waitq; + + /* + * upcalls requiring variable length trailers require that this struct + * be in the request list even after client-core does a read() on the + * device to dequeue the upcall. + * if op_linger field goes to 0, we dequeue this op off the list. + * else we let it stay. What gets passed to the read() is + * a) if op_linger field is = 1, pvfs2_kernel_op_s itself + * b) else if = 0, we pass ->upcall.trailer_buf + * We expect to have only a single upcall trailer buffer, + * so we expect callers with trailers + * to set this field to 2 and others to set it to 1. + */ + __s32 op_linger, op_linger_tmp; + /* VFS aio fields */ + + /* used by the async I/O code to stash the pvfs2_kiocb_s structure */ + void *priv; + + /* used again for the async I/O code for deallocation */ + atomic_t aio_ref_count; + + int attempts; + + struct list_head list; +}; + +/* per inode private pvfs2 info */ +struct pvfs2_inode_s { + struct pvfs2_object_kref refn; + char link_target[PVFS_NAME_MAX]; + __s64 blksize; + /* + * Reading/Writing Extended attributes need to acquire the appropriate + * reader/writer semaphore on the pvfs2_inode_s structure. + */ + struct rw_semaphore xattr_sem; + + struct inode vfs_inode; + sector_t last_failed_block_index_read; + + /* + * State of in-memory attributes not yet flushed to disk associated + * with this object + */ + unsigned long pinode_flags; + + /* All allocated pvfs2_inode_s objects are chained to a list */ + struct list_head list; +}; + +#define P_ATIME_FLAG 0 +#define P_MTIME_FLAG 1 +#define P_CTIME_FLAG 2 +#define P_MODE_FLAG 3 + +#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) +#define SetAtimeFlag(pinode) set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) +#define AtimeFlag(pinode) test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) + +#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) +#define SetMtimeFlag(pinode) set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) +#define MtimeFlag(pinode) test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) + +#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) +#define SetCtimeFlag(pinode) set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) +#define CtimeFlag(pinode) test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) + +#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags) +#define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags) +#define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags) + +/* per superblock private pvfs2 info */ +struct pvfs2_sb_info_s { + struct pvfs2_khandle root_khandle; + __s32 fs_id; + int id; + int flags; +#define PVFS2_OPT_INTR 0x01 +#define PVFS2_OPT_LOCAL_LOCK 0x02 + char devname[PVFS_MAX_SERVER_ADDR_LEN]; + struct super_block *sb; + int mount_pending; + struct list_head list; +}; + +/* + * a temporary structure used only for sb mount time that groups the + * mount time data provided along with a private superblock structure + * that is allocated before a 'kernel' superblock is allocated. +*/ +struct pvfs2_mount_sb_info_s { + void *data; + struct pvfs2_khandle root_khandle; + __s32 fs_id; + int id; +}; + +/* + * structure that holds the state of any async I/O operation issued + * through the VFS. Needed especially to handle cancellation requests + * or even completion notification so that the VFS client-side daemon + * can free up its vfs_request slots. + */ +struct pvfs2_kiocb_s { + /* the pointer to the task that initiated the AIO */ + struct task_struct *tsk; + + /* pointer to the kiocb that kicked this operation */ + struct kiocb *kiocb; + + /* buffer index that was used for the I/O */ + struct pvfs2_bufmap *bufmap; + int buffer_index; + + /* pvfs2 kernel operation type */ + struct pvfs2_kernel_op_s *op; + + /* The user space buffers from/to which I/O is being staged */ + struct iovec *iov; + + /* number of elements in the iovector */ + unsigned long nr_segs; + + /* set to indicate the type of the operation */ + int rw; + + /* file offset */ + loff_t offset; + + /* and the count in bytes */ + size_t bytes_to_be_copied; + + ssize_t bytes_copied; + int needs_cleanup; +}; + +struct pvfs2_stats { + unsigned long cache_hits; + unsigned long cache_misses; + unsigned long reads; + unsigned long writes; +}; + +extern struct pvfs2_stats g_pvfs2_stats; + +/* + NOTE: See Documentation/filesystems/porting for information + on implementing FOO_I and properly accessing fs private data +*/ +static inline struct pvfs2_inode_s *PVFS2_I(struct inode *inode) +{ + return container_of(inode, struct pvfs2_inode_s, vfs_inode); +} + +static inline struct pvfs2_sb_info_s *PVFS2_SB(struct super_block *sb) +{ + return (struct pvfs2_sb_info_s *) sb->s_fs_info; +} + +/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */ +static inline ino_t pvfs2_khandle_to_ino(struct pvfs2_khandle *khandle) +{ + union { + unsigned char u[8]; + __u64 ino; + } ihandle; + + ihandle.u[0] = khandle->u[0] ^ khandle->u[4]; + ihandle.u[1] = khandle->u[1] ^ khandle->u[5]; + ihandle.u[2] = khandle->u[2] ^ khandle->u[6]; + ihandle.u[3] = khandle->u[3] ^ khandle->u[7]; + ihandle.u[4] = khandle->u[12] ^ khandle->u[8]; + ihandle.u[5] = khandle->u[13] ^ khandle->u[9]; + ihandle.u[6] = khandle->u[14] ^ khandle->u[10]; + ihandle.u[7] = khandle->u[15] ^ khandle->u[11]; + + return ihandle.ino; +} + +static inline struct pvfs2_khandle *get_khandle_from_ino(struct inode *inode) +{ + return &(PVFS2_I(inode)->refn.khandle); +} + +static inline __s32 get_fsid_from_ino(struct inode *inode) +{ + return PVFS2_I(inode)->refn.fs_id; +} + +static inline ino_t get_ino_from_khandle(struct inode *inode) +{ + struct pvfs2_khandle *khandle; + ino_t ino; + + khandle = get_khandle_from_ino(inode); + ino = pvfs2_khandle_to_ino(khandle); + return ino; +} + +static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry) +{ + return get_ino_from_khandle(dentry->d_parent->d_inode); +} + +static inline int is_root_handle(struct inode *inode) +{ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: root handle: %pU, this handle: %pU:\n", + __func__, + &PVFS2_SB(inode->i_sb)->root_khandle, + get_khandle_from_ino(inode)); + + if (PVFS_khandle_cmp(&(PVFS2_SB(inode->i_sb)->root_khandle), + get_khandle_from_ino(inode))) + return 0; + else + return 1; +} + +static inline int match_handle(struct pvfs2_khandle resp_handle, + struct inode *inode) +{ + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: one handle: %pU, another handle:%pU:\n", + __func__, + &resp_handle, + get_khandle_from_ino(inode)); + + if (PVFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode))) + return 0; + else + return 1; +} + +/* + * defined in pvfs2-cache.c + */ +int op_cache_initialize(void); +int op_cache_finalize(void); +struct pvfs2_kernel_op_s *op_alloc(__s32 type); +struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type); +char *get_opname_string(struct pvfs2_kernel_op_s *new_op); +void op_release(struct pvfs2_kernel_op_s *op); + +int dev_req_cache_initialize(void); +int dev_req_cache_finalize(void); +void *dev_req_alloc(void); +void dev_req_release(void *); + +int pvfs2_inode_cache_initialize(void); +int pvfs2_inode_cache_finalize(void); + +int kiocb_cache_initialize(void); +int kiocb_cache_finalize(void); +struct pvfs2_kiocb_s *kiocb_alloc(void); +void kiocb_release(struct pvfs2_kiocb_s *ptr); + +/* + * defined in pvfs2-mod.c + */ +void purge_inprogress_ops(void); + +/* + * defined in waitqueue.c + */ +int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op); +int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op); +void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op); +void purge_waiting_ops(void); + +/* + * defined in super.c + */ +struct dentry *pvfs2_mount(struct file_system_type *fst, + int flags, + const char *devname, + void *data); + +void pvfs2_kill_sb(struct super_block *sb); +int pvfs2_remount(struct super_block *sb); + +int fsid_key_table_initialize(void); +void fsid_key_table_finalize(void); + +/* + * defined in inode.c + */ +__u32 convert_to_pvfs2_mask(unsigned long lite_mask); +struct inode *pvfs2_new_inode(struct super_block *sb, + struct inode *dir, + int mode, + dev_t dev, + struct pvfs2_object_kref *ref); + +int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr); + +int pvfs2_getattr(struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *kstat); + +/* + * defined in xattr.c + */ +int pvfs2_setxattr(struct dentry *dentry, + const char *name, + const void *value, + size_t size, + int flags); + +ssize_t pvfs2_getxattr(struct dentry *dentry, + const char *name, + void *buffer, + size_t size); + +ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); + +/* + * defined in namei.c + */ +struct inode *pvfs2_iget(struct super_block *sb, + struct pvfs2_object_kref *ref); + +ssize_t pvfs2_inode_read(struct inode *inode, + char *buf, + size_t count, + loff_t *offset, + loff_t readahead_size); + +/* + * defined in devpvfs2-req.c + */ +int pvfs2_dev_init(void); +void pvfs2_dev_cleanup(void); +int is_daemon_in_service(void); +int fs_mount_pending(__s32 fsid); + +/* + * defined in pvfs2-utils.c + */ +__s32 fsid_of_op(struct pvfs2_kernel_op_s *op); + +int pvfs2_flush_inode(struct inode *inode); + +ssize_t pvfs2_inode_getxattr(struct inode *inode, + const char *prefix, + const char *name, + void *buffer, + size_t size); + +int pvfs2_inode_setxattr(struct inode *inode, + const char *prefix, + const char *name, + const void *value, + size_t size, + int flags); + +int pvfs2_inode_getattr(struct inode *inode, __u32 mask); + +int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr); + +void pvfs2_op_initialize(struct pvfs2_kernel_op_s *op); + +void pvfs2_make_bad_inode(struct inode *inode); + +void mask_blocked_signals(sigset_t *orig_sigset); + +void unmask_blocked_signals(sigset_t *orig_sigset); + +int pvfs2_unmount_sb(struct super_block *sb); + +int pvfs2_cancel_op_in_progress(__u64 tag); + +__u64 pvfs2_convert_time_field(void *time_ptr); + +int pvfs2_normalize_to_errno(__s32 error_code); + +extern struct mutex devreq_mutex; +extern struct mutex request_mutex; +extern int debug; +extern int op_timeout_secs; +extern int slot_timeout_secs; +extern struct list_head pvfs2_superblocks; +extern spinlock_t pvfs2_superblocks_lock; +extern struct list_head pvfs2_request_list; +extern spinlock_t pvfs2_request_list_lock; +extern wait_queue_head_t pvfs2_request_list_waitq; +extern struct list_head *htable_ops_in_progress; +extern spinlock_t htable_ops_in_progress_lock; +extern int hash_table_size; + +extern const struct address_space_operations pvfs2_address_operations; +extern struct backing_dev_info pvfs2_backing_dev_info; +extern struct inode_operations pvfs2_file_inode_operations; +extern const struct file_operations pvfs2_file_operations; +extern struct inode_operations pvfs2_symlink_inode_operations; +extern struct inode_operations pvfs2_dir_inode_operations; +extern const struct file_operations pvfs2_dir_operations; +extern const struct dentry_operations pvfs2_dentry_operations; +extern const struct file_operations pvfs2_devreq_file_operations; + +extern wait_queue_head_t pvfs2_bufmap_init_waitq; + +/* + * misc convenience macros + */ +#define add_op_to_request_list(op) \ +do { \ + spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&op->lock); \ + set_op_state_waiting(op); \ + list_add_tail(&op->list, &pvfs2_request_list); \ + spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&pvfs2_request_list_waitq); \ +} while (0) + +#define add_priority_op_to_request_list(op) \ + do { \ + spin_lock(&pvfs2_request_list_lock); \ + spin_lock(&op->lock); \ + set_op_state_waiting(op); \ + \ + list_add(&op->list, &pvfs2_request_list); \ + spin_unlock(&pvfs2_request_list_lock); \ + spin_unlock(&op->lock); \ + wake_up_interruptible(&pvfs2_request_list_waitq); \ +} while (0) + +#define remove_op_from_request_list(op) \ + do { \ + struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ + struct pvfs2_kernel_op_s *tmp_op = NULL; \ + \ + spin_lock(&pvfs2_request_list_lock); \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_request_list) { \ + tmp_op = list_entry(tmp, \ + struct pvfs2_kernel_op_s, \ + list); \ + if (tmp_op && (tmp_op == op)) { \ + list_del(&tmp_op->list); \ + break; \ + } \ + } \ + spin_unlock(&pvfs2_request_list_lock); \ + } while (0) + +#define PVFS2_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */ +#define PVFS2_OP_PRIORITY 2 /* service_operation() is high priority */ +#define PVFS2_OP_CANCELLATION 4 /* this is a cancellation */ +#define PVFS2_OP_NO_SEMAPHORE 8 /* don't acquire semaphore */ +#define PVFS2_OP_ASYNC 16 /* Queue it, but don't wait */ + +int service_operation(struct pvfs2_kernel_op_s *op, + const char *op_name, + int flags); + +/* + * handles two possible error cases, depending on context. + * + * by design, our vfs i/o errors need to be handled in one of two ways, + * depending on where the error occured. + * + * if the error happens in the waitqueue code because we either timed + * out or a signal was raised while waiting, we need to cancel the + * userspace i/o operation and free the op manually. this is done to + * avoid having the device start writing application data to our shared + * bufmap pages without us expecting it. + * + * FIXME: POSSIBLE OPTIMIZATION: + * However, if we timed out or if we got a signal AND our upcall was never + * picked off the queue (i.e. we were in OP_VFS_STATE_WAITING), then we don't + * need to send a cancellation upcall. The way we can handle this is + * set error_exit to 2 in such cases and 1 whenever cancellation has to be + * sent and have handle_error + * take care of this situation as well.. + * + * if a pvfs2 sysint level error occured and i/o has been completed, + * there is no need to cancel the operation, as the user has finished + * using the bufmap page and so there is no danger in this case. in + * this case, we wake up the device normally so that it may free the + * op, as normal. + * + * note the only reason this is a macro is because both read and write + * cases need the exact same handling code. + */ +#define handle_io_error() \ +do { \ + if (!op_state_serviced(new_op)) { \ + pvfs2_cancel_op_in_progress(new_op->tag); \ + op_release(new_op); \ + } else { \ + wake_up_daemon_for_return(new_op); \ + } \ + new_op = NULL; \ + pvfs_bufmap_put(bufmap, buffer_index); \ + buffer_index = -1; \ +} while (0) + +#define get_interruptible_flag(inode) \ + ((PVFS2_SB(inode->i_sb)->flags & PVFS2_OPT_INTR) ? \ + PVFS2_OP_INTERRUPTIBLE : 0) + +#define add_pvfs2_sb(sb) \ +do { \ + gossip_debug(GOSSIP_SUPER_DEBUG, \ + "Adding SB %p to pvfs2 superblocks\n", \ + PVFS2_SB(sb)); \ + spin_lock(&pvfs2_superblocks_lock); \ + list_add_tail(&PVFS2_SB(sb)->list, &pvfs2_superblocks); \ + spin_unlock(&pvfs2_superblocks_lock); \ +} while (0) + +#define remove_pvfs2_sb(sb) \ +do { \ + struct list_head *tmp = NULL; \ + struct list_head *tmp_safe = NULL; \ + struct pvfs2_sb_info_s *pvfs2_sb = NULL; \ + \ + spin_lock(&pvfs2_superblocks_lock); \ + list_for_each_safe(tmp, tmp_safe, &pvfs2_superblocks) { \ + pvfs2_sb = list_entry(tmp, \ + struct pvfs2_sb_info_s, \ + list); \ + if (pvfs2_sb && (pvfs2_sb->sb == sb)) { \ + gossip_debug(GOSSIP_SUPER_DEBUG, \ + "Removing SB %p from pvfs2 superblocks\n", \ + pvfs2_sb); \ + list_del(&pvfs2_sb->list); \ + break; \ + } \ + } \ + spin_unlock(&pvfs2_superblocks_lock); \ +} while (0) + +#define pvfs2_lock_inode(inode) spin_lock(&inode->i_lock) +#define pvfs2_unlock_inode(inode) spin_unlock(&inode->i_lock) +#define pvfs2_current_signal_lock current->sighand->siglock +#define pvfs2_current_sigaction current->sighand->action + +#define fill_default_sys_attrs(sys_attr, type, mode) \ +do { \ + sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \ + sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \ + sys_attr.size = 0; \ + sys_attr.perms = PVFS_util_translate_mode(mode); \ + sys_attr.objtype = type; \ + sys_attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; \ +} while (0) + +#define pvfs2_inode_lock(__i) mutex_lock(&(__i)->i_mutex) + +#define pvfs2_inode_unlock(__i) mutex_unlock(&(__i)->i_mutex) + +static inline void pvfs2_i_size_write(struct inode *inode, loff_t i_size) +{ +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + pvfs2_inode_lock(inode); +#endif + i_size_write(inode, i_size); +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) + pvfs2_inode_unlock(inode); +#endif +} + +static inline unsigned int diff(struct timeval *end, struct timeval *begin) +{ + if (end->tv_usec < begin->tv_usec) { + end->tv_usec += 1000000; + end->tv_sec--; + } + end->tv_sec -= begin->tv_sec; + end->tv_usec -= begin->tv_usec; + return (end->tv_sec * 1000000) + end->tv_usec; +} + +#endif /* __PVFS2KERNEL_H */ diff --git a/fs/orangefs/pvfs2-sysfs.h b/fs/orangefs/pvfs2-sysfs.h new file mode 100644 index 000000000000..f0b76382db02 --- /dev/null +++ b/fs/orangefs/pvfs2-sysfs.h @@ -0,0 +1,2 @@ +extern int orangefs_sysfs_init(void); +extern void orangefs_sysfs_exit(void); diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h new file mode 100644 index 000000000000..1e07f626aac6 --- /dev/null +++ b/fs/orangefs/upcall.h @@ -0,0 +1,255 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#ifndef __UPCALL_H +#define __UPCALL_H + +/* + * Sanitized this header file to fix + * 32-64 bit interaction issues between + * client-core and device + */ +struct pvfs2_io_request_s { + __s32 async_vfs_io; + __s32 buf_index; + __s32 count; + __s32 __pad1; + __s64 offset; + struct pvfs2_object_kref refn; + enum PVFS_io_type io_type; + __s32 readahead_size; +}; + +struct pvfs2_iox_request_s { + __s32 buf_index; + __s32 count; + struct pvfs2_object_kref refn; + enum PVFS_io_type io_type; + __s32 __pad1; +}; + +struct pvfs2_lookup_request_s { + __s32 sym_follow; + __s32 __pad1; + struct pvfs2_object_kref parent_refn; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_create_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_symlink_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char entry_name[PVFS2_NAME_LEN]; + char target[PVFS2_NAME_LEN]; +}; + +struct pvfs2_getattr_request_s { + struct pvfs2_object_kref refn; + __u32 mask; + __u32 __pad1; +}; + +struct pvfs2_setattr_request_s { + struct pvfs2_object_kref refn; + struct PVFS_sys_attr_s attributes; +}; + +struct pvfs2_remove_request_s { + struct pvfs2_object_kref parent_refn; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_mkdir_request_s { + struct pvfs2_object_kref parent_refn; + struct PVFS_sys_attr_s attributes; + char d_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_readdir_request_s { + struct pvfs2_object_kref refn; + __u64 token; + __s32 max_dirent_count; + __s32 buf_index; +}; + +struct pvfs2_readdirplus_request_s { + struct pvfs2_object_kref refn; + __u64 token; + __s32 max_dirent_count; + __u32 mask; + __s32 buf_index; + __s32 __pad1; +}; + +struct pvfs2_rename_request_s { + struct pvfs2_object_kref old_parent_refn; + struct pvfs2_object_kref new_parent_refn; + char d_old_name[PVFS2_NAME_LEN]; + char d_new_name[PVFS2_NAME_LEN]; +}; + +struct pvfs2_statfs_request_s { + __s32 fs_id; + __s32 __pad1; +}; + +struct pvfs2_truncate_request_s { + struct pvfs2_object_kref refn; + __s64 size; +}; + +struct pvfs2_mmap_ra_cache_flush_request_s { + struct pvfs2_object_kref refn; +}; + +struct pvfs2_fs_mount_request_s { + char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +}; + +struct pvfs2_fs_umount_request_s { + __s32 id; + __s32 fs_id; + char pvfs2_config_server[PVFS_MAX_SERVER_ADDR_LEN]; +}; + +struct pvfs2_getxattr_request_s { + struct pvfs2_object_kref refn; + __s32 key_sz; + __s32 __pad1; + char key[PVFS_MAX_XATTR_NAMELEN]; +}; + +struct pvfs2_setxattr_request_s { + struct pvfs2_object_kref refn; + struct PVFS_keyval_pair keyval; + __s32 flags; + __s32 __pad1; +}; + +struct pvfs2_listxattr_request_s { + struct pvfs2_object_kref refn; + __s32 requested_count; + __s32 __pad1; + __u64 token; +}; + +struct pvfs2_removexattr_request_s { + struct pvfs2_object_kref refn; + __s32 key_sz; + __s32 __pad1; + char key[PVFS_MAX_XATTR_NAMELEN]; +}; + +struct pvfs2_op_cancel_s { + __u64 op_tag; +}; + +struct pvfs2_fsync_request_s { + struct pvfs2_object_kref refn; +}; + +enum pvfs2_param_request_type { + PVFS2_PARAM_REQUEST_SET = 1, + PVFS2_PARAM_REQUEST_GET = 2 +}; + +enum pvfs2_param_request_op { + PVFS2_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1, + PVFS2_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2, + PVFS2_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3, + PVFS2_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4, + PVFS2_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5, + PVFS2_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6, + PVFS2_PARAM_REQUEST_OP_PERF_RESET = 7, + PVFS2_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8, + PVFS2_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9, + PVFS2_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10, + PVFS2_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14, + PVFS2_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15, + PVFS2_PARAM_REQUEST_OP_CLIENT_DEBUG = 16, + PVFS2_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17, + PVFS2_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18, + PVFS2_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19, + PVFS2_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23, + PVFS2_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24, + PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25, +}; + +struct pvfs2_param_request_s { + enum pvfs2_param_request_type type; + enum pvfs2_param_request_op op; + __s64 value; + char s_value[PVFS2_MAX_DEBUG_STRING_LEN]; +}; + +enum pvfs2_perf_count_request_type { + PVFS2_PERF_COUNT_REQUEST_ACACHE = 1, + PVFS2_PERF_COUNT_REQUEST_NCACHE = 2, + PVFS2_PERF_COUNT_REQUEST_CAPCACHE = 3, +}; + +struct pvfs2_perf_count_request_s { + enum pvfs2_perf_count_request_type type; + __s32 __pad1; +}; + +struct pvfs2_fs_key_request_s { + __s32 fsid; + __s32 __pad1; +}; + +struct pvfs2_upcall_s { + __s32 type; + __u32 uid; + __u32 gid; + int pid; + int tgid; + /* currently trailer is used only by readx/writex (iox) */ + __s64 trailer_size; + char *trailer_buf; + + union { + struct pvfs2_io_request_s io; + struct pvfs2_iox_request_s iox; + struct pvfs2_lookup_request_s lookup; + struct pvfs2_create_request_s create; + struct pvfs2_symlink_request_s sym; + struct pvfs2_getattr_request_s getattr; + struct pvfs2_setattr_request_s setattr; + struct pvfs2_remove_request_s remove; + struct pvfs2_mkdir_request_s mkdir; + struct pvfs2_readdir_request_s readdir; + struct pvfs2_readdirplus_request_s readdirplus; + struct pvfs2_rename_request_s rename; + struct pvfs2_statfs_request_s statfs; + struct pvfs2_truncate_request_s truncate; + struct pvfs2_mmap_ra_cache_flush_request_s ra_cache_flush; + struct pvfs2_fs_mount_request_s fs_mount; + struct pvfs2_fs_umount_request_s fs_umount; + struct pvfs2_getxattr_request_s getxattr; + struct pvfs2_setxattr_request_s setxattr; + struct pvfs2_listxattr_request_s listxattr; + struct pvfs2_removexattr_request_s removexattr; + struct pvfs2_op_cancel_s cancel; + struct pvfs2_fsync_request_s fsync; + struct pvfs2_param_request_s param; + struct pvfs2_perf_count_request_s perf_count; + struct pvfs2_fs_key_request_s fs_key; + } req; +}; + +#endif /* __UPCALL_H */