preadv/pwritev: Add preadv and pwritev system calls.
authorGerd Hoffmann <kraxel@redhat.com>
Thu, 2 Apr 2009 23:59:23 +0000 (16:59 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Apr 2009 02:05:08 +0000 (19:05 -0700)
This patch adds preadv and pwritev system calls.  These syscalls are a
pretty straightforward combination of pread and readv (same for write).
They are quite useful for doing vectored I/O in threaded applications.
Using lseek+readv instead opens race windows you'll have to plug with
locking.

Other systems have such system calls too, for example NetBSD, check
here: http://www.daemon-systems.org/man/preadv.2.html

The application-visible interface provided by glibc should look like
this to be compatible to the existing implementations in the *BSD family:

  ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset);
  ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset);

This prototype has one problem though: On 32bit archs is the (64bit)
offset argument unaligned, which the syscall ABI of several archs doesn't
allow to do.  At least s390 needs a wrapper in glibc to handle this.  As
we'll need a wrappers in glibc anyway I've decided to push problem to
glibc entriely and use a syscall prototype which works without
arch-specific wrappers inside the kernel: The offset argument is
explicitly splitted into two 32bit values.

The patch sports the actual system call implementation and the windup in
the x86 system call tables.  Other archs follow as separate patches.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/unistd_32.h
arch/x86/include/asm/unistd_64.h
arch/x86/kernel/syscall_table_32.S
fs/compat.c
fs/read_write.c
include/linux/compat.h
include/linux/syscalls.h

index db0c803170ab9e925dbfe2a22c17d0be23824d6e..a505202086e8741a916e696ccfb664d3f3e0f8ef 100644 (file)
@@ -828,4 +828,6 @@ ia32_sys_call_table:
        .quad sys_dup3                  /* 330 */
        .quad sys_pipe2
        .quad sys_inotify_init1
+       .quad compat_sys_preadv
+       .quad compat_sys_pwritev
 ia32_syscall_end:
index f2bba78430a4d2628ca83203e193dcb6ee78a247..6e72d74cf8dc74b7720f5cb79ba355a926e7fa6e 100644 (file)
 #define __NR_dup3              330
 #define __NR_pipe2             331
 #define __NR_inotify_init1     332
+#define __NR_preadv            333
+#define __NR_pwritev           334
 
 #ifdef __KERNEL__
 
index d2e415e6666f63d314270ef57a26dae73e3fac01..f81829462325f6328a6e6d9c3667da02e9f616d0 100644 (file)
@@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
 __SYSCALL(__NR_pipe2, sys_pipe2)
 #define __NR_inotify_init1                     294
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv                            295
+__SYSCALL(__NR_preadv, sys_preadv)
+#define __NR_pwritev                           296
+__SYSCALL(__NR_pwritev, sys_pwritev)
 
 
 #ifndef __NO_STUBS
index 3bdb64829b82718f42bafed884f55d1dbe9e65a2..ff5c8736b491b8ff2c4835c5a19a51ad63633de1 100644 (file)
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
        .long sys_dup3                  /* 330 */
        .long sys_pipe2
        .long sys_inotify_init1
+       .long sys_preadv
+       .long sys_pwritev
index e04b4660db846527f81f995de39a5e52f16b2e7f..7c1615183d1ec6b6e7f94562c03c83501b770195 100644 (file)
@@ -1232,6 +1232,24 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
        return ret;
 }
 
+asmlinkage ssize_t
+compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
+                 unsigned long vlen, u32 pos_high, u32 pos_low)
+{
+       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+       struct file *file;
+       ssize_t ret;
+
+       if (pos < 0)
+               return -EINVAL;
+       file = fget(fd);
+       if (!file)
+               return -EBADF;
+       ret = compat_readv(file, vec, vlen, &pos);
+       fput(file);
+       return ret;
+}
+
 static size_t compat_writev(struct file *file,
                            const struct compat_iovec __user *vec,
                            unsigned long vlen, loff_t *pos)
@@ -1269,6 +1287,24 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
        return ret;
 }
 
+asmlinkage ssize_t
+compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
+                  unsigned long vlen, u32 pos_high, u32 pos_low)
+{
+       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+       struct file *file;
+       ssize_t ret;
+
+       if (pos < 0)
+               return -EINVAL;
+       file = fget(fd);
+       if (!file)
+               return -EBADF;
+       ret = compat_writev(file, vec, vlen, &pos);
+       fput(file);
+       return ret;
+}
+
 asmlinkage long
 compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
                    unsigned int nr_segs, unsigned int flags)
index 400fe81c973e90cccdb94c814325bc3ce4e39d19..6d5d8ff238aa50cdce17c4126f775d9af9b00b8f 100644 (file)
@@ -731,6 +731,56 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
        return ret;
 }
 
+SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
+               unsigned long, vlen, u32, pos_high, u32, pos_low)
+{
+       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+       struct file *file;
+       ssize_t ret = -EBADF;
+       int fput_needed;
+
+       if (pos < 0)
+               return -EINVAL;
+
+       file = fget_light(fd, &fput_needed);
+       if (file) {
+               ret = -ESPIPE;
+               if (file->f_mode & FMODE_PREAD)
+                       ret = vfs_readv(file, vec, vlen, &pos);
+               fput_light(file, fput_needed);
+       }
+
+       if (ret > 0)
+               add_rchar(current, ret);
+       inc_syscr(current);
+       return ret;
+}
+
+SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
+               unsigned long, vlen, u32, pos_high, u32, pos_low)
+{
+       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+       struct file *file;
+       ssize_t ret = -EBADF;
+       int fput_needed;
+
+       if (pos < 0)
+               return -EINVAL;
+
+       file = fget_light(fd, &fput_needed);
+       if (file) {
+               ret = -ESPIPE;
+               if (file->f_mode & FMODE_PWRITE)
+                       ret = vfs_writev(file, vec, vlen, &pos);
+               fput_light(file, fput_needed);
+       }
+
+       if (ret > 0)
+               add_wchar(current, ret);
+       inc_syscw(current);
+       return ret;
+}
+
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                           size_t count, loff_t max)
 {
index b880864672de75e09cfd522f1f6df1fded3ba56b..9723edd6455cb8d612923f8a5706fb691e2a1525 100644 (file)
@@ -191,6 +191,12 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd,
                const struct compat_iovec __user *vec, unsigned long vlen);
 asmlinkage ssize_t compat_sys_writev(unsigned long fd,
                const struct compat_iovec __user *vec, unsigned long vlen);
+asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
+               const struct compat_iovec __user *vec,
+               unsigned long vlen, u32 pos_high, u32 pos_low);
+asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
+               const struct compat_iovec __user *vec,
+               unsigned long vlen, u32 pos_high, u32 pos_low);
 
 int compat_do_execve(char * filename, compat_uptr_t __user *argv,
                compat_uptr_t __user *envp, struct pt_regs * regs);
index f9f900cfd066f1ad19ac979dba0c48f2384b95e2..b299a82a05e7abd369f6b30d9b7182238112b685 100644 (file)
@@ -461,6 +461,10 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf,
                            size_t count, loff_t pos);
 asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
                             size_t count, loff_t pos);
+asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
+                          unsigned long vlen, u32 pos_high, u32 pos_low);
+asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
+                           unsigned long vlen, u32 pos_high, u32 pos_low);
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
 asmlinkage long sys_mkdir(const char __user *pathname, int mode);
 asmlinkage long sys_chdir(const char __user *filename);