uml: start fixing os_read_file and os_write_file
authorJeff Dike <jdike@addtoit.com>
Sun, 6 May 2007 21:51:32 +0000 (14:51 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Mon, 7 May 2007 19:13:03 +0000 (12:13 -0700)
This patch starts the removal of a very old, very broken piece of code.  This
stems from the problem of passing a userspace buffer into read() or write() on
the host.  If that buffer had not yet been faulted in, read and write will
return -EFAULT.

To avoid this problem, the solution was to fault the buffer in before the
system call by touching the pages that hold the buffer by doing a copy-user of
a byte to each page.  This is obviously bogus, but it does usually work, in tt
mode, since the kernel and process are in the same address space and userspace
addresses can be accessed directly in the kernel.

In skas mode, where the kernel and process are in separate address spaces, it
is completely bogus because the userspace address, which is invalid in the
kernel, is passed into the system call instead of the corresponding physical
address, which would be valid.  Here, it appears that this code, on every host
read() or write(), tries to fault in a random process page.  This doesn't seem
to cause any correctness problems, but there is a performance impact.  This
patch, and the ones following, result in a 10-15% performance gain on a kernel
build.

This code can't be immediately tossed out because when it is, you can't log
in.  Apparently, there is some code in the console driver which depends on
this somehow.

However, we can start removing it by switching the code which does I/O using
kernel addresses to using plain read() and write().  This patch introduces
os_read_file_k and os_write_file_k for use with kernel buffers and converts
all call locations which use obvious kernel buffers to use them.  These
include I/O using buffers which are local variables which are on the stack or
kmalloc-ed.  Later patches will handle the less obvious cases, followed by a
mass conversion back to the original interface.

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
19 files changed:
arch/um/drivers/chan_user.c
arch/um/drivers/daemon_user.c
arch/um/drivers/harddog_user.c
arch/um/drivers/hostaudio_kern.c
arch/um/drivers/net_user.c
arch/um/drivers/port_kern.c
arch/um/drivers/random.c
arch/um/drivers/ubd_kern.c
arch/um/include/os.h
arch/um/kernel/ksyms.c
arch/um/kernel/physmem.c
arch/um/kernel/sigio.c
arch/um/kernel/smp.c
arch/um/kernel/tt/process_kern.c
arch/um/kernel/tt/ptproxy/proxy.c
arch/um/kernel/tt/tracer.c
arch/um/os-Linux/file.c
arch/um/sys-i386/bugs.c
arch/um/sys-i386/ldt.c

index ee53cf882f42d79a8c1f3753a7c61a48bb0101d9..d226f103462e37a7a9fa2af677d86fb541a0a857 100644 (file)
@@ -85,7 +85,7 @@ static int winch_thread(void *arg)
 
        pty_fd = data->pty_fd;
        pipe_fd = data->pipe_fd;
-       count = os_write_file(pipe_fd, &c, sizeof(c));
+       count = os_write_file_k(pipe_fd, &c, sizeof(c));
        if(count != sizeof(c))
                printk("winch_thread : failed to write synchronization "
                       "byte, err = %d\n", -count);
@@ -120,7 +120,7 @@ static int winch_thread(void *arg)
         * host - since they are not different kernel threads, we cannot use
         * kernel semaphores. We don't use SysV semaphores because they are
         * persistent. */
-       count = os_read_file(pipe_fd, &c, sizeof(c));
+       count = os_read_file_k(pipe_fd, &c, sizeof(c));
        if(count != sizeof(c))
                printk("winch_thread : failed to read synchronization byte, "
                       "err = %d\n", -count);
@@ -130,7 +130,7 @@ static int winch_thread(void *arg)
                 * are blocked.*/
                sigsuspend(&sigs);
 
-               count = os_write_file(pipe_fd, &c, sizeof(c));
+               count = os_write_file_k(pipe_fd, &c, sizeof(c));
                if(count != sizeof(c))
                        printk("winch_thread : write failed, err = %d\n",
                               -count);
@@ -162,7 +162,7 @@ static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out)
        }
 
        *fd_out = fds[0];
-       n = os_read_file(fds[0], &c, sizeof(c));
+       n = os_read_file_k(fds[0], &c, sizeof(c));
        if(n != sizeof(c)){
                printk("winch_tramp : failed to read synchronization byte\n");
                printk("read failed, err = %d\n", -n);
@@ -195,7 +195,7 @@ void register_winch(int fd, struct tty_struct *tty)
                if(thread > 0){
                        register_winch_irq(thread_fd, fd, thread, tty);
 
-                       count = os_write_file(thread_fd, &c, sizeof(c));
+                       count = os_write_file_k(thread_fd, &c, sizeof(c));
                        if(count != sizeof(c))
                                printk("register_winch : failed to write "
                                       "synchronization byte, err = %d\n",
index b869e3899683e548d6f98e17ec9959e801ef8a6b..e1fd26c1b728ffa096c72fb174311e389d1336db 100644 (file)
@@ -94,7 +94,7 @@ static int connect_to_switch(struct daemon_data *pri)
        req.version = SWITCH_VERSION;
        req.type = REQ_NEW_CONTROL;
        req.sock = *local_addr;
-       n = os_write_file(pri->control, &req, sizeof(req));
+       n = os_write_file_k(pri->control, &req, sizeof(req));
        if(n != sizeof(req)){
                printk("daemon_open : control setup request failed, err = %d\n",
                       -n);
@@ -102,7 +102,7 @@ static int connect_to_switch(struct daemon_data *pri)
                goto out_free;
        }
 
-       n = os_read_file(pri->control, sun, sizeof(*sun));
+       n = os_read_file_k(pri->control, sun, sizeof(*sun));
        if(n != sizeof(*sun)){
                printk("daemon_open : read of data socket failed, err = %d\n",
                       -n);
index 5eeecf8917c3158b31a6ef960a14a7d9312f4d1b..0fbb1615171ee8af032b2cd1a9b35c04e87f901c 100644 (file)
@@ -79,7 +79,7 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
                goto out_close_out;
        }
 
-       n = os_read_file(in_fds[0], &c, sizeof(c));
+       n = os_read_file_k(in_fds[0], &c, sizeof(c));
        if(n == 0){
                printk("harddog_open - EOF on watchdog pipe\n");
                helper_wait(pid);
@@ -118,7 +118,7 @@ int ping_watchdog(int fd)
        int n;
        char c = '\n';
 
-       n = os_write_file(fd, &c, sizeof(c));
+       n = os_write_file_k(fd, &c, sizeof(c));
        if(n != sizeof(c)){
                printk("ping_watchdog - write failed, err = %d\n", -n);
                if(n < 0)
index 10e08a8c17c3149cc346529a1c61bb6f6b241cff..bd6688ea96de28b36ab1d842556e2c87e8e24d1e 100644 (file)
@@ -84,7 +84,7 @@ static ssize_t hostaudio_read(struct file *file, char __user *buffer,
        if(kbuf == NULL)
                return(-ENOMEM);
 
-       err = os_read_file(state->fd, kbuf, count);
+       err = os_read_file_k(state->fd, kbuf, count);
        if(err < 0)
                goto out;
 
@@ -115,7 +115,7 @@ static ssize_t hostaudio_write(struct file *file, const char __user *buffer,
        if(copy_from_user(kbuf, buffer, count))
                goto out;
 
-       err = os_write_file(state->fd, kbuf, count);
+       err = os_write_file_k(state->fd, kbuf, count);
        if(err < 0)
                goto out;
        *ppos += err;
index 3503cff867c34d1166466c5da5acc6a14de3f46d..2dc57a37e4b713f04e9b25e28ff156f7aa23cb2e 100644 (file)
@@ -63,7 +63,7 @@ void read_output(int fd, char *output, int len)
        }
                
        *output = '\0';
-       ret = os_read_file(fd, &remain, sizeof(remain));
+       ret = os_read_file_k(fd, &remain, sizeof(remain));
 
        if (ret != sizeof(remain)) {
                expected = sizeof(remain);
index 1c8efd95c421946ab5e7e56db518395c1847378b..75bb40126c2eb05b41ec9345252bcf2a9213947d 100644 (file)
@@ -113,7 +113,7 @@ static int port_accept(struct port_list *port)
        }
 
        if(atomic_read(&port->wait_count) == 0){
-               os_write_file(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
+               os_write_file_k(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
                printk("No one waiting for port\n");
        }
        list_add(&conn->list, &port->pending);
index e942e836f9954335651fc3c2b6587c2bd7d7e888..94838f4c16455dde04dea72b2ff2d59e8bc9f66f 100644 (file)
@@ -44,7 +44,7 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
         int n, ret = 0, have_data;
 
         while(size){
-                n = os_read_file(random_fd, &data, sizeof(data));
+                n = os_read_file_k(random_fd, &data, sizeof(data));
                 if(n > 0){
                         have_data = n;
                         while (have_data && size) {
index 83189e188c3f21f6f3eabc011f6dfb91cd38b74e..6d163c9e28857773bc0c0fc05c36e262a04fbb36 100644 (file)
@@ -504,7 +504,7 @@ static void ubd_handler(void)
        struct ubd *dev;
        int n;
 
-       n = os_read_file(thread_fd, &req, sizeof(req));
+       n = os_read_file_k(thread_fd, &req, sizeof(req));
        if(n != sizeof(req)){
                printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
                       "err = %d\n", os_getpid(), -n);
@@ -1092,8 +1092,7 @@ static void do_ubd_request(request_queue_t *q)
                err = prepare_request(req, &io_req);
                if(!err){
                        dev->active = 1;
-                       n = os_write_file(thread_fd, (char *) &io_req,
-                                        sizeof(io_req));
+                       n = os_write_file_k(thread_fd, &io_req, sizeof(io_req));
                        if(n != sizeof(io_req))
                                printk("write to io thread failed, "
                                       "errno = %d\n", -n);
@@ -1336,8 +1335,8 @@ static int update_bitmap(struct io_thread_req *req)
                return(1);
        }
 
-       n = os_write_file(req->fds[1], &req->bitmap_words,
-                         sizeof(req->bitmap_words));
+       n = os_write_file_k(req->fds[1], &req->bitmap_words,
+                           sizeof(req->bitmap_words));
        if(n != sizeof(req->bitmap_words)){
                printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
                       req->fds[1]);
@@ -1381,7 +1380,7 @@ void do_io(struct io_thread_req *req)
                        do {
                                buf = &buf[n];
                                len -= n;
-                               n = os_read_file(req->fds[bit], buf, len);
+                               n = os_read_file_k(req->fds[bit], buf, len);
                                if (n < 0) {
                                        printk("do_io - read failed, err = %d "
                                               "fd = %d\n", -n, req->fds[bit]);
@@ -1391,7 +1390,7 @@ void do_io(struct io_thread_req *req)
                        } while((n < len) && (n != 0));
                        if (n < len) memset(&buf[n], 0, len - n);
                } else {
-                       n = os_write_file(req->fds[bit], buf, len);
+                       n = os_write_file_k(req->fds[bit], buf, len);
                        if(n != len){
                                printk("do_io - write failed err = %d "
                                       "fd = %d\n", -n, req->fds[bit]);
@@ -1421,7 +1420,7 @@ int io_thread(void *arg)
 
        ignore_sigwinch_sig();
        while(1){
-               n = os_read_file(kernel_fd, &req, sizeof(req));
+               n = os_read_file_k(kernel_fd, &req, sizeof(req));
                if(n != sizeof(req)){
                        if(n < 0)
                                printk("io_thread - read failed, fd = %d, "
@@ -1434,7 +1433,7 @@ int io_thread(void *arg)
                }
                io_count++;
                do_io(&req);
-               n = os_write_file(kernel_fd, &req, sizeof(req));
+               n = os_write_file_k(kernel_fd, &req, sizeof(req));
                if(n != sizeof(req))
                        printk("io_thread - write failed, fd = %d, err = %d\n",
                               kernel_fd, -n);
index 394adcded0bfa356c10236a3824fe36d50a9a2aa..b463170a5308474228174ab0bfc51c71f2d8bf00 100644 (file)
@@ -144,7 +144,9 @@ extern int os_mode_fd(int fd, int mode);
 extern int os_seek_file(int fd, __u64 offset);
 extern int os_open_file(char *file, struct openflags flags, int mode);
 extern int os_read_file(int fd, void *buf, int len);
+extern int os_read_file_k(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
+extern int os_write_file_k(int fd, const void *buf, int len);
 extern int os_file_size(char *file, unsigned long long *size_out);
 extern int os_file_modtime(char *file, unsigned long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
index 7b3e53fb80703f4ced9b1dc5ba0004faa288e44c..7c158448b9fdd4898f35fdb803e8196886a14284 100644 (file)
@@ -62,7 +62,9 @@ EXPORT_SYMBOL(os_get_exec_close);
 EXPORT_SYMBOL(os_set_exec_close);
 EXPORT_SYMBOL(os_getpid);
 EXPORT_SYMBOL(os_open_file);
+EXPORT_SYMBOL(os_read_file_k);
 EXPORT_SYMBOL(os_read_file);
+EXPORT_SYMBOL(os_write_file_k);
 EXPORT_SYMBOL(os_write_file);
 EXPORT_SYMBOL(os_seek_file);
 EXPORT_SYMBOL(os_lock_file);
index df1ad3ba130cd308f7760481466bc552b3fca694..a9856209006b85c2e504080493bd5ed94d03afb3 100644 (file)
@@ -341,7 +341,7 @@ void setup_physmem(unsigned long start, unsigned long reserve_end,
         * from physmem_fd, so it needs to be written out there.
         */
        os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
-       os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
+       os_write_file_k(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
 
        bootmap_size = init_bootmem(pfn, pfn + delta);
        free_bootmem(__pa(reserve_end) + bootmap_size,
index 89f9866a1354b8073cf5f015bd4b80e276b6e9a4..f756e78085e4ca1ee152252d4c0ed55cbbbe2a70 100644 (file)
@@ -21,7 +21,7 @@ static irqreturn_t sigio_interrupt(int irq, void *data)
 {
        char c;
 
-       os_read_file(sigio_irq_fd, &c, sizeof(c));
+       os_read_file_k(sigio_irq_fd, &c, sizeof(c));
        reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
        return IRQ_HANDLED;
 }
index 62dd093cbcd764f587d0f00a43d356fabbda0200..47b690893c06bd0d6c3518bdb1d847902eae2c42 100644 (file)
@@ -47,7 +47,7 @@ struct task_struct *idle_threads[NR_CPUS];
 
 void smp_send_reschedule(int cpu)
 {
-       os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1);
+       os_write_file_k(cpu_data[cpu].ipi_pipe[1], "R", 1);
        num_reschedules_sent++;
 }
 
@@ -59,7 +59,7 @@ void smp_send_stop(void)
        for(i = 0; i < num_online_cpus(); i++){
                if(i == current_thread->cpu)
                        continue;
-               os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
+               os_write_file_k(cpu_data[i].ipi_pipe[1], "S", 1);
        }
        printk("done\n");
 }
@@ -108,8 +108,8 @@ static struct task_struct *idle_thread(int cpu)
                          { .pid =      new_task->thread.mode.tt.extern_pid,
                            .task =     new_task } );
        idle_threads[cpu] = new_task;
-       CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c,
-                         sizeof(c)),
+       CHOOSE_MODE(os_write_file_k(new_task->thread.mode.tt.switch_pipe[1], &c,
+                                   sizeof(c)),
                    ({ panic("skas mode doesn't support SMP"); }));
        return(new_task);
 }
@@ -179,7 +179,7 @@ void IPI_handler(int cpu)
        int fd;
 
        fd = cpu_data[cpu].ipi_pipe[0];
-       while (os_read_file(fd, &c, 1) == 1) {
+       while (os_read_file_k(fd, &c, 1) == 1) {
                switch (c) {
                case 'C':
                        smp_call_function_slave(cpu);
@@ -239,7 +239,7 @@ int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic,
        info = _info;
 
        for_each_online_cpu(i)
-               os_write_file(cpu_data[i].ipi_pipe[1], "C", 1);
+               os_write_file_k(cpu_data[i].ipi_pipe[1], "C", 1);
 
        while (atomic_read(&scf_started) != cpus)
                barrier();
index 8029f72afaa7ae48bf739c995a9a6ea19986fb22..c81bd2074930e71429369826530cd79687326a82 100644 (file)
@@ -57,14 +57,15 @@ void switch_to_tt(void *prev, void *next)
         * nor the value in "to" (since it was the task which stole us the CPU,
         * which we don't care about). */
 
-       err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
+       err = os_write_file_k(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
        if(err != sizeof(c))
                panic("write of switch_pipe failed, err = %d", -err);
 
        if(from->thread.mode.tt.switch_pipe[0] == -1)
                os_kill_process(os_getpid(), 0);
 
-       err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
+       err = os_read_file_k(from->thread.mode.tt.switch_pipe[0], &c,
+                            sizeof(c));
        if(err != sizeof(c))
                panic("read of switch_pipe failed, errno = %d", -err);
 
@@ -113,7 +114,7 @@ void suspend_new_thread(int fd)
        char c;
 
        os_stop_process(os_getpid());
-       err = os_read_file(fd, &c, sizeof(c));
+       err = os_read_file_k(fd, &c, sizeof(c));
        if(err != sizeof(c))
                panic("read failed in suspend_new_thread, err = %d", -err);
 }
index c88e7b5d8a769d5327a449ea4fb869fb786b1680..007beb6b7c00e36a112d549896372d74f3b638a6 100644 (file)
@@ -338,13 +338,14 @@ int start_debugger(char *prog, int startup, int stop, int *fd_out)
                               "err = %d\n", -fd);
                        exit(1);
                }
-               os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1);
+               os_write_file_k(fd, gdb_init_string,
+                               sizeof(gdb_init_string) - 1);
                if(startup){
                        if(stop){
-                               os_write_file(fd, "b start_kernel\n",
-                                     strlen("b start_kernel\n"));
+                               os_write_file_k(fd, "b start_kernel\n",
+                                               strlen("b start_kernel\n"));
                        }
-                       os_write_file(fd, "c\n", strlen("c\n"));
+                       os_write_file_k(fd, "c\n", strlen("c\n"));
                }
                if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
                        printk("start_debugger :  PTRACE_TRACEME failed, "
index c23588393f6e9843281c6458f6280a0f52ea15bf..264da6c5a5c380d6d862326d56d56a0f861e11d6 100644 (file)
@@ -44,7 +44,7 @@ static void tracer_winch_handler(int sig)
        int n;
        char c = 1;
 
-       n = os_write_file(tracer_winch[1], &c, sizeof(c));
+       n = os_write_file_k(tracer_winch[1], &c, sizeof(c));
        if(n != sizeof(c))
                printk("tracer_winch_handler - write failed, err = %d\n", -n);
 }
index 4a9510c67622bcdd24b7b5543082d3b1bbe08d9e..5e9b8dcf34d4748acf06416ea22e3b278a3835ff 100644 (file)
@@ -334,12 +334,30 @@ int os_read_file(int fd, void *buf, int len)
                       copy_from_user_proc);
 }
 
+int os_read_file_k(int fd, void *buf, int len)
+{
+       int n = read(fd, buf, len);
+
+       if(n < 0)
+               return -errno;
+       return n;
+}
+
 int os_write_file(int fd, const void *buf, int len)
 {
        return file_io(fd, (void *) buf, len,
                       (int (*)(int, void *, int)) write, copy_to_user_proc);
 }
 
+int os_write_file_k(int fd, const void *buf, int len)
+{
+       int n = write(fd, (void *) buf, len);
+
+       if(n < 0)
+               return -errno;
+       return n;
+}
+
 int os_file_size(char *file, unsigned long long *size_out)
 {
        struct uml_stat buf;
index 0393e44813e775e1adf6e4274789caabb3d09493..74fd062b201b8ebe8e4717f2a00a7573de93a993 100644 (file)
@@ -67,7 +67,7 @@ static int find_cpuinfo_line(int fd, char *key, char *scratch, int len)
                        return 1;
 
                do {
-                       n = os_read_file(fd, &c, sizeof(c));
+                       n = os_read_file_k(fd, &c, sizeof(c));
                        if(n != sizeof(c)){
                                printk("Failed to find newline in "
                                       "/proc/cpuinfo, err = %d\n", -n);
index a939a7ef02274eff96f5e3ca46d3e14772b80975..d031a13bd9694304189114c3d40770c2ab1cb4fa 100644 (file)
@@ -517,7 +517,7 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
                                              .u        =
                                              { .copy_segments =
                                                        from_mm->id.u.mm_fd } } );
-               i = os_write_file(new_mm->id.u.mm_fd, &copy, sizeof(copy));
+               i = os_write_file_k(new_mm->id.u.mm_fd, &copy, sizeof(copy));
                if(i != sizeof(copy))
                        printk("new_mm : /proc/mm copy_segments failed, "
                               "err = %d\n", -i);