[PATCH] per-task-delay-accounting: setup
authorShailabh Nagar <nagar@watson.ibm.com>
Fri, 14 Jul 2006 07:24:36 +0000 (00:24 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Sat, 15 Jul 2006 04:53:56 +0000 (21:53 -0700)
Initialization code related to collection of per-task "delay" statistics which
measure how long it had to wait for cpu, sync block io, swapping etc.  The
collection of statistics and the interface are in other patches.  This patch
sets up the data structures and allows the statistics collection to be
disabled through a kernel boot parameter.

Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Documentation/kernel-parameters.txt
include/linux/delayacct.h [new file with mode: 0644]
include/linux/sched.h
include/linux/time.h
init/Kconfig
init/main.c
kernel/Makefile
kernel/delayacct.c [new file with mode: 0644]
kernel/exit.c
kernel/fork.c

index 149f62ba14a5023a59a1cbaa53550b053950edb2..e11f7728ec6f0691b940daa3d2ec5c7b182417fc 100644 (file)
@@ -448,6 +448,8 @@ running once the system is up.
                        Format: <area>[,<node>]
                        See also Documentation/networking/decnet.txt.
 
+       delayacct       [KNL] Enable per-task delay accounting
+
        dhash_entries=  [KNL]
                        Set number of hash buckets for dentry cache.
 
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
new file mode 100644 (file)
index 0000000..9572cfa
--- /dev/null
@@ -0,0 +1,69 @@
+/* delayacct.h - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_DELAYACCT_H
+#define _LINUX_DELAYACCT_H
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
+
+extern int delayacct_on;       /* Delay accounting turned on/off */
+extern kmem_cache_t *delayacct_cache;
+extern void delayacct_init(void);
+extern void __delayacct_tsk_init(struct task_struct *);
+extern void __delayacct_tsk_exit(struct task_struct *);
+
+static inline void delayacct_set_flag(int flag)
+{
+       if (current->delays)
+               current->delays->flags |= flag;
+}
+
+static inline void delayacct_clear_flag(int flag)
+{
+       if (current->delays)
+               current->delays->flags &= ~flag;
+}
+
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{
+       /* reinitialize in case parent's non-null pointer was dup'ed*/
+       tsk->delays = NULL;
+       if (unlikely(delayacct_on))
+               __delayacct_tsk_init(tsk);
+}
+
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{
+       if (tsk->delays)
+               __delayacct_tsk_exit(tsk);
+}
+
+#else
+static inline void delayacct_set_flag(int flag)
+{}
+static inline void delayacct_clear_flag(int flag)
+{}
+static inline void delayacct_init(void)
+{}
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{}
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{}
+#endif /* CONFIG_TASK_DELAY_ACCT */
+
+#endif
index 1c876e27ff936dfe375694bc3cb6b812f1ef33a2..7a54e62763c53375b0e1ccc86a5066c87c95cf13 100644 (file)
@@ -552,6 +552,23 @@ struct sched_info {
 extern struct file_operations proc_schedstat_operations;
 #endif
 
+#ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+       spinlock_t      lock;
+       unsigned int    flags;  /* Private per-task flags */
+
+       /* For each stat XXX, add following, aligned appropriately
+        *
+        * struct timespec XXX_start, XXX_end;
+        * u64 XXX_delay;
+        * u32 XXX_count;
+        *
+        * Atomicity of updates to XXX_delay, XXX_count protected by
+        * single lock above (split into XXX_lock if contention is an issue).
+        */
+};
+#endif
+
 enum idle_type
 {
        SCHED_IDLE,
@@ -945,6 +962,9 @@ struct task_struct {
         * cache last used pipe for splice
         */
        struct pipe_inode_info *splice_pipe;
+#ifdef CONFIG_TASK_DELAY_ACCT
+       struct task_delay_info *delays;
+#endif
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
index c05f8bb9a323d575853350cc0b8b5e9baf1d939d..a5b739967b74f46f745526952b1c947c236cf75a 100644 (file)
@@ -70,6 +70,18 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
 
 extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
 
+/*
+ * sub = lhs - rhs, in normalized form
+ */
+static inline struct timespec timespec_sub(struct timespec lhs,
+                                               struct timespec rhs)
+{
+       struct timespec ts_delta;
+       set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec,
+                               lhs.tv_nsec - rhs.tv_nsec);
+       return ts_delta;
+}
+
 /*
  * Returns true if the timespec is norm, false if denorm:
  */
index a5b073a103e7cf9cd5a3438765a97e6468c23c6f..90498a3e53da674315ee19a1a5e01cf289a17f83 100644 (file)
@@ -158,6 +158,16 @@ config BSD_PROCESS_ACCT_V3
          for processing it. A preliminary version of these tools is available
          at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
 
+config TASK_DELAY_ACCT
+       bool "Enable per-task delay accounting (EXPERIMENTAL)"
+       help
+         Collect information on time spent by a task waiting for system
+         resources like cpu, synchronous block I/O completion and swapping
+         in pages. Such statistics can help in setting a task's priorities
+         relative to other tasks for cpu, io, rss limits etc.
+
+         Say N if unsure.
+
 config SYSCTL
        bool "Sysctl support" if EMBEDDED
        default y
index 628b8e9e841ae9ed86fedd98cdd850a5af2815f6..9e8e8c152142f3c1273e7cc7f2963971f717ab2c 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/efi.h>
+#include <linux/delayacct.h>
 #include <linux/unistd.h>
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
@@ -574,6 +575,7 @@ asmlinkage void __init start_kernel(void)
        proc_root_init();
 #endif
        cpuset_init();
+       delayacct_init();
 
        check_bugs();
 
index 47dbcd570cd8d67599cea355a33fa704614dd297..87bb34cc893821264e7a71d3047a004463358cc2 100644 (file)
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
new file mode 100644 (file)
index 0000000..fbf7f22
--- /dev/null
@@ -0,0 +1,87 @@
+/* delayacct.c - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sysctl.h>
+#include <linux/delayacct.h>
+
+int delayacct_on __read_mostly;        /* Delay accounting turned on/off */
+kmem_cache_t *delayacct_cache;
+
+static int __init delayacct_setup_enable(char *str)
+{
+       delayacct_on = 1;
+       return 1;
+}
+__setup("delayacct", delayacct_setup_enable);
+
+void delayacct_init(void)
+{
+       delayacct_cache = kmem_cache_create("delayacct_cache",
+                                       sizeof(struct task_delay_info),
+                                       0,
+                                       SLAB_PANIC,
+                                       NULL, NULL);
+       delayacct_tsk_init(&init_task);
+}
+
+void __delayacct_tsk_init(struct task_struct *tsk)
+{
+       tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
+       if (tsk->delays)
+               spin_lock_init(&tsk->delays->lock);
+}
+
+void __delayacct_tsk_exit(struct task_struct *tsk)
+{
+       kmem_cache_free(delayacct_cache, tsk->delays);
+       tsk->delays = NULL;
+}
+
+/*
+ * Start accounting for a delay statistic using
+ * its starting timestamp (@start)
+ */
+
+static inline void delayacct_start(struct timespec *start)
+{
+       do_posix_clock_monotonic_gettime(start);
+}
+
+/*
+ * Finish delay accounting for a statistic using
+ * its timestamps (@start, @end), accumalator (@total) and @count
+ */
+
+static void delayacct_end(struct timespec *start, struct timespec *end,
+                               u64 *total, u32 *count)
+{
+       struct timespec ts;
+       s64 ns;
+
+       do_posix_clock_monotonic_gettime(end);
+       ts = timespec_sub(*end, *start);
+       ns = timespec_to_ns(&ts);
+       if (ns < 0)
+               return;
+
+       spin_lock(&current->delays->lock);
+       *total += ns;
+       (*count)++;
+       spin_unlock(&current->delays->lock);
+}
+
index 6664c084783d49c0a80faf22f5b84fad6015fe0b..3c2cf91defa764842939405aac8e1cfae29c191b 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/mempolicy.h>
+#include <linux/delayacct.h>
 #include <linux/cpuset.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
@@ -900,6 +901,7 @@ fastcall NORET_TYPE void do_exit(long code)
 #endif
        if (unlikely(tsk->audit_context))
                audit_free(tsk);
+       delayacct_tsk_exit(tsk);
        exit_mm(tsk);
 
        if (group_dead)
index 926e5a68ea9e7b2d901f189f33a8da204f8cfec2..451cfd35bf22cdccfdd3f93871ee3712bc3d688a 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/rmap.h>
 #include <linux/acct.h>
 #include <linux/cn_proc.h>
+#include <linux/delayacct.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1000,6 +1001,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_cleanup_put_domain;
 
        p->did_exec = 0;
+       delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
        copy_flags(clone_flags, p);
        p->pid = pid;
        retval = -EFAULT;