dm stats: support precise timestamps
authorMikulas Patocka <mpatocka@redhat.com>
Tue, 9 Jun 2015 21:21:39 +0000 (17:21 -0400)
committerMike Snitzer <snitzer@redhat.com>
Wed, 17 Jun 2015 16:40:40 +0000 (12:40 -0400)
Make it possible to use precise timestamps with nanosecond granularity
in dm statistics.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Documentation/device-mapper/statistics.txt
drivers/md/dm-stats.c
drivers/md/dm-stats.h

index 2a1673adc2004beae034f0ee5d5ae3ef3bceccf8..ff6baeaa71f7f7e42b52fd6d8fad9f85a0268718 100644 (file)
@@ -13,9 +13,13 @@ the range specified.
 The I/O statistics counters for each step-sized area of a region are
 in the same format as /sys/block/*/stat or /proc/diskstats (see:
 Documentation/iostats.txt).  But two extra counters (12 and 13) are
-provided: total time spent reading and writing in milliseconds.         All
-these counters may be accessed by sending the @stats_print message to
-the appropriate DM device via dmsetup.
+provided: total time spent reading and writing.  All these counters may
+be accessed by sending the @stats_print message to the appropriate DM
+device via dmsetup.
+
+The reported times are in milliseconds and the granularity depends on
+the kernel ticks.  When the option precise_timestamps is used, the
+reported times are in nanoseconds.
 
 Each region has a corresponding unique identifier, which we call a
 region_id, that is assigned when the region is created.         The region_id
@@ -33,7 +37,9 @@ memory is used by reading
 Messages
 ========
 
-    @stats_create <range> <step> [<program_id> [<aux_data>]]
+    @stats_create <range> <step>
+               [<number_of_optional_arguments> <optional_arguments>...]
+               [<program_id> [<aux_data>]]
 
        Create a new region and return the region_id.
 
@@ -48,6 +54,17 @@ Messages
          "/<number_of_areas>" - the range is subdivided into the specified
                                 number of areas.
 
+       <number_of_optional_arguments>
+         The number of optional arguments
+
+       <optional_arguments>
+         The following optional arguments are supported
+         precise_timestamps - use precise timer with nanosecond resolution
+               instead of the "jiffies" variable.  When this argument is
+               used, the resulting times are in nanoseconds instead of
+               milliseconds.  Precise timestamps are a little bit slower
+               to obtain than jiffies-based timestamps.
+
        <program_id>
          An optional parameter.  A name that uniquely identifies
          the userspace owner of the range.  This groups ranges together
@@ -55,6 +72,9 @@ Messages
          created and ignore those created by others.
          The kernel returns this string back in the output of
          @stats_list message, but it doesn't use it for anything else.
+         If we omit the number of optional arguments, program id must not
+         be a number, otherwise it would be interpreted as the number of
+         optional arguments.
 
        <aux_data>
          An optional parameter.  A word that provides auxiliary data
index d1fd31a6dd1ab50112d80d48cd1328841b65ccdd..4bfd84ab1d4a04b95c2ec2ff95d1add5b30d61f7 100644 (file)
@@ -33,13 +33,14 @@ struct dm_stat_percpu {
 
 struct dm_stat_shared {
        atomic_t in_flight[2];
-       unsigned long stamp;
+       unsigned long long stamp;
        struct dm_stat_percpu tmp;
 };
 
 struct dm_stat {
        struct list_head list_entry;
        int id;
+       unsigned stat_flags;
        size_t n_entries;
        sector_t start;
        sector_t end;
@@ -53,6 +54,8 @@ struct dm_stat {
        struct dm_stat_shared stat_shared[0];
 };
 
+#define STAT_PRECISE_TIMESTAMPS                1
+
 struct dm_stats_last_position {
        sector_t last_sector;
        unsigned last_rw;
@@ -224,7 +227,8 @@ void dm_stats_cleanup(struct dm_stats *stats)
 }
 
 static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
-                          sector_t step, const char *program_id, const char *aux_data,
+                          sector_t step, unsigned stat_flags,
+                          const char *program_id, const char *aux_data,
                           void (*suspend_callback)(struct mapped_device *),
                           void (*resume_callback)(struct mapped_device *),
                           struct mapped_device *md)
@@ -265,6 +269,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
        if (!s)
                return -ENOMEM;
 
+       s->stat_flags = stat_flags;
        s->n_entries = n_entries;
        s->start = start;
        s->end = end;
@@ -414,18 +419,24 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
        return 1;
 }
 
-static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p)
+static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
+                         struct dm_stat_percpu *p)
 {
        /*
         * This is racy, but so is part_round_stats_single.
         */
-       unsigned long now = jiffies;
-       unsigned in_flight_read;
-       unsigned in_flight_write;
-       unsigned long difference = now - shared->stamp;
+       unsigned long long now, difference;
+       unsigned in_flight_read, in_flight_write;
+
+       if (likely(!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)))
+               now = jiffies;
+       else
+               now = ktime_to_ns(ktime_get());
 
+       difference = now - shared->stamp;
        if (!difference)
                return;
+
        in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
        in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
        if (in_flight_read)
@@ -440,8 +451,9 @@ static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *
 }
 
 static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
-                             unsigned long bi_rw, sector_t len, bool merged,
-                             bool end, unsigned long duration)
+                             unsigned long bi_rw, sector_t len,
+                             struct dm_stats_aux *stats_aux, bool end,
+                             unsigned long duration_jiffies)
 {
        unsigned long idx = bi_rw & REQ_WRITE;
        struct dm_stat_shared *shared = &s->stat_shared[entry];
@@ -471,15 +483,18 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
        p = &s->stat_percpu[smp_processor_id()][entry];
 
        if (!end) {
-               dm_stat_round(shared, p);
+               dm_stat_round(s, shared, p);
                atomic_inc(&shared->in_flight[idx]);
        } else {
-               dm_stat_round(shared, p);
+               dm_stat_round(s, shared, p);
                atomic_dec(&shared->in_flight[idx]);
                p->sectors[idx] += len;
                p->ios[idx] += 1;
-               p->merges[idx] += merged;
-               p->ticks[idx] += duration;
+               p->merges[idx] += stats_aux->merged;
+               if (!(s->stat_flags & STAT_PRECISE_TIMESTAMPS))
+                       p->ticks[idx] += duration_jiffies;
+               else
+                       p->ticks[idx] += stats_aux->duration_ns;
        }
 
 #if BITS_PER_LONG == 32
@@ -491,7 +506,7 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
 
 static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
                          sector_t bi_sector, sector_t end_sector,
-                         bool end, unsigned long duration,
+                         bool end, unsigned long duration_jiffies,
                          struct dm_stats_aux *stats_aux)
 {
        sector_t rel_sector, offset, todo, fragment_len;
@@ -520,7 +535,7 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
                if (fragment_len > s->step - offset)
                        fragment_len = s->step - offset;
                dm_stat_for_entry(s, entry, bi_rw, fragment_len,
-                                 stats_aux->merged, end, duration);
+                                 stats_aux, end, duration_jiffies);
                todo -= fragment_len;
                entry++;
                offset = 0;
@@ -529,11 +544,13 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
 
 void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
                         sector_t bi_sector, unsigned bi_sectors, bool end,
-                        unsigned long duration, struct dm_stats_aux *stats_aux)
+                        unsigned long duration_jiffies,
+                        struct dm_stats_aux *stats_aux)
 {
        struct dm_stat *s;
        sector_t end_sector;
        struct dm_stats_last_position *last;
+       bool got_precise_time;
 
        if (unlikely(!bi_sectors))
                return;
@@ -557,8 +574,17 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
 
        rcu_read_lock();
 
-       list_for_each_entry_rcu(s, &stats->list, list_entry)
-               __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux);
+       got_precise_time = false;
+       list_for_each_entry_rcu(s, &stats->list, list_entry) {
+               if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) {
+                       if (!end)
+                               stats_aux->duration_ns = ktime_to_ns(ktime_get());
+                       else
+                               stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
+                       got_precise_time = true;
+               }
+               __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux);
+       }
 
        rcu_read_unlock();
 }
@@ -571,7 +597,7 @@ static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared
 
        local_irq_disable();
        p = &s->stat_percpu[smp_processor_id()][x];
-       dm_stat_round(shared, p);
+       dm_stat_round(s, shared, p);
        local_irq_enable();
 
        memset(&shared->tmp, 0, sizeof(shared->tmp));
@@ -643,11 +669,15 @@ static int dm_stats_clear(struct dm_stats *stats, int id)
 /*
  * This is like jiffies_to_msec, but works for 64-bit values.
  */
-static unsigned long long dm_jiffies_to_msec64(unsigned long long j)
+static unsigned long long dm_jiffies_to_msec64(struct dm_stat *s, unsigned long long j)
 {
-       unsigned long long result = 0;
+       unsigned long long result;
        unsigned mult;
 
+       if (s->stat_flags & STAT_PRECISE_TIMESTAMPS)
+               return j;
+
+       result = 0;
        if (j)
                result = jiffies_to_msecs(j & 0x3fffff);
        if (j >= 1 << 22) {
@@ -709,16 +739,16 @@ static int dm_stats_print(struct dm_stats *stats, int id,
                       shared->tmp.ios[READ],
                       shared->tmp.merges[READ],
                       shared->tmp.sectors[READ],
-                      dm_jiffies_to_msec64(shared->tmp.ticks[READ]),
+                      dm_jiffies_to_msec64(s, shared->tmp.ticks[READ]),
                       shared->tmp.ios[WRITE],
                       shared->tmp.merges[WRITE],
                       shared->tmp.sectors[WRITE],
-                      dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]),
+                      dm_jiffies_to_msec64(s, shared->tmp.ticks[WRITE]),
                       dm_stat_in_flight(shared),
-                      dm_jiffies_to_msec64(shared->tmp.io_ticks_total),
-                      dm_jiffies_to_msec64(shared->tmp.time_in_queue),
-                      dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]),
-                      dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE]));
+                      dm_jiffies_to_msec64(s, shared->tmp.io_ticks_total),
+                      dm_jiffies_to_msec64(s, shared->tmp.time_in_queue),
+                      dm_jiffies_to_msec64(s, shared->tmp.io_ticks[READ]),
+                      dm_jiffies_to_msec64(s, shared->tmp.io_ticks[WRITE]));
 
                if (unlikely(sz + 1 >= maxlen))
                        goto buffer_overflow;
@@ -769,21 +799,31 @@ static int message_stats_create(struct mapped_device *md,
        unsigned long long start, end, len, step;
        unsigned divisor;
        const char *program_id, *aux_data;
+       unsigned stat_flags = 0;
+
+       struct dm_arg_set as, as_backup;
+       const char *a;
+       unsigned feature_args;
 
        /*
         * Input format:
-        *   <range> <step> [<program_id> [<aux_data>]]
+        *   <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]]
         */
 
-       if (argc < 3 || argc > 5)
+       if (argc < 3)
                return -EINVAL;
 
-       if (!strcmp(argv[1], "-")) {
+       as.argc = argc;
+       as.argv = argv;
+       dm_consume_args(&as, 1);
+
+       a = dm_shift_arg(&as);
+       if (!strcmp(a, "-")) {
                start = 0;
                len = dm_get_size(md);
                if (!len)
                        len = 1;
-       } else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 ||
+       } else if (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 ||
                   start != (sector_t)start || len != (sector_t)len)
                return -EINVAL;
 
@@ -791,7 +831,8 @@ static int message_stats_create(struct mapped_device *md,
        if (start >= end)
                return -EINVAL;
 
-       if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) {
+       a = dm_shift_arg(&as);
+       if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) {
                if (!divisor)
                        return -EINVAL;
                step = end - start;
@@ -799,18 +840,39 @@ static int message_stats_create(struct mapped_device *md,
                        step++;
                if (!step)
                        step = 1;
-       } else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 ||
+       } else if (sscanf(a, "%llu%c", &step, &dummy) != 1 ||
                   step != (sector_t)step || !step)
                return -EINVAL;
 
+       as_backup = as;
+       a = dm_shift_arg(&as);
+       if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) {
+               while (feature_args--) {
+                       a = dm_shift_arg(&as);
+                       if (!a)
+                               return -EINVAL;
+                       if (!strcasecmp(a, "precise_timestamps"))
+                               stat_flags |= STAT_PRECISE_TIMESTAMPS;
+                       else
+                               return -EINVAL;
+               }
+       } else {
+               as = as_backup;
+       }
+
        program_id = "-";
        aux_data = "-";
 
-       if (argc > 3)
-               program_id = argv[3];
+       a = dm_shift_arg(&as);
+       if (a)
+               program_id = a;
+
+       a = dm_shift_arg(&as);
+       if (a)
+               aux_data = a;
 
-       if (argc > 4)
-               aux_data = argv[4];
+       if (as.argc)
+               return -EINVAL;
 
        /*
         * If a buffer overflow happens after we created the region,
@@ -822,7 +884,7 @@ static int message_stats_create(struct mapped_device *md,
        if (dm_message_test_buffer_overflow(result, maxlen))
                return 1;
 
-       id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
+       id = dm_stats_create(dm_get_stats(md), start, end, step, stat_flags, program_id, aux_data,
                             dm_internal_suspend_fast, dm_internal_resume_fast, md);
        if (id < 0)
                return id;
index e7c4984bf2355403d3f9ded3bbcccd951d40e810..f1c0956e3843f51e40dc2da23a68fc54d7a141c3 100644 (file)
@@ -18,6 +18,7 @@ struct dm_stats {
 
 struct dm_stats_aux {
        bool merged;
+       unsigned long long duration_ns;
 };
 
 void dm_stats_init(struct dm_stats *st);
@@ -30,7 +31,8 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
 
 void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
                         sector_t bi_sector, unsigned bi_sectors, bool end,
-                        unsigned long duration, struct dm_stats_aux *aux);
+                        unsigned long duration_jiffies,
+                        struct dm_stats_aux *aux);
 
 static inline bool dm_stats_used(struct dm_stats *st)
 {