writeback: Add tracing to balance_dirty_pages
authorDave Chinner <dchinner@redhat.com>
Wed, 7 Jul 2010 03:24:07 +0000 (13:24 +1000)
committerJens Axboe <jaxboe@fusionio.com>
Sat, 7 Aug 2010 16:24:25 +0000 (18:24 +0200)
Tracing high level background writeback events is good, but it doesn't
give the entire picture. Add visibility into write throttling to catch IO
dispatched by foreground throttling of processing dirtying lots of pages.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
fs/fs-writeback.c
include/trace/events/writeback.h
mm/page-writeback.c

index 73acab4dc2b7aabf8a30d8ef98dc537a12dd50b1..bf10cbf379dd01c4f5a16629aa35c140fbd7213b 100644 (file)
@@ -656,10 +656,14 @@ static long wb_writeback(struct bdi_writeback *wb,
                wbc.more_io = 0;
                wbc.nr_to_write = MAX_WRITEBACK_PAGES;
                wbc.pages_skipped = 0;
+
+               trace_wbc_writeback_start(&wbc, wb->bdi);
                if (work->sb)
                        __writeback_inodes_sb(work->sb, wb, &wbc);
                else
                        writeback_inodes_wb(wb, &wbc);
+               trace_wbc_writeback_written(&wbc, wb->bdi);
+
                work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -687,6 +691,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                if (!list_empty(&wb->b_more_io))  {
                        inode = list_entry(wb->b_more_io.prev,
                                                struct inode, i_list);
+                       trace_wbc_writeback_wait(&wbc, wb->bdi);
                        inode_wait_for_writeback(inode);
                }
                spin_unlock(&inode_lock);
index 562fcae10d9d65aef27b5fe8181114f770fc2fb2..0be26acae064c7088288f6b9c63e5a5a8f340e53 100644 (file)
@@ -85,6 +85,70 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
 DEFINE_WRITEBACK_EVENT(writeback_thread_start);
 DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
 
+DECLARE_EVENT_CLASS(wbc_class,
+       TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
+       TP_ARGS(wbc, bdi),
+       TP_STRUCT__entry(
+               __array(char, name, 32)
+               __field(long, nr_to_write)
+               __field(long, pages_skipped)
+               __field(int, sync_mode)
+               __field(int, nonblocking)
+               __field(int, encountered_congestion)
+               __field(int, for_kupdate)
+               __field(int, for_background)
+               __field(int, for_reclaim)
+               __field(int, range_cyclic)
+               __field(int, more_io)
+               __field(unsigned long, older_than_this)
+               __field(long, range_start)
+               __field(long, range_end)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               __entry->nr_to_write    = wbc->nr_to_write;
+               __entry->pages_skipped  = wbc->pages_skipped;
+               __entry->sync_mode      = wbc->sync_mode;
+               __entry->for_kupdate    = wbc->for_kupdate;
+               __entry->for_background = wbc->for_background;
+               __entry->for_reclaim    = wbc->for_reclaim;
+               __entry->range_cyclic   = wbc->range_cyclic;
+               __entry->more_io        = wbc->more_io;
+               __entry->older_than_this = wbc->older_than_this ?
+                                               *wbc->older_than_this : 0;
+               __entry->range_start    = (long)wbc->range_start;
+               __entry->range_end      = (long)wbc->range_end;
+       ),
+
+       TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
+               "bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
+               "start=0x%lx end=0x%lx",
+               __entry->name,
+               __entry->nr_to_write,
+               __entry->pages_skipped,
+               __entry->sync_mode,
+               __entry->for_kupdate,
+               __entry->for_background,
+               __entry->for_reclaim,
+               __entry->range_cyclic,
+               __entry->more_io,
+               __entry->older_than_this,
+               __entry->range_start,
+               __entry->range_end)
+)
+
+#define DEFINE_WBC_EVENT(name) \
+DEFINE_EVENT(wbc_class, name, \
+       TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
+       TP_ARGS(wbc, bdi))
+DEFINE_WBC_EVENT(wbc_writeback_start);
+DEFINE_WBC_EVENT(wbc_writeback_written);
+DEFINE_WBC_EVENT(wbc_writeback_wait);
+DEFINE_WBC_EVENT(wbc_balance_dirty_start);
+DEFINE_WBC_EVENT(wbc_balance_dirty_written);
+DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
index 37498ef6154836943f3478304bf7440ab255669a..d556cd829af66ab6431334bafb2d5b1e8a583334 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -535,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * threshold otherwise wait until the disk writes catch
                 * up.
                 */
+               trace_wbc_balance_dirty_start(&wbc, bdi);
                if (bdi_nr_reclaimable > bdi_thresh) {
                        writeback_inodes_wb(&bdi->wb, &wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
+                       trace_wbc_balance_dirty_written(&wbc, bdi);
                }
 
                /*
@@ -565,6 +568,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                if (pages_written >= write_chunk)
                        break;          /* We've done our duty */
 
+               trace_wbc_balance_dirty_wait(&wbc, bdi);
                __set_current_state(TASK_INTERRUPTIBLE);
                io_schedule_timeout(pause);