[patch 2/3] OCFS2 Configurable timeouts
authorJeff Mahoney <jeffm@suse.de>
Mon, 4 Dec 2006 13:04:54 +0000 (14:04 +0100)
committerMark Fasheh <mark.fasheh@oracle.com>
Fri, 8 Dec 2006 02:13:20 +0000 (18:13 -0800)
Allow configuration of OCFS2 timeouts from userspace via configfs

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
fs/ocfs2/cluster/nodemanager.c
fs/ocfs2/cluster/nodemanager.h
fs/ocfs2/cluster/tcp.c
fs/ocfs2/cluster/tcp.h
fs/ocfs2/cluster/tcp_internal.h

index dd4aefa11b3db906b956a9efb06a72b038f26066..234f83f2897fd0cc120419512617e2c78c93be2e 100644 (file)
@@ -532,6 +532,161 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group)
 }
 #endif
 
+struct o2nm_cluster_attribute {
+       struct configfs_attribute attr;
+       ssize_t (*show)(struct o2nm_cluster *, char *);
+       ssize_t (*store)(struct o2nm_cluster *, const char *, size_t);
+};
+
+static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count,
+                                       unsigned int *val)
+{
+       unsigned long tmp;
+       char *p = (char *)page;
+
+       tmp = simple_strtoul(p, &p, 0);
+       if (!p || (*p && (*p != '\n')))
+               return -EINVAL;
+
+       if (tmp == 0)
+               return -EINVAL;
+       if (tmp >= (u32)-1)
+               return -ERANGE;
+
+       *val = tmp;
+
+       return count;
+}
+
+static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(
+       struct o2nm_cluster *cluster, char *page)
+{
+       return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
+}
+
+static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(
+       struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+       ssize_t ret;
+       unsigned int val;
+
+       ret =  o2nm_cluster_attr_write(page, count, &val);
+
+       if (ret > 0) {
+               if (val <= cluster->cl_keepalive_delay_ms) {
+                       mlog(ML_NOTICE, "o2net: idle timeout must be larger "
+                            "than keepalive delay\n");
+                       return -EINVAL;
+               }
+               cluster->cl_idle_timeout_ms = val;
+       }
+
+       return ret;
+}
+
+static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(
+       struct o2nm_cluster *cluster, char *page)
+{
+       return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
+}
+
+static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(
+       struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+       ssize_t ret;
+       unsigned int val;
+
+       ret =  o2nm_cluster_attr_write(page, count, &val);
+
+       if (ret > 0) {
+               if (val >= cluster->cl_idle_timeout_ms) {
+                       mlog(ML_NOTICE, "o2net: keepalive delay must be "
+                            "smaller than idle timeout\n");
+                       return -EINVAL;
+               }
+               cluster->cl_keepalive_delay_ms = val;
+       }
+
+       return ret;
+}
+
+static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read(
+       struct o2nm_cluster *cluster, char *page)
+{
+       return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms);
+}
+
+static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
+       struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+       return o2nm_cluster_attr_write(page, count,
+                                      &cluster->cl_reconnect_delay_ms);
+}
+static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
+       .attr   = { .ca_owner = THIS_MODULE,
+                   .ca_name = "idle_timeout_ms",
+                   .ca_mode = S_IRUGO | S_IWUSR },
+       .show   = o2nm_cluster_attr_idle_timeout_ms_read,
+       .store  = o2nm_cluster_attr_idle_timeout_ms_write,
+};
+
+static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = {
+       .attr   = { .ca_owner = THIS_MODULE,
+                   .ca_name = "keepalive_delay_ms",
+                   .ca_mode = S_IRUGO | S_IWUSR },
+       .show   = o2nm_cluster_attr_keepalive_delay_ms_read,
+       .store  = o2nm_cluster_attr_keepalive_delay_ms_write,
+};
+
+static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
+       .attr   = { .ca_owner = THIS_MODULE,
+                   .ca_name = "reconnect_delay_ms",
+                   .ca_mode = S_IRUGO | S_IWUSR },
+       .show   = o2nm_cluster_attr_reconnect_delay_ms_read,
+       .store  = o2nm_cluster_attr_reconnect_delay_ms_write,
+};
+
+static struct configfs_attribute *o2nm_cluster_attrs[] = {
+       &o2nm_cluster_attr_idle_timeout_ms.attr,
+       &o2nm_cluster_attr_keepalive_delay_ms.attr,
+       &o2nm_cluster_attr_reconnect_delay_ms.attr,
+       NULL,
+};
+static ssize_t o2nm_cluster_show(struct config_item *item,
+                                 struct configfs_attribute *attr,
+                                 char *page)
+{
+       struct o2nm_cluster *cluster = to_o2nm_cluster(item);
+       struct o2nm_cluster_attribute *o2nm_cluster_attr =
+               container_of(attr, struct o2nm_cluster_attribute, attr);
+       ssize_t ret = 0;
+
+       if (o2nm_cluster_attr->show)
+               ret = o2nm_cluster_attr->show(cluster, page);
+       return ret;
+}
+
+static ssize_t o2nm_cluster_store(struct config_item *item,
+                                  struct configfs_attribute *attr,
+                                  const char *page, size_t count)
+{
+       struct o2nm_cluster *cluster = to_o2nm_cluster(item);
+       struct o2nm_cluster_attribute *o2nm_cluster_attr =
+               container_of(attr, struct o2nm_cluster_attribute, attr);
+       ssize_t ret;
+
+       if (o2nm_cluster_attr->store == NULL) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = o2nm_cluster_attr->store(cluster, page, count);
+       if (ret < count)
+               goto out;
+out:
+       return ret;
+}
+
 static struct config_item *o2nm_node_group_make_item(struct config_group *group,
                                                     const char *name)
 {
@@ -613,10 +768,13 @@ static void o2nm_cluster_release(struct config_item *item)
 
 static struct configfs_item_operations o2nm_cluster_item_ops = {
        .release        = o2nm_cluster_release,
+       .show_attribute         = o2nm_cluster_show,
+       .store_attribute        = o2nm_cluster_store,
 };
 
 static struct config_item_type o2nm_cluster_type = {
        .ct_item_ops    = &o2nm_cluster_item_ops,
+       .ct_attrs       = o2nm_cluster_attrs,
        .ct_owner       = THIS_MODULE,
 };
 
@@ -667,6 +825,9 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
        cluster->cl_group.default_groups[2] = NULL;
        rwlock_init(&cluster->cl_nodes_lock);
        cluster->cl_node_ip_tree = RB_ROOT;
+       cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
+       cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
+       cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
 
        ret = &cluster->cl_group;
        o2nm_single_cluster = cluster;
index b571cda9fbb72809abf790f1bcd542f824781129..8fb23cacc2f5f15308b7288321a503eb5322fa0d 100644 (file)
@@ -60,6 +60,9 @@ struct o2nm_cluster {
        rwlock_t                cl_nodes_lock;
        struct o2nm_node        *cl_nodes[O2NM_MAX_NODES];
        struct rb_root          cl_node_ip_tree;
+       unsigned int            cl_idle_timeout_ms;
+       unsigned int            cl_keepalive_delay_ms;
+       unsigned int            cl_reconnect_delay_ms;
 
        /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
        unsigned long   cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
index 9b3209dc0b16a147e8b81f0e1eddbfcb2714e27e..ebbaee664c667d26c4e9562c49697c45885a0fe0 100644 (file)
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes);
 static void o2net_sc_send_keep_req(struct work_struct *work);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
+
+/*
+ * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
+ * losing our parent link to the cluster during shutdown. This can be
+ * solved by adding a pre-removal callback to configfs, or passing
+ * around the cluster with the node. -jeffm
+ */
+static inline int o2net_reconnect_delay(struct o2nm_node *node)
+{
+       return o2nm_single_cluster->cl_reconnect_delay_ms;
+}
+
+static inline int o2net_keepalive_delay(struct o2nm_node *node)
+{
+       return o2nm_single_cluster->cl_keepalive_delay_ms;
+}
+
+static inline int o2net_idle_timeout(struct o2nm_node *node)
+{
+       return o2nm_single_cluster->cl_idle_timeout_ms;
+}
 
 static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
 {
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref)
 {
        struct o2net_sock_container *sc = container_of(kref,
                                        struct o2net_sock_container, sc_kref);
+       BUG_ON(timer_pending(&sc->sc_idle_timeout));
+
        sclog(sc, "releasing\n");
 
        if (sc->sc_sock) {
@@ -424,9 +448,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
                /* delay if we're withing a RECONNECT_DELAY of the
                 * last attempt */
                delay = (nn->nn_last_connect_attempt +
-                        msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+                        msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
                        - jiffies;
-               if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+               if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
                        delay = 0;
                mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
                queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1105,7 +1129,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
        /* set valid and queue the idle timers only if it hasn't been
         * shut down already */
        if (nn->nn_sc == sc) {
-               o2net_sc_postpone_idle(sc);
+               o2net_sc_reset_idle_timer(sc);
                o2net_set_nn_state(nn, sc, 1, 0);
        }
        spin_unlock(&nn->nn_lock);
@@ -1287,8 +1311,10 @@ static void o2net_idle_timer(unsigned long data)
 
        do_gettimeofday(&now);
 
-       printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 "
-            "seconds, shutting it down.\n", SC_NODEF_ARGS(sc));
+       printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
+            "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
+                    o2net_idle_timeout(sc->sc_node) / 1000,
+                    o2net_idle_timeout(sc->sc_node) % 1000);
        mlog(ML_NOTICE, "here are some times that might help debug the "
             "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
             "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1306,14 +1332,21 @@ static void o2net_idle_timer(unsigned long data)
        o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
 }
 
-static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
 {
        o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
        o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
-                                   O2NET_KEEPALIVE_DELAY_SECS * HZ);
+                     msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
        do_gettimeofday(&sc->sc_tv_timer);
        mod_timer(&sc->sc_idle_timeout,
-                 jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ));
+              jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
+}
+
+static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+{
+       /* Only push out an existing timer */
+       if (timer_pending(&sc->sc_idle_timeout))
+               o2net_sc_reset_idle_timer(sc);
 }
 
 /* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1435,9 +1468,12 @@ static void o2net_connect_expired(struct work_struct *work)
 
        spin_lock(&nn->nn_lock);
        if (!nn->nn_sc_valid) {
+               struct o2nm_node *node = nn->nn_sc->sc_node;
                mlog(ML_ERROR, "no connection established with node %u after "
-                    "%u seconds, giving up and returning errors.\n",
-                    o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS);
+                    "%u.%u seconds, giving up and returning errors.\n",
+                    o2net_num_from_nn(nn),
+                    o2net_idle_timeout(node) / 1000,
+                    o2net_idle_timeout(node) % 1000);
 
                o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
        }
@@ -1489,14 +1525,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
 
        /* ensure an immediate connect attempt */
        nn->nn_last_connect_attempt = jiffies -
-               (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1);
+               (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
 
        if (node_num != o2nm_this_node()) {
                /* heartbeat doesn't work unless a local node number is
                 * configured and doing so brings up the o2net_wq, so we can
                 * use it.. */
                queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
-                                  O2NET_IDLE_TIMEOUT_SECS * HZ);
+                                  msecs_to_jiffies(o2net_idle_timeout(node)));
 
                /* believe it or not, accept and node hearbeating testing
                 * can succeed for this node before we got here.. so
index 616ff2b8434ad2fb33c77f702c08789ef031ac01..2e08976050fb0d448cd4e2968cf0672af3e4caa4 100644 (file)
@@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data)
 
 #define O2NET_MAX_PAYLOAD_BYTES  (4096 - sizeof(struct o2net_msg))
 
+/* same as hb delay, we're waiting for another node to recognize our hb */
+#define O2NET_RECONNECT_DELAY_MS_DEFAULT       2000
+
+#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT       5000
+#define O2NET_IDLE_TIMEOUT_MS_DEFAULT          10000
+
+
 /* TODO: figure this out.... */
 static inline int o2net_link_down(int err, struct socket *sock)
 {
index daebbd3a2c8ceb630404566223e2189d96bb0ed6..56f7ee1d254737a917cfa1ef175d2b93cac719e3 100644 (file)
 #define O2NET_MSG_KEEP_REQ_MAGIC  ((u16)0xfa57)
 #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58)
 
-/* same as hb delay, we're waiting for another node to recognize our hb */
-#define O2NET_RECONNECT_DELAY_MS       O2HB_REGION_TIMEOUT_MS
-
 /* we're delaying our quorum decision so that heartbeat will have timed
  * out truly dead nodes by the time we come around to making decisions
  * on their number */
 #define O2NET_QUORUM_DELAY_MS  ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
 
-#define O2NET_KEEPALIVE_DELAY_SECS     5
-#define O2NET_IDLE_TIMEOUT_SECS                10
-
 /* 
  * This version number represents quite a lot, unfortunately.  It not
  * only represents the raw network message protocol on the wire but also