drbd: New packet for Ahead/Behind mode: P_OUT_OF_SYNC
authorPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 27 Oct 2010 12:33:00 +0000 (14:33 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 10 Mar 2011 10:34:48 +0000 (11:34 +0100)
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_req.h
drivers/block/drbd/drbd_worker.c
include/linux/drbd.h

index b4adb58c7472c6317404cbb64fa922fed30e649d..33f6cc537d08eaf9c4a52bfb58ac98770ce39a96 100644 (file)
@@ -1007,22 +1007,22 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
  * called by tl_clear and drbd_send_dblock (==drbd_make_request).
  * so this can be _any_ process.
  */
-void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
                            const char *file, const unsigned int line)
 {
        unsigned long sbnr, ebnr, lbnr, flags;
        sector_t esector, nr_sectors;
-       unsigned int enr, count;
+       unsigned int enr, count = 0;
        struct lc_element *e;
 
        if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
                dev_err(DEV, "sector: %llus, size: %d\n",
                        (unsigned long long)sector, size);
-               return;
+               return 0;
        }
 
        if (!get_ldev(mdev))
-               return; /* no disk, no metadata, no bitmap to set bits in */
+               return 0; /* no disk, no metadata, no bitmap to set bits in */
 
        nr_sectors = drbd_get_capacity(mdev->this_bdev);
        esector = sector + (size >> 9) - 1;
@@ -1052,6 +1052,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
 
 out:
        put_ldev(mdev);
+
+       return count;
 }
 
 static
index 21b7439438cd79bd111a345cdf44f2937cecc6d3..471331236826d5e0af66c1650165188d1500f25b 100644 (file)
@@ -212,6 +212,7 @@ enum drbd_packets {
        /* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
        /* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
        P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
+       P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
 
        P_MAX_CMD             = 0x28,
        P_MAY_IGNORE          = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
@@ -269,6 +270,7 @@ static inline const char *cmdname(enum drbd_packets cmd)
                [P_RS_IS_IN_SYNC]       = "CsumRSIsInSync",
                [P_COMPRESSED_BITMAP]   = "CBitmap",
                [P_DELAY_PROBE]         = "DelayProbe",
+               [P_OUT_OF_SYNC]         = "OutOfSync",
                [P_MAX_CMD]             = NULL,
        };
 
@@ -550,6 +552,13 @@ struct p_discard {
        u32         pad;
 } __packed;
 
+struct p_block_desc {
+       struct p_header80 head;
+       u64 sector;
+       u32 blksize;
+       u32 pad;        /* to multiple of 8 Byte */
+} __packed;
+
 /* Valid values for the encoding field.
  * Bump proto version when changing this. */
 enum drbd_bitmap_code {
@@ -647,6 +656,7 @@ union p_polymorph {
         struct p_block_req       block_req;
        struct p_delay_probe93   delay_probe93;
        struct p_rs_uuid         rs_uuid;
+       struct p_block_desc      block_desc;
 } __packed;
 
 /**********************************************************************/
@@ -1221,6 +1231,7 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
                        struct p_data *dp, int data_size);
 extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
                            sector_t sector, int blksize, u64 block_id);
+extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
                           struct drbd_epoch_entry *e);
 extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
@@ -1534,6 +1545,7 @@ extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
 extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
 extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
 extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
 
 extern void resync_timer_fn(unsigned long data);
 
@@ -1626,7 +1638,7 @@ extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
                int size, const char *file, const unsigned int line);
 #define drbd_set_in_sync(mdev, sector, size) \
        __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
                int size, const char *file, const unsigned int line);
 #define drbd_set_out_of_sync(mdev, sector, size) \
        __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
index 46f27d6c0b21cc4954ae92225a554ec7905ca6e6..0dc93f43a4760b18aceae473782c7df77dae6314 100644 (file)
@@ -2634,6 +2634,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
        return ok;
 }
 
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+       struct p_block_desc p;
+
+       p.sector  = cpu_to_be64(req->sector);
+       p.blksize = cpu_to_be32(req->size);
+
+       return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
 /*
   drbd_send distinguishes two cases:
 
index b19e8b2c4ce5b12f29882fb76c10e48ee728e607..04a08e7541cceb895aa4ea7b83a0ae2e83076512 100644 (file)
@@ -3562,6 +3562,15 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u
        return TRUE;
 }
 
+static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+       struct p_block_desc *p = &mdev->data.rbuf.block_desc;
+
+       drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+
+       return TRUE;
+}
+
 typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
 
 struct data_cmd {
@@ -3592,6 +3601,7 @@ static struct data_cmd drbd_cmd_handler[] = {
        [P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
        [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
        [P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
+       [P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
        /* anything missing from this table is in
         * the asender_tbl, see get_asender_cmd */
        [P_MAX_CMD]         = { 0, 0, NULL },
index 60288fb3c4d71ade3008b9271887a0dcdc5ffdfb..a8d1ff2bda278defbddb68909929eeea94a4f887 100644 (file)
@@ -142,7 +142,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
 
        /* before we can signal completion to the upper layers,
         * we may need to close the current epoch */
-       if (mdev->state.conn >= C_CONNECTED &&
+       if (mdev->state.conn >= C_CONNECTED && mdev->state.conn < C_AHEAD &&
            req->epoch == mdev->newest_tle->br_number)
                queue_barrier(mdev);
 
@@ -545,6 +545,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
                break;
 
+       case queue_for_send_oos:
+               req->rq_state |= RQ_NET_QUEUED;
+               req->w.cb =  w_send_oos;
+               drbd_queue_work(&mdev->data.work, &req->w);
+               break;
+
+       case oos_handed_to_network:
+               /* actually the same */
        case send_canceled:
                /* treat it the same */
        case send_failed:
@@ -756,7 +764,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
        const sector_t sector = bio->bi_sector;
        struct drbd_tl_epoch *b = NULL;
        struct drbd_request *req;
-       int local, remote;
+       int local, remote, send_oos = 0;
        int err = -EIO;
        int ret = 0;
 
@@ -820,8 +828,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
        }
 
        remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
-                           (mdev->state.pdsk == D_INCONSISTENT &&
-                            mdev->state.conn >= C_CONNECTED));
+                           (mdev->state.pdsk >= D_INCONSISTENT &&
+                            mdev->state.conn >= C_CONNECTED &&
+                            mdev->state.conn < C_AHEAD));
+       send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD &&
+                   mdev->state.pdsk >= D_INCONSISTENT);
 
        if (!(local || remote) && !is_susp(mdev->state)) {
                if (__ratelimit(&drbd_ratelimit_state))
@@ -835,7 +846,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
         * but there is a race between testing the bit and pointer outside the
         * spinlock, and grabbing the spinlock.
         * if we lost that race, we retry.  */
-       if (rw == WRITE && remote &&
+       if (rw == WRITE && (remote || send_oos) &&
            mdev->unused_spare_tle == NULL &&
            test_bit(CREATE_BARRIER, &mdev->flags)) {
 allocate_barrier:
@@ -860,11 +871,15 @@ allocate_barrier:
                goto fail_free_complete;
        }
 
-       if (remote) {
+       if (remote || send_oos) {
                remote = (mdev->state.pdsk == D_UP_TO_DATE ||
-                           (mdev->state.pdsk == D_INCONSISTENT &&
-                            mdev->state.conn >= C_CONNECTED));
-               if (!remote)
+                           (mdev->state.pdsk >= D_INCONSISTENT &&
+                            mdev->state.conn >= C_CONNECTED &&
+                            mdev->state.conn < C_AHEAD));
+               send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD &&
+                           mdev->state.pdsk >= D_INCONSISTENT);
+
+               if (!(remote || send_oos))
                        dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
                if (!(local || remote)) {
                        dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
@@ -877,7 +892,7 @@ allocate_barrier:
                mdev->unused_spare_tle = b;
                b = NULL;
        }
-       if (rw == WRITE && remote &&
+       if (rw == WRITE && (remote || send_oos) &&
            mdev->unused_spare_tle == NULL &&
            test_bit(CREATE_BARRIER, &mdev->flags)) {
                /* someone closed the current epoch
@@ -900,7 +915,7 @@ allocate_barrier:
         * barrier packet.  To get the write ordering right, we only have to
         * make sure that, if this is a write request and it triggered a
         * barrier packet, this request is queued within the same spinlock. */
-       if (remote && mdev->unused_spare_tle &&
+       if ((remote || send_oos) && mdev->unused_spare_tle &&
            test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
                _tl_add_barrier(mdev, mdev->unused_spare_tle);
                mdev->unused_spare_tle = NULL;
@@ -948,8 +963,11 @@ allocate_barrier:
                                ? queue_for_net_write
                                : queue_for_net_read);
        }
+       if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
+               _req_mod(req, queue_for_send_oos);
 
-       if (remote && mdev->net_conf->on_congestion != OC_BLOCK) {
+       if (remote &&
+           mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
                int congested = 0;
 
                if (mdev->net_conf->cong_fill &&
@@ -964,6 +982,8 @@ allocate_barrier:
                }
 
                if (congested) {
+                       queue_barrier(mdev);
+
                        if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
                                _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
                        else  /*mdev->net_conf->on_congestion == OC_DISCONNECT */
index 69d350fe7c1e6ef1e42d4426ddc1c298d16428e5..40d3dcd8fc815cf2b7927c66b9720fe17836c739 100644 (file)
@@ -82,14 +82,16 @@ enum drbd_req_event {
        to_be_submitted,
 
        /* XXX yes, now I am inconsistent...
-        * these two are not "events" but "actions"
+        * these are not "events" but "actions"
         * oh, well... */
        queue_for_net_write,
        queue_for_net_read,
+       queue_for_send_oos,
 
        send_canceled,
        send_failed,
        handed_over_to_network,
+       oos_handed_to_network,
        connection_lost_while_pending,
        read_retry_remote_canceled,
        recv_acked_by_peer,
index 782d87237cb47533c12c18807293d0115dffd231..67499077c4828de1cb2ba9e163232959e4452aa9 100644 (file)
@@ -1237,6 +1237,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
        return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
 }
 
+int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+       struct drbd_request *req = container_of(w, struct drbd_request, w);
+       int ok;
+
+       if (unlikely(cancel)) {
+               req_mod(req, send_canceled);
+               return 1;
+       }
+
+       ok = drbd_send_oos(mdev, req);
+       req_mod(req, oos_handed_to_network);
+
+       return ok;
+}
+
 /**
  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
  * @mdev:      DRBD device.
index 23f31be6f00d0f689c03b32d56219a9e086adc88..41da654cc0b184ca1bf2d04531b2ac6fec352304 100644 (file)
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.9"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 95
+#define PRO_VERSION_MAX 96
 
 
 enum drbd_io_error_p {