if (device) {
dma_addr_t dma_dest, dma_src;
- unsigned long dma_prep_flags;
+ unsigned long dma_prep_flags = 0;
- dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);
if (device) {
dma_addr_t dma_dest;
- unsigned long dma_prep_flags;
+ unsigned long dma_prep_flags = 0;
- dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
*/
if (src_cnt > pq_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
if (cb_fn_orig)
dma_flags |= DMA_PREP_INTERRUPT;
}
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward
dma_flags |= DMA_PREP_PQ_DISABLE_P;
if (!Q(blocks, disks))
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
for (i = 0; i < disks; i++)
if (likely(blocks[i])) {
BUG_ON(is_raid6_zero_block(blocks[i]));
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
srcs[1] = q;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(b, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
dp = blocks[faila];
dq = blocks[failb];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_memcpy(dp, g, 0, 0, bytes, submit);
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
blocks[failb] = (void *)raid6_empty_zero_page;
blocks[disks-1] = dq;
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
/* Restore pointer table */
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
int good = faila == 0 ? 1 : 0;
struct page *g = blocks[good];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
tx = async_memcpy(p, g, 0, 0, bytes, submit);
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
} else {
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
}
srcs[0] = dq;
srcs[1] = q;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_mult(dq, dq, coef, bytes, submit);
srcs[0] = p;
*/
if (src_cnt > xor_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
}
if (submit->cb_fn)
dma_flags |= DMA_PREP_INTERRUPT;
-
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
dma_src = (dma_addr_t *) src_list;
if (dma_src && device && src_cnt <= device->max_xor) {
- unsigned long dma_prep_flags;
+ unsigned long dma_prep_flags = 0;
int i;
pr_debug("%s: (async) len: %zu\n", __func__, len);
- dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
int i;
int page_offset;
struct async_submit_ctl submit;
+ enum async_tx_flags flags = 0;
if (bio->bi_sector >= sector)
page_offset = (signed)(bio->bi_sector - sector) * 512;
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
- init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
+ if (frombio)
+ flags |= ASYNC_TX_FENCE;
+ init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
bio_for_each_segment(bvl, bio, i) {
int len = bio_iovec_idx(bio, i)->bv_len;
int clen;
atomic_inc(&sh->count);
- init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute, sh, to_addr_conv(sh, percpu));
if (unlikely(count == 1))
tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
count = set_syndrome_sources(blocks, sh);
blocks[count] = NULL; /* regenerating p is not necessary */
BUG_ON(blocks[count+1] != dest); /* q should already be set */
- init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
to_addr_conv(sh, percpu));
tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
} else {
blocks[count++] = sh->dev[i].page;
}
- init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
- ops_complete_compute, sh,
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ NULL, ops_complete_compute, sh,
to_addr_conv(sh, percpu));
tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
}
/* Q disk is one of the missing disks */
if (faila == syndrome_disks) {
/* Missing P+Q, just recompute */
- init_async_submit(&submit, 0, NULL, ops_complete_compute,
- sh, to_addr_conv(sh, percpu));
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
return async_gen_syndrome(blocks, 0, count+2,
STRIPE_SIZE, &submit);
} else {
blocks[count++] = sh->dev[i].page;
}
dest = sh->dev[data_target].page;
- init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
- NULL, NULL, to_addr_conv(sh, percpu));
+ init_async_submit(&submit,
+ ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+ NULL, NULL, NULL,
+ to_addr_conv(sh, percpu));
tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
&submit);
count = set_syndrome_sources(blocks, sh);
- init_async_submit(&submit, 0, tx, ops_complete_compute,
- sh, to_addr_conv(sh, percpu));
+ init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+ ops_complete_compute, sh,
+ to_addr_conv(sh, percpu));
return async_gen_syndrome(blocks, 0, count+2,
STRIPE_SIZE, &submit);
}
}
- init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
- to_addr_conv(sh, percpu));
+ init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
+ sh, to_addr_conv(sh, percpu));
if (failb == syndrome_disks) {
/* We're missing D+P. */
return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
xor_srcs[count++] = dev->page;
}
- init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
ops_complete_prexor, sh, to_addr_conv(sh, percpu));
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
* array.
* @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
* dependency chain
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
*/
enum async_tx_flags {
ASYNC_TX_XOR_ZERO_DST = (1 << 0),
ASYNC_TX_XOR_DROP_DST = (1 << 1),
ASYNC_TX_ACK = (1 << 2),
+ ASYNC_TX_FENCE = (1 << 3),
};
/**
* @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
* sources that were the result of a previous operation, in the case of a PQ
* operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ * on the result of this operation
*/
enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0),
DMA_PREP_PQ_DISABLE_P = (1 << 6),
DMA_PREP_PQ_DISABLE_Q = (1 << 7),
DMA_PREP_CONTINUE = (1 << 8),
+ DMA_PREP_FENCE = (1 << 9),
};
/**