source "drivers/staging/dream/Kconfig"
-source "drivers/staging/dst/Kconfig"
-
source "drivers/staging/pohmelfs/Kconfig"
source "drivers/staging/b3dfg/Kconfig"
obj-$(CONFIG_INPUT_MIMIO) += mimio/
obj-$(CONFIG_TRANZPORT) += frontier/
obj-$(CONFIG_DREAM) += dream/
-obj-$(CONFIG_DST) += dst/
obj-$(CONFIG_POHMELFS) += pohmelfs/
obj-$(CONFIG_B3DFG) += b3dfg/
obj-$(CONFIG_IDE_PHISON) += phison/
+++ /dev/null
-config DST
- tristate "Distributed storage"
- depends on NET && CRYPTO && SYSFS && BLK_DEV
- select CONNECTOR
- ---help---
- DST is a network block device storage, which can be used to organize
- exported storage on the remote nodes into the local block device.
-
- DST works on top of any network media and protocol; it is just a matter
- of configuration utility to understand the correct addresses. The most
- common example is TCP over IP, which allows to pass through firewalls and
- create remote backup storage in a different datacenter. DST requires
- single port to be enabled on the exporting node and outgoing connections
- on the local node.
-
- DST works with in-kernel client and server, which improves performance by
- eliminating unneded data copies and by not depending on the version
- of the external IO components. It requires userspace configuration utility
- though.
-
- DST uses transaction model, when each store has to be explicitly acked
- from the remote node to be considered as successfully written. There
- may be lots of in-flight transactions. When remote host does not ack
- the transaction it will be resent predefined number of times with specified
- timeouts between them. All those parameters are configurable. Transactions
- are marked as failed after all resends complete unsuccessfully; having
- long enough resend timeout and/or large number of resends allows not to
- return error to the higher (FS usually) layer in case of short network
- problems or remote node outages. In case of network RAID setup this means
- that storage will not degrade until transactions are marked as failed, and
- thus will not force checksum recalculation and data rebuild. In case of
- connection failure DST will try to reconnect to the remote node automatically.
- DST sends ping commands at idle time to detect if remote node is alive.
-
- Because of transactional model it is possible to use zero-copy sending
- without worry of data corruption (which in turn could be detected by the
- strong checksums though).
-
- DST may fully encrypt the data channel in case of untrusted channel and implement
- strong checksum of the transferred data. It is possible to configure algorithms
- and crypto keys; they should match on both sides of the network channel.
- Crypto processing does not introduce noticeble performance overhead, since DST
- uses configurable pool of threads to perform crypto processing.
-
- DST utilizes memory pool model of all its transaction allocations (it is the
- only additional allocation on the client) and server allocations (bio pools,
- while pages are allocated from the slab).
-
- At startup DST performs a simple negotiation with the export node to determine
- access permissions and size of the exported storage. It can be extended if
- new parameters should be autonegotiated.
-
- DST carries block IO flags in the protocol, which allows to transparently implement
- barriers and sync/flush operations. Those flags are used in the export node where
- IO against the local storage is performed, which means that sync write will be sync
- on the remote node too, which in turn improves data integrity and improved resistance
- to errors and data corruption during power outages or storage damages.
-
- Homepage: http://www.ioremap.net/projects/dst
- Userspace configuration utility and the latest releases: http://www.ioremap.net/archive/dst/
-
-config DST_DEBUG
- bool "DST debug"
- depends on DST
- ---help---
- This option will enable HEAVY debugging of the DST.
- Turn it on ONLY if you have to debug some really obscure problem.
+++ /dev/null
-obj-$(CONFIG_DST) += nst.o
-
-nst-y := dcore.o state.o export.o thread_pool.o crypto.o trans.o
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/crypto.h>
-#include <linux/dst.h>
-#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-/*
- * Tricky bastard, but IV can be more complex with time...
- */
-static inline u64 dst_gen_iv(struct dst_trans *t)
-{
- return t->gen;
-}
-
-/*
- * Crypto machinery: hash/cipher support for the given crypto controls.
- */
-static struct crypto_hash *dst_init_hash(struct dst_crypto_ctl *ctl, u8 *key)
-{
- int err;
- struct crypto_hash *hash;
-
- hash = crypto_alloc_hash(ctl->hash_algo, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(hash)) {
- err = PTR_ERR(hash);
- dprintk("%s: failed to allocate hash '%s', err: %d.\n",
- __func__, ctl->hash_algo, err);
- goto err_out_exit;
- }
-
- ctl->crypto_attached_size = crypto_hash_digestsize(hash);
-
- if (!ctl->hash_keysize)
- return hash;
-
- err = crypto_hash_setkey(hash, key, ctl->hash_keysize);
- if (err) {
- dprintk("%s: failed to set key for hash '%s', err: %d.\n",
- __func__, ctl->hash_algo, err);
- goto err_out_free;
- }
-
- return hash;
-
-err_out_free:
- crypto_free_hash(hash);
-err_out_exit:
- return ERR_PTR(err);
-}
-
-static struct crypto_ablkcipher *dst_init_cipher(struct dst_crypto_ctl *ctl,
- u8 *key)
-{
- int err = -EINVAL;
- struct crypto_ablkcipher *cipher;
-
- if (!ctl->cipher_keysize)
- goto err_out_exit;
-
- cipher = crypto_alloc_ablkcipher(ctl->cipher_algo, 0, 0);
- if (IS_ERR(cipher)) {
- err = PTR_ERR(cipher);
- dprintk("%s: failed to allocate cipher '%s', err: %d.\n",
- __func__, ctl->cipher_algo, err);
- goto err_out_exit;
- }
-
- crypto_ablkcipher_clear_flags(cipher, ~0);
-
- err = crypto_ablkcipher_setkey(cipher, key, ctl->cipher_keysize);
- if (err) {
- dprintk("%s: failed to set key for cipher '%s', err: %d.\n",
- __func__, ctl->cipher_algo, err);
- goto err_out_free;
- }
-
- return cipher;
-
-err_out_free:
- crypto_free_ablkcipher(cipher);
-err_out_exit:
- return ERR_PTR(err);
-}
-
-/*
- * Crypto engine has a pool of pages to encrypt data into before sending
- * it over the network. This pool is freed/allocated here.
- */
-static void dst_crypto_pages_free(struct dst_crypto_engine *e)
-{
- unsigned int i;
-
- for (i = 0; i < e->page_num; ++i)
- __free_page(e->pages[i]);
- kfree(e->pages);
-}
-
-static int dst_crypto_pages_alloc(struct dst_crypto_engine *e, int num)
-{
- int i;
-
- e->pages = kmalloc(num * sizeof(struct page **), GFP_KERNEL);
- if (!e->pages)
- return -ENOMEM;
-
- for (i = 0; i < num; ++i) {
- e->pages[i] = alloc_page(GFP_KERNEL);
- if (!e->pages[i])
- goto err_out_free_pages;
- }
-
- e->page_num = num;
- return 0;
-
-err_out_free_pages:
- while (--i >= 0)
- __free_page(e->pages[i]);
-
- kfree(e->pages);
- return -ENOMEM;
-}
-
-/*
- * Initialize crypto engine for given node.
- * Setup cipher/hash, keys, pool of threads and private data.
- */
-static int dst_crypto_engine_init(struct dst_crypto_engine *e,
- struct dst_node *n)
-{
- int err;
- struct dst_crypto_ctl *ctl = &n->crypto;
-
- err = dst_crypto_pages_alloc(e, n->max_pages);
- if (err)
- goto err_out_exit;
-
- e->size = PAGE_SIZE;
- e->data = kmalloc(e->size, GFP_KERNEL);
- if (!e->data) {
- err = -ENOMEM;
- goto err_out_free_pages;
- }
-
- if (ctl->hash_algo[0]) {
- e->hash = dst_init_hash(ctl, n->hash_key);
- if (IS_ERR(e->hash)) {
- err = PTR_ERR(e->hash);
- e->hash = NULL;
- goto err_out_free;
- }
- }
-
- if (ctl->cipher_algo[0]) {
- e->cipher = dst_init_cipher(ctl, n->cipher_key);
- if (IS_ERR(e->cipher)) {
- err = PTR_ERR(e->cipher);
- e->cipher = NULL;
- goto err_out_free_hash;
- }
- }
-
- return 0;
-
-err_out_free_hash:
- crypto_free_hash(e->hash);
-err_out_free:
- kfree(e->data);
-err_out_free_pages:
- dst_crypto_pages_free(e);
-err_out_exit:
- return err;
-}
-
-static void dst_crypto_engine_exit(struct dst_crypto_engine *e)
-{
- if (e->hash)
- crypto_free_hash(e->hash);
- if (e->cipher)
- crypto_free_ablkcipher(e->cipher);
- dst_crypto_pages_free(e);
- kfree(e->data);
-}
-
-/*
- * Waiting for cipher processing to be completed.
- */
-struct dst_crypto_completion {
- struct completion complete;
- int error;
-};
-
-static void dst_crypto_complete(struct crypto_async_request *req, int err)
-{
- struct dst_crypto_completion *c = req->data;
-
- if (err == -EINPROGRESS)
- return;
-
- dprintk("%s: req: %p, err: %d.\n", __func__, req, err);
- c->error = err;
- complete(&c->complete);
-}
-
-static int dst_crypto_process(struct ablkcipher_request *req,
- struct scatterlist *sg_dst, struct scatterlist *sg_src,
- void *iv, int enc, unsigned long timeout)
-{
- struct dst_crypto_completion c;
- int err;
-
- init_completion(&c.complete);
- c.error = -EINPROGRESS;
-
- ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- dst_crypto_complete, &c);
-
- ablkcipher_request_set_crypt(req, sg_src, sg_dst, sg_src->length, iv);
-
- if (enc)
- err = crypto_ablkcipher_encrypt(req);
- else
- err = crypto_ablkcipher_decrypt(req);
-
- switch (err) {
- case -EINPROGRESS:
- case -EBUSY:
- err = wait_for_completion_interruptible_timeout(&c.complete,
- timeout);
- if (!err)
- err = -ETIMEDOUT;
- else
- err = c.error;
- break;
- default:
- break;
- }
-
- return err;
-}
-
-/*
- * DST uses generic iteration approach for data crypto processing.
- * Single block IO request is switched into array of scatterlists,
- * which are submitted to the crypto processing iterator.
- *
- * Input and output iterator initialization are different, since
- * in output case we can not encrypt data in-place and need a
- * temporary storage, which is then being sent to the remote peer.
- */
-static int dst_trans_iter_out(struct bio *bio, struct dst_crypto_engine *e,
- int (*iterator) (struct dst_crypto_engine *e,
- struct scatterlist *dst,
- struct scatterlist *src))
-{
- struct bio_vec *bv;
- int err, i;
-
- sg_init_table(e->src, bio->bi_vcnt);
- sg_init_table(e->dst, bio->bi_vcnt);
-
- bio_for_each_segment(bv, bio, i) {
- sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
- sg_set_page(&e->dst[i], e->pages[i], bv->bv_len, bv->bv_offset);
-
- err = iterator(e, &e->dst[i], &e->src[i]);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int dst_trans_iter_in(struct bio *bio, struct dst_crypto_engine *e,
- int (*iterator) (struct dst_crypto_engine *e,
- struct scatterlist *dst,
- struct scatterlist *src))
-{
- struct bio_vec *bv;
- int err, i;
-
- sg_init_table(e->src, bio->bi_vcnt);
- sg_init_table(e->dst, bio->bi_vcnt);
-
- bio_for_each_segment(bv, bio, i) {
- sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
- sg_set_page(&e->dst[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-
- err = iterator(e, &e->dst[i], &e->src[i]);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int dst_crypt_iterator(struct dst_crypto_engine *e,
- struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
- struct ablkcipher_request *req = e->data;
- u8 iv[32];
-
- memset(iv, 0, sizeof(iv));
-
- memcpy(iv, &e->iv, sizeof(e->iv));
-
- return dst_crypto_process(req, sg_dst, sg_src, iv, e->enc, e->timeout);
-}
-
-static int dst_crypt(struct dst_crypto_engine *e, struct bio *bio)
-{
- struct ablkcipher_request *req = e->data;
-
- memset(req, 0, sizeof(struct ablkcipher_request));
- ablkcipher_request_set_tfm(req, e->cipher);
-
- if (e->enc)
- return dst_trans_iter_out(bio, e, dst_crypt_iterator);
- else
- return dst_trans_iter_in(bio, e, dst_crypt_iterator);
-}
-
-static int dst_hash_iterator(struct dst_crypto_engine *e,
- struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
- return crypto_hash_update(e->data, sg_src, sg_src->length);
-}
-
-static int dst_hash(struct dst_crypto_engine *e, struct bio *bio, void *dst)
-{
- struct hash_desc *desc = e->data;
- int err;
-
- desc->tfm = e->hash;
- desc->flags = 0;
-
- err = crypto_hash_init(desc);
- if (err)
- return err;
-
- err = dst_trans_iter_in(bio, e, dst_hash_iterator);
- if (err)
- return err;
-
- err = crypto_hash_final(desc, dst);
- if (err)
- return err;
-
- return 0;
-}
-
-/*
- * Initialize/cleanup a crypto thread. The only thing it should
- * do is to allocate a pool of pages as temporary storage.
- * And to setup cipher and/or hash.
- */
-static void *dst_crypto_thread_init(void *data)
-{
- struct dst_node *n = data;
- struct dst_crypto_engine *e;
- int err = -ENOMEM;
-
- e = kzalloc(sizeof(struct dst_crypto_engine), GFP_KERNEL);
- if (!e)
- goto err_out_exit;
- e->src = kcalloc(2 * n->max_pages, sizeof(struct scatterlist),
- GFP_KERNEL);
- if (!e->src)
- goto err_out_free;
-
- e->dst = e->src + n->max_pages;
-
- err = dst_crypto_engine_init(e, n);
- if (err)
- goto err_out_free_all;
-
- return e;
-
-err_out_free_all:
- kfree(e->src);
-err_out_free:
- kfree(e);
-err_out_exit:
- return ERR_PTR(err);
-}
-
-static void dst_crypto_thread_cleanup(void *private)
-{
- struct dst_crypto_engine *e = private;
-
- dst_crypto_engine_exit(e);
- kfree(e->src);
- kfree(e);
-}
-
-/*
- * Initialize crypto engine for given node: store keys, create pool
- * of threads, initialize each one.
- *
- * Each thread has unique ID, but 0 and 1 are reserved for receiving and
- * accepting threads (if export node), so IDs could start from 2, but starting
- * them from 10 allows easily understand what this thread is for.
- */
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl)
-{
- void *key = (ctl + 1);
- int err = -ENOMEM, i;
- char name[32];
-
- if (ctl->hash_keysize) {
- n->hash_key = kmalloc(ctl->hash_keysize, GFP_KERNEL);
- if (!n->hash_key)
- goto err_out_exit;
- memcpy(n->hash_key, key, ctl->hash_keysize);
- }
-
- if (ctl->cipher_keysize) {
- n->cipher_key = kmalloc(ctl->cipher_keysize, GFP_KERNEL);
- if (!n->cipher_key)
- goto err_out_free_hash;
- memcpy(n->cipher_key, key, ctl->cipher_keysize);
- }
- memcpy(&n->crypto, ctl, sizeof(struct dst_crypto_ctl));
-
- for (i = 0; i < ctl->thread_num; ++i) {
- snprintf(name, sizeof(name), "%s-crypto-%d", n->name, i);
- /* Unique ids... */
- err = thread_pool_add_worker(n->pool, name, i + 10,
- dst_crypto_thread_init, dst_crypto_thread_cleanup, n);
- if (err)
- goto err_out_free_threads;
- }
-
- return 0;
-
-err_out_free_threads:
- while (--i >= 0)
- thread_pool_del_worker_id(n->pool, i+10);
-
- if (ctl->cipher_keysize)
- kfree(n->cipher_key);
- ctl->cipher_keysize = 0;
-err_out_free_hash:
- if (ctl->hash_keysize)
- kfree(n->hash_key);
- ctl->hash_keysize = 0;
-err_out_exit:
- return err;
-}
-
-void dst_node_crypto_exit(struct dst_node *n)
-{
- struct dst_crypto_ctl *ctl = &n->crypto;
-
- if (ctl->cipher_algo[0] || ctl->hash_algo[0]) {
- kfree(n->hash_key);
- kfree(n->cipher_key);
- }
-}
-
-/*
- * Thrad pool setup callback. Just stores a transaction in private data.
- */
-static int dst_trans_crypto_setup(void *crypto_engine, void *trans)
-{
- struct dst_crypto_engine *e = crypto_engine;
-
- e->private = trans;
- return 0;
-}
-
-#if 0
-static void dst_dump_bio(struct bio *bio)
-{
- u8 *p;
- struct bio_vec *bv;
- int i;
-
- bio_for_each_segment(bv, bio, i) {
- dprintk("%s: %llu/%u: size: %u, offset: %u, data: ",
- __func__, bio->bi_sector, bio->bi_size,
- bv->bv_len, bv->bv_offset);
-
- p = kmap(bv->bv_page) + bv->bv_offset;
- for (i = 0; i < bv->bv_len; ++i)
- printk(KERN_DEBUG "%02x ", p[i]);
- kunmap(bv->bv_page);
- printk("\n");
- }
-}
-#endif
-
-/*
- * Encrypt/hash data and send it to the network.
- */
-static int dst_crypto_process_sending(struct dst_crypto_engine *e,
- struct bio *bio, u8 *hash)
-{
- int err;
-
- if (e->cipher) {
- err = dst_crypt(e, bio);
- if (err)
- goto err_out_exit;
- }
-
- if (e->hash) {
- err = dst_hash(e, bio, hash);
- if (err)
- goto err_out_exit;
-
-#ifdef CONFIG_DST_DEBUG
- {
- unsigned int i;
-
- /* dst_dump_bio(bio); */
-
- printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash: ",
- __func__, (u64)bio->bi_sector,
- bio->bi_size, bio_data_dir(bio));
- for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
- printk("%02x ", hash[i]);
- printk("\n");
- }
-#endif
- }
-
- return 0;
-
-err_out_exit:
- return err;
-}
-
-/*
- * Check if received data is valid. Decipher if it is.
- */
-static int dst_crypto_process_receiving(struct dst_crypto_engine *e,
- struct bio *bio, u8 *hash, u8 *recv_hash)
-{
- int err;
-
- if (e->hash) {
- int mismatch;
-
- err = dst_hash(e, bio, hash);
- if (err)
- goto err_out_exit;
-
- mismatch = !!memcmp(recv_hash, hash,
- crypto_hash_digestsize(e->hash));
-#ifdef CONFIG_DST_DEBUG
- /* dst_dump_bio(bio); */
-
- printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash mismatch: %d",
- __func__, (u64)bio->bi_sector, bio->bi_size,
- bio_data_dir(bio), mismatch);
- if (mismatch) {
- unsigned int i;
-
- printk(", recv/calc: ");
- for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
- printk("%02x/%02x ", recv_hash[i], hash[i]);
-
- }
- printk("\n");
-#endif
- err = -1;
- if (mismatch)
- goto err_out_exit;
- }
-
- if (e->cipher) {
- err = dst_crypt(e, bio);
- if (err)
- goto err_out_exit;
- }
-
- return 0;
-
-err_out_exit:
- return err;
-}
-
-/*
- * Thread pool callback to encrypt data and send it to the netowork.
- */
-static int dst_trans_crypto_action(void *crypto_engine, void *schedule_data)
-{
- struct dst_crypto_engine *e = crypto_engine;
- struct dst_trans *t = schedule_data;
- struct bio *bio = t->bio;
- int err;
-
- dprintk("%s: t: %p, gen: %llu, cipher: %p, hash: %p.\n",
- __func__, t, t->gen, e->cipher, e->hash);
-
- e->enc = t->enc;
- e->iv = dst_gen_iv(t);
-
- if (bio_data_dir(bio) == WRITE) {
- err = dst_crypto_process_sending(e, bio, t->cmd.hash);
- if (err)
- goto err_out_exit;
-
- if (e->hash) {
- t->cmd.csize = crypto_hash_digestsize(e->hash);
- t->cmd.size += t->cmd.csize;
- }
-
- return dst_trans_send(t);
- } else {
- u8 *hash = e->data + e->size/2;
-
- err = dst_crypto_process_receiving(e, bio, hash, t->cmd.hash);
- if (err)
- goto err_out_exit;
-
- dst_trans_remove(t);
- dst_trans_put(t);
- }
-
- return 0;
-
-err_out_exit:
- t->error = err;
- dst_trans_put(t);
- return err;
-}
-
-/*
- * Schedule crypto processing for given transaction.
- */
-int dst_trans_crypto(struct dst_trans *t)
-{
- struct dst_node *n = t->n;
- int err;
-
- err = thread_pool_schedule(n->pool,
- dst_trans_crypto_setup, dst_trans_crypto_action,
- t, MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_out_exit;
-
- return 0;
-
-err_out_exit:
- dst_trans_put(t);
- return err;
-}
-
-/*
- * Crypto machinery for the export node.
- */
-static int dst_export_crypto_setup(void *crypto_engine, void *bio)
-{
- struct dst_crypto_engine *e = crypto_engine;
-
- e->private = bio;
- return 0;
-}
-
-static int dst_export_crypto_action(void *crypto_engine, void *schedule_data)
-{
- struct dst_crypto_engine *e = crypto_engine;
- struct bio *bio = schedule_data;
- struct dst_export_priv *p = bio->bi_private;
- int err;
-
- dprintk("%s: e: %p, data: %p, bio: %llu/%u, dir: %lu.\n",
- __func__, e, e->data, (u64)bio->bi_sector,
- bio->bi_size, bio_data_dir(bio));
-
- e->enc = (bio_data_dir(bio) == READ);
- e->iv = p->cmd.id;
-
- if (bio_data_dir(bio) == WRITE) {
- u8 *hash = e->data + e->size/2;
-
- err = dst_crypto_process_receiving(e, bio, hash, p->cmd.hash);
- if (err)
- goto err_out_exit;
-
- generic_make_request(bio);
- } else {
- err = dst_crypto_process_sending(e, bio, p->cmd.hash);
- if (err)
- goto err_out_exit;
-
- if (e->hash) {
- p->cmd.csize = crypto_hash_digestsize(e->hash);
- p->cmd.size += p->cmd.csize;
- }
-
- err = dst_export_send_bio(bio);
- }
- return 0;
-
-err_out_exit:
- bio_put(bio);
- return err;
-}
-
-int dst_export_crypto(struct dst_node *n, struct bio *bio)
-{
- int err;
-
- err = thread_pool_schedule(n->pool,
- dst_export_crypto_setup, dst_export_crypto_action,
- bio, MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_out_exit;
-
- return 0;
-
-err_out_exit:
- bio_put(bio);
- return err;
-}
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/jhash.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <linux/in.h>
-#include <linux/in6.h>
-
-#include <net/sock.h>
-
-static int dst_major;
-
-static DEFINE_MUTEX(dst_hash_lock);
-static struct list_head *dst_hashtable;
-static unsigned int dst_hashtable_size = 128;
-module_param(dst_hashtable_size, uint, 0644);
-
-static char dst_name[] = "Dementianting goldfish";
-
-static DEFINE_IDR(dst_index_idr);
-static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL };
-
-/*
- * DST sysfs tree for device called 'storage':
- *
- * /sys/bus/dst/devices/storage/
- * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025
- * /sys/bus/dst/devices/storage/size : 800
- * /sys/bus/dst/devices/storage/name : storage
- */
-
-static int dst_dev_match(struct device *dev, struct device_driver *drv)
-{
- return 1;
-}
-
-static struct bus_type dst_dev_bus_type = {
- .name = "dst",
- .match = &dst_dev_match,
-};
-
-static void dst_node_release(struct device *dev)
-{
- struct dst_info *info = container_of(dev, struct dst_info, device);
-
- kfree(info);
-}
-
-static struct device dst_node_dev = {
- .bus = &dst_dev_bus_type,
- .release = &dst_node_release
-};
-
-/*
- * Setting size of the node after it was changed.
- */
-static void dst_node_set_size(struct dst_node *n)
-{
- struct block_device *bdev;
-
- set_capacity(n->disk, n->size >> 9);
-
- bdev = bdget_disk(n->disk, 0);
- if (bdev) {
- mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode, n->size);
- mutex_unlock(&bdev->bd_inode->i_mutex);
- bdput(bdev);
- }
-}
-
-/*
- * Distributed storage request processing function.
- */
-static int dst_request(struct request_queue *q, struct bio *bio)
-{
- struct dst_node *n = q->queuedata;
- int err = -EIO;
-
- if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
- /*
- * This is a dirty^Wnice hack, but if we complete this
- * operation with -EOPNOTSUPP like intended, XFS
- * will stuck and freeze the machine. This may be
- * not particulary XFS problem though, but it is the
- * only FS which sends empty barrier at umount time
- * I worked with.
- *
- * Empty barriers are not allowed anyway, see 51fd77bd9f512
- * for example, although later it was changed to
- * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
- * work in this case.
- */
- /* err = -EOPNOTSUPP; */
- err = 0;
- goto end_io;
- }
-
- bio_get(bio);
-
- return dst_process_bio(n, bio);
-
-end_io:
- bio_endio(bio, err);
- return err;
-}
-
-/*
- * Open/close callbacks for appropriate block device.
- */
-static int dst_bdev_open(struct block_device *bdev, fmode_t mode)
-{
- struct dst_node *n = bdev->bd_disk->private_data;
-
- dst_node_get(n);
- return 0;
-}
-
-static int dst_bdev_release(struct gendisk *disk, fmode_t mode)
-{
- struct dst_node *n = disk->private_data;
-
- dst_node_put(n);
- return 0;
-}
-
-static struct block_device_operations dst_blk_ops = {
- .open = dst_bdev_open,
- .release = dst_bdev_release,
- .owner = THIS_MODULE,
-};
-
-/*
- * Block layer binding - disk is created when array is fully configured
- * by userspace request.
- */
-static int dst_node_create_disk(struct dst_node *n)
-{
- int err = -ENOMEM;
- u32 index = 0;
-
- n->queue = blk_init_queue(NULL, NULL);
- if (!n->queue)
- goto err_out_exit;
-
- n->queue->queuedata = n;
- blk_queue_make_request(n->queue, dst_request);
- blk_queue_max_phys_segments(n->queue, n->max_pages);
- blk_queue_max_hw_segments(n->queue, n->max_pages);
-
- err = -ENOMEM;
- n->disk = alloc_disk(1);
- if (!n->disk)
- goto err_out_free_queue;
-
- if (!(n->state->permissions & DST_PERM_WRITE)) {
- printk(KERN_INFO "DST node %s attached read-only.\n", n->name);
- set_disk_ro(n->disk, 1);
- }
-
- if (!idr_pre_get(&dst_index_idr, GFP_KERNEL))
- goto err_out_put;
-
- mutex_lock(&dst_hash_lock);
- err = idr_get_new(&dst_index_idr, NULL, &index);
- mutex_unlock(&dst_hash_lock);
- if (err)
- goto err_out_put;
-
- n->disk->major = dst_major;
- n->disk->first_minor = index;
- n->disk->fops = &dst_blk_ops;
- n->disk->queue = n->queue;
- n->disk->private_data = n;
- snprintf(n->disk->disk_name, sizeof(n->disk->disk_name),
- "dst-%s", n->name);
-
- return 0;
-
-err_out_put:
- put_disk(n->disk);
-err_out_free_queue:
- blk_cleanup_queue(n->queue);
-err_out_exit:
- return err;
-}
-
-/*
- * Sysfs machinery: show device's size.
- */
-static ssize_t dst_show_size(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dst_info *info = container_of(dev, struct dst_info, device);
-
- return sprintf(buf, "%llu\n", info->size);
-}
-
-/*
- * Show local exported device.
- */
-static ssize_t dst_show_local(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dst_info *info = container_of(dev, struct dst_info, device);
-
- return sprintf(buf, "%s\n", info->local);
-}
-
-/*
- * Shows type of the remote node - device major/minor number
- * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes.
- */
-static ssize_t dst_show_type(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct dst_info *info = container_of(dev, struct dst_info, device);
- int family = info->net.addr.sa_family;
-
- if (family == AF_INET) {
- struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr;
- return sprintf(buf, "%u.%u.%u.%u:%d\n",
- NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
- } else if (family == AF_INET6) {
- struct sockaddr_in6 *sin = (struct sockaddr_in6 *)
- &info->net.addr;
- return sprintf(buf,
- "%pi6:%d\n",
- &sin->sin6_addr, ntohs(sin->sin6_port));
- } else {
- int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */
- int size, addrlen = info->net.addr.sa_data_len;
- unsigned char *a = (unsigned char *)&info->net.addr.sa_data;
- char *buf_orig = buf;
-
- size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ",
- family, addrlen);
- sz -= size;
- buf += size;
-
- for (i = 0; i < addrlen; ++i) {
- if (sz < 3)
- break;
-
- size = snprintf(buf, sz, "%02x ", a[i]);
- sz -= size;
- buf += size;
- }
- buf += sprintf(buf, "\n");
-
- return buf - buf_orig;
- }
- return 0;
-}
-
-static struct device_attribute dst_node_attrs[] = {
- __ATTR(size, 0444, dst_show_size, NULL),
- __ATTR(type, 0444, dst_show_type, NULL),
- __ATTR(local, 0444, dst_show_local, NULL),
-};
-
-static int dst_create_node_attributes(struct dst_node *n)
-{
- int err, i;
-
- for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) {
- err = device_create_file(&n->info->device,
- &dst_node_attrs[i]);
- if (err)
- goto err_out_remove_all;
- }
- return 0;
-
-err_out_remove_all:
- while (--i >= 0)
- device_remove_file(&n->info->device,
- &dst_node_attrs[i]);
-
- return err;
-}
-
-static void dst_remove_node_attributes(struct dst_node *n)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i)
- device_remove_file(&n->info->device,
- &dst_node_attrs[i]);
-}
-
-/*
- * Sysfs cleanup and initialization.
- * Shows number of useful parameters.
- */
-static void dst_node_sysfs_exit(struct dst_node *n)
-{
- if (n->info) {
- dst_remove_node_attributes(n);
- device_unregister(&n->info->device);
- n->info = NULL;
- }
-}
-
-static int dst_node_sysfs_init(struct dst_node *n)
-{
- int err;
-
- n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL);
- if (!n->info)
- return -ENOMEM;
-
- memcpy(&n->info->device, &dst_node_dev, sizeof(struct device));
- n->info->size = n->size;
-
- dev_set_name(&n->info->device, "dst-%s", n->name);
- err = device_register(&n->info->device);
- if (err) {
- dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n",
- n->name, err);
- goto err_out_exit;
- }
-
- dst_create_node_attributes(n);
-
- return 0;
-
-err_out_exit:
- kfree(n->info);
- n->info = NULL;
- return err;
-}
-
-/*
- * DST node hash tables machinery.
- */
-static inline unsigned int dst_hash(char *str, unsigned int size)
-{
- return jhash(str, size, 0) % dst_hashtable_size;
-}
-
-static void dst_node_remove(struct dst_node *n)
-{
- mutex_lock(&dst_hash_lock);
- list_del_init(&n->node_entry);
- mutex_unlock(&dst_hash_lock);
-}
-
-static void dst_node_add(struct dst_node *n)
-{
- unsigned hash = dst_hash(n->name, sizeof(n->name));
-
- mutex_lock(&dst_hash_lock);
- list_add_tail(&n->node_entry, &dst_hashtable[hash]);
- mutex_unlock(&dst_hash_lock);
-}
-
-/*
- * Cleaning node when it is about to be freed.
- * There are still users of the socket though,
- * so connection cleanup should be protected.
- */
-static void dst_node_cleanup(struct dst_node *n)
-{
- struct dst_state *st = n->state;
-
- if (!st)
- return;
-
- if (n->queue) {
- blk_cleanup_queue(n->queue);
-
- mutex_lock(&dst_hash_lock);
- idr_remove(&dst_index_idr, n->disk->first_minor);
- mutex_unlock(&dst_hash_lock);
-
- put_disk(n->disk);
- }
-
- if (n->bdev) {
- sync_blockdev(n->bdev);
- close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
- }
-
- dst_state_lock(st);
- st->need_exit = 1;
- dst_state_exit_connected(st);
- dst_state_unlock(st);
-
- wake_up(&st->thread_wait);
-
- dst_state_put(st);
- n->state = NULL;
-}
-
-/*
- * Free security attributes attached to given node.
- */
-static void dst_security_exit(struct dst_node *n)
-{
- struct dst_secure *s, *tmp;
-
- list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) {
- list_del(&s->sec_entry);
- kfree(s);
- }
-}
-
-/*
- * Free node when there are no more users.
- * Actually node has to be freed on behalf od userspace process,
- * since there are number of threads, which are embedded in the
- * node, so they can not exit and free node from there, that is
- * why there is a wakeup if reference counter is not equal to zero.
- */
-void dst_node_put(struct dst_node *n)
-{
- if (unlikely(!n))
- return;
-
- dprintk("%s: n: %p, refcnt: %d.\n",
- __func__, n, atomic_read(&n->refcnt));
-
- if (atomic_dec_and_test(&n->refcnt)) {
- dst_node_remove(n);
- n->trans_scan_timeout = 0;
- dst_node_cleanup(n);
- thread_pool_destroy(n->pool);
- dst_node_sysfs_exit(n);
- dst_node_crypto_exit(n);
- dst_security_exit(n);
- dst_node_trans_exit(n);
-
- kfree(n);
-
- dprintk("%s: freed n: %p.\n", __func__, n);
- } else {
- wake_up(&n->wait);
- }
-}
-
-/*
- * Setting up export device: lookup by the name, get its size
- * and setup listening socket, which will accept clients, which
- * will submit IO for given storage.
- */
-static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl,
- struct dst_export_ctl *le)
-{
- int err;
-
- snprintf(n->info->local, sizeof(n->info->local), "%s", le->device);
-
- n->bdev = open_bdev_exclusive(le->device, FMODE_READ|FMODE_WRITE, NULL);
- if (IS_ERR(n->bdev))
- return PTR_ERR(n->bdev);
-
- if (n->size != 0)
- n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size);
- else
- n->size = n->bdev->bd_inode->i_size;
-
- n->info->size = n->size;
- err = dst_node_init_listened(n, le);
- if (err)
- goto err_out_cleanup;
-
- return 0;
-
-err_out_cleanup:
- close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
- n->bdev = NULL;
-
- return err;
-}
-
-/* Empty thread pool callbacks for the network processing threads. */
-static inline void *dst_thread_network_init(void *data)
-{
- dprintk("%s: data: %p.\n", __func__, data);
- return data;
-}
-
-static inline void dst_thread_network_cleanup(void *data)
-{
- dprintk("%s: data: %p.\n", __func__, data);
-}
-
-/*
- * Allocate DST node and initialize some of its parameters.
- */
-static struct dst_node *dst_alloc_node(struct dst_ctl *ctl,
- int (*start)(struct dst_node *),
- int num)
-{
- struct dst_node *n;
- int err;
-
- n = kzalloc(sizeof(struct dst_node), GFP_KERNEL);
- if (!n)
- return NULL;
-
- INIT_LIST_HEAD(&n->node_entry);
-
- INIT_LIST_HEAD(&n->security_list);
- mutex_init(&n->security_lock);
-
- init_waitqueue_head(&n->wait);
-
- n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout);
- if (!n->trans_scan_timeout)
- n->trans_scan_timeout = HZ;
-
- n->trans_max_retries = ctl->trans_max_retries;
- if (!n->trans_max_retries)
- n->trans_max_retries = 10;
-
- /*
- * Pretty much arbitrary default numbers.
- * 32 matches maximum number of pages in bio originated from ext3 (31).
- */
- n->max_pages = ctl->max_pages;
- if (!n->max_pages)
- n->max_pages = 32;
-
- if (n->max_pages > 1024)
- n->max_pages = 1024;
-
- n->start = start;
- n->size = ctl->size;
-
- atomic_set(&n->refcnt, 1);
- atomic_long_set(&n->gen, 0);
- snprintf(n->name, sizeof(n->name), "%s", ctl->name);
-
- err = dst_node_sysfs_init(n);
- if (err)
- goto err_out_free;
-
- n->pool = thread_pool_create(num, n->name, dst_thread_network_init,
- dst_thread_network_cleanup, n);
- if (IS_ERR(n->pool)) {
- err = PTR_ERR(n->pool);
- goto err_out_sysfs_exit;
- }
-
- dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name);
-
- return n;
-
-err_out_sysfs_exit:
- dst_node_sysfs_exit(n);
-err_out_free:
- kfree(n);
- return NULL;
-}
-
-/*
- * Starting a node, connected to the remote server:
- * register block device and initialize transaction mechanism.
- * In revers order though.
- *
- * It will autonegotiate some parameters with the remote node
- * and update local if needed.
- *
- * Transaction initialization should be the last thing before
- * starting the node, since transaction should include not only
- * block IO, but also crypto related data (if any), which are
- * initialized separately.
- */
-static int dst_start_remote(struct dst_node *n)
-{
- int err;
-
- err = dst_node_trans_init(n, sizeof(struct dst_trans));
- if (err)
- return err;
-
- err = dst_node_create_disk(n);
- if (err)
- return err;
-
- dst_node_set_size(n);
- add_disk(n->disk);
-
- dprintk("DST: started remote node '%s', minor: %d.\n",
- n->name, n->disk->first_minor);
-
- return 0;
-}
-
-/*
- * Adding remote node and initialize connection.
- */
-static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size)
-{
- int err;
- struct dst_network_ctl *rctl = data;
-
- if (n)
- return -EEXIST;
-
- if (size != sizeof(struct dst_network_ctl))
- return -EINVAL;
-
- n = dst_alloc_node(ctl, dst_start_remote, 1);
- if (!n)
- return -ENOMEM;
-
- memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl));
- err = dst_node_init_connected(n, rctl);
- if (err)
- goto err_out_free;
-
- dst_node_add(n);
-
- return 0;
-
-err_out_free:
- dst_node_put(n);
- return err;
-}
-
-/*
- * Adding export node: initializing block device and listening socket.
- */
-static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size)
-{
- int err;
- struct dst_export_ctl *le = data;
-
- if (n)
- return -EEXIST;
-
- if (size != sizeof(struct dst_export_ctl))
- return -EINVAL;
-
- n = dst_alloc_node(ctl, dst_start_export, 2);
- if (!n)
- return -EINVAL;
-
- err = dst_setup_export(n, ctl, le);
- if (err)
- goto err_out_free;
-
- dst_node_add(n);
-
- return 0;
-
-err_out_free:
- dst_node_put(n);
- return err;
-}
-
-static int dst_node_remove_unload(struct dst_node *n)
-{
- printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n",
- n->name, n->size);
-
- if (n->disk)
- del_gendisk(n->disk);
-
- dst_node_remove(n);
- dst_node_sysfs_exit(n);
-
- /*
- * This is not a hack. Really.
- * Node's reference counter allows to implement fine grained
- * node freeing, but since all transactions (which hold node's
- * reference counter) are processed in the dedicated thread,
- * it is possible that reference will hit zero in that thread,
- * so we will not be able to exit thread and cleanup the node.
- *
- * So, we remove disk, so no new activity is possible, and
- * wait until all pending transaction are completed (either
- * in receiving thread or by timeout in workqueue), in this
- * case reference counter will be less or equal to 2 (once set in
- * dst_alloc_node() and then in connector message parser;
- * or when we force module unloading, and connector message
- * parser does not hold a reference, in this case reference
- * counter will be equal to 1),
- * and subsequent dst_node_put() calls will free the node.
- */
- dprintk("%s: going to sleep with %d refcnt.\n",
- __func__, atomic_read(&n->refcnt));
- wait_event(n->wait, atomic_read(&n->refcnt) <= 2);
-
- dst_node_put(n);
- return 0;
-}
-
-/*
- * Remove node from the hash table.
- */
-static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size)
-{
- if (!n)
- return -ENODEV;
-
- return dst_node_remove_unload(n);
-}
-
-/*
- * Initialize crypto processing for given node.
- */
-static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size)
-{
- struct dst_crypto_ctl *crypto = data;
-
- if (!n)
- return -ENODEV;
-
- if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize +
- crypto->cipher_keysize)
- return -EINVAL;
-
- if (n->trans_cache)
- return -EEXIST;
-
- return dst_node_crypto_init(n, crypto);
-}
-
-/*
- * Security attributes for given node.
- */
-static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size)
-{
- struct dst_secure *s;
-
- if (!n)
- return -ENODEV;
-
- if (size != sizeof(struct dst_secure_user))
- return -EINVAL;
-
- s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
-
- memcpy(&s->sec, data, size);
-
- mutex_lock(&n->security_lock);
- list_add_tail(&s->sec_entry, &n->security_list);
- mutex_unlock(&n->security_lock);
-
- return 0;
-}
-
-/*
- * Kill'em all!
- */
-static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size)
-{
- int err;
-
- if (!n)
- return -ENODEV;
-
- if (n->trans_cache)
- return 0;
-
- err = n->start(n);
- if (err)
- return err;
-
- printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size);
- return 0;
-}
-
-typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl,
- void *data, unsigned int size);
-
-/*
- * List of userspace commands.
- */
-static dst_command_func dst_commands[] = {
- [DST_ADD_REMOTE] = &dst_add_remote,
- [DST_ADD_EXPORT] = &dst_add_export,
- [DST_DEL_NODE] = &dst_del_node,
- [DST_CRYPTO] = &dst_crypto_init,
- [DST_SECURITY] = &dst_security_init,
- [DST_START] = &dst_start_node,
-};
-
-/*
- * Configuration parser.
- */
-static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
- struct dst_ctl *ctl;
- int err;
- struct dst_ctl_ack ack;
- struct dst_node *n = NULL, *tmp;
- unsigned int hash;
-
- if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
- err = -EPERM;
- goto out;
- }
-
- if (msg->len < sizeof(struct dst_ctl)) {
- err = -EBADMSG;
- goto out;
- }
-
- ctl = (struct dst_ctl *)msg->data;
-
- if (ctl->cmd >= DST_CMD_MAX) {
- err = -EINVAL;
- goto out;
- }
- hash = dst_hash(ctl->name, sizeof(ctl->name));
-
- mutex_lock(&dst_hash_lock);
- list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) {
- if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) {
- n = tmp;
- dst_node_get(n);
- break;
- }
- }
- mutex_unlock(&dst_hash_lock);
-
- err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl),
- msg->len - sizeof(struct dst_ctl));
-
- dst_node_put(n);
-out:
- memcpy(&ack.msg, msg, sizeof(struct cn_msg));
-
- ack.msg.ack = msg->ack + 1;
- ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg);
-
- ack.error = err;
-
- cn_netlink_send(&ack.msg, 0, GFP_KERNEL);
-}
-
-/*
- * Global initialization: sysfs, hash table, block device registration,
- * connector and various caches.
- */
-static int __init dst_sysfs_init(void)
-{
- return bus_register(&dst_dev_bus_type);
-}
-
-static void dst_sysfs_exit(void)
-{
- bus_unregister(&dst_dev_bus_type);
-}
-
-static int __init dst_hashtable_init(void)
-{
- unsigned int i;
-
- dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head),
- GFP_KERNEL);
- if (!dst_hashtable)
- return -ENOMEM;
-
- for (i = 0; i < dst_hashtable_size; ++i)
- INIT_LIST_HEAD(&dst_hashtable[i]);
-
- return 0;
-}
-
-static void dst_hashtable_exit(void)
-{
- unsigned int i;
- struct dst_node *n, *tmp;
-
- for (i = 0; i < dst_hashtable_size; ++i) {
- list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) {
- dst_node_remove_unload(n);
- }
- }
-
- kfree(dst_hashtable);
-}
-
-static int __init dst_sys_init(void)
-{
- int err = -ENOMEM;
-
- err = dst_hashtable_init();
- if (err)
- goto err_out_exit;
-
- err = dst_export_init();
- if (err)
- goto err_out_hashtable_exit;
-
- err = register_blkdev(dst_major, DST_NAME);
- if (err < 0)
- goto err_out_export_exit;
- if (err)
- dst_major = err;
-
- err = dst_sysfs_init();
- if (err)
- goto err_out_unregister;
-
- err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback);
- if (err)
- goto err_out_sysfs_exit;
-
- printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name);
-
- return 0;
-
-err_out_sysfs_exit:
- dst_sysfs_exit();
-err_out_unregister:
- unregister_blkdev(dst_major, DST_NAME);
-err_out_export_exit:
- dst_export_exit();
-err_out_hashtable_exit:
- dst_hashtable_exit();
-err_out_exit:
- return err;
-}
-
-static void __exit dst_sys_exit(void)
-{
- cn_del_callback(&cn_dst_id);
- unregister_blkdev(dst_major, DST_NAME);
- dst_hashtable_exit();
- dst_sysfs_exit();
- dst_export_exit();
-}
-
-module_init(dst_sys_init);
-module_exit(dst_sys_exit);
-
-MODULE_DESCRIPTION("Distributed storage");
-MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
-MODULE_LICENSE("GPL");
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <net/sock.h>
-
-/*
- * Export bioset is used for server block IO requests.
- */
-static struct bio_set *dst_bio_set;
-
-int __init dst_export_init(void)
-{
- int err = -ENOMEM;
-
- dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv));
- if (!dst_bio_set)
- goto err_out_exit;
-
- return 0;
-
-err_out_exit:
- return err;
-}
-
-void dst_export_exit(void)
-{
- bioset_free(dst_bio_set);
-}
-
-/*
- * When client connects and autonegotiates with the server node,
- * its permissions are checked in a security attributes and sent
- * back.
- */
-static unsigned int dst_check_permissions(struct dst_state *main,
- struct dst_state *st)
-{
- struct dst_node *n = main->node;
- struct dst_secure *sentry;
- struct dst_secure_user *s;
- struct saddr *sa = &st->ctl.addr;
- unsigned int perm = 0;
-
- mutex_lock(&n->security_lock);
- list_for_each_entry(sentry, &n->security_list, sec_entry) {
- s = &sentry->sec;
-
- if (s->addr.sa_family != sa->sa_family)
- continue;
-
- if (s->addr.sa_data_len != sa->sa_data_len)
- continue;
-
- /*
- * This '2' below is a port field. This may be very wrong to do
- * in atalk for example though. If there will be any need
- * to extent protocol to something else, I can create
- * per-family helpers and use them instead of this memcmp.
- */
- if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2,
- sa->sa_data_len - 2))
- continue;
-
- perm = s->permissions;
- }
- mutex_unlock(&n->security_lock);
-
- return perm;
-}
-
-/*
- * Accept new client: allocate appropriate network state and check permissions.
- */
-static struct dst_state *dst_accept_client(struct dst_state *st)
-{
- unsigned int revents = 0;
- unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
- unsigned int mask = err_mask | POLLIN;
- struct dst_node *n = st->node;
- int err = 0;
- struct socket *sock = NULL;
- struct dst_state *new;
-
- while (!err && !sock) {
- revents = dst_state_poll(st);
-
- if (!(revents & mask)) {
- DEFINE_WAIT(wait);
-
- for (;;) {
- prepare_to_wait(&st->thread_wait,
- &wait, TASK_INTERRUPTIBLE);
- if (!n->trans_scan_timeout || st->need_exit)
- break;
-
- revents = dst_state_poll(st);
-
- if (revents & mask)
- break;
-
- if (signal_pending(current))
- break;
-
- /*
- * Magic HZ? Polling check above is not safe in
- * all cases (like socket reset in BH context),
- * so it is simpler just to postpone it to the
- * process context instead of implementing
- * special locking there.
- */
- schedule_timeout(HZ);
- }
- finish_wait(&st->thread_wait, &wait);
- }
-
- err = -ECONNRESET;
- dst_state_lock(st);
-
- dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
- __func__, st, revents, revents & err_mask,
- revents & POLLIN);
-
- if (revents & err_mask) {
- dprintk("%s: revents: %x, socket: %p, err: %d.\n",
- __func__, revents, st->socket, err);
- err = -ECONNRESET;
- }
-
- if (!n->trans_scan_timeout || st->need_exit)
- err = -ENODEV;
-
- if (st->socket && (revents & POLLIN))
- err = kernel_accept(st->socket, &sock, 0);
-
- dst_state_unlock(st);
- }
-
- if (err)
- goto err_out_exit;
-
- new = dst_state_alloc(st->node);
- if (IS_ERR(new)) {
- err = -ENOMEM;
- goto err_out_release;
- }
- new->socket = sock;
-
- new->ctl.addr.sa_data_len = sizeof(struct sockaddr);
- err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr,
- (int *)&new->ctl.addr.sa_data_len);
- if (err)
- goto err_out_put;
-
- new->permissions = dst_check_permissions(st, new);
- if (new->permissions == 0) {
- err = -EPERM;
- dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
- "Client is not allowed to connect");
- goto err_out_put;
- }
-
- err = dst_poll_init(new);
- if (err)
- goto err_out_put;
-
- dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
- "Connected client");
-
- return new;
-
-err_out_put:
- dst_state_put(new);
-err_out_release:
- sock_release(sock);
-err_out_exit:
- return ERR_PTR(err);
-}
-
-/*
- * Each server's block request sometime finishes.
- * Usually it happens in hard irq context of the appropriate controller,
- * so to play good with all cases we just queue BIO into the queue
- * and wake up processing thread, which gets completed request and
- * send (encrypting if needed) it back to the client (if it was a read
- * request), or sends back reply that writing successfully completed.
- */
-static int dst_export_process_request_queue(struct dst_state *st)
-{
- unsigned long flags;
- struct dst_export_priv *p = NULL;
- struct bio *bio;
- int err = 0;
-
- while (!list_empty(&st->request_list)) {
- spin_lock_irqsave(&st->request_lock, flags);
- if (!list_empty(&st->request_list)) {
- p = list_first_entry(&st->request_list,
- struct dst_export_priv, request_entry);
- list_del(&p->request_entry);
- }
- spin_unlock_irqrestore(&st->request_lock, flags);
-
- if (!p)
- break;
-
- bio = p->bio;
-
- if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ))
- err = dst_export_crypto(st->node, bio);
- else
- err = dst_export_send_bio(bio);
-
- if (err)
- break;
- }
-
- return err;
-}
-
-/*
- * Cleanup export state.
- * It has to wait until all requests are finished,
- * and then free them all.
- */
-static void dst_state_cleanup_export(struct dst_state *st)
-{
- struct dst_export_priv *p;
- unsigned long flags;
-
- /*
- * This loop waits for all pending bios to be completed and freed.
- */
- while (atomic_read(&st->refcnt) > 1) {
- dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
- __func__, st, atomic_read(&st->refcnt),
- list_empty(&st->request_list));
- wait_event_timeout(st->thread_wait,
- (atomic_read(&st->refcnt) == 1) ||
- !list_empty(&st->request_list),
- HZ/2);
-
- while (!list_empty(&st->request_list)) {
- p = NULL;
- spin_lock_irqsave(&st->request_lock, flags);
- if (!list_empty(&st->request_list)) {
- p = list_first_entry(&st->request_list,
- struct dst_export_priv, request_entry);
- list_del(&p->request_entry);
- }
- spin_unlock_irqrestore(&st->request_lock, flags);
-
- if (p)
- bio_put(p->bio);
-
- dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: "
- "%p.\n", __func__, st, atomic_read(&st->refcnt),
- list_empty(&st->request_list), p);
- }
- }
-
- dst_state_put(st);
-}
-
-/*
- * Client accepting thread.
- * Not only accepts new connection, but also schedules receiving thread
- * and performs request completion described above.
- */
-static int dst_accept(void *init_data, void *schedule_data)
-{
- struct dst_state *main_st = schedule_data;
- struct dst_node *n = init_data;
- struct dst_state *st;
- int err;
-
- while (n->trans_scan_timeout && !main_st->need_exit) {
- dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n);
- st = dst_accept_client(main_st);
- if (IS_ERR(st))
- continue;
-
- err = dst_state_schedule_receiver(st);
- if (!err) {
- while (n->trans_scan_timeout) {
- err = wait_event_interruptible_timeout(st->thread_wait,
- !list_empty(&st->request_list) ||
- !n->trans_scan_timeout ||
- st->need_exit,
- HZ);
-
- if (!n->trans_scan_timeout || st->need_exit)
- break;
-
- if (list_empty(&st->request_list))
- continue;
-
- err = dst_export_process_request_queue(st);
- if (err)
- break;
- }
-
- st->need_exit = 1;
- wake_up(&st->thread_wait);
- }
-
- dst_state_cleanup_export(st);
- }
-
- dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st);
-
- dst_state_lock(main_st);
- dst_poll_exit(main_st);
- dst_state_socket_release(main_st);
- dst_state_unlock(main_st);
- dst_state_put(main_st);
- dprintk("%s: freed listening socket st: %p.\n", __func__, main_st);
-
- return 0;
-}
-
-int dst_start_export(struct dst_node *n)
-{
- if (list_empty(&n->security_list)) {
- printk(KERN_ERR "You are trying to export node '%s' "
- "without security attributes.\nNo clients will "
- "be allowed to connect. Exiting.\n", n->name);
- return -EINVAL;
- }
- return dst_node_trans_init(n, sizeof(struct dst_export_priv));
-}
-
-/*
- * Initialize listening state and schedule accepting thread.
- */
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le)
-{
- struct dst_state *st;
- int err = -ENOMEM;
- struct dst_network_ctl *ctl = &le->ctl;
-
- memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl));
-
- st = dst_state_alloc(n);
- if (IS_ERR(st)) {
- err = PTR_ERR(st);
- goto err_out_exit;
- }
- memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl));
-
- err = dst_state_socket_create(st);
- if (err)
- goto err_out_put;
-
- st->socket->sk->sk_reuse = 1;
-
- err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr,
- ctl->addr.sa_data_len);
- if (err)
- goto err_out_socket_release;
-
- err = kernel_listen(st->socket, 1024);
- if (err)
- goto err_out_socket_release;
- n->state = st;
-
- err = dst_poll_init(st);
- if (err)
- goto err_out_socket_release;
-
- dst_state_get(st);
-
- err = thread_pool_schedule(n->pool, dst_thread_setup,
- dst_accept, st, MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_out_poll_exit;
-
- return 0;
-
-err_out_poll_exit:
- dst_poll_exit(st);
-err_out_socket_release:
- dst_state_socket_release(st);
-err_out_put:
- dst_state_put(st);
-err_out_exit:
- n->state = NULL;
- return err;
-}
-
-/*
- * Free bio and related private data.
- * Also drop a reference counter for appropriate state,
- * which waits when there are no more block IOs in-flight.
- */
-static void dst_bio_destructor(struct bio *bio)
-{
- struct bio_vec *bv;
- struct dst_export_priv *priv = bio->bi_private;
- int i;
-
- bio_for_each_segment(bv, bio, i) {
- if (!bv->bv_page)
- break;
-
- __free_page(bv->bv_page);
- }
-
- if (priv)
- dst_state_put(priv->state);
- bio_free(bio, dst_bio_set);
-}
-
-/*
- * Block IO completion. Queue request to be sent back to
- * the client (or just confirmation).
- */
-static void dst_bio_end_io(struct bio *bio, int err)
-{
- struct dst_export_priv *p = bio->bi_private;
- struct dst_state *st = p->state;
- unsigned long flags;
-
- spin_lock_irqsave(&st->request_lock, flags);
- list_add_tail(&p->request_entry, &st->request_list);
- spin_unlock_irqrestore(&st->request_lock, flags);
-
- wake_up(&st->thread_wait);
-}
-
-/*
- * Allocate read request for the server.
- */
-static int dst_export_read_request(struct bio *bio, unsigned int total_size)
-{
- unsigned int size;
- struct page *page;
- int err;
-
- while (total_size) {
- err = -ENOMEM;
- page = alloc_page(GFP_KERNEL);
- if (!page)
- goto err_out_exit;
-
- size = min_t(unsigned int, PAGE_SIZE, total_size);
-
- err = bio_add_page(bio, page, size, 0);
- dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
- __func__, (u64)bio->bi_sector, bio->bi_size,
- size, err);
- if (err <= 0)
- goto err_out_free_page;
-
- total_size -= size;
- }
-
- return 0;
-
-err_out_free_page:
- __free_page(page);
-err_out_exit:
- return err;
-}
-
-/*
- * Allocate write request for the server.
- * Should not only get pages, but also read data from the network.
- */
-static int dst_export_write_request(struct dst_state *st,
- struct bio *bio, unsigned int total_size)
-{
- unsigned int size;
- struct page *page;
- void *data;
- int err;
-
- while (total_size) {
- err = -ENOMEM;
- page = alloc_page(GFP_KERNEL);
- if (!page)
- goto err_out_exit;
-
- data = kmap(page);
- if (!data)
- goto err_out_free_page;
-
- size = min_t(unsigned int, PAGE_SIZE, total_size);
-
- err = dst_data_recv(st, data, size);
- if (err)
- goto err_out_unmap_page;
-
- err = bio_add_page(bio, page, size, 0);
- if (err <= 0)
- goto err_out_unmap_page;
-
- kunmap(page);
-
- total_size -= size;
- }
-
- return 0;
-
-err_out_unmap_page:
- kunmap(page);
-err_out_free_page:
- __free_page(page);
-err_out_exit:
- return err;
-}
-
-/*
- * Groovy, we've gotten an IO request from the client.
- * Allocate BIO from the bioset, private data from the mempool
- * and lots of pages for IO.
- */
-int dst_process_io(struct dst_state *st)
-{
- struct dst_node *n = st->node;
- struct dst_cmd *cmd = st->data;
- struct bio *bio;
- struct dst_export_priv *priv;
- int err = -ENOMEM;
-
- if (unlikely(!n->bdev)) {
- err = -EINVAL;
- goto err_out_exit;
- }
-
- bio = bio_alloc_bioset(GFP_KERNEL,
- PAGE_ALIGN(cmd->size) >> PAGE_SHIFT,
- dst_bio_set);
- if (!bio)
- goto err_out_exit;
-
- priv = (struct dst_export_priv *)(((void *)bio) -
- sizeof (struct dst_export_priv));
-
- priv->state = dst_state_get(st);
- priv->bio = bio;
-
- bio->bi_private = priv;
- bio->bi_end_io = dst_bio_end_io;
- bio->bi_destructor = dst_bio_destructor;
- bio->bi_bdev = n->bdev;
-
- /*
- * Server side is only interested in two low bits:
- * uptodate (set by itself actually) and rw block
- */
- bio->bi_flags |= cmd->flags & 3;
-
- bio->bi_rw = cmd->rw;
- bio->bi_size = 0;
- bio->bi_sector = cmd->sector;
-
- dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id);
-
- priv->cmd.flags = 0;
- priv->cmd.size = cmd->size;
-
- if (bio_data_dir(bio) == WRITE) {
- err = dst_recv_cdata(st, priv->cmd.hash);
- if (err)
- goto err_out_free;
-
- err = dst_export_write_request(st, bio, cmd->size);
- if (err)
- goto err_out_free;
-
- if (dst_need_crypto(n))
- return dst_export_crypto(n, bio);
- } else {
- err = dst_export_read_request(bio, cmd->size);
- if (err)
- goto err_out_free;
- }
-
- dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
- __func__, (u64)bio->bi_sector, bio->bi_size,
- bio->bi_rw, bio_data_dir(bio),
- bio->bi_flags, bio->bi_phys_segments);
-
- generic_make_request(bio);
-
- return 0;
-
-err_out_free:
- bio_put(bio);
-err_out_exit:
- return err;
-}
-
-/*
- * Ok, block IO is ready, let's send it back to the client...
- */
-int dst_export_send_bio(struct bio *bio)
-{
- struct dst_export_priv *p = bio->bi_private;
- struct dst_state *st = p->state;
- struct dst_cmd *cmd = &p->cmd;
- int err;
-
- dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
- __func__, cmd->id, (u64)bio->bi_sector, bio->bi_size,
- cmd->csize, bio->bi_flags, bio->bi_rw);
-
- dst_convert_cmd(cmd);
-
- dst_state_lock(st);
- if (!st->socket) {
- err = -ECONNRESET;
- goto err_out_unlock;
- }
-
- if (bio_data_dir(bio) == WRITE) {
- /* ... or just confirmation that writing has completed. */
- cmd->size = cmd->csize = 0;
- err = dst_data_send_header(st->socket, cmd,
- sizeof(struct dst_cmd), 0);
- if (err)
- goto err_out_unlock;
- } else {
- err = dst_send_bio(st, cmd, bio);
- if (err)
- goto err_out_unlock;
- }
-
- dst_state_unlock(st);
-
- bio_put(bio);
- return 0;
-
-err_out_unlock:
- dst_state_unlock(st);
-
- bio_put(bio);
- return err;
-}
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-
-/*
- * Polling machinery.
- */
-
-struct dst_poll_helper {
- poll_table pt;
- struct dst_state *st;
-};
-
-static int dst_queue_wake(wait_queue_t *wait, unsigned mode,
- int sync, void *key)
-{
- struct dst_state *st = container_of(wait, struct dst_state, wait);
-
- wake_up(&st->thread_wait);
- return 1;
-}
-
-static void dst_queue_func(struct file *file, wait_queue_head_t *whead,
- poll_table *pt)
-{
- struct dst_state *st = container_of(pt, struct dst_poll_helper, pt)->st;
-
- st->whead = whead;
- init_waitqueue_func_entry(&st->wait, dst_queue_wake);
- add_wait_queue(whead, &st->wait);
-}
-
-void dst_poll_exit(struct dst_state *st)
-{
- if (st->whead) {
- remove_wait_queue(st->whead, &st->wait);
- st->whead = NULL;
- }
-}
-
-int dst_poll_init(struct dst_state *st)
-{
- struct dst_poll_helper ph;
-
- ph.st = st;
- init_poll_funcptr(&ph.pt, &dst_queue_func);
-
- st->socket->ops->poll(NULL, st->socket, &ph.pt);
- return 0;
-}
-
-/*
- * Header receiving function - may block.
- */
-static int dst_data_recv_header(struct socket *sock,
- void *data, unsigned int size, int block)
-{
- struct msghdr msg;
- struct kvec iov;
- int err;
-
- iov.iov_base = data;
- iov.iov_len = size;
-
- msg.msg_iov = (struct iovec *)&iov;
- msg.msg_iovlen = 1;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = (block) ? MSG_WAITALL : MSG_DONTWAIT;
-
- err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len,
- msg.msg_flags);
- if (err != size)
- return -1;
-
- return 0;
-}
-
-/*
- * Header sending function - may block.
- */
-int dst_data_send_header(struct socket *sock,
- void *data, unsigned int size, int more)
-{
- struct msghdr msg;
- struct kvec iov;
- int err;
-
- iov.iov_base = data;
- iov.iov_len = size;
-
- msg.msg_iov = (struct iovec *)&iov;
- msg.msg_iovlen = 1;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = MSG_WAITALL | (more ? MSG_MORE : 0);
-
- err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
- if (err != size) {
- dprintk("%s: size: %u, more: %d, err: %d.\n",
- __func__, size, more, err);
- return -1;
- }
-
- return 0;
-}
-
-/*
- * Block autoconfiguration: request size of the storage and permissions.
- */
-static int dst_request_remote_config(struct dst_state *st)
-{
- struct dst_node *n = st->node;
- int err = -EINVAL;
- struct dst_cmd *cmd = st->data;
-
- memset(cmd, 0, sizeof(struct dst_cmd));
- cmd->cmd = DST_CFG;
-
- dst_convert_cmd(cmd);
-
- err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
- if (err)
- goto out;
-
- err = dst_data_recv_header(st->socket, cmd, sizeof(struct dst_cmd), 1);
- if (err)
- goto out;
-
- dst_convert_cmd(cmd);
-
- if (cmd->cmd != DST_CFG) {
- err = -EINVAL;
- dprintk("%s: checking result: cmd: %d, size reported: %llu.\n",
- __func__, cmd->cmd, cmd->sector);
- goto out;
- }
-
- if (n->size != 0)
- n->size = min_t(loff_t, n->size, cmd->sector);
- else
- n->size = cmd->sector;
-
- n->info->size = n->size;
- st->permissions = cmd->rw;
-
-out:
- dprintk("%s: n: %p, err: %d, size: %llu, permission: %x.\n",
- __func__, n, err, n->size, st->permissions);
- return err;
-}
-
-/*
- * Socket machinery.
- */
-
-#define DST_DEFAULT_TIMEO 20000
-
-int dst_state_socket_create(struct dst_state *st)
-{
- int err;
- struct socket *sock;
- struct dst_network_ctl *ctl = &st->ctl;
-
- err = sock_create(ctl->addr.sa_family, ctl->type, ctl->proto, &sock);
- if (err < 0)
- return err;
-
- sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo =
- msecs_to_jiffies(DST_DEFAULT_TIMEO);
- sock->sk->sk_allocation = GFP_NOIO;
-
- st->socket = st->read_socket = sock;
- return 0;
-}
-
-void dst_state_socket_release(struct dst_state *st)
-{
- dprintk("%s: st: %p, socket: %p, n: %p.\n",
- __func__, st, st->socket, st->node);
- if (st->socket) {
- sock_release(st->socket);
- st->socket = NULL;
- st->read_socket = NULL;
- }
-}
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str)
-{
- if (sk->ops->family == AF_INET) {
- struct sockaddr_in *sin = (struct sockaddr_in *)sa;
- printk(KERN_INFO "%s %u.%u.%u.%u:%d.\n", str,
- NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
- } else if (sk->ops->family == AF_INET6) {
- struct sockaddr_in6 *sin = (struct sockaddr_in6 *)sa;
- printk(KERN_INFO "%s %pi6:%d",
- str, &sin->sin6_addr, ntohs(sin->sin6_port));
- }
-}
-
-void dst_state_exit_connected(struct dst_state *st)
-{
- if (st->socket) {
- dst_poll_exit(st);
- st->socket->ops->shutdown(st->socket, 2);
-
- dst_dump_addr(st->socket, (struct sockaddr *)&st->ctl.addr,
- "Disconnected peer");
- dst_state_socket_release(st);
- }
-}
-
-static int dst_state_init_connected(struct dst_state *st)
-{
- int err;
- struct dst_network_ctl *ctl = &st->ctl;
-
- err = dst_state_socket_create(st);
- if (err)
- goto err_out_exit;
-
- err = kernel_connect(st->socket, (struct sockaddr *)&st->ctl.addr,
- st->ctl.addr.sa_data_len, 0);
- if (err)
- goto err_out_release;
-
- err = dst_poll_init(st);
- if (err)
- goto err_out_release;
-
- dst_dump_addr(st->socket, (struct sockaddr *)&ctl->addr,
- "Connected to peer");
-
- return 0;
-
-err_out_release:
- dst_state_socket_release(st);
-err_out_exit:
- return err;
-}
-
-/*
- * State reset is used to reconnect to the remote peer.
- * May fail, but who cares, we will try again later.
- */
-static inline void dst_state_reset_nolock(struct dst_state *st)
-{
- dst_state_exit_connected(st);
- dst_state_init_connected(st);
-}
-
-static inline void dst_state_reset(struct dst_state *st)
-{
- dst_state_lock(st);
- dst_state_reset_nolock(st);
- dst_state_unlock(st);
-}
-
-/*
- * Basic network sending/receiving functions.
- * Blocked mode is used.
- */
-static int dst_data_recv_raw(struct dst_state *st, void *buf, u64 size)
-{
- struct msghdr msg;
- struct kvec iov;
- int err;
-
- BUG_ON(!size);
-
- iov.iov_base = buf;
- iov.iov_len = size;
-
- msg.msg_iov = (struct iovec *)&iov;
- msg.msg_iovlen = 1;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = MSG_DONTWAIT;
-
- err = kernel_recvmsg(st->socket, &msg, &iov, 1, iov.iov_len,
- msg.msg_flags);
- if (err <= 0) {
- dprintk("%s: failed to recv data: size: %llu, err: %d.\n",
- __func__, size, err);
- if (err == 0)
- err = -ECONNRESET;
-
- dst_state_exit_connected(st);
- }
-
- return err;
-}
-
-/*
- * Ping command to early detect failed nodes.
- */
-static int dst_send_ping(struct dst_state *st)
-{
- struct dst_cmd *cmd = st->data;
- int err = -ECONNRESET;
-
- dst_state_lock(st);
- if (st->socket) {
- memset(cmd, 0, sizeof(struct dst_cmd));
-
- cmd->cmd = __cpu_to_be32(DST_PING);
-
- err = dst_data_send_header(st->socket, cmd,
- sizeof(struct dst_cmd), 0);
- }
- dprintk("%s: st: %p, socket: %p, err: %d.\n", __func__,
- st, st->socket, err);
- dst_state_unlock(st);
-
- return err;
-}
-
-/*
- * Receiving function, which should either return error or read
- * whole block request. If there was no traffic for a one second,
- * send a ping, since remote node may die.
- */
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size)
-{
- unsigned int revents = 0;
- unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
- unsigned int mask = err_mask | POLLIN;
- struct dst_node *n = st->node;
- int err = 0;
-
- while (size && !err) {
- revents = dst_state_poll(st);
-
- if (!(revents & mask)) {
- DEFINE_WAIT(wait);
-
- for (;;) {
- prepare_to_wait(&st->thread_wait, &wait,
- TASK_INTERRUPTIBLE);
- if (!n->trans_scan_timeout || st->need_exit)
- break;
-
- revents = dst_state_poll(st);
-
- if (revents & mask)
- break;
-
- if (signal_pending(current))
- break;
-
- if (!schedule_timeout(HZ)) {
- err = dst_send_ping(st);
- if (err)
- return err;
- }
-
- continue;
- }
- finish_wait(&st->thread_wait, &wait);
- }
-
- err = -ECONNRESET;
- dst_state_lock(st);
-
- if (st->socket && (st->read_socket == st->socket) &&
- (revents & POLLIN)) {
- err = dst_data_recv_raw(st, data, size);
- if (err > 0) {
- data += err;
- size -= err;
- err = 0;
- }
- }
-
- if (revents & err_mask || !st->socket) {
- dprintk("%s: revents: %x, socket: %p, size: %u, "
- "err: %d.\n", __func__, revents,
- st->socket, size, err);
- err = -ECONNRESET;
- }
-
- dst_state_unlock(st);
-
- if (!n->trans_scan_timeout)
- err = -ENODEV;
- }
-
- return err;
-}
-
-/*
- * Send block autoconf reply.
- */
-static int dst_process_cfg(struct dst_state *st)
-{
- struct dst_node *n = st->node;
- struct dst_cmd *cmd = st->data;
- int err;
-
- cmd->sector = n->size;
- cmd->rw = st->permissions;
-
- dst_convert_cmd(cmd);
-
- dst_state_lock(st);
- err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
- dst_state_unlock(st);
-
- return err;
-}
-
-/*
- * Receive block IO from the network.
- */
-static int dst_recv_bio(struct dst_state *st, struct bio *bio,
- unsigned int total_size)
-{
- struct bio_vec *bv;
- int i, err;
- void *data;
- unsigned int sz;
-
- bio_for_each_segment(bv, bio, i) {
- sz = min(total_size, bv->bv_len);
-
- dprintk("%s: bio: %llu/%u, total: %u, len: %u, sz: %u, "
- "off: %u.\n", __func__, (u64)bio->bi_sector,
- bio->bi_size, total_size, bv->bv_len, sz,
- bv->bv_offset);
-
- data = kmap(bv->bv_page) + bv->bv_offset;
- err = dst_data_recv(st, data, sz);
- kunmap(bv->bv_page);
-
- bv->bv_len = sz;
-
- if (err)
- return err;
-
- total_size -= sz;
- if (total_size == 0)
- break;
- }
-
- return 0;
-}
-
-/*
- * Our block IO has just completed and arrived: get it.
- */
-static int dst_process_io_response(struct dst_state *st)
-{
- struct dst_node *n = st->node;
- struct dst_cmd *cmd = st->data;
- struct dst_trans *t;
- int err = 0;
- struct bio *bio;
-
- mutex_lock(&n->trans_lock);
- t = dst_trans_search(n, cmd->id);
- mutex_unlock(&n->trans_lock);
-
- if (!t)
- goto err_out_exit;
-
- bio = t->bio;
-
- dprintk("%s: bio: %llu/%u, cmd_size: %u, csize: %u, dir: %lu.\n",
- __func__, (u64)bio->bi_sector, bio->bi_size, cmd->size,
- cmd->csize, bio_data_dir(bio));
-
- if (bio_data_dir(bio) == READ) {
- if (bio->bi_size != cmd->size - cmd->csize)
- goto err_out_exit;
-
- if (dst_need_crypto(n)) {
- err = dst_recv_cdata(st, t->cmd.hash);
- if (err)
- goto err_out_exit;
- }
-
- err = dst_recv_bio(st, t->bio, bio->bi_size);
- if (err)
- goto err_out_exit;
-
- if (dst_need_crypto(n))
- return dst_trans_crypto(t);
- } else {
- err = -EBADMSG;
- if (cmd->size || cmd->csize)
- goto err_out_exit;
- }
-
- dst_trans_remove(t);
- dst_trans_put(t);
-
- return 0;
-
-err_out_exit:
- return err;
-}
-
-/*
- * Receive crypto data.
- */
-int dst_recv_cdata(struct dst_state *st, void *cdata)
-{
- struct dst_cmd *cmd = st->data;
- struct dst_node *n = st->node;
- struct dst_crypto_ctl *c = &n->crypto;
- int err;
-
- if (cmd->csize != c->crypto_attached_size) {
- dprintk("%s: cmd: cmd: %u, sector: %llu, size: %u, "
- "csize: %u != digest size %u.\n",
- __func__, cmd->cmd, cmd->sector, cmd->size,
- cmd->csize, c->crypto_attached_size);
- err = -EINVAL;
- goto err_out_exit;
- }
-
- err = dst_data_recv(st, cdata, cmd->csize);
- if (err)
- goto err_out_exit;
-
- cmd->size -= cmd->csize;
- return 0;
-
-err_out_exit:
- return err;
-}
-
-/*
- * Receive the command and start its processing.
- */
-static int dst_recv_processing(struct dst_state *st)
-{
- int err = -EINTR;
- struct dst_cmd *cmd = st->data;
-
- /*
- * If socket will be reset after this statement, then
- * dst_data_recv() will just fail and loop will
- * start again, so it can be done without any locks.
- *
- * st->read_socket is needed to prevents state machine
- * breaking between this data reading and subsequent one
- * in protocol specific functions during connection reset.
- * In case of reset we have to read next command and do
- * not expect data for old command to magically appear in
- * new connection.
- */
- st->read_socket = st->socket;
- err = dst_data_recv(st, cmd, sizeof(struct dst_cmd));
- if (err)
- goto out_exit;
-
- dst_convert_cmd(cmd);
-
- dprintk("%s: cmd: %u, size: %u, csize: %u, id: %llu, "
- "sector: %llu, flags: %llx, rw: %llx.\n",
- __func__, cmd->cmd, cmd->size,
- cmd->csize, cmd->id, cmd->sector,
- cmd->flags, cmd->rw);
-
- /*
- * This should catch protocol breakage and random garbage
- * instead of commands.
- */
- if (unlikely(cmd->csize > st->size - sizeof(struct dst_cmd))) {
- err = -EBADMSG;
- goto out_exit;
- }
-
- err = -EPROTO;
- switch (cmd->cmd) {
- case DST_IO_RESPONSE:
- err = dst_process_io_response(st);
- break;
- case DST_IO:
- err = dst_process_io(st);
- break;
- case DST_CFG:
- err = dst_process_cfg(st);
- break;
- case DST_PING:
- err = 0;
- break;
- default:
- break;
- }
-
-out_exit:
- return err;
-}
-
-/*
- * Receiving thread. For the client node we should try to reconnect,
- * for accepted client we just drop the state and expect it to reconnect.
- */
-static int dst_recv(void *init_data, void *schedule_data)
-{
- struct dst_state *st = schedule_data;
- struct dst_node *n = init_data;
- int err = 0;
-
- dprintk("%s: start st: %p, n: %p, scan: %lu, need_exit: %d.\n",
- __func__, st, n, n->trans_scan_timeout, st->need_exit);
-
- while (n->trans_scan_timeout && !st->need_exit) {
- err = dst_recv_processing(st);
- if (err < 0) {
- if (!st->ctl.type)
- break;
-
- if (!n->trans_scan_timeout || st->need_exit)
- break;
-
- dst_state_reset(st);
- msleep(1000);
- }
- }
-
- st->need_exit = 1;
- wake_up(&st->thread_wait);
-
- dprintk("%s: freeing receiving socket st: %p.\n", __func__, st);
- dst_state_lock(st);
- dst_state_exit_connected(st);
- dst_state_unlock(st);
- dst_state_put(st);
-
- dprintk("%s: freed receiving socket st: %p.\n", __func__, st);
-
- return err;
-}
-
-/*
- * Network state dies here and borns couple of lines below.
- * This object is the main network state processing engine:
- * sending, receiving, reconnections, all network related
- * tasks are handled on behalf of the state.
- */
-static void dst_state_free(struct dst_state *st)
-{
- dprintk("%s: st: %p.\n", __func__, st);
- if (st->cleanup)
- st->cleanup(st);
- kfree(st->data);
- kfree(st);
-}
-
-struct dst_state *dst_state_alloc(struct dst_node *n)
-{
- struct dst_state *st;
- int err = -ENOMEM;
-
- st = kzalloc(sizeof(struct dst_state), GFP_KERNEL);
- if (!st)
- goto err_out_exit;
-
- st->node = n;
- st->need_exit = 0;
-
- st->size = PAGE_SIZE;
- st->data = kmalloc(st->size, GFP_KERNEL);
- if (!st->data)
- goto err_out_free;
-
- spin_lock_init(&st->request_lock);
- INIT_LIST_HEAD(&st->request_list);
-
- mutex_init(&st->state_lock);
- init_waitqueue_head(&st->thread_wait);
-
- /*
- * One for processing thread, another one for node itself.
- */
- atomic_set(&st->refcnt, 2);
-
- dprintk("%s: st: %p, n: %p.\n", __func__, st, st->node);
-
- return st;
-
-err_out_free:
- kfree(st);
-err_out_exit:
- return ERR_PTR(err);
-}
-
-int dst_state_schedule_receiver(struct dst_state *st)
-{
- return thread_pool_schedule_private(st->node->pool, dst_thread_setup,
- dst_recv, st, MAX_SCHEDULE_TIMEOUT, st->node);
-}
-
-/*
- * Initialize client's connection to the remote peer: allocate state,
- * connect and perform block IO autoconfiguration.
- */
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r)
-{
- struct dst_state *st;
- int err = -ENOMEM;
-
- st = dst_state_alloc(n);
- if (IS_ERR(st)) {
- err = PTR_ERR(st);
- goto err_out_exit;
- }
- memcpy(&st->ctl, r, sizeof(struct dst_network_ctl));
-
- err = dst_state_init_connected(st);
- if (err)
- goto err_out_free_data;
-
- err = dst_request_remote_config(st);
- if (err)
- goto err_out_exit_connected;
- n->state = st;
-
- err = dst_state_schedule_receiver(st);
- if (err)
- goto err_out_exit_connected;
-
- return 0;
-
-err_out_exit_connected:
- dst_state_exit_connected(st);
-err_out_free_data:
- dst_state_free(st);
-err_out_exit:
- n->state = NULL;
- return err;
-}
-
-void dst_state_put(struct dst_state *st)
-{
- dprintk("%s: st: %p, refcnt: %d.\n",
- __func__, st, atomic_read(&st->refcnt));
- if (atomic_dec_and_test(&st->refcnt))
- dst_state_free(st);
-}
-
-/*
- * Send block IO to the network one by one using zero-copy ->sendpage().
- */
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio)
-{
- struct bio_vec *bv;
- struct dst_crypto_ctl *c = &st->node->crypto;
- int err, i = 0;
- int flags = MSG_WAITALL;
-
- err = dst_data_send_header(st->socket, cmd,
- sizeof(struct dst_cmd) + c->crypto_attached_size, bio->bi_vcnt);
- if (err)
- goto err_out_exit;
-
- bio_for_each_segment(bv, bio, i) {
- if (i < bio->bi_vcnt - 1)
- flags |= MSG_MORE;
-
- err = kernel_sendpage(st->socket, bv->bv_page, bv->bv_offset,
- bv->bv_len, flags);
- if (err <= 0)
- goto err_out_exit;
- }
-
- return 0;
-
-err_out_exit:
- dprintk("%s: %d/%d, flags: %x, err: %d.\n",
- __func__, i, bio->bi_vcnt, flags, err);
- return err;
-}
-
-/*
- * Send transaction to the remote peer.
- */
-int dst_trans_send(struct dst_trans *t)
-{
- int err;
- struct dst_state *st = t->n->state;
- struct bio *bio = t->bio;
-
- dst_convert_cmd(&t->cmd);
-
- dst_state_lock(st);
- if (!st->socket) {
- err = dst_state_init_connected(st);
- if (err)
- goto err_out_unlock;
- }
-
- if (bio_data_dir(bio) == WRITE) {
- err = dst_send_bio(st, &t->cmd, t->bio);
- } else {
- err = dst_data_send_header(st->socket, &t->cmd,
- sizeof(struct dst_cmd), 0);
- }
- if (err)
- goto err_out_reset;
-
- dst_state_unlock(st);
- return 0;
-
-err_out_reset:
- dst_state_reset_nolock(st);
-err_out_unlock:
- dst_state_unlock(st);
-
- return err;
-}
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/dst.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-
-/*
- * Thread pool abstraction allows to schedule a work to be performed
- * on behalf of kernel thread. One does not operate with threads itself,
- * instead user provides setup and cleanup callbacks for thread pool itself,
- * and action and cleanup callbacks for each submitted work.
- *
- * Each worker has private data initialized at creation time and data,
- * provided by user at scheduling time.
- *
- * When action is being performed, thread can not be used by other users,
- * instead they will sleep until there is free thread to pick their work.
- */
-struct thread_pool_worker {
- struct list_head worker_entry;
-
- struct task_struct *thread;
-
- struct thread_pool *pool;
-
- int error;
- int has_data;
- int need_exit;
- unsigned int id;
-
- wait_queue_head_t wait;
-
- void *private;
- void *schedule_data;
-
- int (*action)(void *private, void *schedule_data);
- void (*cleanup)(void *private);
-};
-
-static void thread_pool_exit_worker(struct thread_pool_worker *w)
-{
- kthread_stop(w->thread);
-
- w->cleanup(w->private);
- kfree(w);
-}
-
-/*
- * Called to mark thread as ready and allow users to schedule new work.
- */
-static void thread_pool_worker_make_ready(struct thread_pool_worker *w)
-{
- struct thread_pool *p = w->pool;
-
- mutex_lock(&p->thread_lock);
-
- if (!w->need_exit) {
- list_move_tail(&w->worker_entry, &p->ready_list);
- w->has_data = 0;
- mutex_unlock(&p->thread_lock);
-
- wake_up(&p->wait);
- } else {
- p->thread_num--;
- list_del(&w->worker_entry);
- mutex_unlock(&p->thread_lock);
-
- thread_pool_exit_worker(w);
- }
-}
-
-/*
- * Thread action loop: waits until there is new work.
- */
-static int thread_pool_worker_func(void *data)
-{
- struct thread_pool_worker *w = data;
-
- while (!kthread_should_stop()) {
- wait_event_interruptible(w->wait,
- kthread_should_stop() || w->has_data);
-
- if (kthread_should_stop())
- break;
-
- if (!w->has_data)
- continue;
-
- w->action(w->private, w->schedule_data);
- thread_pool_worker_make_ready(w);
- }
-
- return 0;
-}
-
-/*
- * Remove single worker without specifying which one.
- */
-void thread_pool_del_worker(struct thread_pool *p)
-{
- struct thread_pool_worker *w = NULL;
-
- while (!w && p->thread_num) {
- wait_event(p->wait, !list_empty(&p->ready_list) ||
- !p->thread_num);
-
- dprintk("%s: locking list_empty: %d, thread_num: %d.\n",
- __func__, list_empty(&p->ready_list),
- p->thread_num);
-
- mutex_lock(&p->thread_lock);
- if (!list_empty(&p->ready_list)) {
- w = list_first_entry(&p->ready_list,
- struct thread_pool_worker,
- worker_entry);
-
- dprintk("%s: deleting w: %p, thread_num: %d, "
- "list: %p [%p.%p].\n", __func__,
- w, p->thread_num, &p->ready_list,
- p->ready_list.prev, p->ready_list.next);
-
- p->thread_num--;
- list_del(&w->worker_entry);
- }
- mutex_unlock(&p->thread_lock);
- }
-
- if (w)
- thread_pool_exit_worker(w);
- dprintk("%s: deleted w: %p, thread_num: %d.\n",
- __func__, w, p->thread_num);
-}
-
-/*
- * Remove a worker with given ID.
- */
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id)
-{
- struct thread_pool_worker *w;
- int found = 0;
-
- mutex_lock(&p->thread_lock);
- list_for_each_entry(w, &p->ready_list, worker_entry) {
- if (w->id == id) {
- found = 1;
- p->thread_num--;
- list_del(&w->worker_entry);
- break;
- }
- }
-
- if (!found) {
- list_for_each_entry(w, &p->active_list, worker_entry) {
- if (w->id == id) {
- w->need_exit = 1;
- break;
- }
- }
- }
- mutex_unlock(&p->thread_lock);
-
- if (found)
- thread_pool_exit_worker(w);
-}
-
-/*
- * Add new worker thread with given parameters.
- * If initialization callback fails, return error.
- */
-int thread_pool_add_worker(struct thread_pool *p,
- char *name,
- unsigned int id,
- void *(*init)(void *private),
- void (*cleanup)(void *private),
- void *private)
-{
- struct thread_pool_worker *w;
- int err = -ENOMEM;
-
- w = kzalloc(sizeof(struct thread_pool_worker), GFP_KERNEL);
- if (!w)
- goto err_out_exit;
-
- w->pool = p;
- init_waitqueue_head(&w->wait);
- w->cleanup = cleanup;
- w->id = id;
-
- w->thread = kthread_run(thread_pool_worker_func, w, "%s", name);
- if (IS_ERR(w->thread)) {
- err = PTR_ERR(w->thread);
- goto err_out_free;
- }
-
- w->private = init(private);
- if (IS_ERR(w->private)) {
- err = PTR_ERR(w->private);
- goto err_out_stop_thread;
- }
-
- mutex_lock(&p->thread_lock);
- list_add_tail(&w->worker_entry, &p->ready_list);
- p->thread_num++;
- mutex_unlock(&p->thread_lock);
-
- return 0;
-
-err_out_stop_thread:
- kthread_stop(w->thread);
-err_out_free:
- kfree(w);
-err_out_exit:
- return err;
-}
-
-/*
- * Destroy the whole pool.
- */
-void thread_pool_destroy(struct thread_pool *p)
-{
- while (p->thread_num) {
- dprintk("%s: num: %d.\n", __func__, p->thread_num);
- thread_pool_del_worker(p);
- }
-
- kfree(p);
-}
-
-/*
- * Create a pool with given number of threads.
- * They will have sequential IDs started from zero.
- */
-struct thread_pool *thread_pool_create(int num, char *name,
- void *(*init)(void *private),
- void (*cleanup)(void *private),
- void *private)
-{
- struct thread_pool_worker *w, *tmp;
- struct thread_pool *p;
- int err = -ENOMEM;
- int i;
-
- p = kzalloc(sizeof(struct thread_pool), GFP_KERNEL);
- if (!p)
- goto err_out_exit;
-
- init_waitqueue_head(&p->wait);
- mutex_init(&p->thread_lock);
- INIT_LIST_HEAD(&p->ready_list);
- INIT_LIST_HEAD(&p->active_list);
- p->thread_num = 0;
-
- for (i = 0; i < num; ++i) {
- err = thread_pool_add_worker(p, name, i, init,
- cleanup, private);
- if (err)
- goto err_out_free_all;
- }
-
- return p;
-
-err_out_free_all:
- list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
- list_del(&w->worker_entry);
- thread_pool_exit_worker(w);
- }
- kfree(p);
-err_out_exit:
- return ERR_PTR(err);
-}
-
-/*
- * Schedule execution of the action on a given thread,
- * provided ID pointer has to match previously stored
- * private data.
- */
-int thread_pool_schedule_private(struct thread_pool *p,
- int (*setup)(void *private, void *data),
- int (*action)(void *private, void *data),
- void *data, long timeout, void *id)
-{
- struct thread_pool_worker *w, *tmp, *worker = NULL;
- int err = 0;
-
- while (!worker && !err) {
- timeout = wait_event_interruptible_timeout(p->wait,
- !list_empty(&p->ready_list),
- timeout);
-
- if (!timeout) {
- err = -ETIMEDOUT;
- break;
- }
-
- worker = NULL;
- mutex_lock(&p->thread_lock);
- list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
- if (id && id != w->private)
- continue;
-
- worker = w;
-
- list_move_tail(&w->worker_entry, &p->active_list);
-
- err = setup(w->private, data);
- if (!err) {
- w->schedule_data = data;
- w->action = action;
- w->has_data = 1;
- wake_up(&w->wait);
- } else {
- list_move_tail(&w->worker_entry,
- &p->ready_list);
- }
-
- break;
- }
- mutex_unlock(&p->thread_lock);
- }
-
- return err;
-}
-
-/*
- * Schedule execution on arbitrary thread from the pool.
- */
-int thread_pool_schedule(struct thread_pool *p,
- int (*setup)(void *private, void *data),
- int (*action)(void *private, void *data),
- void *data, long timeout)
-{
- return thread_pool_schedule_private(p, setup,
- action, data, timeout, NULL);
-}
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/slab.h>
-#include <linux/mempool.h>
-
-/*
- * Transaction memory pool size.
- */
-static int dst_mempool_num = 32;
-module_param(dst_mempool_num, int, 0644);
-
-/*
- * Transaction tree management.
- */
-static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new)
-{
- if (gen < new)
- return 1;
- if (gen > new)
- return -1;
- return 0;
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen)
-{
- struct rb_root *root = &node->trans_root;
- struct rb_node *n = root->rb_node;
- struct dst_trans *t, *ret = NULL;
- int cmp;
-
- while (n) {
- t = rb_entry(n, struct dst_trans, trans_entry);
-
- cmp = dst_trans_cmp(t->gen, gen);
- if (cmp < 0)
- n = n->rb_left;
- else if (cmp > 0)
- n = n->rb_right;
- else {
- ret = t;
- break;
- }
- }
-
- dprintk("%s: %s transaction: id: %llu.\n", __func__,
- (ret) ? "found" : "not found", gen);
-
- return ret;
-}
-
-static int dst_trans_insert(struct dst_trans *new)
-{
- struct rb_root *root = &new->n->trans_root;
- struct rb_node **n = &root->rb_node, *parent = NULL;
- struct dst_trans *ret = NULL, *t;
- int cmp;
-
- while (*n) {
- parent = *n;
-
- t = rb_entry(parent, struct dst_trans, trans_entry);
-
- cmp = dst_trans_cmp(t->gen, new->gen);
- if (cmp < 0)
- n = &parent->rb_left;
- else if (cmp > 0)
- n = &parent->rb_right;
- else {
- ret = t;
- break;
- }
- }
-
- new->send_time = jiffies;
- if (ret) {
- printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, "
- "send_time: %lu, new: gen: %llu, bio: %llu/%u, "
- "send_time: %lu.\n", __func__,
- ret->gen, (u64)ret->bio->bi_sector,
- ret->bio->bi_size, ret->send_time,
- new->gen, (u64)new->bio->bi_sector,
- new->bio->bi_size, new->send_time);
- return -EEXIST;
- }
-
- rb_link_node(&new->trans_entry, parent, n);
- rb_insert_color(&new->trans_entry, root);
-
- dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
- __func__, new->gen, (u64)new->bio->bi_sector,
- new->bio->bi_size, new->send_time);
-
- return 0;
-}
-
-int dst_trans_remove_nolock(struct dst_trans *t)
-{
- struct dst_node *n = t->n;
-
- if (t->trans_entry.rb_parent_color) {
- rb_erase(&t->trans_entry, &n->trans_root);
- t->trans_entry.rb_parent_color = 0;
- }
- return 0;
-}
-
-int dst_trans_remove(struct dst_trans *t)
-{
- int ret;
- struct dst_node *n = t->n;
-
- mutex_lock(&n->trans_lock);
- ret = dst_trans_remove_nolock(t);
- mutex_unlock(&n->trans_lock);
-
- return ret;
-}
-
-/*
- * When transaction is completed and there are no more users,
- * we complete appriate block IO request with given error status.
- */
-void dst_trans_put(struct dst_trans *t)
-{
- if (atomic_dec_and_test(&t->refcnt)) {
- struct bio *bio = t->bio;
-
- dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
- __func__, t, t->gen, bio);
-
- bio_endio(bio, t->error);
- bio_put(bio);
-
- dst_node_put(t->n);
- mempool_free(t, t->n->trans_pool);
- }
-}
-
-/*
- * Process given block IO request: allocate transaction, insert it into the tree
- * and send/schedule crypto processing.
- */
-int dst_process_bio(struct dst_node *n, struct bio *bio)
-{
- struct dst_trans *t;
- int err = -ENOMEM;
-
- t = mempool_alloc(n->trans_pool, GFP_NOFS);
- if (!t)
- goto err_out_exit;
-
- t->n = dst_node_get(n);
- t->bio = bio;
- t->error = 0;
- t->retries = 0;
- atomic_set(&t->refcnt, 1);
- t->gen = atomic_long_inc_return(&n->gen);
-
- t->enc = bio_data_dir(bio);
- dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen);
-
- mutex_lock(&n->trans_lock);
- err = dst_trans_insert(t);
- mutex_unlock(&n->trans_lock);
- if (err)
- goto err_out_free;
-
- dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
- __func__, t->gen, (u64)bio->bi_sector,
- bio->bi_size, t->enc, dst_need_crypto(n));
-
- if (dst_need_crypto(n) && t->enc)
- dst_trans_crypto(t);
- else
- dst_trans_send(t);
-
- return 0;
-
-err_out_free:
- dst_node_put(n);
- mempool_free(t, n->trans_pool);
-err_out_exit:
- bio_endio(bio, err);
- bio_put(bio);
- return err;
-}
-
-/*
- * Scan for timeout/stale transactions.
- * Each transaction is being resent multiple times before error completion.
- */
-static void dst_trans_scan(struct work_struct *work)
-{
- struct dst_node *n = container_of(work, struct dst_node,
- trans_work.work);
- struct rb_node *rb_node;
- struct dst_trans *t;
- unsigned long timeout = n->trans_scan_timeout;
- int num = 10 * n->trans_max_retries;
-
- mutex_lock(&n->trans_lock);
-
- for (rb_node = rb_first(&n->trans_root); rb_node; ) {
- t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
- if (timeout && time_after(t->send_time + timeout, jiffies)
- && t->retries == 0)
- break;
-#if 0
- dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
- __func__, t, t->gen, n->name,
- t->retries, n->trans_max_retries);
-#endif
- if (--num == 0)
- break;
-
- dst_trans_get(t);
-
- rb_node = rb_next(rb_node);
-
- if (timeout && (++t->retries < n->trans_max_retries)) {
- dst_trans_send(t);
- } else {
- t->error = -ETIMEDOUT;
- dst_trans_remove_nolock(t);
- dst_trans_put(t);
- }
-
- dst_trans_put(t);
- }
-
- mutex_unlock(&n->trans_lock);
-
- /*
- * If no timeout specified then system is in the middle of exiting
- * process, so no need to reschedule scanning process again.
- */
- if (timeout) {
- if (!num)
- timeout = HZ;
- schedule_delayed_work(&n->trans_work, timeout);
- }
-}
-
-/*
- * Flush all transactions and mark them as timed out.
- * Destroy transaction pools.
- */
-void dst_node_trans_exit(struct dst_node *n)
-{
- struct dst_trans *t;
- struct rb_node *rb_node;
-
- if (!n->trans_cache)
- return;
-
- dprintk("%s: n: %p, cancelling the work.\n", __func__, n);
- cancel_delayed_work_sync(&n->trans_work);
- flush_scheduled_work();
- dprintk("%s: n: %p, work has been cancelled.\n", __func__, n);
-
- for (rb_node = rb_first(&n->trans_root); rb_node; ) {
- t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
- dprintk("%s: t: %p, gen: %llu, n: %s.\n",
- __func__, t, t->gen, n->name);
-
- rb_node = rb_next(rb_node);
-
- t->error = -ETIMEDOUT;
- dst_trans_remove_nolock(t);
- dst_trans_put(t);
- }
-
- mempool_destroy(n->trans_pool);
- kmem_cache_destroy(n->trans_cache);
-}
-
-/*
- * Initialize transaction storage for given node.
- * Transaction stores not only control information,
- * but also network command and crypto data (if needed)
- * to reduce number of allocations. Thus transaction size
- * differs from node to node.
- */
-int dst_node_trans_init(struct dst_node *n, unsigned int size)
-{
- /*
- * We need this, since node with given name can be dropped from the
- * hash table, but be still alive, so subsequent creation of the node
- * with the same name may collide with existing cache name.
- */
-
- snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n);
-
- n->trans_cache = kmem_cache_create(n->cache_name,
- size + n->crypto.crypto_attached_size,
- 0, 0, NULL);
- if (!n->trans_cache)
- goto err_out_exit;
-
- n->trans_pool = mempool_create_slab_pool(dst_mempool_num,
- n->trans_cache);
- if (!n->trans_pool)
- goto err_out_cache_destroy;
-
- mutex_init(&n->trans_lock);
- n->trans_root = RB_ROOT;
-
- INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan);
- schedule_delayed_work(&n->trans_work, n->trans_scan_timeout);
-
- dprintk("%s: n: %p, size: %u, crypto: %u.\n",
- __func__, n, size, n->crypto.crypto_attached_size);
-
- return 0;
-
-err_out_cache_destroy:
- kmem_cache_destroy(n->trans_cache);
-err_out_exit:
- return -ENOMEM;
-}
+++ /dev/null
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#ifndef __DST_H
-#define __DST_H
-
-#include <linux/types.h>
-#include <linux/connector.h>
-
-#define DST_NAMELEN 32
-#define DST_NAME "dst"
-
-enum {
- /* Remove node with given id from storage */
- DST_DEL_NODE = 0,
- /* Add remote node with given id to the storage */
- DST_ADD_REMOTE,
- /* Add local node with given id to the storage to be exported and used by remote peers */
- DST_ADD_EXPORT,
- /* Crypto initialization command (hash/cipher used to protect the connection) */
- DST_CRYPTO,
- /* Security attributes for given connection (permissions for example) */
- DST_SECURITY,
- /* Register given node in the block layer subsystem */
- DST_START,
- DST_CMD_MAX
-};
-
-struct dst_ctl
-{
- /* Storage name */
- char name[DST_NAMELEN];
- /* Command flags */
- __u32 flags;
- /* Command itself (see above) */
- __u32 cmd;
- /* Maximum number of pages per single request in this device */
- __u32 max_pages;
- /* Stale/error transaction scanning timeout in milliseconds */
- __u32 trans_scan_timeout;
- /* Maximum number of retry sends before completing transaction as broken */
- __u32 trans_max_retries;
- /* Storage size */
- __u64 size;
-};
-
-/* Reply command carries completion status */
-struct dst_ctl_ack
-{
- struct cn_msg msg;
- int error;
- int unused[3];
-};
-
-/*
- * Unfortunaltely socket address structure is not exported to userspace
- * and is redefined there.
- */
-#define SADDR_MAX_DATA 128
-
-struct saddr {
- /* address family, AF_xxx */
- unsigned short sa_family;
- /* 14 bytes of protocol address */
- char sa_data[SADDR_MAX_DATA];
- /* Number of bytes used in sa_data */
- unsigned short sa_data_len;
-};
-
-/* Address structure */
-struct dst_network_ctl
-{
- /* Socket type: datagram, stream...*/
- unsigned int type;
- /* Let me guess, is it a Jupiter diameter? */
- unsigned int proto;
- /* Peer's address */
- struct saddr addr;
-};
-
-struct dst_crypto_ctl
-{
- /* Cipher and hash names */
- char cipher_algo[DST_NAMELEN];
- char hash_algo[DST_NAMELEN];
-
- /* Key sizes. Can be zero for digest for example */
- unsigned int cipher_keysize, hash_keysize;
- /* Alignment. Calculated by the DST itself. */
- unsigned int crypto_attached_size;
- /* Number of threads to perform crypto operations */
- int thread_num;
-};
-
-/* Export security attributes have this bits checked in when client connects */
-#define DST_PERM_READ (1<<0)
-#define DST_PERM_WRITE (1<<1)
-
-/*
- * Right now it is simple model, where each remote address
- * is assigned to set of permissions it is allowed to perform.
- * In real world block device does not know anything but
- * reading and writing, so it should be more than enough.
- */
-struct dst_secure_user
-{
- unsigned int permissions;
- struct saddr addr;
-};
-
-/*
- * Export control command: device to export and network address to accept
- * clients to work with given device
- */
-struct dst_export_ctl
-{
- char device[DST_NAMELEN];
- struct dst_network_ctl ctl;
-};
-
-enum {
- DST_CFG = 1, /* Request remote configuration */
- DST_IO, /* IO command */
- DST_IO_RESPONSE, /* IO response */
- DST_PING, /* Keepalive message */
- DST_NCMD_MAX,
-};
-
-struct dst_cmd
-{
- /* Network command itself, see above */
- __u32 cmd;
- /*
- * Size of the attached data
- * (in most cases, for READ command it means how many bytes were requested)
- */
- __u32 size;
- /* Crypto size: number of attached bytes with digest/hmac */
- __u32 csize;
- /* Here we can carry secret data */
- __u32 reserved;
- /* Read/write bits, see how they are encoded in bio structure */
- __u64 rw;
- /* BIO flags */
- __u64 flags;
- /* Unique command id (like transaction ID) */
- __u64 id;
- /* Sector to start IO from */
- __u64 sector;
- /* Hash data is placed after this header */
- __u8 hash[0];
-};
-
-/*
- * Convert command to/from network byte order.
- * We do not use hton*() functions, since there is
- * no 64-bit implementation.
- */
-static inline void dst_convert_cmd(struct dst_cmd *c)
-{
- c->cmd = __cpu_to_be32(c->cmd);
- c->csize = __cpu_to_be32(c->csize);
- c->size = __cpu_to_be32(c->size);
- c->sector = __cpu_to_be64(c->sector);
- c->id = __cpu_to_be64(c->id);
- c->flags = __cpu_to_be64(c->flags);
- c->rw = __cpu_to_be64(c->rw);
-}
-
-/* Transaction id */
-typedef __u64 dst_gen_t;
-
-#ifdef __KERNEL__
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/device.h>
-#include <linux/mempool.h>
-#include <linux/net.h>
-#include <linux/poll.h>
-#include <linux/rbtree.h>
-
-#ifdef CONFIG_DST_DEBUG
-#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
-#else
-static inline void __attribute__ ((format (printf, 1, 2)))
- dprintk(const char *fmt, ...) {}
-#endif
-
-struct dst_node;
-
-struct dst_trans
-{
- /* DST node we are working with */
- struct dst_node *n;
-
- /* Entry inside transaction tree */
- struct rb_node trans_entry;
-
- /* Merlin kills this transaction when this memory cell equals zero */
- atomic_t refcnt;
-
- /* How this transaction should be processed by crypto engine */
- short enc;
- /* How many times this transaction was resent */
- short retries;
- /* Completion status */
- int error;
-
- /* When did we send it to the remote peer */
- long send_time;
-
- /* My name is...
- * Well, computers does not speak, they have unique id instead */
- dst_gen_t gen;
-
- /* Block IO we are working with */
- struct bio *bio;
-
- /* Network command for above block IO request */
- struct dst_cmd cmd;
-};
-
-struct dst_crypto_engine
-{
- /* What should we do with all block requests */
- struct crypto_hash *hash;
- struct crypto_ablkcipher *cipher;
-
- /* Pool of pages used to encrypt data into before sending */
- int page_num;
- struct page **pages;
-
- /* What to do with current request */
- int enc;
- /* Who we are and where do we go */
- struct scatterlist *src, *dst;
-
- /* Maximum timeout waiting for encryption to be completed */
- long timeout;
- /* IV is a 64-bit sequential counter */
- u64 iv;
-
- /* Secret data */
- void *private;
-
- /* Cached temporary data lives here */
- int size;
- void *data;
-};
-
-struct dst_state
-{
- /* The main state protection */
- struct mutex state_lock;
-
- /* Polling machinery for sockets */
- wait_queue_t wait;
- wait_queue_head_t *whead;
- /* Most of events are being waited here */
- wait_queue_head_t thread_wait;
-
- /* Who owns this? */
- struct dst_node *node;
-
- /* Network address for this state */
- struct dst_network_ctl ctl;
-
- /* Permissions to work with: read-only or rw connection */
- u32 permissions;
-
- /* Called when we need to clean private data */
- void (* cleanup)(struct dst_state *st);
-
- /* Used by the server: BIO completion queues BIOs here */
- struct list_head request_list;
- spinlock_t request_lock;
-
- /* Guess what? No, it is not number of planets */
- atomic_t refcnt;
-
- /* This flags is set when connection should be dropped */
- int need_exit;
-
- /*
- * Socket to work with. Second pointer is used for
- * lockless check if socket was changed before performing
- * next action (like working with cached polling result)
- */
- struct socket *socket, *read_socket;
-
- /* Cached preallocated data */
- void *data;
- unsigned int size;
-
- /* Currently processed command */
- struct dst_cmd cmd;
-};
-
-struct dst_info
-{
- /* Device size */
- u64 size;
-
- /* Local device name for export devices */
- char local[DST_NAMELEN];
-
- /* Network setup */
- struct dst_network_ctl net;
-
- /* Sysfs bits use this */
- struct device device;
-};
-
-struct dst_node
-{
- struct list_head node_entry;
-
- /* Hi, my name is stored here */
- char name[DST_NAMELEN];
- /* My cache name is stored here */
- char cache_name[DST_NAMELEN];
-
- /* Block device attached to given node.
- * Only valid for exporting nodes */
- struct block_device *bdev;
- /* Network state machine for given peer */
- struct dst_state *state;
-
- /* Block IO machinery */
- struct request_queue *queue;
- struct gendisk *disk;
-
- /* Number of threads in processing pool */
- int thread_num;
- /* Maximum number of pages in single IO */
- int max_pages;
-
- /* I'm that big in bytes */
- loff_t size;
-
- /* Exported to userspace node information */
- struct dst_info *info;
-
- /*
- * Security attribute list.
- * Used only by exporting node currently.
- */
- struct list_head security_list;
- struct mutex security_lock;
-
- /*
- * When this unerflows below zero, university collapses.
- * But this will not happen, since node will be freed,
- * when reference counter reaches zero.
- */
- atomic_t refcnt;
-
- /* How precisely should I be started? */
- int (*start)(struct dst_node *);
-
- /* Crypto capabilities */
- struct dst_crypto_ctl crypto;
- u8 *hash_key;
- u8 *cipher_key;
-
- /* Pool of processing thread */
- struct thread_pool *pool;
-
- /* Transaction IDs live here */
- atomic_long_t gen;
-
- /*
- * How frequently and how many times transaction
- * tree should be scanned to drop stale objects.
- */
- long trans_scan_timeout;
- int trans_max_retries;
-
- /* Small gnomes live here */
- struct rb_root trans_root;
- struct mutex trans_lock;
-
- /*
- * Transaction cache/memory pool.
- * It is big enough to contain not only transaction
- * itself, but additional crypto data (digest/hmac).
- */
- struct kmem_cache *trans_cache;
- mempool_t *trans_pool;
-
- /* This entity scans transaction tree */
- struct delayed_work trans_work;
-
- wait_queue_head_t wait;
-};
-
-/* Kernel representation of the security attribute */
-struct dst_secure
-{
- struct list_head sec_entry;
- struct dst_secure_user sec;
-};
-
-int dst_process_bio(struct dst_node *n, struct bio *bio);
-
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
-
-static inline struct dst_state *dst_state_get(struct dst_state *st)
-{
- BUG_ON(atomic_read(&st->refcnt) == 0);
- atomic_inc(&st->refcnt);
- return st;
-}
-
-void dst_state_put(struct dst_state *st);
-
-struct dst_state *dst_state_alloc(struct dst_node *n);
-int dst_state_socket_create(struct dst_state *st);
-void dst_state_socket_release(struct dst_state *st);
-
-void dst_state_exit_connected(struct dst_state *st);
-
-int dst_state_schedule_receiver(struct dst_state *st);
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
-
-static inline void dst_state_lock(struct dst_state *st)
-{
- mutex_lock(&st->state_lock);
-}
-
-static inline void dst_state_unlock(struct dst_state *st)
-{
- mutex_unlock(&st->state_lock);
-}
-
-void dst_poll_exit(struct dst_state *st);
-int dst_poll_init(struct dst_state *st);
-
-static inline unsigned int dst_state_poll(struct dst_state *st)
-{
- unsigned int revents = POLLHUP | POLLERR;
-
- dst_state_lock(st);
- if (st->socket)
- revents = st->socket->ops->poll(NULL, st->socket, NULL);
- dst_state_unlock(st);
-
- return revents;
-}
-
-static inline int dst_thread_setup(void *private, void *data)
-{
- return 0;
-}
-
-void dst_node_put(struct dst_node *n);
-
-static inline struct dst_node *dst_node_get(struct dst_node *n)
-{
- atomic_inc(&n->refcnt);
- return n;
-}
-
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
-int dst_recv_cdata(struct dst_state *st, void *cdata);
-int dst_data_send_header(struct socket *sock,
- void *data, unsigned int size, int more);
-
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
-
-int dst_process_io(struct dst_state *st);
-int dst_export_crypto(struct dst_node *n, struct bio *bio);
-int dst_export_send_bio(struct bio *bio);
-int dst_start_export(struct dst_node *n);
-
-int __init dst_export_init(void);
-void dst_export_exit(void);
-
-/* Private structure for export block IO requests */
-struct dst_export_priv
-{
- struct list_head request_entry;
- struct dst_state *state;
- struct bio *bio;
- struct dst_cmd cmd;
-};
-
-static inline void dst_trans_get(struct dst_trans *t)
-{
- atomic_inc(&t->refcnt);
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
-int dst_trans_remove(struct dst_trans *t);
-int dst_trans_remove_nolock(struct dst_trans *t);
-void dst_trans_put(struct dst_trans *t);
-
-/*
- * Convert bio into network command.
- */
-static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
- u32 command, u64 id)
-{
- cmd->cmd = command;
- cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
- cmd->rw = bio->bi_rw;
- cmd->size = bio->bi_size;
- cmd->csize = 0;
- cmd->id = id;
- cmd->sector = bio->bi_sector;
-};
-
-int dst_trans_send(struct dst_trans *t);
-int dst_trans_crypto(struct dst_trans *t);
-
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
-void dst_node_crypto_exit(struct dst_node *n);
-
-static inline int dst_need_crypto(struct dst_node *n)
-{
- struct dst_crypto_ctl *c = &n->crypto;
- /*
- * Logical OR is appropriate here, but boolean one produces
- * more optimal code, so it is used instead.
- */
- return (c->hash_algo[0] | c->cipher_algo[0]);
-}
-
-int dst_node_trans_init(struct dst_node *n, unsigned int size);
-void dst_node_trans_exit(struct dst_node *n);
-
-/*
- * Pool of threads.
- * Ready list contains threads currently free to be used,
- * active one contains threads with some work scheduled for them.
- * Caller can wait in given queue when thread is ready.
- */
-struct thread_pool
-{
- int thread_num;
- struct mutex thread_lock;
- struct list_head ready_list, active_list;
-
- wait_queue_head_t wait;
-};
-
-void thread_pool_del_worker(struct thread_pool *p);
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
-int thread_pool_add_worker(struct thread_pool *p,
- char *name,
- unsigned int id,
- void *(* init)(void *data),
- void (* cleanup)(void *data),
- void *data);
-
-void thread_pool_destroy(struct thread_pool *p);
-struct thread_pool *thread_pool_create(int num, char *name,
- void *(* init)(void *data),
- void (* cleanup)(void *data),
- void *data);
-
-int thread_pool_schedule(struct thread_pool *p,
- int (* setup)(void *stored_private, void *setup_data),
- int (* action)(void *stored_private, void *setup_data),
- void *setup_data, long timeout);
-int thread_pool_schedule_private(struct thread_pool *p,
- int (* setup)(void *private, void *data),
- int (* action)(void *private, void *data),
- void *data, long timeout, void *id);
-
-#endif /* __KERNEL__ */
-#endif /* __DST_H */