return NULL;
}
- e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
+ e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL) {
IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
return NULL;
e->dest = dest;
/* link it to the list */
- write_lock(&set->lock);
e->next = set->list;
set->list = e;
atomic_inc(&set->size);
- write_unlock(&set->lock);
set->lastmod = jiffies;
return e;
{
struct ip_vs_dest_list *e, **ep;
- write_lock(&set->lock);
for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
if (e->dest == dest) {
/* HIT */
}
ep = &e->next;
}
- write_unlock(&set->lock);
}
static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
if (set == NULL)
return NULL;
- read_lock(&set->lock);
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
least = e->dest;
goto nextstage;
}
}
- read_unlock(&set->lock);
return NULL;
/* find the destination with the weighted least load */
loh = doh;
}
}
- read_unlock(&set->lock);
IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
if (set == NULL)
return NULL;
- read_lock(&set->lock);
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
most = e->dest;
goto nextstage;
}
}
- read_unlock(&set->lock);
return NULL;
/* find the destination with the weighted most load */
moh = doh;
}
}
- read_unlock(&set->lock);
IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
* IPVS lblcr hash table
*/
struct ip_vs_lblcr_table {
- rwlock_t lock; /* lock for this table */
struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
static struct ctl_table_header * sysctl_header;
-/*
- * new/free a ip_vs_lblcr_entry, which is a mapping of a destination
- * IP address to a server.
- */
-static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr)
-{
- struct ip_vs_lblcr_entry *en;
-
- en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
- if (en == NULL) {
- IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
- return NULL;
- }
-
- INIT_LIST_HEAD(&en->list);
- en->addr = daddr;
-
- /* initilize its dest set */
- atomic_set(&(en->set.size), 0);
- en->set.list = NULL;
- rwlock_init(&en->set.lock);
-
- return en;
-}
-
-
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
list_del(&en->list);
* Hash an entry in the ip_vs_lblcr_table.
* returns bool success.
*/
-static int
+static void
ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
{
- unsigned hash;
-
- if (!list_empty(&en->list)) {
- IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
- "called from %p\n", __builtin_return_address(0));
- return 0;
- }
+ unsigned hash = ip_vs_lblcr_hashkey(en->addr);
- /*
- * Hash by destination IP address
- */
- hash = ip_vs_lblcr_hashkey(en->addr);
-
- write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
- write_unlock(&tbl->lock);
-
- return 1;
}
/*
- * Get ip_vs_lblcr_entry associated with supplied parameters.
+ * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
+ * read lock.
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
{
- unsigned hash;
+ unsigned hash = ip_vs_lblcr_hashkey(addr);
struct ip_vs_lblcr_entry *en;
- hash = ip_vs_lblcr_hashkey(addr);
+ list_for_each_entry(en, &tbl->bucket[hash], list)
+ if (en->addr == addr)
+ return en;
+
+ return NULL;
+}
- read_lock(&tbl->lock);
- list_for_each_entry(en, &tbl->bucket[hash], list) {
- if (en->addr == addr) {
- /* HIT */
- read_unlock(&tbl->lock);
- return en;
+/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_lblcr_entry *en;
+
+ en = ip_vs_lblcr_get(tbl, daddr);
+ if (!en) {
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en) {
+ IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+ return NULL;
}
+
+ en->addr = daddr;
+ en->lastuse = jiffies;
+
+ /* initilize its dest set */
+ atomic_set(&(en->set.size), 0);
+ en->set.list = NULL;
+ rwlock_init(&en->set.lock);
+
+ ip_vs_lblcr_hash(tbl, en);
}
- read_unlock(&tbl->lock);
+ write_lock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest);
+ write_unlock(&en->set.lock);
- return NULL;
+ return en;
}
int i;
struct ip_vs_lblcr_entry *en, *nxt;
+ /* No locking required, only called during cleanup. */
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
- atomic_dec(&tbl->entries);
}
- write_unlock(&tbl->lock);
}
}
-static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&tbl->lock);
+ write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
now))
ip_vs_lblcr_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&tbl->lock);
+ write_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
*/
static void ip_vs_lblcr_check_expire(unsigned long data)
{
- struct ip_vs_lblcr_table *tbl;
+ struct ip_vs_service *svc = (struct ip_vs_service *) data;
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int goal;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
- tbl = (struct ip_vs_lblcr_table *)data;
-
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
- ip_vs_lblcr_full_check(tbl);
+ ip_vs_lblcr_full_check(svc);
tbl->counter = 1;
goto out;
}
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&tbl->lock);
+ write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
continue;
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&tbl->lock);
+ write_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
/*
* Allocate the ip_vs_lblcr_table for this service
*/
- tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
+ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
if (tbl == NULL) {
IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
return -ENOMEM;
}
svc->sched_data = tbl;
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
- "current service\n",
- sizeof(struct ip_vs_lblcr_table));
+ "current service\n", sizeof(*tbl));
/*
* Initialize the hash buckets
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_LIST_HEAD(&tbl->bucket[i]);
}
- rwlock_init(&tbl->lock);
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
* Hook periodic timer for garbage collection
*/
setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
- (unsigned long)tbl);
- tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
- add_timer(&tbl->periodic_timer);
+ (unsigned long)svc);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
}
ip_vs_lblcr_flush(tbl);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree(tbl);
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
- sizeof(struct ip_vs_lblcr_table));
+ sizeof(*tbl));
return 0;
}
static struct ip_vs_dest *
ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct ip_vs_dest *dest;
- struct ip_vs_lblcr_table *tbl;
- struct ip_vs_lblcr_entry *en;
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_dest *dest = NULL;
+ struct ip_vs_lblcr_entry *en;
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
- tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
+ /* First look in our cache */
+ read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(tbl, iph->daddr);
- if (en == NULL) {
- dest = __ip_vs_lblcr_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- en = ip_vs_lblcr_new(iph->daddr);
- if (en == NULL) {
- return NULL;
- }
- ip_vs_dest_set_insert(&en->set, dest);
- ip_vs_lblcr_hash(tbl, en);
- } else {
+ if (en) {
+ /* We only hold a read lock, but this is atomic */
+ en->lastuse = jiffies;
+
+ /* Get the least loaded destination */
+ read_lock(&en->set.lock);
dest = ip_vs_dest_set_min(&en->set);
- if (!dest || is_overloaded(dest, svc)) {
- dest = __ip_vs_lblcr_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- ip_vs_dest_set_insert(&en->set, dest);
- }
+ read_unlock(&en->set.lock);
+
+ /* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
- jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
+ time_after(jiffies, en->set.lastmod +
+ sysctl_ip_vs_lblcr_expiration)) {
struct ip_vs_dest *m;
+
+ write_lock(&en->set.lock);
m = ip_vs_dest_set_max(&en->set);
if (m)
ip_vs_dest_set_erase(&en->set, m);
+ write_unlock(&en->set.lock);
+ }
+
+ /* If the destination is not overloaded, use it */
+ if (dest && !is_overloaded(dest, svc)) {
+ read_unlock(&svc->sched_lock);
+ goto out;
+ }
+
+ /* The cache entry is invalid, time to schedule */
+ dest = __ip_vs_lblcr_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ read_unlock(&svc->sched_lock);
+ return NULL;
}
+
+ /* Update our cache entry */
+ write_lock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest);
+ write_unlock(&en->set.lock);
+ }
+ read_unlock(&svc->sched_lock);
+
+ if (dest)
+ goto out;
+
+ /* No cache entry, time to schedule */
+ dest = __ip_vs_lblcr_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ return NULL;
}
- en->lastuse = jiffies;
+ /* If we fail to create a cache entry, we'll just use the valid dest */
+ write_lock(&svc->sched_lock);
+ ip_vs_lblcr_new(tbl, iph->daddr, dest);
+ write_unlock(&svc->sched_lock);
+
+out:
IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
- NIPQUAD(en->addr),
+ NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
ntohs(dest->port));