[INET_SOCK]: Move struct inet_sock & helper functions to net/inet_sock.h
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg>
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * Changes:
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/fs.h>
27#include <linux/sysctl.h>
28#include <linux/proc_fs.h>
29#include <linux/workqueue.h>
30#include <linux/swap.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33
34#include <linux/netfilter.h>
35#include <linux/netfilter_ipv4.h>
36
37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4
LT
39#include <net/sock.h>
40
41#include <asm/uaccess.h>
42
43#include <net/ip_vs.h>
44
45/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
46static DECLARE_MUTEX(__ip_vs_mutex);
47
48/* lock for service table */
49static DEFINE_RWLOCK(__ip_vs_svc_lock);
50
51/* lock for table with the real services */
52static DEFINE_RWLOCK(__ip_vs_rs_lock);
53
54/* lock for state and timeout tables */
55static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
56
57/* lock for drop entry handling */
58static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
59
60/* lock for drop packet handling */
61static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
62
63/* 1/rate drop and drop-entry variables */
64int ip_vs_drop_rate = 0;
65int ip_vs_drop_counter = 0;
66static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
67
68/* number of virtual services */
69static int ip_vs_num_services = 0;
70
71/* sysctl variables */
72static int sysctl_ip_vs_drop_entry = 0;
73static int sysctl_ip_vs_drop_packet = 0;
74static int sysctl_ip_vs_secure_tcp = 0;
75static int sysctl_ip_vs_amemthresh = 1024;
76static int sysctl_ip_vs_am_droprate = 10;
77int sysctl_ip_vs_cache_bypass = 0;
78int sysctl_ip_vs_expire_nodest_conn = 0;
79int sysctl_ip_vs_expire_quiescent_template = 0;
80int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
81int sysctl_ip_vs_nat_icmp_send = 0;
82
83
84#ifdef CONFIG_IP_VS_DEBUG
85static int sysctl_ip_vs_debug_level = 0;
86
87int ip_vs_get_debug_level(void)
88{
89 return sysctl_ip_vs_debug_level;
90}
91#endif
92
93/*
af9debd4
JA
94 * update_defense_level is called from keventd and from sysctl,
95 * so it needs to protect itself from softirqs
1da177e4
LT
96 */
97static void update_defense_level(void)
98{
99 struct sysinfo i;
100 static int old_secure_tcp = 0;
101 int availmem;
102 int nomem;
103 int to_change = -1;
104
105 /* we only count free and buffered memory (in pages) */
106 si_meminfo(&i);
107 availmem = i.freeram + i.bufferram;
108 /* however in linux 2.5 the i.bufferram is total page cache size,
109 we need adjust it */
110 /* si_swapinfo(&i); */
111 /* availmem = availmem - (i.totalswap - i.freeswap); */
112
113 nomem = (availmem < sysctl_ip_vs_amemthresh);
114
af9debd4
JA
115 local_bh_disable();
116
1da177e4
LT
117 /* drop_entry */
118 spin_lock(&__ip_vs_dropentry_lock);
119 switch (sysctl_ip_vs_drop_entry) {
120 case 0:
121 atomic_set(&ip_vs_dropentry, 0);
122 break;
123 case 1:
124 if (nomem) {
125 atomic_set(&ip_vs_dropentry, 1);
126 sysctl_ip_vs_drop_entry = 2;
127 } else {
128 atomic_set(&ip_vs_dropentry, 0);
129 }
130 break;
131 case 2:
132 if (nomem) {
133 atomic_set(&ip_vs_dropentry, 1);
134 } else {
135 atomic_set(&ip_vs_dropentry, 0);
136 sysctl_ip_vs_drop_entry = 1;
137 };
138 break;
139 case 3:
140 atomic_set(&ip_vs_dropentry, 1);
141 break;
142 }
143 spin_unlock(&__ip_vs_dropentry_lock);
144
145 /* drop_packet */
146 spin_lock(&__ip_vs_droppacket_lock);
147 switch (sysctl_ip_vs_drop_packet) {
148 case 0:
149 ip_vs_drop_rate = 0;
150 break;
151 case 1:
152 if (nomem) {
153 ip_vs_drop_rate = ip_vs_drop_counter
154 = sysctl_ip_vs_amemthresh /
155 (sysctl_ip_vs_amemthresh-availmem);
156 sysctl_ip_vs_drop_packet = 2;
157 } else {
158 ip_vs_drop_rate = 0;
159 }
160 break;
161 case 2:
162 if (nomem) {
163 ip_vs_drop_rate = ip_vs_drop_counter
164 = sysctl_ip_vs_amemthresh /
165 (sysctl_ip_vs_amemthresh-availmem);
166 } else {
167 ip_vs_drop_rate = 0;
168 sysctl_ip_vs_drop_packet = 1;
169 }
170 break;
171 case 3:
172 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
173 break;
174 }
175 spin_unlock(&__ip_vs_droppacket_lock);
176
177 /* secure_tcp */
178 write_lock(&__ip_vs_securetcp_lock);
179 switch (sysctl_ip_vs_secure_tcp) {
180 case 0:
181 if (old_secure_tcp >= 2)
182 to_change = 0;
183 break;
184 case 1:
185 if (nomem) {
186 if (old_secure_tcp < 2)
187 to_change = 1;
188 sysctl_ip_vs_secure_tcp = 2;
189 } else {
190 if (old_secure_tcp >= 2)
191 to_change = 0;
192 }
193 break;
194 case 2:
195 if (nomem) {
196 if (old_secure_tcp < 2)
197 to_change = 1;
198 } else {
199 if (old_secure_tcp >= 2)
200 to_change = 0;
201 sysctl_ip_vs_secure_tcp = 1;
202 }
203 break;
204 case 3:
205 if (old_secure_tcp < 2)
206 to_change = 1;
207 break;
208 }
209 old_secure_tcp = sysctl_ip_vs_secure_tcp;
210 if (to_change >= 0)
211 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
212 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
213
214 local_bh_enable();
1da177e4
LT
215}
216
217
218/*
219 * Timer for checking the defense
220 */
221#define DEFENSE_TIMER_PERIOD 1*HZ
222static void defense_work_handler(void *data);
223static DECLARE_WORK(defense_work, defense_work_handler, NULL);
224
225static void defense_work_handler(void *data)
226{
227 update_defense_level();
228 if (atomic_read(&ip_vs_dropentry))
229 ip_vs_random_dropentry();
230
231 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
232}
233
234int
235ip_vs_use_count_inc(void)
236{
237 return try_module_get(THIS_MODULE);
238}
239
240void
241ip_vs_use_count_dec(void)
242{
243 module_put(THIS_MODULE);
244}
245
246
247/*
248 * Hash table: for virtual service lookups
249 */
250#define IP_VS_SVC_TAB_BITS 8
251#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254/* the service table hashed by <protocol, addr, port> */
255static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256/* the service table hashed by fwmark */
257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
259/*
260 * Hash table: for real service lookups
261 */
262#define IP_VS_RTAB_BITS 4
263#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
264#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
265
266static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
267
268/*
269 * Trash for destinations
270 */
271static LIST_HEAD(ip_vs_dest_trash);
272
273/*
274 * FTP & NULL virtual service counters
275 */
276static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
277static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
278
279
280/*
281 * Returns hash value for virtual service
282 */
283static __inline__ unsigned
284ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
285{
286 register unsigned porth = ntohs(port);
287
288 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
289 & IP_VS_SVC_TAB_MASK;
290}
291
292/*
293 * Returns hash value of fwmark for virtual service lookup
294 */
295static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
296{
297 return fwmark & IP_VS_SVC_TAB_MASK;
298}
299
300/*
301 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
302 * or in the ip_vs_svc_fwm_table by fwmark.
303 * Should be called with locked tables.
304 */
305static int ip_vs_svc_hash(struct ip_vs_service *svc)
306{
307 unsigned hash;
308
309 if (svc->flags & IP_VS_SVC_F_HASHED) {
310 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
311 "called from %p\n", __builtin_return_address(0));
312 return 0;
313 }
314
315 if (svc->fwmark == 0) {
316 /*
317 * Hash it by <protocol,addr,port> in ip_vs_svc_table
318 */
319 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
320 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
321 } else {
322 /*
323 * Hash it by fwmark in ip_vs_svc_fwm_table
324 */
325 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
326 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
327 }
328
329 svc->flags |= IP_VS_SVC_F_HASHED;
330 /* increase its refcnt because it is referenced by the svc table */
331 atomic_inc(&svc->refcnt);
332 return 1;
333}
334
335
336/*
337 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
338 * Should be called with locked tables.
339 */
340static int ip_vs_svc_unhash(struct ip_vs_service *svc)
341{
342 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
343 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349 /* Remove it from the ip_vs_svc_table table */
350 list_del(&svc->s_list);
351 } else {
352 /* Remove it from the ip_vs_svc_fwm_table table */
353 list_del(&svc->f_list);
354 }
355
356 svc->flags &= ~IP_VS_SVC_F_HASHED;
357 atomic_dec(&svc->refcnt);
358 return 1;
359}
360
361
362/*
363 * Get service by {proto,addr,port} in the service table.
364 */
365static __inline__ struct ip_vs_service *
366__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
367{
368 unsigned hash;
369 struct ip_vs_service *svc;
370
371 /* Check for "full" addressed entries */
372 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
373
374 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
375 if ((svc->addr == vaddr)
376 && (svc->port == vport)
377 && (svc->protocol == protocol)) {
378 /* HIT */
379 atomic_inc(&svc->usecnt);
380 return svc;
381 }
382 }
383
384 return NULL;
385}
386
387
388/*
389 * Get service by {fwmark} in the service table.
390 */
391static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
392{
393 unsigned hash;
394 struct ip_vs_service *svc;
395
396 /* Check for fwmark addressed entries */
397 hash = ip_vs_svc_fwm_hashkey(fwmark);
398
399 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
400 if (svc->fwmark == fwmark) {
401 /* HIT */
402 atomic_inc(&svc->usecnt);
403 return svc;
404 }
405 }
406
407 return NULL;
408}
409
410struct ip_vs_service *
411ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
412{
413 struct ip_vs_service *svc;
414
415 read_lock(&__ip_vs_svc_lock);
416
417 /*
418 * Check the table hashed by fwmark first
419 */
420 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
421 goto out;
422
423 /*
424 * Check the table hashed by <protocol,addr,port>
425 * for "full" addressed entries
426 */
427 svc = __ip_vs_service_get(protocol, vaddr, vport);
428
429 if (svc == NULL
430 && protocol == IPPROTO_TCP
431 && atomic_read(&ip_vs_ftpsvc_counter)
432 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
433 /*
434 * Check if ftp service entry exists, the packet
435 * might belong to FTP data connections.
436 */
437 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
438 }
439
440 if (svc == NULL
441 && atomic_read(&ip_vs_nullsvc_counter)) {
442 /*
443 * Check if the catch-all port (port zero) exists
444 */
445 svc = __ip_vs_service_get(protocol, vaddr, 0);
446 }
447
448 out:
449 read_unlock(&__ip_vs_svc_lock);
450
451 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
452 fwmark, ip_vs_proto_name(protocol),
453 NIPQUAD(vaddr), ntohs(vport),
454 svc?"hit":"not hit");
455
456 return svc;
457}
458
459
460static inline void
461__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
462{
463 atomic_inc(&svc->refcnt);
464 dest->svc = svc;
465}
466
467static inline void
468__ip_vs_unbind_svc(struct ip_vs_dest *dest)
469{
470 struct ip_vs_service *svc = dest->svc;
471
472 dest->svc = NULL;
473 if (atomic_dec_and_test(&svc->refcnt))
474 kfree(svc);
475}
476
477
478/*
479 * Returns hash value for real service
480 */
481static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
482{
483 register unsigned porth = ntohs(port);
484
485 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
486 & IP_VS_RTAB_MASK;
487}
488
489/*
490 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
491 * should be called with locked tables.
492 */
493static int ip_vs_rs_hash(struct ip_vs_dest *dest)
494{
495 unsigned hash;
496
497 if (!list_empty(&dest->d_list)) {
498 return 0;
499 }
500
501 /*
502 * Hash by proto,addr,port,
503 * which are the parameters of the real service.
504 */
505 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
506 list_add(&dest->d_list, &ip_vs_rtable[hash]);
507
508 return 1;
509}
510
511/*
512 * UNhashes ip_vs_dest from ip_vs_rtable.
513 * should be called with locked tables.
514 */
515static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
516{
517 /*
518 * Remove it from the ip_vs_rtable table.
519 */
520 if (!list_empty(&dest->d_list)) {
521 list_del(&dest->d_list);
522 INIT_LIST_HEAD(&dest->d_list);
523 }
524
525 return 1;
526}
527
528/*
529 * Lookup real service by <proto,addr,port> in the real service table.
530 */
531struct ip_vs_dest *
532ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
533{
534 unsigned hash;
535 struct ip_vs_dest *dest;
536
537 /*
538 * Check for "full" addressed entries
539 * Return the first found entry
540 */
541 hash = ip_vs_rs_hashkey(daddr, dport);
542
543 read_lock(&__ip_vs_rs_lock);
544 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
545 if ((dest->addr == daddr)
546 && (dest->port == dport)
547 && ((dest->protocol == protocol) ||
548 dest->vfwmark)) {
549 /* HIT */
550 read_unlock(&__ip_vs_rs_lock);
551 return dest;
552 }
553 }
554 read_unlock(&__ip_vs_rs_lock);
555
556 return NULL;
557}
558
559/*
560 * Lookup destination by {addr,port} in the given service
561 */
562static struct ip_vs_dest *
563ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
564{
565 struct ip_vs_dest *dest;
566
567 /*
568 * Find the destination for the given service
569 */
570 list_for_each_entry(dest, &svc->destinations, n_list) {
571 if ((dest->addr == daddr) && (dest->port == dport)) {
572 /* HIT */
573 return dest;
574 }
575 }
576
577 return NULL;
578}
579
580
581/*
582 * Lookup dest by {svc,addr,port} in the destination trash.
583 * The destination trash is used to hold the destinations that are removed
584 * from the service table but are still referenced by some conn entries.
585 * The reason to add the destination trash is when the dest is temporary
586 * down (either by administrator or by monitor program), the dest can be
587 * picked back from the trash, the remaining connections to the dest can
588 * continue, and the counting information of the dest is also useful for
589 * scheduling.
590 */
591static struct ip_vs_dest *
592ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
593{
594 struct ip_vs_dest *dest, *nxt;
595
596 /*
597 * Find the destination in trash
598 */
599 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
600 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
601 "refcnt=%d\n",
602 dest->vfwmark,
603 NIPQUAD(dest->addr), ntohs(dest->port),
604 atomic_read(&dest->refcnt));
605 if (dest->addr == daddr &&
606 dest->port == dport &&
607 dest->vfwmark == svc->fwmark &&
608 dest->protocol == svc->protocol &&
609 (svc->fwmark ||
610 (dest->vaddr == svc->addr &&
611 dest->vport == svc->port))) {
612 /* HIT */
613 return dest;
614 }
615
616 /*
617 * Try to purge the destination from trash if not referenced
618 */
619 if (atomic_read(&dest->refcnt) == 1) {
620 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
621 "from trash\n",
622 dest->vfwmark,
623 NIPQUAD(dest->addr), ntohs(dest->port));
624 list_del(&dest->n_list);
625 ip_vs_dst_reset(dest);
626 __ip_vs_unbind_svc(dest);
627 kfree(dest);
628 }
629 }
630
631 return NULL;
632}
633
634
635/*
636 * Clean up all the destinations in the trash
637 * Called by the ip_vs_control_cleanup()
638 *
639 * When the ip_vs_control_clearup is activated by ipvs module exit,
640 * the service tables must have been flushed and all the connections
641 * are expired, and the refcnt of each destination in the trash must
642 * be 1, so we simply release them here.
643 */
644static void ip_vs_trash_cleanup(void)
645{
646 struct ip_vs_dest *dest, *nxt;
647
648 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
649 list_del(&dest->n_list);
650 ip_vs_dst_reset(dest);
651 __ip_vs_unbind_svc(dest);
652 kfree(dest);
653 }
654}
655
656
657static void
658ip_vs_zero_stats(struct ip_vs_stats *stats)
659{
660 spin_lock_bh(&stats->lock);
661 memset(stats, 0, (char *)&stats->lock - (char *)stats);
662 spin_unlock_bh(&stats->lock);
663 ip_vs_zero_estimator(stats);
664}
665
666/*
667 * Update a destination in the given service
668 */
669static void
670__ip_vs_update_dest(struct ip_vs_service *svc,
671 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
672{
673 int conn_flags;
674
675 /* set the weight and the flags */
676 atomic_set(&dest->weight, udest->weight);
677 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
678
679 /* check if local node and update the flags */
680 if (inet_addr_type(udest->addr) == RTN_LOCAL) {
681 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
682 | IP_VS_CONN_F_LOCALNODE;
683 }
684
685 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
686 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
687 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
688 } else {
689 /*
690 * Put the real service in ip_vs_rtable if not present.
691 * For now only for NAT!
692 */
693 write_lock_bh(&__ip_vs_rs_lock);
694 ip_vs_rs_hash(dest);
695 write_unlock_bh(&__ip_vs_rs_lock);
696 }
697 atomic_set(&dest->conn_flags, conn_flags);
698
699 /* bind the service */
700 if (!dest->svc) {
701 __ip_vs_bind_svc(dest, svc);
702 } else {
703 if (dest->svc != svc) {
704 __ip_vs_unbind_svc(dest);
705 ip_vs_zero_stats(&dest->stats);
706 __ip_vs_bind_svc(dest, svc);
707 }
708 }
709
710 /* set the dest status flags */
711 dest->flags |= IP_VS_DEST_F_AVAILABLE;
712
713 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
714 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
715 dest->u_threshold = udest->u_threshold;
716 dest->l_threshold = udest->l_threshold;
717}
718
719
720/*
721 * Create a destination for the given service
722 */
723static int
724ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
725 struct ip_vs_dest **dest_p)
726{
727 struct ip_vs_dest *dest;
728 unsigned atype;
729
730 EnterFunction(2);
731
732 atype = inet_addr_type(udest->addr);
733 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
734 return -EINVAL;
735
736 dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
737 if (dest == NULL) {
738 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
739 return -ENOMEM;
740 }
741 memset(dest, 0, sizeof(struct ip_vs_dest));
742
743 dest->protocol = svc->protocol;
744 dest->vaddr = svc->addr;
745 dest->vport = svc->port;
746 dest->vfwmark = svc->fwmark;
747 dest->addr = udest->addr;
748 dest->port = udest->port;
749
750 atomic_set(&dest->activeconns, 0);
751 atomic_set(&dest->inactconns, 0);
752 atomic_set(&dest->persistconns, 0);
753 atomic_set(&dest->refcnt, 0);
754
755 INIT_LIST_HEAD(&dest->d_list);
756 spin_lock_init(&dest->dst_lock);
757 spin_lock_init(&dest->stats.lock);
758 __ip_vs_update_dest(svc, dest, udest);
759 ip_vs_new_estimator(&dest->stats);
760
761 *dest_p = dest;
762
763 LeaveFunction(2);
764 return 0;
765}
766
767
768/*
769 * Add a destination into an existing service
770 */
771static int
772ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
773{
774 struct ip_vs_dest *dest;
775 __u32 daddr = udest->addr;
776 __u16 dport = udest->port;
777 int ret;
778
779 EnterFunction(2);
780
781 if (udest->weight < 0) {
782 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
783 return -ERANGE;
784 }
785
786 if (udest->l_threshold > udest->u_threshold) {
787 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
788 "upper threshold\n");
789 return -ERANGE;
790 }
791
792 /*
793 * Check if the dest already exists in the list
794 */
795 dest = ip_vs_lookup_dest(svc, daddr, dport);
796 if (dest != NULL) {
797 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
798 return -EEXIST;
799 }
800
801 /*
802 * Check if the dest already exists in the trash and
803 * is from the same service
804 */
805 dest = ip_vs_trash_get_dest(svc, daddr, dport);
806 if (dest != NULL) {
807 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
808 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
809 NIPQUAD(daddr), ntohs(dport),
810 atomic_read(&dest->refcnt),
811 dest->vfwmark,
812 NIPQUAD(dest->vaddr),
813 ntohs(dest->vport));
814 __ip_vs_update_dest(svc, dest, udest);
815
816 /*
817 * Get the destination from the trash
818 */
819 list_del(&dest->n_list);
820
821 ip_vs_new_estimator(&dest->stats);
822
823 write_lock_bh(&__ip_vs_svc_lock);
824
825 /*
826 * Wait until all other svc users go away.
827 */
828 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
829
830 list_add(&dest->n_list, &svc->destinations);
831 svc->num_dests++;
832
833 /* call the update_service function of its scheduler */
834 svc->scheduler->update_service(svc);
835
836 write_unlock_bh(&__ip_vs_svc_lock);
837 return 0;
838 }
839
840 /*
841 * Allocate and initialize the dest structure
842 */
843 ret = ip_vs_new_dest(svc, udest, &dest);
844 if (ret) {
845 return ret;
846 }
847
848 /*
849 * Add the dest entry into the list
850 */
851 atomic_inc(&dest->refcnt);
852
853 write_lock_bh(&__ip_vs_svc_lock);
854
855 /*
856 * Wait until all other svc users go away.
857 */
858 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
859
860 list_add(&dest->n_list, &svc->destinations);
861 svc->num_dests++;
862
863 /* call the update_service function of its scheduler */
864 svc->scheduler->update_service(svc);
865
866 write_unlock_bh(&__ip_vs_svc_lock);
867
868 LeaveFunction(2);
869
870 return 0;
871}
872
873
874/*
875 * Edit a destination in the given service
876 */
877static int
878ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
879{
880 struct ip_vs_dest *dest;
881 __u32 daddr = udest->addr;
882 __u16 dport = udest->port;
883
884 EnterFunction(2);
885
886 if (udest->weight < 0) {
887 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
888 return -ERANGE;
889 }
890
891 if (udest->l_threshold > udest->u_threshold) {
892 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
893 "upper threshold\n");
894 return -ERANGE;
895 }
896
897 /*
898 * Lookup the destination list
899 */
900 dest = ip_vs_lookup_dest(svc, daddr, dport);
901 if (dest == NULL) {
902 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
903 return -ENOENT;
904 }
905
906 __ip_vs_update_dest(svc, dest, udest);
907
908 write_lock_bh(&__ip_vs_svc_lock);
909
910 /* Wait until all other svc users go away */
911 while (atomic_read(&svc->usecnt) > 1) {};
912
913 /* call the update_service, because server weight may be changed */
914 svc->scheduler->update_service(svc);
915
916 write_unlock_bh(&__ip_vs_svc_lock);
917
918 LeaveFunction(2);
919
920 return 0;
921}
922
923
924/*
925 * Delete a destination (must be already unlinked from the service)
926 */
927static void __ip_vs_del_dest(struct ip_vs_dest *dest)
928{
929 ip_vs_kill_estimator(&dest->stats);
930
931 /*
932 * Remove it from the d-linked list with the real services.
933 */
934 write_lock_bh(&__ip_vs_rs_lock);
935 ip_vs_rs_unhash(dest);
936 write_unlock_bh(&__ip_vs_rs_lock);
937
938 /*
939 * Decrease the refcnt of the dest, and free the dest
940 * if nobody refers to it (refcnt=0). Otherwise, throw
941 * the destination into the trash.
942 */
943 if (atomic_dec_and_test(&dest->refcnt)) {
944 ip_vs_dst_reset(dest);
945 /* simply decrease svc->refcnt here, let the caller check
946 and release the service if nobody refers to it.
947 Only user context can release destination and service,
948 and only one user context can update virtual service at a
949 time, so the operation here is OK */
950 atomic_dec(&dest->svc->refcnt);
951 kfree(dest);
952 } else {
953 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
954 NIPQUAD(dest->addr), ntohs(dest->port),
955 atomic_read(&dest->refcnt));
956 list_add(&dest->n_list, &ip_vs_dest_trash);
957 atomic_inc(&dest->refcnt);
958 }
959}
960
961
962/*
963 * Unlink a destination from the given service
964 */
965static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
966 struct ip_vs_dest *dest,
967 int svcupd)
968{
969 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
970
971 /*
972 * Remove it from the d-linked destination list.
973 */
974 list_del(&dest->n_list);
975 svc->num_dests--;
976 if (svcupd) {
977 /*
978 * Call the update_service function of its scheduler
979 */
980 svc->scheduler->update_service(svc);
981 }
982}
983
984
985/*
986 * Delete a destination server in the given service
987 */
988static int
989ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
990{
991 struct ip_vs_dest *dest;
992 __u32 daddr = udest->addr;
993 __u16 dport = udest->port;
994
995 EnterFunction(2);
996
997 dest = ip_vs_lookup_dest(svc, daddr, dport);
998 if (dest == NULL) {
999 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1000 return -ENOENT;
1001 }
1002
1003 write_lock_bh(&__ip_vs_svc_lock);
1004
1005 /*
1006 * Wait until all other svc users go away.
1007 */
1008 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1009
1010 /*
1011 * Unlink dest from the service
1012 */
1013 __ip_vs_unlink_dest(svc, dest, 1);
1014
1015 write_unlock_bh(&__ip_vs_svc_lock);
1016
1017 /*
1018 * Delete the destination
1019 */
1020 __ip_vs_del_dest(dest);
1021
1022 LeaveFunction(2);
1023
1024 return 0;
1025}
1026
1027
1028/*
1029 * Add a service into the service hash table
1030 */
1031static int
1032ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1033{
1034 int ret = 0;
1035 struct ip_vs_scheduler *sched = NULL;
1036 struct ip_vs_service *svc = NULL;
1037
1038 /* increase the module use count */
1039 ip_vs_use_count_inc();
1040
1041 /* Lookup the scheduler by 'u->sched_name' */
1042 sched = ip_vs_scheduler_get(u->sched_name);
1043 if (sched == NULL) {
1044 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1045 u->sched_name);
1046 ret = -ENOENT;
1047 goto out_mod_dec;
1048 }
1049
1050 svc = (struct ip_vs_service *)
1051 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1052 if (svc == NULL) {
1053 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1054 ret = -ENOMEM;
1055 goto out_err;
1056 }
1057 memset(svc, 0, sizeof(struct ip_vs_service));
1058
1059 /* I'm the first user of the service */
1060 atomic_set(&svc->usecnt, 1);
1061 atomic_set(&svc->refcnt, 0);
1062
1063 svc->protocol = u->protocol;
1064 svc->addr = u->addr;
1065 svc->port = u->port;
1066 svc->fwmark = u->fwmark;
1067 svc->flags = u->flags;
1068 svc->timeout = u->timeout * HZ;
1069 svc->netmask = u->netmask;
1070
1071 INIT_LIST_HEAD(&svc->destinations);
1072 rwlock_init(&svc->sched_lock);
1073 spin_lock_init(&svc->stats.lock);
1074
1075 /* Bind the scheduler */
1076 ret = ip_vs_bind_scheduler(svc, sched);
1077 if (ret)
1078 goto out_err;
1079 sched = NULL;
1080
1081 /* Update the virtual service counters */
1082 if (svc->port == FTPPORT)
1083 atomic_inc(&ip_vs_ftpsvc_counter);
1084 else if (svc->port == 0)
1085 atomic_inc(&ip_vs_nullsvc_counter);
1086
1087 ip_vs_new_estimator(&svc->stats);
1088 ip_vs_num_services++;
1089
1090 /* Hash the service into the service table */
1091 write_lock_bh(&__ip_vs_svc_lock);
1092 ip_vs_svc_hash(svc);
1093 write_unlock_bh(&__ip_vs_svc_lock);
1094
1095 *svc_p = svc;
1096 return 0;
1097
1098 out_err:
1099 if (svc != NULL) {
1100 if (svc->scheduler)
1101 ip_vs_unbind_scheduler(svc);
1102 if (svc->inc) {
1103 local_bh_disable();
1104 ip_vs_app_inc_put(svc->inc);
1105 local_bh_enable();
1106 }
1107 kfree(svc);
1108 }
1109 ip_vs_scheduler_put(sched);
1110
1111 out_mod_dec:
1112 /* decrease the module use count */
1113 ip_vs_use_count_dec();
1114
1115 return ret;
1116}
1117
1118
1119/*
1120 * Edit a service and bind it with a new scheduler
1121 */
1122static int
1123ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1124{
1125 struct ip_vs_scheduler *sched, *old_sched;
1126 int ret = 0;
1127
1128 /*
1129 * Lookup the scheduler, by 'u->sched_name'
1130 */
1131 sched = ip_vs_scheduler_get(u->sched_name);
1132 if (sched == NULL) {
1133 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1134 u->sched_name);
1135 return -ENOENT;
1136 }
1137 old_sched = sched;
1138
1139 write_lock_bh(&__ip_vs_svc_lock);
1140
1141 /*
1142 * Wait until all other svc users go away.
1143 */
1144 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1145
1146 /*
1147 * Set the flags and timeout value
1148 */
1149 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1150 svc->timeout = u->timeout * HZ;
1151 svc->netmask = u->netmask;
1152
1153 old_sched = svc->scheduler;
1154 if (sched != old_sched) {
1155 /*
1156 * Unbind the old scheduler
1157 */
1158 if ((ret = ip_vs_unbind_scheduler(svc))) {
1159 old_sched = sched;
1160 goto out;
1161 }
1162
1163 /*
1164 * Bind the new scheduler
1165 */
1166 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1167 /*
1168 * If ip_vs_bind_scheduler fails, restore the old
1169 * scheduler.
1170 * The main reason of failure is out of memory.
1171 *
1172 * The question is if the old scheduler can be
1173 * restored all the time. TODO: if it cannot be
1174 * restored some time, we must delete the service,
1175 * otherwise the system may crash.
1176 */
1177 ip_vs_bind_scheduler(svc, old_sched);
1178 old_sched = sched;
1179 goto out;
1180 }
1181 }
1182
1183 out:
1184 write_unlock_bh(&__ip_vs_svc_lock);
1185
1186 if (old_sched)
1187 ip_vs_scheduler_put(old_sched);
1188
1189 return ret;
1190}
1191
1192
1193/*
1194 * Delete a service from the service list
1195 * - The service must be unlinked, unlocked and not referenced!
1196 * - We are called under _bh lock
1197 */
1198static void __ip_vs_del_service(struct ip_vs_service *svc)
1199{
1200 struct ip_vs_dest *dest, *nxt;
1201 struct ip_vs_scheduler *old_sched;
1202
1203 ip_vs_num_services--;
1204 ip_vs_kill_estimator(&svc->stats);
1205
1206 /* Unbind scheduler */
1207 old_sched = svc->scheduler;
1208 ip_vs_unbind_scheduler(svc);
1209 if (old_sched)
1210 ip_vs_scheduler_put(old_sched);
1211
1212 /* Unbind app inc */
1213 if (svc->inc) {
1214 ip_vs_app_inc_put(svc->inc);
1215 svc->inc = NULL;
1216 }
1217
1218 /*
1219 * Unlink the whole destination list
1220 */
1221 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1222 __ip_vs_unlink_dest(svc, dest, 0);
1223 __ip_vs_del_dest(dest);
1224 }
1225
1226 /*
1227 * Update the virtual service counters
1228 */
1229 if (svc->port == FTPPORT)
1230 atomic_dec(&ip_vs_ftpsvc_counter);
1231 else if (svc->port == 0)
1232 atomic_dec(&ip_vs_nullsvc_counter);
1233
1234 /*
1235 * Free the service if nobody refers to it
1236 */
1237 if (atomic_read(&svc->refcnt) == 0)
1238 kfree(svc);
1239
1240 /* decrease the module use count */
1241 ip_vs_use_count_dec();
1242}
1243
1244/*
1245 * Delete a service from the service list
1246 */
1247static int ip_vs_del_service(struct ip_vs_service *svc)
1248{
1249 if (svc == NULL)
1250 return -EEXIST;
1251
1252 /*
1253 * Unhash it from the service table
1254 */
1255 write_lock_bh(&__ip_vs_svc_lock);
1256
1257 ip_vs_svc_unhash(svc);
1258
1259 /*
1260 * Wait until all the svc users go away.
1261 */
1262 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1263
1264 __ip_vs_del_service(svc);
1265
1266 write_unlock_bh(&__ip_vs_svc_lock);
1267
1268 return 0;
1269}
1270
1271
1272/*
1273 * Flush all the virtual services
1274 */
1275static int ip_vs_flush(void)
1276{
1277 int idx;
1278 struct ip_vs_service *svc, *nxt;
1279
1280 /*
1281 * Flush the service table hashed by <protocol,addr,port>
1282 */
1283 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1284 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1285 write_lock_bh(&__ip_vs_svc_lock);
1286 ip_vs_svc_unhash(svc);
1287 /*
1288 * Wait until all the svc users go away.
1289 */
1290 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1291 __ip_vs_del_service(svc);
1292 write_unlock_bh(&__ip_vs_svc_lock);
1293 }
1294 }
1295
1296 /*
1297 * Flush the service table hashed by fwmark
1298 */
1299 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1300 list_for_each_entry_safe(svc, nxt,
1301 &ip_vs_svc_fwm_table[idx], f_list) {
1302 write_lock_bh(&__ip_vs_svc_lock);
1303 ip_vs_svc_unhash(svc);
1304 /*
1305 * Wait until all the svc users go away.
1306 */
1307 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1308 __ip_vs_del_service(svc);
1309 write_unlock_bh(&__ip_vs_svc_lock);
1310 }
1311 }
1312
1313 return 0;
1314}
1315
1316
1317/*
1318 * Zero counters in a service or all services
1319 */
1320static int ip_vs_zero_service(struct ip_vs_service *svc)
1321{
1322 struct ip_vs_dest *dest;
1323
1324 write_lock_bh(&__ip_vs_svc_lock);
1325 list_for_each_entry(dest, &svc->destinations, n_list) {
1326 ip_vs_zero_stats(&dest->stats);
1327 }
1328 ip_vs_zero_stats(&svc->stats);
1329 write_unlock_bh(&__ip_vs_svc_lock);
1330 return 0;
1331}
1332
1333static int ip_vs_zero_all(void)
1334{
1335 int idx;
1336 struct ip_vs_service *svc;
1337
1338 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1339 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1340 ip_vs_zero_service(svc);
1341 }
1342 }
1343
1344 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1345 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1346 ip_vs_zero_service(svc);
1347 }
1348 }
1349
1350 ip_vs_zero_stats(&ip_vs_stats);
1351 return 0;
1352}
1353
1354
1355static int
1356proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1357 void __user *buffer, size_t *lenp, loff_t *ppos)
1358{
1359 int *valp = table->data;
1360 int val = *valp;
1361 int rc;
1362
1363 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1364 if (write && (*valp != val)) {
1365 if ((*valp < 0) || (*valp > 3)) {
1366 /* Restore the correct value */
1367 *valp = val;
1368 } else {
1da177e4 1369 update_defense_level();
1da177e4
LT
1370 }
1371 }
1372 return rc;
1373}
1374
1375
1376static int
1377proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1378 void __user *buffer, size_t *lenp, loff_t *ppos)
1379{
1380 int *valp = table->data;
1381 int val[2];
1382 int rc;
1383
1384 /* backup the value first */
1385 memcpy(val, valp, sizeof(val));
1386
1387 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1388 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1389 /* Restore the correct value */
1390 memcpy(valp, val, sizeof(val));
1391 }
1392 return rc;
1393}
1394
1395
1396/*
1397 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1398 */
1399
1400static struct ctl_table vs_vars[] = {
1401 {
1402 .ctl_name = NET_IPV4_VS_AMEMTHRESH,
1403 .procname = "amemthresh",
1404 .data = &sysctl_ip_vs_amemthresh,
1405 .maxlen = sizeof(int),
1406 .mode = 0644,
1407 .proc_handler = &proc_dointvec,
1408 },
1409#ifdef CONFIG_IP_VS_DEBUG
1410 {
1411 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
1412 .procname = "debug_level",
1413 .data = &sysctl_ip_vs_debug_level,
1414 .maxlen = sizeof(int),
1415 .mode = 0644,
1416 .proc_handler = &proc_dointvec,
1417 },
1418#endif
1419 {
1420 .ctl_name = NET_IPV4_VS_AMDROPRATE,
1421 .procname = "am_droprate",
1422 .data = &sysctl_ip_vs_am_droprate,
1423 .maxlen = sizeof(int),
1424 .mode = 0644,
1425 .proc_handler = &proc_dointvec,
1426 },
1427 {
1428 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
1429 .procname = "drop_entry",
1430 .data = &sysctl_ip_vs_drop_entry,
1431 .maxlen = sizeof(int),
1432 .mode = 0644,
1433 .proc_handler = &proc_do_defense_mode,
1434 },
1435 {
1436 .ctl_name = NET_IPV4_VS_DROP_PACKET,
1437 .procname = "drop_packet",
1438 .data = &sysctl_ip_vs_drop_packet,
1439 .maxlen = sizeof(int),
1440 .mode = 0644,
1441 .proc_handler = &proc_do_defense_mode,
1442 },
1443 {
1444 .ctl_name = NET_IPV4_VS_SECURE_TCP,
1445 .procname = "secure_tcp",
1446 .data = &sysctl_ip_vs_secure_tcp,
1447 .maxlen = sizeof(int),
1448 .mode = 0644,
1449 .proc_handler = &proc_do_defense_mode,
1450 },
1451#if 0
1452 {
1453 .ctl_name = NET_IPV4_VS_TO_ES,
1454 .procname = "timeout_established",
1455 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1456 .maxlen = sizeof(int),
1457 .mode = 0644,
1458 .proc_handler = &proc_dointvec_jiffies,
1459 },
1460 {
1461 .ctl_name = NET_IPV4_VS_TO_SS,
1462 .procname = "timeout_synsent",
1463 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1464 .maxlen = sizeof(int),
1465 .mode = 0644,
1466 .proc_handler = &proc_dointvec_jiffies,
1467 },
1468 {
1469 .ctl_name = NET_IPV4_VS_TO_SR,
1470 .procname = "timeout_synrecv",
1471 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1472 .maxlen = sizeof(int),
1473 .mode = 0644,
1474 .proc_handler = &proc_dointvec_jiffies,
1475 },
1476 {
1477 .ctl_name = NET_IPV4_VS_TO_FW,
1478 .procname = "timeout_finwait",
1479 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1480 .maxlen = sizeof(int),
1481 .mode = 0644,
1482 .proc_handler = &proc_dointvec_jiffies,
1483 },
1484 {
1485 .ctl_name = NET_IPV4_VS_TO_TW,
1486 .procname = "timeout_timewait",
1487 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1488 .maxlen = sizeof(int),
1489 .mode = 0644,
1490 .proc_handler = &proc_dointvec_jiffies,
1491 },
1492 {
1493 .ctl_name = NET_IPV4_VS_TO_CL,
1494 .procname = "timeout_close",
1495 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1496 .maxlen = sizeof(int),
1497 .mode = 0644,
1498 .proc_handler = &proc_dointvec_jiffies,
1499 },
1500 {
1501 .ctl_name = NET_IPV4_VS_TO_CW,
1502 .procname = "timeout_closewait",
1503 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1504 .maxlen = sizeof(int),
1505 .mode = 0644,
1506 .proc_handler = &proc_dointvec_jiffies,
1507 },
1508 {
1509 .ctl_name = NET_IPV4_VS_TO_LA,
1510 .procname = "timeout_lastack",
1511 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1512 .maxlen = sizeof(int),
1513 .mode = 0644,
1514 .proc_handler = &proc_dointvec_jiffies,
1515 },
1516 {
1517 .ctl_name = NET_IPV4_VS_TO_LI,
1518 .procname = "timeout_listen",
1519 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1520 .maxlen = sizeof(int),
1521 .mode = 0644,
1522 .proc_handler = &proc_dointvec_jiffies,
1523 },
1524 {
1525 .ctl_name = NET_IPV4_VS_TO_SA,
1526 .procname = "timeout_synack",
1527 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1528 .maxlen = sizeof(int),
1529 .mode = 0644,
1530 .proc_handler = &proc_dointvec_jiffies,
1531 },
1532 {
1533 .ctl_name = NET_IPV4_VS_TO_UDP,
1534 .procname = "timeout_udp",
1535 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1536 .maxlen = sizeof(int),
1537 .mode = 0644,
1538 .proc_handler = &proc_dointvec_jiffies,
1539 },
1540 {
1541 .ctl_name = NET_IPV4_VS_TO_ICMP,
1542 .procname = "timeout_icmp",
1543 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1544 .maxlen = sizeof(int),
1545 .mode = 0644,
1546 .proc_handler = &proc_dointvec_jiffies,
1547 },
1548#endif
1549 {
1550 .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
1551 .procname = "cache_bypass",
1552 .data = &sysctl_ip_vs_cache_bypass,
1553 .maxlen = sizeof(int),
1554 .mode = 0644,
1555 .proc_handler = &proc_dointvec,
1556 },
1557 {
1558 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
1559 .procname = "expire_nodest_conn",
1560 .data = &sysctl_ip_vs_expire_nodest_conn,
1561 .maxlen = sizeof(int),
1562 .mode = 0644,
1563 .proc_handler = &proc_dointvec,
1564 },
1565 {
1566 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
1567 .procname = "expire_quiescent_template",
1568 .data = &sysctl_ip_vs_expire_quiescent_template,
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
1571 .proc_handler = &proc_dointvec,
1572 },
1573 {
1574 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
1575 .procname = "sync_threshold",
1576 .data = &sysctl_ip_vs_sync_threshold,
1577 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1578 .mode = 0644,
1579 .proc_handler = &proc_do_sync_threshold,
1580 },
1581 {
1582 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
1583 .procname = "nat_icmp_send",
1584 .data = &sysctl_ip_vs_nat_icmp_send,
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = &proc_dointvec,
1588 },
1589 { .ctl_name = 0 }
1590};
1591
1592static ctl_table vs_table[] = {
1593 {
1594 .ctl_name = NET_IPV4_VS,
1595 .procname = "vs",
1596 .mode = 0555,
1597 .child = vs_vars
1598 },
1599 { .ctl_name = 0 }
1600};
1601
bf0ff9e5 1602static ctl_table ipvs_ipv4_table[] = {
1da177e4
LT
1603 {
1604 .ctl_name = NET_IPV4,
1605 .procname = "ipv4",
1606 .mode = 0555,
1607 .child = vs_table,
1608 },
1609 { .ctl_name = 0 }
1610};
1611
1612static ctl_table vs_root_table[] = {
1613 {
1614 .ctl_name = CTL_NET,
1615 .procname = "net",
1616 .mode = 0555,
bf0ff9e5 1617 .child = ipvs_ipv4_table,
1da177e4
LT
1618 },
1619 { .ctl_name = 0 }
1620};
1621
1622static struct ctl_table_header * sysctl_header;
1623
1624#ifdef CONFIG_PROC_FS
1625
1626struct ip_vs_iter {
1627 struct list_head *table;
1628 int bucket;
1629};
1630
1631/*
1632 * Write the contents of the VS rule table to a PROCfs file.
1633 * (It is kept just for backward compatibility)
1634 */
1635static inline const char *ip_vs_fwd_name(unsigned flags)
1636{
1637 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1638 case IP_VS_CONN_F_LOCALNODE:
1639 return "Local";
1640 case IP_VS_CONN_F_TUNNEL:
1641 return "Tunnel";
1642 case IP_VS_CONN_F_DROUTE:
1643 return "Route";
1644 default:
1645 return "Masq";
1646 }
1647}
1648
1649
1650/* Get the Nth entry in the two lists */
1651static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1652{
1653 struct ip_vs_iter *iter = seq->private;
1654 int idx;
1655 struct ip_vs_service *svc;
1656
1657 /* look in hash by protocol */
1658 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1659 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1660 if (pos-- == 0){
1661 iter->table = ip_vs_svc_table;
1662 iter->bucket = idx;
1663 return svc;
1664 }
1665 }
1666 }
1667
1668 /* keep looking in fwmark */
1669 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1670 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1671 if (pos-- == 0) {
1672 iter->table = ip_vs_svc_fwm_table;
1673 iter->bucket = idx;
1674 return svc;
1675 }
1676 }
1677 }
1678
1679 return NULL;
1680}
1681
1682static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1683{
1684
1685 read_lock_bh(&__ip_vs_svc_lock);
1686 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1687}
1688
1689
1690static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1691{
1692 struct list_head *e;
1693 struct ip_vs_iter *iter;
1694 struct ip_vs_service *svc;
1695
1696 ++*pos;
1697 if (v == SEQ_START_TOKEN)
1698 return ip_vs_info_array(seq,0);
1699
1700 svc = v;
1701 iter = seq->private;
1702
1703 if (iter->table == ip_vs_svc_table) {
1704 /* next service in table hashed by protocol */
1705 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1706 return list_entry(e, struct ip_vs_service, s_list);
1707
1708
1709 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1710 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1711 s_list) {
1712 return svc;
1713 }
1714 }
1715
1716 iter->table = ip_vs_svc_fwm_table;
1717 iter->bucket = -1;
1718 goto scan_fwmark;
1719 }
1720
1721 /* next service in hashed by fwmark */
1722 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1723 return list_entry(e, struct ip_vs_service, f_list);
1724
1725 scan_fwmark:
1726 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1727 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1728 f_list)
1729 return svc;
1730 }
1731
1732 return NULL;
1733}
1734
1735static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1736{
1737 read_unlock_bh(&__ip_vs_svc_lock);
1738}
1739
1740
1741static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1742{
1743 if (v == SEQ_START_TOKEN) {
1744 seq_printf(seq,
1745 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1746 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1747 seq_puts(seq,
1748 "Prot LocalAddress:Port Scheduler Flags\n");
1749 seq_puts(seq,
1750 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1751 } else {
1752 const struct ip_vs_service *svc = v;
1753 const struct ip_vs_iter *iter = seq->private;
1754 const struct ip_vs_dest *dest;
1755
1756 if (iter->table == ip_vs_svc_table)
1757 seq_printf(seq, "%s %08X:%04X %s ",
1758 ip_vs_proto_name(svc->protocol),
1759 ntohl(svc->addr),
1760 ntohs(svc->port),
1761 svc->scheduler->name);
1762 else
1763 seq_printf(seq, "FWM %08X %s ",
1764 svc->fwmark, svc->scheduler->name);
1765
1766 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1767 seq_printf(seq, "persistent %d %08X\n",
1768 svc->timeout,
1769 ntohl(svc->netmask));
1770 else
1771 seq_putc(seq, '\n');
1772
1773 list_for_each_entry(dest, &svc->destinations, n_list) {
1774 seq_printf(seq,
1775 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
1776 ntohl(dest->addr), ntohs(dest->port),
1777 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1778 atomic_read(&dest->weight),
1779 atomic_read(&dest->activeconns),
1780 atomic_read(&dest->inactconns));
1781 }
1782 }
1783 return 0;
1784}
1785
1786static struct seq_operations ip_vs_info_seq_ops = {
1787 .start = ip_vs_info_seq_start,
1788 .next = ip_vs_info_seq_next,
1789 .stop = ip_vs_info_seq_stop,
1790 .show = ip_vs_info_seq_show,
1791};
1792
1793static int ip_vs_info_open(struct inode *inode, struct file *file)
1794{
1795 struct seq_file *seq;
1796 int rc = -ENOMEM;
1797 struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1798
1799 if (!s)
1800 goto out;
1801
1802 rc = seq_open(file, &ip_vs_info_seq_ops);
1803 if (rc)
1804 goto out_kfree;
1805
1806 seq = file->private_data;
1807 seq->private = s;
1808 memset(s, 0, sizeof(*s));
1809out:
1810 return rc;
1811out_kfree:
1812 kfree(s);
1813 goto out;
1814}
1815
1816static struct file_operations ip_vs_info_fops = {
1817 .owner = THIS_MODULE,
1818 .open = ip_vs_info_open,
1819 .read = seq_read,
1820 .llseek = seq_lseek,
1821 .release = seq_release_private,
1822};
1823
1824#endif
1825
1826struct ip_vs_stats ip_vs_stats;
1827
1828#ifdef CONFIG_PROC_FS
1829static int ip_vs_stats_show(struct seq_file *seq, void *v)
1830{
1831
1832/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1833 seq_puts(seq,
1834 " Total Incoming Outgoing Incoming Outgoing\n");
1835 seq_printf(seq,
1836 " Conns Packets Packets Bytes Bytes\n");
1837
1838 spin_lock_bh(&ip_vs_stats.lock);
1839 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1840 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1841 (unsigned long long) ip_vs_stats.inbytes,
1842 (unsigned long long) ip_vs_stats.outbytes);
1843
1844/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1845 seq_puts(seq,
1846 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1847 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1848 ip_vs_stats.cps,
1849 ip_vs_stats.inpps,
1850 ip_vs_stats.outpps,
1851 ip_vs_stats.inbps,
1852 ip_vs_stats.outbps);
1853 spin_unlock_bh(&ip_vs_stats.lock);
1854
1855 return 0;
1856}
1857
1858static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1859{
1860 return single_open(file, ip_vs_stats_show, NULL);
1861}
1862
1863static struct file_operations ip_vs_stats_fops = {
1864 .owner = THIS_MODULE,
1865 .open = ip_vs_stats_seq_open,
1866 .read = seq_read,
1867 .llseek = seq_lseek,
1868 .release = single_release,
1869};
1870
1871#endif
1872
1873/*
1874 * Set timeout values for tcp tcpfin udp in the timeout_table.
1875 */
1876static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1877{
1878 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1879 u->tcp_timeout,
1880 u->tcp_fin_timeout,
1881 u->udp_timeout);
1882
1883#ifdef CONFIG_IP_VS_PROTO_TCP
1884 if (u->tcp_timeout) {
1885 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1886 = u->tcp_timeout * HZ;
1887 }
1888
1889 if (u->tcp_fin_timeout) {
1890 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1891 = u->tcp_fin_timeout * HZ;
1892 }
1893#endif
1894
1895#ifdef CONFIG_IP_VS_PROTO_UDP
1896 if (u->udp_timeout) {
1897 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1898 = u->udp_timeout * HZ;
1899 }
1900#endif
1901 return 0;
1902}
1903
1904
1905#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1906#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1907#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1908 sizeof(struct ip_vs_dest_user))
1909#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1910#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1911#define MAX_ARG_LEN SVCDEST_ARG_LEN
1912
9b5b5cff 1913static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1914 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1915 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1916 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1917 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1918 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1919 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1921 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1922 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1923 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1924 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1925};
1926
1927static int
1928do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1929{
1930 int ret;
1931 unsigned char arg[MAX_ARG_LEN];
1932 struct ip_vs_service_user *usvc;
1933 struct ip_vs_service *svc;
1934 struct ip_vs_dest_user *udest;
1935
1936 if (!capable(CAP_NET_ADMIN))
1937 return -EPERM;
1938
1939 if (len != set_arglen[SET_CMDID(cmd)]) {
1940 IP_VS_ERR("set_ctl: len %u != %u\n",
1941 len, set_arglen[SET_CMDID(cmd)]);
1942 return -EINVAL;
1943 }
1944
1945 if (copy_from_user(arg, user, len) != 0)
1946 return -EFAULT;
1947
1948 /* increase the module use count */
1949 ip_vs_use_count_inc();
1950
1951 if (down_interruptible(&__ip_vs_mutex)) {
1952 ret = -ERESTARTSYS;
1953 goto out_dec;
1954 }
1955
1956 if (cmd == IP_VS_SO_SET_FLUSH) {
1957 /* Flush the virtual service */
1958 ret = ip_vs_flush();
1959 goto out_unlock;
1960 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1961 /* Set timeout values for (tcp tcpfin udp) */
1962 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1963 goto out_unlock;
1964 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1965 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1966 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1967 goto out_unlock;
1968 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1969 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1970 ret = stop_sync_thread(dm->state);
1971 goto out_unlock;
1972 }
1973
1974 usvc = (struct ip_vs_service_user *)arg;
1975 udest = (struct ip_vs_dest_user *)(usvc + 1);
1976
1977 if (cmd == IP_VS_SO_SET_ZERO) {
1978 /* if no service address is set, zero counters in all */
1979 if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1980 ret = ip_vs_zero_all();
1981 goto out_unlock;
1982 }
1983 }
1984
1985 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1986 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1987 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1988 usvc->protocol, NIPQUAD(usvc->addr),
1989 ntohs(usvc->port), usvc->sched_name);
1990 ret = -EFAULT;
1991 goto out_unlock;
1992 }
1993
1994 /* Lookup the exact service by <protocol, addr, port> or fwmark */
1995 if (usvc->fwmark == 0)
1996 svc = __ip_vs_service_get(usvc->protocol,
1997 usvc->addr, usvc->port);
1998 else
1999 svc = __ip_vs_svc_fwm_get(usvc->fwmark);
2000
2001 if (cmd != IP_VS_SO_SET_ADD
2002 && (svc == NULL || svc->protocol != usvc->protocol)) {
2003 ret = -ESRCH;
2004 goto out_unlock;
2005 }
2006
2007 switch (cmd) {
2008 case IP_VS_SO_SET_ADD:
2009 if (svc != NULL)
2010 ret = -EEXIST;
2011 else
2012 ret = ip_vs_add_service(usvc, &svc);
2013 break;
2014 case IP_VS_SO_SET_EDIT:
2015 ret = ip_vs_edit_service(svc, usvc);
2016 break;
2017 case IP_VS_SO_SET_DEL:
2018 ret = ip_vs_del_service(svc);
2019 if (!ret)
2020 goto out_unlock;
2021 break;
2022 case IP_VS_SO_SET_ZERO:
2023 ret = ip_vs_zero_service(svc);
2024 break;
2025 case IP_VS_SO_SET_ADDDEST:
2026 ret = ip_vs_add_dest(svc, udest);
2027 break;
2028 case IP_VS_SO_SET_EDITDEST:
2029 ret = ip_vs_edit_dest(svc, udest);
2030 break;
2031 case IP_VS_SO_SET_DELDEST:
2032 ret = ip_vs_del_dest(svc, udest);
2033 break;
2034 default:
2035 ret = -EINVAL;
2036 }
2037
2038 if (svc)
2039 ip_vs_service_put(svc);
2040
2041 out_unlock:
2042 up(&__ip_vs_mutex);
2043 out_dec:
2044 /* decrease the module use count */
2045 ip_vs_use_count_dec();
2046
2047 return ret;
2048}
2049
2050
2051static void
2052ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2053{
2054 spin_lock_bh(&src->lock);
2055 memcpy(dst, src, (char*)&src->lock - (char*)src);
2056 spin_unlock_bh(&src->lock);
2057}
2058
2059static void
2060ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2061{
2062 dst->protocol = src->protocol;
2063 dst->addr = src->addr;
2064 dst->port = src->port;
2065 dst->fwmark = src->fwmark;
4da62fc7 2066 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2067 dst->flags = src->flags;
2068 dst->timeout = src->timeout / HZ;
2069 dst->netmask = src->netmask;
2070 dst->num_dests = src->num_dests;
2071 ip_vs_copy_stats(&dst->stats, &src->stats);
2072}
2073
2074static inline int
2075__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2076 struct ip_vs_get_services __user *uptr)
2077{
2078 int idx, count=0;
2079 struct ip_vs_service *svc;
2080 struct ip_vs_service_entry entry;
2081 int ret = 0;
2082
2083 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2084 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2085 if (count >= get->num_services)
2086 goto out;
4da62fc7 2087 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2088 ip_vs_copy_service(&entry, svc);
2089 if (copy_to_user(&uptr->entrytable[count],
2090 &entry, sizeof(entry))) {
2091 ret = -EFAULT;
2092 goto out;
2093 }
2094 count++;
2095 }
2096 }
2097
2098 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2099 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2100 if (count >= get->num_services)
2101 goto out;
4da62fc7 2102 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2103 ip_vs_copy_service(&entry, svc);
2104 if (copy_to_user(&uptr->entrytable[count],
2105 &entry, sizeof(entry))) {
2106 ret = -EFAULT;
2107 goto out;
2108 }
2109 count++;
2110 }
2111 }
2112 out:
2113 return ret;
2114}
2115
2116static inline int
2117__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2118 struct ip_vs_get_dests __user *uptr)
2119{
2120 struct ip_vs_service *svc;
2121 int ret = 0;
2122
2123 if (get->fwmark)
2124 svc = __ip_vs_svc_fwm_get(get->fwmark);
2125 else
2126 svc = __ip_vs_service_get(get->protocol,
2127 get->addr, get->port);
2128 if (svc) {
2129 int count = 0;
2130 struct ip_vs_dest *dest;
2131 struct ip_vs_dest_entry entry;
2132
2133 list_for_each_entry(dest, &svc->destinations, n_list) {
2134 if (count >= get->num_dests)
2135 break;
2136
2137 entry.addr = dest->addr;
2138 entry.port = dest->port;
2139 entry.conn_flags = atomic_read(&dest->conn_flags);
2140 entry.weight = atomic_read(&dest->weight);
2141 entry.u_threshold = dest->u_threshold;
2142 entry.l_threshold = dest->l_threshold;
2143 entry.activeconns = atomic_read(&dest->activeconns);
2144 entry.inactconns = atomic_read(&dest->inactconns);
2145 entry.persistconns = atomic_read(&dest->persistconns);
2146 ip_vs_copy_stats(&entry.stats, &dest->stats);
2147 if (copy_to_user(&uptr->entrytable[count],
2148 &entry, sizeof(entry))) {
2149 ret = -EFAULT;
2150 break;
2151 }
2152 count++;
2153 }
2154 ip_vs_service_put(svc);
2155 } else
2156 ret = -ESRCH;
2157 return ret;
2158}
2159
2160static inline void
2161__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2162{
2163#ifdef CONFIG_IP_VS_PROTO_TCP
2164 u->tcp_timeout =
2165 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2166 u->tcp_fin_timeout =
2167 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2168#endif
2169#ifdef CONFIG_IP_VS_PROTO_UDP
2170 u->udp_timeout =
2171 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2172#endif
2173}
2174
2175
2176#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2177#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2178#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2179#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2180#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2181#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2182#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2183
9b5b5cff 2184static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2185 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2186 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2187 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2188 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2189 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2190 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2191 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2192};
2193
2194static int
2195do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2196{
2197 unsigned char arg[128];
2198 int ret = 0;
2199
2200 if (!capable(CAP_NET_ADMIN))
2201 return -EPERM;
2202
2203 if (*len < get_arglen[GET_CMDID(cmd)]) {
2204 IP_VS_ERR("get_ctl: len %u < %u\n",
2205 *len, get_arglen[GET_CMDID(cmd)]);
2206 return -EINVAL;
2207 }
2208
2209 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2210 return -EFAULT;
2211
2212 if (down_interruptible(&__ip_vs_mutex))
2213 return -ERESTARTSYS;
2214
2215 switch (cmd) {
2216 case IP_VS_SO_GET_VERSION:
2217 {
2218 char buf[64];
2219
2220 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2221 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2222 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2223 ret = -EFAULT;
2224 goto out;
2225 }
2226 *len = strlen(buf)+1;
2227 }
2228 break;
2229
2230 case IP_VS_SO_GET_INFO:
2231 {
2232 struct ip_vs_getinfo info;
2233 info.version = IP_VS_VERSION_CODE;
2234 info.size = IP_VS_CONN_TAB_SIZE;
2235 info.num_services = ip_vs_num_services;
2236 if (copy_to_user(user, &info, sizeof(info)) != 0)
2237 ret = -EFAULT;
2238 }
2239 break;
2240
2241 case IP_VS_SO_GET_SERVICES:
2242 {
2243 struct ip_vs_get_services *get;
2244 int size;
2245
2246 get = (struct ip_vs_get_services *)arg;
2247 size = sizeof(*get) +
2248 sizeof(struct ip_vs_service_entry) * get->num_services;
2249 if (*len != size) {
2250 IP_VS_ERR("length: %u != %u\n", *len, size);
2251 ret = -EINVAL;
2252 goto out;
2253 }
2254 ret = __ip_vs_get_service_entries(get, user);
2255 }
2256 break;
2257
2258 case IP_VS_SO_GET_SERVICE:
2259 {
2260 struct ip_vs_service_entry *entry;
2261 struct ip_vs_service *svc;
2262
2263 entry = (struct ip_vs_service_entry *)arg;
2264 if (entry->fwmark)
2265 svc = __ip_vs_svc_fwm_get(entry->fwmark);
2266 else
2267 svc = __ip_vs_service_get(entry->protocol,
2268 entry->addr, entry->port);
2269 if (svc) {
2270 ip_vs_copy_service(entry, svc);
2271 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2272 ret = -EFAULT;
2273 ip_vs_service_put(svc);
2274 } else
2275 ret = -ESRCH;
2276 }
2277 break;
2278
2279 case IP_VS_SO_GET_DESTS:
2280 {
2281 struct ip_vs_get_dests *get;
2282 int size;
2283
2284 get = (struct ip_vs_get_dests *)arg;
2285 size = sizeof(*get) +
2286 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2287 if (*len != size) {
2288 IP_VS_ERR("length: %u != %u\n", *len, size);
2289 ret = -EINVAL;
2290 goto out;
2291 }
2292 ret = __ip_vs_get_dest_entries(get, user);
2293 }
2294 break;
2295
2296 case IP_VS_SO_GET_TIMEOUT:
2297 {
2298 struct ip_vs_timeout_user t;
2299
2300 __ip_vs_get_timeouts(&t);
2301 if (copy_to_user(user, &t, sizeof(t)) != 0)
2302 ret = -EFAULT;
2303 }
2304 break;
2305
2306 case IP_VS_SO_GET_DAEMON:
2307 {
2308 struct ip_vs_daemon_user d[2];
2309
2310 memset(&d, 0, sizeof(d));
2311 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2312 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2313 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2314 d[0].syncid = ip_vs_master_syncid;
2315 }
2316 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2317 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2318 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2319 d[1].syncid = ip_vs_backup_syncid;
2320 }
2321 if (copy_to_user(user, &d, sizeof(d)) != 0)
2322 ret = -EFAULT;
2323 }
2324 break;
2325
2326 default:
2327 ret = -EINVAL;
2328 }
2329
2330 out:
2331 up(&__ip_vs_mutex);
2332 return ret;
2333}
2334
2335
2336static struct nf_sockopt_ops ip_vs_sockopts = {
2337 .pf = PF_INET,
2338 .set_optmin = IP_VS_BASE_CTL,
2339 .set_optmax = IP_VS_SO_SET_MAX+1,
2340 .set = do_ip_vs_set_ctl,
2341 .get_optmin = IP_VS_BASE_CTL,
2342 .get_optmax = IP_VS_SO_GET_MAX+1,
2343 .get = do_ip_vs_get_ctl,
2344};
2345
2346
2347int ip_vs_control_init(void)
2348{
2349 int ret;
2350 int idx;
2351
2352 EnterFunction(2);
2353
2354 ret = nf_register_sockopt(&ip_vs_sockopts);
2355 if (ret) {
2356 IP_VS_ERR("cannot register sockopt.\n");
2357 return ret;
2358 }
2359
2360 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
2361 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
2362
2363 sysctl_header = register_sysctl_table(vs_root_table, 0);
2364
2365 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2366 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2367 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2368 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2369 }
2370 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2371 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2372 }
2373
2374 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2375 spin_lock_init(&ip_vs_stats.lock);
2376 ip_vs_new_estimator(&ip_vs_stats);
2377
2378 /* Hook the defense timer */
2379 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
2380
2381 LeaveFunction(2);
2382 return 0;
2383}
2384
2385
2386void ip_vs_control_cleanup(void)
2387{
2388 EnterFunction(2);
2389 ip_vs_trash_cleanup();
2390 cancel_rearming_delayed_work(&defense_work);
2391 ip_vs_kill_estimator(&ip_vs_stats);
2392 unregister_sysctl_table(sysctl_header);
2393 proc_net_remove("ip_vs_stats");
2394 proc_net_remove("ip_vs");
2395 nf_unregister_sockopt(&ip_vs_sockopts);
2396 LeaveFunction(2);
2397}