2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
40 #include <net/net_namespace.h>
42 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ip6_route.h>
46 #include <net/route.h>
48 #include <net/genetlink.h>
50 #include <asm/uaccess.h>
52 #include <net/ip_vs.h>
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex
);
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock
);
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock
);
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock
);
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock
);
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock
);
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate
= 0;
74 int ip_vs_drop_counter
= 0;
75 static atomic_t ip_vs_dropentry
= ATOMIC_INIT(0);
77 /* number of virtual services */
78 static int ip_vs_num_services
= 0;
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry
= 0;
82 static int sysctl_ip_vs_drop_packet
= 0;
83 static int sysctl_ip_vs_secure_tcp
= 0;
84 static int sysctl_ip_vs_amemthresh
= 1024;
85 static int sysctl_ip_vs_am_droprate
= 10;
86 int sysctl_ip_vs_cache_bypass
= 0;
87 int sysctl_ip_vs_expire_nodest_conn
= 0;
88 int sysctl_ip_vs_expire_quiescent_template
= 0;
89 int sysctl_ip_vs_sync_threshold
[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send
= 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack
;
94 int sysctl_ip_vs_snat_reroute
= 1;
97 #ifdef CONFIG_IP_VS_DEBUG
98 static int sysctl_ip_vs_debug_level
= 0;
100 int ip_vs_get_debug_level(void)
102 return sysctl_ip_vs_debug_level
;
106 #ifdef CONFIG_IP_VS_IPV6
107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108 static int __ip_vs_addr_is_local_v6(const struct in6_addr
*addr
)
116 .saddr
= { .s6_addr32
= {0, 0, 0, 0} }, } },
119 rt
= (struct rt6_info
*)ip6_route_output(&init_net
, NULL
, &fl
);
120 if (rt
&& rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
& IFF_LOOPBACK
))
127 * update_defense_level is called from keventd and from sysctl,
128 * so it needs to protect itself from softirqs
130 static void update_defense_level(void)
133 static int old_secure_tcp
= 0;
138 /* we only count free and buffered memory (in pages) */
140 availmem
= i
.freeram
+ i
.bufferram
;
141 /* however in linux 2.5 the i.bufferram is total page cache size,
143 /* si_swapinfo(&i); */
144 /* availmem = availmem - (i.totalswap - i.freeswap); */
146 nomem
= (availmem
< sysctl_ip_vs_amemthresh
);
151 spin_lock(&__ip_vs_dropentry_lock
);
152 switch (sysctl_ip_vs_drop_entry
) {
154 atomic_set(&ip_vs_dropentry
, 0);
158 atomic_set(&ip_vs_dropentry
, 1);
159 sysctl_ip_vs_drop_entry
= 2;
161 atomic_set(&ip_vs_dropentry
, 0);
166 atomic_set(&ip_vs_dropentry
, 1);
168 atomic_set(&ip_vs_dropentry
, 0);
169 sysctl_ip_vs_drop_entry
= 1;
173 atomic_set(&ip_vs_dropentry
, 1);
176 spin_unlock(&__ip_vs_dropentry_lock
);
179 spin_lock(&__ip_vs_droppacket_lock
);
180 switch (sysctl_ip_vs_drop_packet
) {
186 ip_vs_drop_rate
= ip_vs_drop_counter
187 = sysctl_ip_vs_amemthresh
/
188 (sysctl_ip_vs_amemthresh
-availmem
);
189 sysctl_ip_vs_drop_packet
= 2;
196 ip_vs_drop_rate
= ip_vs_drop_counter
197 = sysctl_ip_vs_amemthresh
/
198 (sysctl_ip_vs_amemthresh
-availmem
);
201 sysctl_ip_vs_drop_packet
= 1;
205 ip_vs_drop_rate
= sysctl_ip_vs_am_droprate
;
208 spin_unlock(&__ip_vs_droppacket_lock
);
211 spin_lock(&ip_vs_securetcp_lock
);
212 switch (sysctl_ip_vs_secure_tcp
) {
214 if (old_secure_tcp
>= 2)
219 if (old_secure_tcp
< 2)
221 sysctl_ip_vs_secure_tcp
= 2;
223 if (old_secure_tcp
>= 2)
229 if (old_secure_tcp
< 2)
232 if (old_secure_tcp
>= 2)
234 sysctl_ip_vs_secure_tcp
= 1;
238 if (old_secure_tcp
< 2)
242 old_secure_tcp
= sysctl_ip_vs_secure_tcp
;
244 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp
>1);
245 spin_unlock(&ip_vs_securetcp_lock
);
252 * Timer for checking the defense
254 #define DEFENSE_TIMER_PERIOD 1*HZ
255 static void defense_work_handler(struct work_struct
*work
);
256 static DECLARE_DELAYED_WORK(defense_work
, defense_work_handler
);
258 static void defense_work_handler(struct work_struct
*work
)
260 update_defense_level();
261 if (atomic_read(&ip_vs_dropentry
))
262 ip_vs_random_dropentry();
264 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
268 ip_vs_use_count_inc(void)
270 return try_module_get(THIS_MODULE
);
274 ip_vs_use_count_dec(void)
276 module_put(THIS_MODULE
);
281 * Hash table: for virtual service lookups
283 #define IP_VS_SVC_TAB_BITS 8
284 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
285 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
287 /* the service table hashed by <protocol, addr, port> */
288 static struct list_head ip_vs_svc_table
[IP_VS_SVC_TAB_SIZE
];
289 /* the service table hashed by fwmark */
290 static struct list_head ip_vs_svc_fwm_table
[IP_VS_SVC_TAB_SIZE
];
293 * Hash table: for real service lookups
295 #define IP_VS_RTAB_BITS 4
296 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
297 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
299 static struct list_head ip_vs_rtable
[IP_VS_RTAB_SIZE
];
302 * Trash for destinations
304 static LIST_HEAD(ip_vs_dest_trash
);
307 * FTP & NULL virtual service counters
309 static atomic_t ip_vs_ftpsvc_counter
= ATOMIC_INIT(0);
310 static atomic_t ip_vs_nullsvc_counter
= ATOMIC_INIT(0);
314 * Returns hash value for virtual service
316 static __inline__
unsigned
317 ip_vs_svc_hashkey(int af
, unsigned proto
, const union nf_inet_addr
*addr
,
320 register unsigned porth
= ntohs(port
);
321 __be32 addr_fold
= addr
->ip
;
323 #ifdef CONFIG_IP_VS_IPV6
325 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
326 addr
->ip6
[2]^addr
->ip6
[3];
329 return (proto
^ntohl(addr_fold
)^(porth
>>IP_VS_SVC_TAB_BITS
)^porth
)
330 & IP_VS_SVC_TAB_MASK
;
334 * Returns hash value of fwmark for virtual service lookup
336 static __inline__
unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark
)
338 return fwmark
& IP_VS_SVC_TAB_MASK
;
342 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
343 * or in the ip_vs_svc_fwm_table by fwmark.
344 * Should be called with locked tables.
346 static int ip_vs_svc_hash(struct ip_vs_service
*svc
)
350 if (svc
->flags
& IP_VS_SVC_F_HASHED
) {
351 pr_err("%s(): request for already hashed, called from %pF\n",
352 __func__
, __builtin_return_address(0));
356 if (svc
->fwmark
== 0) {
358 * Hash it by <protocol,addr,port> in ip_vs_svc_table
360 hash
= ip_vs_svc_hashkey(svc
->af
, svc
->protocol
, &svc
->addr
,
362 list_add(&svc
->s_list
, &ip_vs_svc_table
[hash
]);
365 * Hash it by fwmark in ip_vs_svc_fwm_table
367 hash
= ip_vs_svc_fwm_hashkey(svc
->fwmark
);
368 list_add(&svc
->f_list
, &ip_vs_svc_fwm_table
[hash
]);
371 svc
->flags
|= IP_VS_SVC_F_HASHED
;
372 /* increase its refcnt because it is referenced by the svc table */
373 atomic_inc(&svc
->refcnt
);
379 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
380 * Should be called with locked tables.
382 static int ip_vs_svc_unhash(struct ip_vs_service
*svc
)
384 if (!(svc
->flags
& IP_VS_SVC_F_HASHED
)) {
385 pr_err("%s(): request for unhash flagged, called from %pF\n",
386 __func__
, __builtin_return_address(0));
390 if (svc
->fwmark
== 0) {
391 /* Remove it from the ip_vs_svc_table table */
392 list_del(&svc
->s_list
);
394 /* Remove it from the ip_vs_svc_fwm_table table */
395 list_del(&svc
->f_list
);
398 svc
->flags
&= ~IP_VS_SVC_F_HASHED
;
399 atomic_dec(&svc
->refcnt
);
405 * Get service by {proto,addr,port} in the service table.
407 static inline struct ip_vs_service
*
408 __ip_vs_service_get(int af
, __u16 protocol
, const union nf_inet_addr
*vaddr
,
412 struct ip_vs_service
*svc
;
414 /* Check for "full" addressed entries */
415 hash
= ip_vs_svc_hashkey(af
, protocol
, vaddr
, vport
);
417 list_for_each_entry(svc
, &ip_vs_svc_table
[hash
], s_list
){
419 && ip_vs_addr_equal(af
, &svc
->addr
, vaddr
)
420 && (svc
->port
== vport
)
421 && (svc
->protocol
== protocol
)) {
423 atomic_inc(&svc
->usecnt
);
433 * Get service by {fwmark} in the service table.
435 static inline struct ip_vs_service
*
436 __ip_vs_svc_fwm_get(int af
, __u32 fwmark
)
439 struct ip_vs_service
*svc
;
441 /* Check for fwmark addressed entries */
442 hash
= ip_vs_svc_fwm_hashkey(fwmark
);
444 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[hash
], f_list
) {
445 if (svc
->fwmark
== fwmark
&& svc
->af
== af
) {
447 atomic_inc(&svc
->usecnt
);
455 struct ip_vs_service
*
456 ip_vs_service_get(int af
, __u32 fwmark
, __u16 protocol
,
457 const union nf_inet_addr
*vaddr
, __be16 vport
)
459 struct ip_vs_service
*svc
;
461 read_lock(&__ip_vs_svc_lock
);
464 * Check the table hashed by fwmark first
466 if (fwmark
&& (svc
= __ip_vs_svc_fwm_get(af
, fwmark
)))
470 * Check the table hashed by <protocol,addr,port>
471 * for "full" addressed entries
473 svc
= __ip_vs_service_get(af
, protocol
, vaddr
, vport
);
476 && protocol
== IPPROTO_TCP
477 && atomic_read(&ip_vs_ftpsvc_counter
)
478 && (vport
== FTPDATA
|| ntohs(vport
) >= PROT_SOCK
)) {
480 * Check if ftp service entry exists, the packet
481 * might belong to FTP data connections.
483 svc
= __ip_vs_service_get(af
, protocol
, vaddr
, FTPPORT
);
487 && atomic_read(&ip_vs_nullsvc_counter
)) {
489 * Check if the catch-all port (port zero) exists
491 svc
= __ip_vs_service_get(af
, protocol
, vaddr
, 0);
495 read_unlock(&__ip_vs_svc_lock
);
497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
498 fwmark
, ip_vs_proto_name(protocol
),
499 IP_VS_DBG_ADDR(af
, vaddr
), ntohs(vport
),
500 svc
? "hit" : "not hit");
507 __ip_vs_bind_svc(struct ip_vs_dest
*dest
, struct ip_vs_service
*svc
)
509 atomic_inc(&svc
->refcnt
);
514 __ip_vs_unbind_svc(struct ip_vs_dest
*dest
)
516 struct ip_vs_service
*svc
= dest
->svc
;
519 if (atomic_dec_and_test(&svc
->refcnt
))
525 * Returns hash value for real service
527 static inline unsigned ip_vs_rs_hashkey(int af
,
528 const union nf_inet_addr
*addr
,
531 register unsigned porth
= ntohs(port
);
532 __be32 addr_fold
= addr
->ip
;
534 #ifdef CONFIG_IP_VS_IPV6
536 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
537 addr
->ip6
[2]^addr
->ip6
[3];
540 return (ntohl(addr_fold
)^(porth
>>IP_VS_RTAB_BITS
)^porth
)
545 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
546 * should be called with locked tables.
548 static int ip_vs_rs_hash(struct ip_vs_dest
*dest
)
552 if (!list_empty(&dest
->d_list
)) {
557 * Hash by proto,addr,port,
558 * which are the parameters of the real service.
560 hash
= ip_vs_rs_hashkey(dest
->af
, &dest
->addr
, dest
->port
);
562 list_add(&dest
->d_list
, &ip_vs_rtable
[hash
]);
568 * UNhashes ip_vs_dest from ip_vs_rtable.
569 * should be called with locked tables.
571 static int ip_vs_rs_unhash(struct ip_vs_dest
*dest
)
574 * Remove it from the ip_vs_rtable table.
576 if (!list_empty(&dest
->d_list
)) {
577 list_del(&dest
->d_list
);
578 INIT_LIST_HEAD(&dest
->d_list
);
585 * Lookup real service by <proto,addr,port> in the real service table.
588 ip_vs_lookup_real_service(int af
, __u16 protocol
,
589 const union nf_inet_addr
*daddr
,
593 struct ip_vs_dest
*dest
;
596 * Check for "full" addressed entries
597 * Return the first found entry
599 hash
= ip_vs_rs_hashkey(af
, daddr
, dport
);
601 read_lock(&__ip_vs_rs_lock
);
602 list_for_each_entry(dest
, &ip_vs_rtable
[hash
], d_list
) {
604 && ip_vs_addr_equal(af
, &dest
->addr
, daddr
)
605 && (dest
->port
== dport
)
606 && ((dest
->protocol
== protocol
) ||
609 read_unlock(&__ip_vs_rs_lock
);
613 read_unlock(&__ip_vs_rs_lock
);
619 * Lookup destination by {addr,port} in the given service
621 static struct ip_vs_dest
*
622 ip_vs_lookup_dest(struct ip_vs_service
*svc
, const union nf_inet_addr
*daddr
,
625 struct ip_vs_dest
*dest
;
628 * Find the destination for the given service
630 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
631 if ((dest
->af
== svc
->af
)
632 && ip_vs_addr_equal(svc
->af
, &dest
->addr
, daddr
)
633 && (dest
->port
== dport
)) {
643 * Find destination by {daddr,dport,vaddr,protocol}
644 * Cretaed to be used in ip_vs_process_message() in
645 * the backup synchronization daemon. It finds the
646 * destination to be bound to the received connection
649 * ip_vs_lookup_real_service() looked promissing, but
650 * seems not working as expected.
652 struct ip_vs_dest
*ip_vs_find_dest(int af
, const union nf_inet_addr
*daddr
,
654 const union nf_inet_addr
*vaddr
,
655 __be16 vport
, __u16 protocol
)
657 struct ip_vs_dest
*dest
;
658 struct ip_vs_service
*svc
;
660 svc
= ip_vs_service_get(af
, 0, protocol
, vaddr
, vport
);
663 dest
= ip_vs_lookup_dest(svc
, daddr
, dport
);
665 atomic_inc(&dest
->refcnt
);
666 ip_vs_service_put(svc
);
671 * Lookup dest by {svc,addr,port} in the destination trash.
672 * The destination trash is used to hold the destinations that are removed
673 * from the service table but are still referenced by some conn entries.
674 * The reason to add the destination trash is when the dest is temporary
675 * down (either by administrator or by monitor program), the dest can be
676 * picked back from the trash, the remaining connections to the dest can
677 * continue, and the counting information of the dest is also useful for
680 static struct ip_vs_dest
*
681 ip_vs_trash_get_dest(struct ip_vs_service
*svc
, const union nf_inet_addr
*daddr
,
684 struct ip_vs_dest
*dest
, *nxt
;
687 * Find the destination in trash
689 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
690 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
693 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
695 atomic_read(&dest
->refcnt
));
696 if (dest
->af
== svc
->af
&&
697 ip_vs_addr_equal(svc
->af
, &dest
->addr
, daddr
) &&
698 dest
->port
== dport
&&
699 dest
->vfwmark
== svc
->fwmark
&&
700 dest
->protocol
== svc
->protocol
&&
702 (ip_vs_addr_equal(svc
->af
, &dest
->vaddr
, &svc
->addr
) &&
703 dest
->vport
== svc
->port
))) {
709 * Try to purge the destination from trash if not referenced
711 if (atomic_read(&dest
->refcnt
) == 1) {
712 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
715 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
717 list_del(&dest
->n_list
);
718 ip_vs_dst_reset(dest
);
719 __ip_vs_unbind_svc(dest
);
729 * Clean up all the destinations in the trash
730 * Called by the ip_vs_control_cleanup()
732 * When the ip_vs_control_clearup is activated by ipvs module exit,
733 * the service tables must have been flushed and all the connections
734 * are expired, and the refcnt of each destination in the trash must
735 * be 1, so we simply release them here.
737 static void ip_vs_trash_cleanup(void)
739 struct ip_vs_dest
*dest
, *nxt
;
741 list_for_each_entry_safe(dest
, nxt
, &ip_vs_dest_trash
, n_list
) {
742 list_del(&dest
->n_list
);
743 ip_vs_dst_reset(dest
);
744 __ip_vs_unbind_svc(dest
);
751 ip_vs_zero_stats(struct ip_vs_stats
*stats
)
753 spin_lock_bh(&stats
->lock
);
755 memset(&stats
->ustats
, 0, sizeof(stats
->ustats
));
756 ip_vs_zero_estimator(stats
);
758 spin_unlock_bh(&stats
->lock
);
762 * Update a destination in the given service
765 __ip_vs_update_dest(struct ip_vs_service
*svc
,
766 struct ip_vs_dest
*dest
, struct ip_vs_dest_user_kern
*udest
)
770 /* set the weight and the flags */
771 atomic_set(&dest
->weight
, udest
->weight
);
772 conn_flags
= udest
->conn_flags
& IP_VS_CONN_F_DEST_MASK
;
773 conn_flags
|= IP_VS_CONN_F_INACTIVE
;
775 /* check if local node and update the flags */
776 #ifdef CONFIG_IP_VS_IPV6
777 if (svc
->af
== AF_INET6
) {
778 if (__ip_vs_addr_is_local_v6(&udest
->addr
.in6
)) {
779 conn_flags
= (conn_flags
& ~IP_VS_CONN_F_FWD_MASK
)
780 | IP_VS_CONN_F_LOCALNODE
;
784 if (inet_addr_type(&init_net
, udest
->addr
.ip
) == RTN_LOCAL
) {
785 conn_flags
= (conn_flags
& ~IP_VS_CONN_F_FWD_MASK
)
786 | IP_VS_CONN_F_LOCALNODE
;
789 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
790 if ((conn_flags
& IP_VS_CONN_F_FWD_MASK
) != IP_VS_CONN_F_MASQ
) {
791 conn_flags
|= IP_VS_CONN_F_NOOUTPUT
;
794 * Put the real service in ip_vs_rtable if not present.
795 * For now only for NAT!
797 write_lock_bh(&__ip_vs_rs_lock
);
799 write_unlock_bh(&__ip_vs_rs_lock
);
801 atomic_set(&dest
->conn_flags
, conn_flags
);
803 /* bind the service */
805 __ip_vs_bind_svc(dest
, svc
);
807 if (dest
->svc
!= svc
) {
808 __ip_vs_unbind_svc(dest
);
809 ip_vs_zero_stats(&dest
->stats
);
810 __ip_vs_bind_svc(dest
, svc
);
814 /* set the dest status flags */
815 dest
->flags
|= IP_VS_DEST_F_AVAILABLE
;
817 if (udest
->u_threshold
== 0 || udest
->u_threshold
> dest
->u_threshold
)
818 dest
->flags
&= ~IP_VS_DEST_F_OVERLOAD
;
819 dest
->u_threshold
= udest
->u_threshold
;
820 dest
->l_threshold
= udest
->l_threshold
;
825 * Create a destination for the given service
828 ip_vs_new_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
,
829 struct ip_vs_dest
**dest_p
)
831 struct ip_vs_dest
*dest
;
836 #ifdef CONFIG_IP_VS_IPV6
837 if (svc
->af
== AF_INET6
) {
838 atype
= ipv6_addr_type(&udest
->addr
.in6
);
839 if ((!(atype
& IPV6_ADDR_UNICAST
) ||
840 atype
& IPV6_ADDR_LINKLOCAL
) &&
841 !__ip_vs_addr_is_local_v6(&udest
->addr
.in6
))
846 atype
= inet_addr_type(&init_net
, udest
->addr
.ip
);
847 if (atype
!= RTN_LOCAL
&& atype
!= RTN_UNICAST
)
851 dest
= kzalloc(sizeof(struct ip_vs_dest
), GFP_KERNEL
);
853 pr_err("%s(): no memory.\n", __func__
);
858 dest
->protocol
= svc
->protocol
;
859 dest
->vaddr
= svc
->addr
;
860 dest
->vport
= svc
->port
;
861 dest
->vfwmark
= svc
->fwmark
;
862 ip_vs_addr_copy(svc
->af
, &dest
->addr
, &udest
->addr
);
863 dest
->port
= udest
->port
;
865 atomic_set(&dest
->activeconns
, 0);
866 atomic_set(&dest
->inactconns
, 0);
867 atomic_set(&dest
->persistconns
, 0);
868 atomic_set(&dest
->refcnt
, 0);
870 INIT_LIST_HEAD(&dest
->d_list
);
871 spin_lock_init(&dest
->dst_lock
);
872 spin_lock_init(&dest
->stats
.lock
);
873 __ip_vs_update_dest(svc
, dest
, udest
);
874 ip_vs_new_estimator(&dest
->stats
);
884 * Add a destination into an existing service
887 ip_vs_add_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
889 struct ip_vs_dest
*dest
;
890 union nf_inet_addr daddr
;
891 __be16 dport
= udest
->port
;
896 if (udest
->weight
< 0) {
897 pr_err("%s(): server weight less than zero\n", __func__
);
901 if (udest
->l_threshold
> udest
->u_threshold
) {
902 pr_err("%s(): lower threshold is higher than upper threshold\n",
907 ip_vs_addr_copy(svc
->af
, &daddr
, &udest
->addr
);
910 * Check if the dest already exists in the list
912 dest
= ip_vs_lookup_dest(svc
, &daddr
, dport
);
915 IP_VS_DBG(1, "%s(): dest already exists\n", __func__
);
920 * Check if the dest already exists in the trash and
921 * is from the same service
923 dest
= ip_vs_trash_get_dest(svc
, &daddr
, dport
);
926 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
927 "dest->refcnt=%d, service %u/%s:%u\n",
928 IP_VS_DBG_ADDR(svc
->af
, &daddr
), ntohs(dport
),
929 atomic_read(&dest
->refcnt
),
931 IP_VS_DBG_ADDR(svc
->af
, &dest
->vaddr
),
934 __ip_vs_update_dest(svc
, dest
, udest
);
937 * Get the destination from the trash
939 list_del(&dest
->n_list
);
941 ip_vs_new_estimator(&dest
->stats
);
943 write_lock_bh(&__ip_vs_svc_lock
);
946 * Wait until all other svc users go away.
948 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
950 list_add(&dest
->n_list
, &svc
->destinations
);
953 /* call the update_service function of its scheduler */
954 if (svc
->scheduler
->update_service
)
955 svc
->scheduler
->update_service(svc
);
957 write_unlock_bh(&__ip_vs_svc_lock
);
962 * Allocate and initialize the dest structure
964 ret
= ip_vs_new_dest(svc
, udest
, &dest
);
970 * Add the dest entry into the list
972 atomic_inc(&dest
->refcnt
);
974 write_lock_bh(&__ip_vs_svc_lock
);
977 * Wait until all other svc users go away.
979 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
981 list_add(&dest
->n_list
, &svc
->destinations
);
984 /* call the update_service function of its scheduler */
985 if (svc
->scheduler
->update_service
)
986 svc
->scheduler
->update_service(svc
);
988 write_unlock_bh(&__ip_vs_svc_lock
);
997 * Edit a destination in the given service
1000 ip_vs_edit_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
1002 struct ip_vs_dest
*dest
;
1003 union nf_inet_addr daddr
;
1004 __be16 dport
= udest
->port
;
1008 if (udest
->weight
< 0) {
1009 pr_err("%s(): server weight less than zero\n", __func__
);
1013 if (udest
->l_threshold
> udest
->u_threshold
) {
1014 pr_err("%s(): lower threshold is higher than upper threshold\n",
1019 ip_vs_addr_copy(svc
->af
, &daddr
, &udest
->addr
);
1022 * Lookup the destination list
1024 dest
= ip_vs_lookup_dest(svc
, &daddr
, dport
);
1027 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__
);
1031 __ip_vs_update_dest(svc
, dest
, udest
);
1033 write_lock_bh(&__ip_vs_svc_lock
);
1035 /* Wait until all other svc users go away */
1036 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1038 /* call the update_service, because server weight may be changed */
1039 if (svc
->scheduler
->update_service
)
1040 svc
->scheduler
->update_service(svc
);
1042 write_unlock_bh(&__ip_vs_svc_lock
);
1051 * Delete a destination (must be already unlinked from the service)
1053 static void __ip_vs_del_dest(struct ip_vs_dest
*dest
)
1055 ip_vs_kill_estimator(&dest
->stats
);
1058 * Remove it from the d-linked list with the real services.
1060 write_lock_bh(&__ip_vs_rs_lock
);
1061 ip_vs_rs_unhash(dest
);
1062 write_unlock_bh(&__ip_vs_rs_lock
);
1065 * Decrease the refcnt of the dest, and free the dest
1066 * if nobody refers to it (refcnt=0). Otherwise, throw
1067 * the destination into the trash.
1069 if (atomic_dec_and_test(&dest
->refcnt
)) {
1070 ip_vs_dst_reset(dest
);
1071 /* simply decrease svc->refcnt here, let the caller check
1072 and release the service if nobody refers to it.
1073 Only user context can release destination and service,
1074 and only one user context can update virtual service at a
1075 time, so the operation here is OK */
1076 atomic_dec(&dest
->svc
->refcnt
);
1079 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1080 "dest->refcnt=%d\n",
1081 IP_VS_DBG_ADDR(dest
->af
, &dest
->addr
),
1083 atomic_read(&dest
->refcnt
));
1084 list_add(&dest
->n_list
, &ip_vs_dest_trash
);
1085 atomic_inc(&dest
->refcnt
);
1091 * Unlink a destination from the given service
1093 static void __ip_vs_unlink_dest(struct ip_vs_service
*svc
,
1094 struct ip_vs_dest
*dest
,
1097 dest
->flags
&= ~IP_VS_DEST_F_AVAILABLE
;
1100 * Remove it from the d-linked destination list.
1102 list_del(&dest
->n_list
);
1106 * Call the update_service function of its scheduler
1108 if (svcupd
&& svc
->scheduler
->update_service
)
1109 svc
->scheduler
->update_service(svc
);
1114 * Delete a destination server in the given service
1117 ip_vs_del_dest(struct ip_vs_service
*svc
, struct ip_vs_dest_user_kern
*udest
)
1119 struct ip_vs_dest
*dest
;
1120 __be16 dport
= udest
->port
;
1124 dest
= ip_vs_lookup_dest(svc
, &udest
->addr
, dport
);
1127 IP_VS_DBG(1, "%s(): destination not found!\n", __func__
);
1131 write_lock_bh(&__ip_vs_svc_lock
);
1134 * Wait until all other svc users go away.
1136 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1139 * Unlink dest from the service
1141 __ip_vs_unlink_dest(svc
, dest
, 1);
1143 write_unlock_bh(&__ip_vs_svc_lock
);
1146 * Delete the destination
1148 __ip_vs_del_dest(dest
);
1157 * Add a service into the service hash table
1160 ip_vs_add_service(struct ip_vs_service_user_kern
*u
,
1161 struct ip_vs_service
**svc_p
)
1164 struct ip_vs_scheduler
*sched
= NULL
;
1165 struct ip_vs_service
*svc
= NULL
;
1167 /* increase the module use count */
1168 ip_vs_use_count_inc();
1170 /* Lookup the scheduler by 'u->sched_name' */
1171 sched
= ip_vs_scheduler_get(u
->sched_name
);
1172 if (sched
== NULL
) {
1173 pr_info("Scheduler module ip_vs_%s not found\n", u
->sched_name
);
1178 #ifdef CONFIG_IP_VS_IPV6
1179 if (u
->af
== AF_INET6
&& (u
->netmask
< 1 || u
->netmask
> 128)) {
1185 svc
= kzalloc(sizeof(struct ip_vs_service
), GFP_KERNEL
);
1187 IP_VS_DBG(1, "%s(): no memory\n", __func__
);
1192 /* I'm the first user of the service */
1193 atomic_set(&svc
->usecnt
, 1);
1194 atomic_set(&svc
->refcnt
, 0);
1197 svc
->protocol
= u
->protocol
;
1198 ip_vs_addr_copy(svc
->af
, &svc
->addr
, &u
->addr
);
1199 svc
->port
= u
->port
;
1200 svc
->fwmark
= u
->fwmark
;
1201 svc
->flags
= u
->flags
;
1202 svc
->timeout
= u
->timeout
* HZ
;
1203 svc
->netmask
= u
->netmask
;
1205 INIT_LIST_HEAD(&svc
->destinations
);
1206 rwlock_init(&svc
->sched_lock
);
1207 spin_lock_init(&svc
->stats
.lock
);
1209 /* Bind the scheduler */
1210 ret
= ip_vs_bind_scheduler(svc
, sched
);
1215 /* Update the virtual service counters */
1216 if (svc
->port
== FTPPORT
)
1217 atomic_inc(&ip_vs_ftpsvc_counter
);
1218 else if (svc
->port
== 0)
1219 atomic_inc(&ip_vs_nullsvc_counter
);
1221 ip_vs_new_estimator(&svc
->stats
);
1223 /* Count only IPv4 services for old get/setsockopt interface */
1224 if (svc
->af
== AF_INET
)
1225 ip_vs_num_services
++;
1227 /* Hash the service into the service table */
1228 write_lock_bh(&__ip_vs_svc_lock
);
1229 ip_vs_svc_hash(svc
);
1230 write_unlock_bh(&__ip_vs_svc_lock
);
1238 ip_vs_unbind_scheduler(svc
);
1241 ip_vs_app_inc_put(svc
->inc
);
1246 ip_vs_scheduler_put(sched
);
1249 /* decrease the module use count */
1250 ip_vs_use_count_dec();
1257 * Edit a service and bind it with a new scheduler
1260 ip_vs_edit_service(struct ip_vs_service
*svc
, struct ip_vs_service_user_kern
*u
)
1262 struct ip_vs_scheduler
*sched
, *old_sched
;
1266 * Lookup the scheduler, by 'u->sched_name'
1268 sched
= ip_vs_scheduler_get(u
->sched_name
);
1269 if (sched
== NULL
) {
1270 pr_info("Scheduler module ip_vs_%s not found\n", u
->sched_name
);
1275 #ifdef CONFIG_IP_VS_IPV6
1276 if (u
->af
== AF_INET6
&& (u
->netmask
< 1 || u
->netmask
> 128)) {
1282 write_lock_bh(&__ip_vs_svc_lock
);
1285 * Wait until all other svc users go away.
1287 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1290 * Set the flags and timeout value
1292 svc
->flags
= u
->flags
| IP_VS_SVC_F_HASHED
;
1293 svc
->timeout
= u
->timeout
* HZ
;
1294 svc
->netmask
= u
->netmask
;
1296 old_sched
= svc
->scheduler
;
1297 if (sched
!= old_sched
) {
1299 * Unbind the old scheduler
1301 if ((ret
= ip_vs_unbind_scheduler(svc
))) {
1307 * Bind the new scheduler
1309 if ((ret
= ip_vs_bind_scheduler(svc
, sched
))) {
1311 * If ip_vs_bind_scheduler fails, restore the old
1313 * The main reason of failure is out of memory.
1315 * The question is if the old scheduler can be
1316 * restored all the time. TODO: if it cannot be
1317 * restored some time, we must delete the service,
1318 * otherwise the system may crash.
1320 ip_vs_bind_scheduler(svc
, old_sched
);
1327 write_unlock_bh(&__ip_vs_svc_lock
);
1328 #ifdef CONFIG_IP_VS_IPV6
1333 ip_vs_scheduler_put(old_sched
);
1340 * Delete a service from the service list
1341 * - The service must be unlinked, unlocked and not referenced!
1342 * - We are called under _bh lock
1344 static void __ip_vs_del_service(struct ip_vs_service
*svc
)
1346 struct ip_vs_dest
*dest
, *nxt
;
1347 struct ip_vs_scheduler
*old_sched
;
1349 /* Count only IPv4 services for old get/setsockopt interface */
1350 if (svc
->af
== AF_INET
)
1351 ip_vs_num_services
--;
1353 ip_vs_kill_estimator(&svc
->stats
);
1355 /* Unbind scheduler */
1356 old_sched
= svc
->scheduler
;
1357 ip_vs_unbind_scheduler(svc
);
1359 ip_vs_scheduler_put(old_sched
);
1361 /* Unbind app inc */
1363 ip_vs_app_inc_put(svc
->inc
);
1368 * Unlink the whole destination list
1370 list_for_each_entry_safe(dest
, nxt
, &svc
->destinations
, n_list
) {
1371 __ip_vs_unlink_dest(svc
, dest
, 0);
1372 __ip_vs_del_dest(dest
);
1376 * Update the virtual service counters
1378 if (svc
->port
== FTPPORT
)
1379 atomic_dec(&ip_vs_ftpsvc_counter
);
1380 else if (svc
->port
== 0)
1381 atomic_dec(&ip_vs_nullsvc_counter
);
1384 * Free the service if nobody refers to it
1386 if (atomic_read(&svc
->refcnt
) == 0)
1389 /* decrease the module use count */
1390 ip_vs_use_count_dec();
1394 * Delete a service from the service list
1396 static int ip_vs_del_service(struct ip_vs_service
*svc
)
1402 * Unhash it from the service table
1404 write_lock_bh(&__ip_vs_svc_lock
);
1406 ip_vs_svc_unhash(svc
);
1409 * Wait until all the svc users go away.
1411 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 1);
1413 __ip_vs_del_service(svc
);
1415 write_unlock_bh(&__ip_vs_svc_lock
);
1422 * Flush all the virtual services
1424 static int ip_vs_flush(void)
1427 struct ip_vs_service
*svc
, *nxt
;
1430 * Flush the service table hashed by <protocol,addr,port>
1432 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1433 list_for_each_entry_safe(svc
, nxt
, &ip_vs_svc_table
[idx
], s_list
) {
1434 write_lock_bh(&__ip_vs_svc_lock
);
1435 ip_vs_svc_unhash(svc
);
1437 * Wait until all the svc users go away.
1439 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1440 __ip_vs_del_service(svc
);
1441 write_unlock_bh(&__ip_vs_svc_lock
);
1446 * Flush the service table hashed by fwmark
1448 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1449 list_for_each_entry_safe(svc
, nxt
,
1450 &ip_vs_svc_fwm_table
[idx
], f_list
) {
1451 write_lock_bh(&__ip_vs_svc_lock
);
1452 ip_vs_svc_unhash(svc
);
1454 * Wait until all the svc users go away.
1456 IP_VS_WAIT_WHILE(atomic_read(&svc
->usecnt
) > 0);
1457 __ip_vs_del_service(svc
);
1458 write_unlock_bh(&__ip_vs_svc_lock
);
1467 * Zero counters in a service or all services
1469 static int ip_vs_zero_service(struct ip_vs_service
*svc
)
1471 struct ip_vs_dest
*dest
;
1473 write_lock_bh(&__ip_vs_svc_lock
);
1474 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1475 ip_vs_zero_stats(&dest
->stats
);
1477 ip_vs_zero_stats(&svc
->stats
);
1478 write_unlock_bh(&__ip_vs_svc_lock
);
1482 static int ip_vs_zero_all(void)
1485 struct ip_vs_service
*svc
;
1487 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1488 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1489 ip_vs_zero_service(svc
);
1493 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1494 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1495 ip_vs_zero_service(svc
);
1499 ip_vs_zero_stats(&ip_vs_stats
);
1505 proc_do_defense_mode(ctl_table
*table
, int write
,
1506 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1508 int *valp
= table
->data
;
1512 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1513 if (write
&& (*valp
!= val
)) {
1514 if ((*valp
< 0) || (*valp
> 3)) {
1515 /* Restore the correct value */
1518 update_defense_level();
1526 proc_do_sync_threshold(ctl_table
*table
, int write
,
1527 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1529 int *valp
= table
->data
;
1533 /* backup the value first */
1534 memcpy(val
, valp
, sizeof(val
));
1536 rc
= proc_dointvec(table
, write
, buffer
, lenp
, ppos
);
1537 if (write
&& (valp
[0] < 0 || valp
[1] < 0 || valp
[0] >= valp
[1])) {
1538 /* Restore the correct value */
1539 memcpy(valp
, val
, sizeof(val
));
1546 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1549 static struct ctl_table vs_vars
[] = {
1551 .procname
= "amemthresh",
1552 .data
= &sysctl_ip_vs_amemthresh
,
1553 .maxlen
= sizeof(int),
1555 .proc_handler
= proc_dointvec
,
1557 #ifdef CONFIG_IP_VS_DEBUG
1559 .procname
= "debug_level",
1560 .data
= &sysctl_ip_vs_debug_level
,
1561 .maxlen
= sizeof(int),
1563 .proc_handler
= proc_dointvec
,
1567 .procname
= "am_droprate",
1568 .data
= &sysctl_ip_vs_am_droprate
,
1569 .maxlen
= sizeof(int),
1571 .proc_handler
= proc_dointvec
,
1574 .procname
= "drop_entry",
1575 .data
= &sysctl_ip_vs_drop_entry
,
1576 .maxlen
= sizeof(int),
1578 .proc_handler
= proc_do_defense_mode
,
1581 .procname
= "drop_packet",
1582 .data
= &sysctl_ip_vs_drop_packet
,
1583 .maxlen
= sizeof(int),
1585 .proc_handler
= proc_do_defense_mode
,
1587 #ifdef CONFIG_IP_VS_NFCT
1589 .procname
= "conntrack",
1590 .data
= &sysctl_ip_vs_conntrack
,
1591 .maxlen
= sizeof(int),
1593 .proc_handler
= &proc_dointvec
,
1597 .procname
= "secure_tcp",
1598 .data
= &sysctl_ip_vs_secure_tcp
,
1599 .maxlen
= sizeof(int),
1601 .proc_handler
= proc_do_defense_mode
,
1604 .procname
= "snat_reroute",
1605 .data
= &sysctl_ip_vs_snat_reroute
,
1606 .maxlen
= sizeof(int),
1608 .proc_handler
= &proc_dointvec
,
1612 .procname
= "timeout_established",
1613 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ESTABLISHED
],
1614 .maxlen
= sizeof(int),
1616 .proc_handler
= proc_dointvec_jiffies
,
1619 .procname
= "timeout_synsent",
1620 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_SENT
],
1621 .maxlen
= sizeof(int),
1623 .proc_handler
= proc_dointvec_jiffies
,
1626 .procname
= "timeout_synrecv",
1627 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYN_RECV
],
1628 .maxlen
= sizeof(int),
1630 .proc_handler
= proc_dointvec_jiffies
,
1633 .procname
= "timeout_finwait",
1634 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_FIN_WAIT
],
1635 .maxlen
= sizeof(int),
1637 .proc_handler
= proc_dointvec_jiffies
,
1640 .procname
= "timeout_timewait",
1641 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_TIME_WAIT
],
1642 .maxlen
= sizeof(int),
1644 .proc_handler
= proc_dointvec_jiffies
,
1647 .procname
= "timeout_close",
1648 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE
],
1649 .maxlen
= sizeof(int),
1651 .proc_handler
= proc_dointvec_jiffies
,
1654 .procname
= "timeout_closewait",
1655 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_CLOSE_WAIT
],
1656 .maxlen
= sizeof(int),
1658 .proc_handler
= proc_dointvec_jiffies
,
1661 .procname
= "timeout_lastack",
1662 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LAST_ACK
],
1663 .maxlen
= sizeof(int),
1665 .proc_handler
= proc_dointvec_jiffies
,
1668 .procname
= "timeout_listen",
1669 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_LISTEN
],
1670 .maxlen
= sizeof(int),
1672 .proc_handler
= proc_dointvec_jiffies
,
1675 .procname
= "timeout_synack",
1676 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_SYNACK
],
1677 .maxlen
= sizeof(int),
1679 .proc_handler
= proc_dointvec_jiffies
,
1682 .procname
= "timeout_udp",
1683 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_UDP
],
1684 .maxlen
= sizeof(int),
1686 .proc_handler
= proc_dointvec_jiffies
,
1689 .procname
= "timeout_icmp",
1690 .data
= &vs_timeout_table_dos
.timeout
[IP_VS_S_ICMP
],
1691 .maxlen
= sizeof(int),
1693 .proc_handler
= proc_dointvec_jiffies
,
1697 .procname
= "cache_bypass",
1698 .data
= &sysctl_ip_vs_cache_bypass
,
1699 .maxlen
= sizeof(int),
1701 .proc_handler
= proc_dointvec
,
1704 .procname
= "expire_nodest_conn",
1705 .data
= &sysctl_ip_vs_expire_nodest_conn
,
1706 .maxlen
= sizeof(int),
1708 .proc_handler
= proc_dointvec
,
1711 .procname
= "expire_quiescent_template",
1712 .data
= &sysctl_ip_vs_expire_quiescent_template
,
1713 .maxlen
= sizeof(int),
1715 .proc_handler
= proc_dointvec
,
1718 .procname
= "sync_threshold",
1719 .data
= &sysctl_ip_vs_sync_threshold
,
1720 .maxlen
= sizeof(sysctl_ip_vs_sync_threshold
),
1722 .proc_handler
= proc_do_sync_threshold
,
1725 .procname
= "nat_icmp_send",
1726 .data
= &sysctl_ip_vs_nat_icmp_send
,
1727 .maxlen
= sizeof(int),
1729 .proc_handler
= proc_dointvec
,
1734 const struct ctl_path net_vs_ctl_path
[] = {
1735 { .procname
= "net", },
1736 { .procname
= "ipv4", },
1737 { .procname
= "vs", },
1740 EXPORT_SYMBOL_GPL(net_vs_ctl_path
);
1742 static struct ctl_table_header
* sysctl_header
;
1744 #ifdef CONFIG_PROC_FS
1747 struct list_head
*table
;
1752 * Write the contents of the VS rule table to a PROCfs file.
1753 * (It is kept just for backward compatibility)
1755 static inline const char *ip_vs_fwd_name(unsigned flags
)
1757 switch (flags
& IP_VS_CONN_F_FWD_MASK
) {
1758 case IP_VS_CONN_F_LOCALNODE
:
1760 case IP_VS_CONN_F_TUNNEL
:
1762 case IP_VS_CONN_F_DROUTE
:
1770 /* Get the Nth entry in the two lists */
1771 static struct ip_vs_service
*ip_vs_info_array(struct seq_file
*seq
, loff_t pos
)
1773 struct ip_vs_iter
*iter
= seq
->private;
1775 struct ip_vs_service
*svc
;
1777 /* look in hash by protocol */
1778 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1779 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
1781 iter
->table
= ip_vs_svc_table
;
1788 /* keep looking in fwmark */
1789 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
1790 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
1792 iter
->table
= ip_vs_svc_fwm_table
;
1802 static void *ip_vs_info_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1803 __acquires(__ip_vs_svc_lock
)
1806 read_lock_bh(&__ip_vs_svc_lock
);
1807 return *pos
? ip_vs_info_array(seq
, *pos
- 1) : SEQ_START_TOKEN
;
1811 static void *ip_vs_info_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1813 struct list_head
*e
;
1814 struct ip_vs_iter
*iter
;
1815 struct ip_vs_service
*svc
;
1818 if (v
== SEQ_START_TOKEN
)
1819 return ip_vs_info_array(seq
,0);
1822 iter
= seq
->private;
1824 if (iter
->table
== ip_vs_svc_table
) {
1825 /* next service in table hashed by protocol */
1826 if ((e
= svc
->s_list
.next
) != &ip_vs_svc_table
[iter
->bucket
])
1827 return list_entry(e
, struct ip_vs_service
, s_list
);
1830 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1831 list_for_each_entry(svc
,&ip_vs_svc_table
[iter
->bucket
],
1837 iter
->table
= ip_vs_svc_fwm_table
;
1842 /* next service in hashed by fwmark */
1843 if ((e
= svc
->f_list
.next
) != &ip_vs_svc_fwm_table
[iter
->bucket
])
1844 return list_entry(e
, struct ip_vs_service
, f_list
);
1847 while (++iter
->bucket
< IP_VS_SVC_TAB_SIZE
) {
1848 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[iter
->bucket
],
1856 static void ip_vs_info_seq_stop(struct seq_file
*seq
, void *v
)
1857 __releases(__ip_vs_svc_lock
)
1859 read_unlock_bh(&__ip_vs_svc_lock
);
1863 static int ip_vs_info_seq_show(struct seq_file
*seq
, void *v
)
1865 if (v
== SEQ_START_TOKEN
) {
1867 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1868 NVERSION(IP_VS_VERSION_CODE
), ip_vs_conn_tab_size
);
1870 "Prot LocalAddress:Port Scheduler Flags\n");
1872 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1874 const struct ip_vs_service
*svc
= v
;
1875 const struct ip_vs_iter
*iter
= seq
->private;
1876 const struct ip_vs_dest
*dest
;
1878 if (iter
->table
== ip_vs_svc_table
) {
1879 #ifdef CONFIG_IP_VS_IPV6
1880 if (svc
->af
== AF_INET6
)
1881 seq_printf(seq
, "%s [%pI6]:%04X %s ",
1882 ip_vs_proto_name(svc
->protocol
),
1885 svc
->scheduler
->name
);
1888 seq_printf(seq
, "%s %08X:%04X %s %s ",
1889 ip_vs_proto_name(svc
->protocol
),
1890 ntohl(svc
->addr
.ip
),
1892 svc
->scheduler
->name
,
1893 (svc
->flags
& IP_VS_SVC_F_ONEPACKET
)?"ops ":"");
1895 seq_printf(seq
, "FWM %08X %s %s",
1896 svc
->fwmark
, svc
->scheduler
->name
,
1897 (svc
->flags
& IP_VS_SVC_F_ONEPACKET
)?"ops ":"");
1900 if (svc
->flags
& IP_VS_SVC_F_PERSISTENT
)
1901 seq_printf(seq
, "persistent %d %08X\n",
1903 ntohl(svc
->netmask
));
1905 seq_putc(seq
, '\n');
1907 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
1908 #ifdef CONFIG_IP_VS_IPV6
1909 if (dest
->af
== AF_INET6
)
1912 " %-7s %-6d %-10d %-10d\n",
1915 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
1916 atomic_read(&dest
->weight
),
1917 atomic_read(&dest
->activeconns
),
1918 atomic_read(&dest
->inactconns
));
1923 "%-7s %-6d %-10d %-10d\n",
1924 ntohl(dest
->addr
.ip
),
1926 ip_vs_fwd_name(atomic_read(&dest
->conn_flags
)),
1927 atomic_read(&dest
->weight
),
1928 atomic_read(&dest
->activeconns
),
1929 atomic_read(&dest
->inactconns
));
1936 static const struct seq_operations ip_vs_info_seq_ops
= {
1937 .start
= ip_vs_info_seq_start
,
1938 .next
= ip_vs_info_seq_next
,
1939 .stop
= ip_vs_info_seq_stop
,
1940 .show
= ip_vs_info_seq_show
,
1943 static int ip_vs_info_open(struct inode
*inode
, struct file
*file
)
1945 return seq_open_private(file
, &ip_vs_info_seq_ops
,
1946 sizeof(struct ip_vs_iter
));
1949 static const struct file_operations ip_vs_info_fops
= {
1950 .owner
= THIS_MODULE
,
1951 .open
= ip_vs_info_open
,
1953 .llseek
= seq_lseek
,
1954 .release
= seq_release_private
,
1959 struct ip_vs_stats ip_vs_stats
= {
1960 .lock
= __SPIN_LOCK_UNLOCKED(ip_vs_stats
.lock
),
1963 #ifdef CONFIG_PROC_FS
1964 static int ip_vs_stats_show(struct seq_file
*seq
, void *v
)
1967 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1969 " Total Incoming Outgoing Incoming Outgoing\n");
1971 " Conns Packets Packets Bytes Bytes\n");
1973 spin_lock_bh(&ip_vs_stats
.lock
);
1974 seq_printf(seq
, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats
.ustats
.conns
,
1975 ip_vs_stats
.ustats
.inpkts
, ip_vs_stats
.ustats
.outpkts
,
1976 (unsigned long long) ip_vs_stats
.ustats
.inbytes
,
1977 (unsigned long long) ip_vs_stats
.ustats
.outbytes
);
1979 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1981 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1982 seq_printf(seq
,"%8X %8X %8X %16X %16X\n",
1983 ip_vs_stats
.ustats
.cps
,
1984 ip_vs_stats
.ustats
.inpps
,
1985 ip_vs_stats
.ustats
.outpps
,
1986 ip_vs_stats
.ustats
.inbps
,
1987 ip_vs_stats
.ustats
.outbps
);
1988 spin_unlock_bh(&ip_vs_stats
.lock
);
1993 static int ip_vs_stats_seq_open(struct inode
*inode
, struct file
*file
)
1995 return single_open(file
, ip_vs_stats_show
, NULL
);
1998 static const struct file_operations ip_vs_stats_fops
= {
1999 .owner
= THIS_MODULE
,
2000 .open
= ip_vs_stats_seq_open
,
2002 .llseek
= seq_lseek
,
2003 .release
= single_release
,
2009 * Set timeout values for tcp tcpfin udp in the timeout_table.
2011 static int ip_vs_set_timeout(struct ip_vs_timeout_user
*u
)
2013 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2018 #ifdef CONFIG_IP_VS_PROTO_TCP
2019 if (u
->tcp_timeout
) {
2020 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_ESTABLISHED
]
2021 = u
->tcp_timeout
* HZ
;
2024 if (u
->tcp_fin_timeout
) {
2025 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_FIN_WAIT
]
2026 = u
->tcp_fin_timeout
* HZ
;
2030 #ifdef CONFIG_IP_VS_PROTO_UDP
2031 if (u
->udp_timeout
) {
2032 ip_vs_protocol_udp
.timeout_table
[IP_VS_UDP_S_NORMAL
]
2033 = u
->udp_timeout
* HZ
;
2040 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2041 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2042 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2043 sizeof(struct ip_vs_dest_user))
2044 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2045 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2046 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2048 static const unsigned char set_arglen
[SET_CMDID(IP_VS_SO_SET_MAX
)+1] = {
2049 [SET_CMDID(IP_VS_SO_SET_ADD
)] = SERVICE_ARG_LEN
,
2050 [SET_CMDID(IP_VS_SO_SET_EDIT
)] = SERVICE_ARG_LEN
,
2051 [SET_CMDID(IP_VS_SO_SET_DEL
)] = SERVICE_ARG_LEN
,
2052 [SET_CMDID(IP_VS_SO_SET_FLUSH
)] = 0,
2053 [SET_CMDID(IP_VS_SO_SET_ADDDEST
)] = SVCDEST_ARG_LEN
,
2054 [SET_CMDID(IP_VS_SO_SET_DELDEST
)] = SVCDEST_ARG_LEN
,
2055 [SET_CMDID(IP_VS_SO_SET_EDITDEST
)] = SVCDEST_ARG_LEN
,
2056 [SET_CMDID(IP_VS_SO_SET_TIMEOUT
)] = TIMEOUT_ARG_LEN
,
2057 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON
)] = DAEMON_ARG_LEN
,
2058 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON
)] = DAEMON_ARG_LEN
,
2059 [SET_CMDID(IP_VS_SO_SET_ZERO
)] = SERVICE_ARG_LEN
,
2062 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern
*usvc
,
2063 struct ip_vs_service_user
*usvc_compat
)
2066 usvc
->protocol
= usvc_compat
->protocol
;
2067 usvc
->addr
.ip
= usvc_compat
->addr
;
2068 usvc
->port
= usvc_compat
->port
;
2069 usvc
->fwmark
= usvc_compat
->fwmark
;
2071 /* Deep copy of sched_name is not needed here */
2072 usvc
->sched_name
= usvc_compat
->sched_name
;
2074 usvc
->flags
= usvc_compat
->flags
;
2075 usvc
->timeout
= usvc_compat
->timeout
;
2076 usvc
->netmask
= usvc_compat
->netmask
;
2079 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern
*udest
,
2080 struct ip_vs_dest_user
*udest_compat
)
2082 udest
->addr
.ip
= udest_compat
->addr
;
2083 udest
->port
= udest_compat
->port
;
2084 udest
->conn_flags
= udest_compat
->conn_flags
;
2085 udest
->weight
= udest_compat
->weight
;
2086 udest
->u_threshold
= udest_compat
->u_threshold
;
2087 udest
->l_threshold
= udest_compat
->l_threshold
;
2091 do_ip_vs_set_ctl(struct sock
*sk
, int cmd
, void __user
*user
, unsigned int len
)
2094 unsigned char arg
[MAX_ARG_LEN
];
2095 struct ip_vs_service_user
*usvc_compat
;
2096 struct ip_vs_service_user_kern usvc
;
2097 struct ip_vs_service
*svc
;
2098 struct ip_vs_dest_user
*udest_compat
;
2099 struct ip_vs_dest_user_kern udest
;
2101 if (!capable(CAP_NET_ADMIN
))
2104 if (cmd
< IP_VS_BASE_CTL
|| cmd
> IP_VS_SO_SET_MAX
)
2106 if (len
< 0 || len
> MAX_ARG_LEN
)
2108 if (len
!= set_arglen
[SET_CMDID(cmd
)]) {
2109 pr_err("set_ctl: len %u != %u\n",
2110 len
, set_arglen
[SET_CMDID(cmd
)]);
2114 if (copy_from_user(arg
, user
, len
) != 0)
2117 /* increase the module use count */
2118 ip_vs_use_count_inc();
2120 if (mutex_lock_interruptible(&__ip_vs_mutex
)) {
2125 if (cmd
== IP_VS_SO_SET_FLUSH
) {
2126 /* Flush the virtual service */
2127 ret
= ip_vs_flush();
2129 } else if (cmd
== IP_VS_SO_SET_TIMEOUT
) {
2130 /* Set timeout values for (tcp tcpfin udp) */
2131 ret
= ip_vs_set_timeout((struct ip_vs_timeout_user
*)arg
);
2133 } else if (cmd
== IP_VS_SO_SET_STARTDAEMON
) {
2134 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
2135 ret
= start_sync_thread(dm
->state
, dm
->mcast_ifn
, dm
->syncid
);
2137 } else if (cmd
== IP_VS_SO_SET_STOPDAEMON
) {
2138 struct ip_vs_daemon_user
*dm
= (struct ip_vs_daemon_user
*)arg
;
2139 ret
= stop_sync_thread(dm
->state
);
2143 usvc_compat
= (struct ip_vs_service_user
*)arg
;
2144 udest_compat
= (struct ip_vs_dest_user
*)(usvc_compat
+ 1);
2146 /* We only use the new structs internally, so copy userspace compat
2147 * structs to extended internal versions */
2148 ip_vs_copy_usvc_compat(&usvc
, usvc_compat
);
2149 ip_vs_copy_udest_compat(&udest
, udest_compat
);
2151 if (cmd
== IP_VS_SO_SET_ZERO
) {
2152 /* if no service address is set, zero counters in all */
2153 if (!usvc
.fwmark
&& !usvc
.addr
.ip
&& !usvc
.port
) {
2154 ret
= ip_vs_zero_all();
2159 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2160 if (usvc
.protocol
!= IPPROTO_TCP
&& usvc
.protocol
!= IPPROTO_UDP
&&
2161 usvc
.protocol
!= IPPROTO_SCTP
) {
2162 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2163 usvc
.protocol
, &usvc
.addr
.ip
,
2164 ntohs(usvc
.port
), usvc
.sched_name
);
2169 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2170 if (usvc
.fwmark
== 0)
2171 svc
= __ip_vs_service_get(usvc
.af
, usvc
.protocol
,
2172 &usvc
.addr
, usvc
.port
);
2174 svc
= __ip_vs_svc_fwm_get(usvc
.af
, usvc
.fwmark
);
2176 if (cmd
!= IP_VS_SO_SET_ADD
2177 && (svc
== NULL
|| svc
->protocol
!= usvc
.protocol
)) {
2179 goto out_drop_service
;
2183 case IP_VS_SO_SET_ADD
:
2187 ret
= ip_vs_add_service(&usvc
, &svc
);
2189 case IP_VS_SO_SET_EDIT
:
2190 ret
= ip_vs_edit_service(svc
, &usvc
);
2192 case IP_VS_SO_SET_DEL
:
2193 ret
= ip_vs_del_service(svc
);
2197 case IP_VS_SO_SET_ZERO
:
2198 ret
= ip_vs_zero_service(svc
);
2200 case IP_VS_SO_SET_ADDDEST
:
2201 ret
= ip_vs_add_dest(svc
, &udest
);
2203 case IP_VS_SO_SET_EDITDEST
:
2204 ret
= ip_vs_edit_dest(svc
, &udest
);
2206 case IP_VS_SO_SET_DELDEST
:
2207 ret
= ip_vs_del_dest(svc
, &udest
);
2215 ip_vs_service_put(svc
);
2218 mutex_unlock(&__ip_vs_mutex
);
2220 /* decrease the module use count */
2221 ip_vs_use_count_dec();
2228 ip_vs_copy_stats(struct ip_vs_stats_user
*dst
, struct ip_vs_stats
*src
)
2230 spin_lock_bh(&src
->lock
);
2231 memcpy(dst
, &src
->ustats
, sizeof(*dst
));
2232 spin_unlock_bh(&src
->lock
);
2236 ip_vs_copy_service(struct ip_vs_service_entry
*dst
, struct ip_vs_service
*src
)
2238 dst
->protocol
= src
->protocol
;
2239 dst
->addr
= src
->addr
.ip
;
2240 dst
->port
= src
->port
;
2241 dst
->fwmark
= src
->fwmark
;
2242 strlcpy(dst
->sched_name
, src
->scheduler
->name
, sizeof(dst
->sched_name
));
2243 dst
->flags
= src
->flags
;
2244 dst
->timeout
= src
->timeout
/ HZ
;
2245 dst
->netmask
= src
->netmask
;
2246 dst
->num_dests
= src
->num_dests
;
2247 ip_vs_copy_stats(&dst
->stats
, &src
->stats
);
2251 __ip_vs_get_service_entries(const struct ip_vs_get_services
*get
,
2252 struct ip_vs_get_services __user
*uptr
)
2255 struct ip_vs_service
*svc
;
2256 struct ip_vs_service_entry entry
;
2259 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2260 list_for_each_entry(svc
, &ip_vs_svc_table
[idx
], s_list
) {
2261 /* Only expose IPv4 entries to old interface */
2262 if (svc
->af
!= AF_INET
)
2265 if (count
>= get
->num_services
)
2267 memset(&entry
, 0, sizeof(entry
));
2268 ip_vs_copy_service(&entry
, svc
);
2269 if (copy_to_user(&uptr
->entrytable
[count
],
2270 &entry
, sizeof(entry
))) {
2278 for (idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
2279 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[idx
], f_list
) {
2280 /* Only expose IPv4 entries to old interface */
2281 if (svc
->af
!= AF_INET
)
2284 if (count
>= get
->num_services
)
2286 memset(&entry
, 0, sizeof(entry
));
2287 ip_vs_copy_service(&entry
, svc
);
2288 if (copy_to_user(&uptr
->entrytable
[count
],
2289 &entry
, sizeof(entry
))) {
2301 __ip_vs_get_dest_entries(const struct ip_vs_get_dests
*get
,
2302 struct ip_vs_get_dests __user
*uptr
)
2304 struct ip_vs_service
*svc
;
2305 union nf_inet_addr addr
= { .ip
= get
->addr
};
2309 svc
= __ip_vs_svc_fwm_get(AF_INET
, get
->fwmark
);
2311 svc
= __ip_vs_service_get(AF_INET
, get
->protocol
, &addr
,
2316 struct ip_vs_dest
*dest
;
2317 struct ip_vs_dest_entry entry
;
2319 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2320 if (count
>= get
->num_dests
)
2323 entry
.addr
= dest
->addr
.ip
;
2324 entry
.port
= dest
->port
;
2325 entry
.conn_flags
= atomic_read(&dest
->conn_flags
);
2326 entry
.weight
= atomic_read(&dest
->weight
);
2327 entry
.u_threshold
= dest
->u_threshold
;
2328 entry
.l_threshold
= dest
->l_threshold
;
2329 entry
.activeconns
= atomic_read(&dest
->activeconns
);
2330 entry
.inactconns
= atomic_read(&dest
->inactconns
);
2331 entry
.persistconns
= atomic_read(&dest
->persistconns
);
2332 ip_vs_copy_stats(&entry
.stats
, &dest
->stats
);
2333 if (copy_to_user(&uptr
->entrytable
[count
],
2334 &entry
, sizeof(entry
))) {
2340 ip_vs_service_put(svc
);
2347 __ip_vs_get_timeouts(struct ip_vs_timeout_user
*u
)
2349 #ifdef CONFIG_IP_VS_PROTO_TCP
2351 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_ESTABLISHED
] / HZ
;
2352 u
->tcp_fin_timeout
=
2353 ip_vs_protocol_tcp
.timeout_table
[IP_VS_TCP_S_FIN_WAIT
] / HZ
;
2355 #ifdef CONFIG_IP_VS_PROTO_UDP
2357 ip_vs_protocol_udp
.timeout_table
[IP_VS_UDP_S_NORMAL
] / HZ
;
2362 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2363 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2364 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2365 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2366 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2367 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2368 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2370 static const unsigned char get_arglen
[GET_CMDID(IP_VS_SO_GET_MAX
)+1] = {
2371 [GET_CMDID(IP_VS_SO_GET_VERSION
)] = 64,
2372 [GET_CMDID(IP_VS_SO_GET_INFO
)] = GET_INFO_ARG_LEN
,
2373 [GET_CMDID(IP_VS_SO_GET_SERVICES
)] = GET_SERVICES_ARG_LEN
,
2374 [GET_CMDID(IP_VS_SO_GET_SERVICE
)] = GET_SERVICE_ARG_LEN
,
2375 [GET_CMDID(IP_VS_SO_GET_DESTS
)] = GET_DESTS_ARG_LEN
,
2376 [GET_CMDID(IP_VS_SO_GET_TIMEOUT
)] = GET_TIMEOUT_ARG_LEN
,
2377 [GET_CMDID(IP_VS_SO_GET_DAEMON
)] = GET_DAEMON_ARG_LEN
,
2381 do_ip_vs_get_ctl(struct sock
*sk
, int cmd
, void __user
*user
, int *len
)
2383 unsigned char arg
[128];
2385 unsigned int copylen
;
2387 if (!capable(CAP_NET_ADMIN
))
2390 if (cmd
< IP_VS_BASE_CTL
|| cmd
> IP_VS_SO_GET_MAX
)
2393 if (*len
< get_arglen
[GET_CMDID(cmd
)]) {
2394 pr_err("get_ctl: len %u < %u\n",
2395 *len
, get_arglen
[GET_CMDID(cmd
)]);
2399 copylen
= get_arglen
[GET_CMDID(cmd
)];
2403 if (copy_from_user(arg
, user
, copylen
) != 0)
2406 if (mutex_lock_interruptible(&__ip_vs_mutex
))
2407 return -ERESTARTSYS
;
2410 case IP_VS_SO_GET_VERSION
:
2414 sprintf(buf
, "IP Virtual Server version %d.%d.%d (size=%d)",
2415 NVERSION(IP_VS_VERSION_CODE
), ip_vs_conn_tab_size
);
2416 if (copy_to_user(user
, buf
, strlen(buf
)+1) != 0) {
2420 *len
= strlen(buf
)+1;
2424 case IP_VS_SO_GET_INFO
:
2426 struct ip_vs_getinfo info
;
2427 info
.version
= IP_VS_VERSION_CODE
;
2428 info
.size
= ip_vs_conn_tab_size
;
2429 info
.num_services
= ip_vs_num_services
;
2430 if (copy_to_user(user
, &info
, sizeof(info
)) != 0)
2435 case IP_VS_SO_GET_SERVICES
:
2437 struct ip_vs_get_services
*get
;
2440 get
= (struct ip_vs_get_services
*)arg
;
2441 size
= sizeof(*get
) +
2442 sizeof(struct ip_vs_service_entry
) * get
->num_services
;
2444 pr_err("length: %u != %u\n", *len
, size
);
2448 ret
= __ip_vs_get_service_entries(get
, user
);
2452 case IP_VS_SO_GET_SERVICE
:
2454 struct ip_vs_service_entry
*entry
;
2455 struct ip_vs_service
*svc
;
2456 union nf_inet_addr addr
;
2458 entry
= (struct ip_vs_service_entry
*)arg
;
2459 addr
.ip
= entry
->addr
;
2461 svc
= __ip_vs_svc_fwm_get(AF_INET
, entry
->fwmark
);
2463 svc
= __ip_vs_service_get(AF_INET
, entry
->protocol
,
2464 &addr
, entry
->port
);
2466 ip_vs_copy_service(entry
, svc
);
2467 if (copy_to_user(user
, entry
, sizeof(*entry
)) != 0)
2469 ip_vs_service_put(svc
);
2475 case IP_VS_SO_GET_DESTS
:
2477 struct ip_vs_get_dests
*get
;
2480 get
= (struct ip_vs_get_dests
*)arg
;
2481 size
= sizeof(*get
) +
2482 sizeof(struct ip_vs_dest_entry
) * get
->num_dests
;
2484 pr_err("length: %u != %u\n", *len
, size
);
2488 ret
= __ip_vs_get_dest_entries(get
, user
);
2492 case IP_VS_SO_GET_TIMEOUT
:
2494 struct ip_vs_timeout_user t
;
2496 __ip_vs_get_timeouts(&t
);
2497 if (copy_to_user(user
, &t
, sizeof(t
)) != 0)
2502 case IP_VS_SO_GET_DAEMON
:
2504 struct ip_vs_daemon_user d
[2];
2506 memset(&d
, 0, sizeof(d
));
2507 if (ip_vs_sync_state
& IP_VS_STATE_MASTER
) {
2508 d
[0].state
= IP_VS_STATE_MASTER
;
2509 strlcpy(d
[0].mcast_ifn
, ip_vs_master_mcast_ifn
, sizeof(d
[0].mcast_ifn
));
2510 d
[0].syncid
= ip_vs_master_syncid
;
2512 if (ip_vs_sync_state
& IP_VS_STATE_BACKUP
) {
2513 d
[1].state
= IP_VS_STATE_BACKUP
;
2514 strlcpy(d
[1].mcast_ifn
, ip_vs_backup_mcast_ifn
, sizeof(d
[1].mcast_ifn
));
2515 d
[1].syncid
= ip_vs_backup_syncid
;
2517 if (copy_to_user(user
, &d
, sizeof(d
)) != 0)
2527 mutex_unlock(&__ip_vs_mutex
);
2532 static struct nf_sockopt_ops ip_vs_sockopts
= {
2534 .set_optmin
= IP_VS_BASE_CTL
,
2535 .set_optmax
= IP_VS_SO_SET_MAX
+1,
2536 .set
= do_ip_vs_set_ctl
,
2537 .get_optmin
= IP_VS_BASE_CTL
,
2538 .get_optmax
= IP_VS_SO_GET_MAX
+1,
2539 .get
= do_ip_vs_get_ctl
,
2540 .owner
= THIS_MODULE
,
2544 * Generic Netlink interface
2547 /* IPVS genetlink family */
2548 static struct genl_family ip_vs_genl_family
= {
2549 .id
= GENL_ID_GENERATE
,
2551 .name
= IPVS_GENL_NAME
,
2552 .version
= IPVS_GENL_VERSION
,
2553 .maxattr
= IPVS_CMD_MAX
,
2556 /* Policy used for first-level command attributes */
2557 static const struct nla_policy ip_vs_cmd_policy
[IPVS_CMD_ATTR_MAX
+ 1] = {
2558 [IPVS_CMD_ATTR_SERVICE
] = { .type
= NLA_NESTED
},
2559 [IPVS_CMD_ATTR_DEST
] = { .type
= NLA_NESTED
},
2560 [IPVS_CMD_ATTR_DAEMON
] = { .type
= NLA_NESTED
},
2561 [IPVS_CMD_ATTR_TIMEOUT_TCP
] = { .type
= NLA_U32
},
2562 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
] = { .type
= NLA_U32
},
2563 [IPVS_CMD_ATTR_TIMEOUT_UDP
] = { .type
= NLA_U32
},
2566 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2567 static const struct nla_policy ip_vs_daemon_policy
[IPVS_DAEMON_ATTR_MAX
+ 1] = {
2568 [IPVS_DAEMON_ATTR_STATE
] = { .type
= NLA_U32
},
2569 [IPVS_DAEMON_ATTR_MCAST_IFN
] = { .type
= NLA_NUL_STRING
,
2570 .len
= IP_VS_IFNAME_MAXLEN
},
2571 [IPVS_DAEMON_ATTR_SYNC_ID
] = { .type
= NLA_U32
},
2574 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2575 static const struct nla_policy ip_vs_svc_policy
[IPVS_SVC_ATTR_MAX
+ 1] = {
2576 [IPVS_SVC_ATTR_AF
] = { .type
= NLA_U16
},
2577 [IPVS_SVC_ATTR_PROTOCOL
] = { .type
= NLA_U16
},
2578 [IPVS_SVC_ATTR_ADDR
] = { .type
= NLA_BINARY
,
2579 .len
= sizeof(union nf_inet_addr
) },
2580 [IPVS_SVC_ATTR_PORT
] = { .type
= NLA_U16
},
2581 [IPVS_SVC_ATTR_FWMARK
] = { .type
= NLA_U32
},
2582 [IPVS_SVC_ATTR_SCHED_NAME
] = { .type
= NLA_NUL_STRING
,
2583 .len
= IP_VS_SCHEDNAME_MAXLEN
},
2584 [IPVS_SVC_ATTR_FLAGS
] = { .type
= NLA_BINARY
,
2585 .len
= sizeof(struct ip_vs_flags
) },
2586 [IPVS_SVC_ATTR_TIMEOUT
] = { .type
= NLA_U32
},
2587 [IPVS_SVC_ATTR_NETMASK
] = { .type
= NLA_U32
},
2588 [IPVS_SVC_ATTR_STATS
] = { .type
= NLA_NESTED
},
2591 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2592 static const struct nla_policy ip_vs_dest_policy
[IPVS_DEST_ATTR_MAX
+ 1] = {
2593 [IPVS_DEST_ATTR_ADDR
] = { .type
= NLA_BINARY
,
2594 .len
= sizeof(union nf_inet_addr
) },
2595 [IPVS_DEST_ATTR_PORT
] = { .type
= NLA_U16
},
2596 [IPVS_DEST_ATTR_FWD_METHOD
] = { .type
= NLA_U32
},
2597 [IPVS_DEST_ATTR_WEIGHT
] = { .type
= NLA_U32
},
2598 [IPVS_DEST_ATTR_U_THRESH
] = { .type
= NLA_U32
},
2599 [IPVS_DEST_ATTR_L_THRESH
] = { .type
= NLA_U32
},
2600 [IPVS_DEST_ATTR_ACTIVE_CONNS
] = { .type
= NLA_U32
},
2601 [IPVS_DEST_ATTR_INACT_CONNS
] = { .type
= NLA_U32
},
2602 [IPVS_DEST_ATTR_PERSIST_CONNS
] = { .type
= NLA_U32
},
2603 [IPVS_DEST_ATTR_STATS
] = { .type
= NLA_NESTED
},
2606 static int ip_vs_genl_fill_stats(struct sk_buff
*skb
, int container_type
,
2607 struct ip_vs_stats
*stats
)
2609 struct nlattr
*nl_stats
= nla_nest_start(skb
, container_type
);
2613 spin_lock_bh(&stats
->lock
);
2615 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_CONNS
, stats
->ustats
.conns
);
2616 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_INPKTS
, stats
->ustats
.inpkts
);
2617 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_OUTPKTS
, stats
->ustats
.outpkts
);
2618 NLA_PUT_U64(skb
, IPVS_STATS_ATTR_INBYTES
, stats
->ustats
.inbytes
);
2619 NLA_PUT_U64(skb
, IPVS_STATS_ATTR_OUTBYTES
, stats
->ustats
.outbytes
);
2620 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_CPS
, stats
->ustats
.cps
);
2621 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_INPPS
, stats
->ustats
.inpps
);
2622 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_OUTPPS
, stats
->ustats
.outpps
);
2623 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_INBPS
, stats
->ustats
.inbps
);
2624 NLA_PUT_U32(skb
, IPVS_STATS_ATTR_OUTBPS
, stats
->ustats
.outbps
);
2626 spin_unlock_bh(&stats
->lock
);
2628 nla_nest_end(skb
, nl_stats
);
2633 spin_unlock_bh(&stats
->lock
);
2634 nla_nest_cancel(skb
, nl_stats
);
2638 static int ip_vs_genl_fill_service(struct sk_buff
*skb
,
2639 struct ip_vs_service
*svc
)
2641 struct nlattr
*nl_service
;
2642 struct ip_vs_flags flags
= { .flags
= svc
->flags
,
2645 nl_service
= nla_nest_start(skb
, IPVS_CMD_ATTR_SERVICE
);
2649 NLA_PUT_U16(skb
, IPVS_SVC_ATTR_AF
, svc
->af
);
2652 NLA_PUT_U32(skb
, IPVS_SVC_ATTR_FWMARK
, svc
->fwmark
);
2654 NLA_PUT_U16(skb
, IPVS_SVC_ATTR_PROTOCOL
, svc
->protocol
);
2655 NLA_PUT(skb
, IPVS_SVC_ATTR_ADDR
, sizeof(svc
->addr
), &svc
->addr
);
2656 NLA_PUT_U16(skb
, IPVS_SVC_ATTR_PORT
, svc
->port
);
2659 NLA_PUT_STRING(skb
, IPVS_SVC_ATTR_SCHED_NAME
, svc
->scheduler
->name
);
2660 NLA_PUT(skb
, IPVS_SVC_ATTR_FLAGS
, sizeof(flags
), &flags
);
2661 NLA_PUT_U32(skb
, IPVS_SVC_ATTR_TIMEOUT
, svc
->timeout
/ HZ
);
2662 NLA_PUT_U32(skb
, IPVS_SVC_ATTR_NETMASK
, svc
->netmask
);
2664 if (ip_vs_genl_fill_stats(skb
, IPVS_SVC_ATTR_STATS
, &svc
->stats
))
2665 goto nla_put_failure
;
2667 nla_nest_end(skb
, nl_service
);
2672 nla_nest_cancel(skb
, nl_service
);
2676 static int ip_vs_genl_dump_service(struct sk_buff
*skb
,
2677 struct ip_vs_service
*svc
,
2678 struct netlink_callback
*cb
)
2682 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).pid
, cb
->nlh
->nlmsg_seq
,
2683 &ip_vs_genl_family
, NLM_F_MULTI
,
2684 IPVS_CMD_NEW_SERVICE
);
2688 if (ip_vs_genl_fill_service(skb
, svc
) < 0)
2689 goto nla_put_failure
;
2691 return genlmsg_end(skb
, hdr
);
2694 genlmsg_cancel(skb
, hdr
);
2698 static int ip_vs_genl_dump_services(struct sk_buff
*skb
,
2699 struct netlink_callback
*cb
)
2702 int start
= cb
->args
[0];
2703 struct ip_vs_service
*svc
;
2705 mutex_lock(&__ip_vs_mutex
);
2706 for (i
= 0; i
< IP_VS_SVC_TAB_SIZE
; i
++) {
2707 list_for_each_entry(svc
, &ip_vs_svc_table
[i
], s_list
) {
2710 if (ip_vs_genl_dump_service(skb
, svc
, cb
) < 0) {
2712 goto nla_put_failure
;
2717 for (i
= 0; i
< IP_VS_SVC_TAB_SIZE
; i
++) {
2718 list_for_each_entry(svc
, &ip_vs_svc_fwm_table
[i
], f_list
) {
2721 if (ip_vs_genl_dump_service(skb
, svc
, cb
) < 0) {
2723 goto nla_put_failure
;
2729 mutex_unlock(&__ip_vs_mutex
);
2735 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern
*usvc
,
2736 struct nlattr
*nla
, int full_entry
)
2738 struct nlattr
*attrs
[IPVS_SVC_ATTR_MAX
+ 1];
2739 struct nlattr
*nla_af
, *nla_port
, *nla_fwmark
, *nla_protocol
, *nla_addr
;
2741 /* Parse mandatory identifying service fields first */
2743 nla_parse_nested(attrs
, IPVS_SVC_ATTR_MAX
, nla
, ip_vs_svc_policy
))
2746 nla_af
= attrs
[IPVS_SVC_ATTR_AF
];
2747 nla_protocol
= attrs
[IPVS_SVC_ATTR_PROTOCOL
];
2748 nla_addr
= attrs
[IPVS_SVC_ATTR_ADDR
];
2749 nla_port
= attrs
[IPVS_SVC_ATTR_PORT
];
2750 nla_fwmark
= attrs
[IPVS_SVC_ATTR_FWMARK
];
2752 if (!(nla_af
&& (nla_fwmark
|| (nla_port
&& nla_protocol
&& nla_addr
))))
2755 memset(usvc
, 0, sizeof(*usvc
));
2757 usvc
->af
= nla_get_u16(nla_af
);
2758 #ifdef CONFIG_IP_VS_IPV6
2759 if (usvc
->af
!= AF_INET
&& usvc
->af
!= AF_INET6
)
2761 if (usvc
->af
!= AF_INET
)
2763 return -EAFNOSUPPORT
;
2766 usvc
->protocol
= IPPROTO_TCP
;
2767 usvc
->fwmark
= nla_get_u32(nla_fwmark
);
2769 usvc
->protocol
= nla_get_u16(nla_protocol
);
2770 nla_memcpy(&usvc
->addr
, nla_addr
, sizeof(usvc
->addr
));
2771 usvc
->port
= nla_get_u16(nla_port
);
2775 /* If a full entry was requested, check for the additional fields */
2777 struct nlattr
*nla_sched
, *nla_flags
, *nla_timeout
,
2779 struct ip_vs_flags flags
;
2780 struct ip_vs_service
*svc
;
2782 nla_sched
= attrs
[IPVS_SVC_ATTR_SCHED_NAME
];
2783 nla_flags
= attrs
[IPVS_SVC_ATTR_FLAGS
];
2784 nla_timeout
= attrs
[IPVS_SVC_ATTR_TIMEOUT
];
2785 nla_netmask
= attrs
[IPVS_SVC_ATTR_NETMASK
];
2787 if (!(nla_sched
&& nla_flags
&& nla_timeout
&& nla_netmask
))
2790 nla_memcpy(&flags
, nla_flags
, sizeof(flags
));
2792 /* prefill flags from service if it already exists */
2794 svc
= __ip_vs_svc_fwm_get(usvc
->af
, usvc
->fwmark
);
2796 svc
= __ip_vs_service_get(usvc
->af
, usvc
->protocol
,
2797 &usvc
->addr
, usvc
->port
);
2799 usvc
->flags
= svc
->flags
;
2800 ip_vs_service_put(svc
);
2804 /* set new flags from userland */
2805 usvc
->flags
= (usvc
->flags
& ~flags
.mask
) |
2806 (flags
.flags
& flags
.mask
);
2807 usvc
->sched_name
= nla_data(nla_sched
);
2808 usvc
->timeout
= nla_get_u32(nla_timeout
);
2809 usvc
->netmask
= nla_get_u32(nla_netmask
);
2815 static struct ip_vs_service
*ip_vs_genl_find_service(struct nlattr
*nla
)
2817 struct ip_vs_service_user_kern usvc
;
2820 ret
= ip_vs_genl_parse_service(&usvc
, nla
, 0);
2822 return ERR_PTR(ret
);
2825 return __ip_vs_svc_fwm_get(usvc
.af
, usvc
.fwmark
);
2827 return __ip_vs_service_get(usvc
.af
, usvc
.protocol
,
2828 &usvc
.addr
, usvc
.port
);
2831 static int ip_vs_genl_fill_dest(struct sk_buff
*skb
, struct ip_vs_dest
*dest
)
2833 struct nlattr
*nl_dest
;
2835 nl_dest
= nla_nest_start(skb
, IPVS_CMD_ATTR_DEST
);
2839 NLA_PUT(skb
, IPVS_DEST_ATTR_ADDR
, sizeof(dest
->addr
), &dest
->addr
);
2840 NLA_PUT_U16(skb
, IPVS_DEST_ATTR_PORT
, dest
->port
);
2842 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_FWD_METHOD
,
2843 atomic_read(&dest
->conn_flags
) & IP_VS_CONN_F_FWD_MASK
);
2844 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_WEIGHT
, atomic_read(&dest
->weight
));
2845 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_U_THRESH
, dest
->u_threshold
);
2846 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_L_THRESH
, dest
->l_threshold
);
2847 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_ACTIVE_CONNS
,
2848 atomic_read(&dest
->activeconns
));
2849 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_INACT_CONNS
,
2850 atomic_read(&dest
->inactconns
));
2851 NLA_PUT_U32(skb
, IPVS_DEST_ATTR_PERSIST_CONNS
,
2852 atomic_read(&dest
->persistconns
));
2854 if (ip_vs_genl_fill_stats(skb
, IPVS_DEST_ATTR_STATS
, &dest
->stats
))
2855 goto nla_put_failure
;
2857 nla_nest_end(skb
, nl_dest
);
2862 nla_nest_cancel(skb
, nl_dest
);
2866 static int ip_vs_genl_dump_dest(struct sk_buff
*skb
, struct ip_vs_dest
*dest
,
2867 struct netlink_callback
*cb
)
2871 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).pid
, cb
->nlh
->nlmsg_seq
,
2872 &ip_vs_genl_family
, NLM_F_MULTI
,
2877 if (ip_vs_genl_fill_dest(skb
, dest
) < 0)
2878 goto nla_put_failure
;
2880 return genlmsg_end(skb
, hdr
);
2883 genlmsg_cancel(skb
, hdr
);
2887 static int ip_vs_genl_dump_dests(struct sk_buff
*skb
,
2888 struct netlink_callback
*cb
)
2891 int start
= cb
->args
[0];
2892 struct ip_vs_service
*svc
;
2893 struct ip_vs_dest
*dest
;
2894 struct nlattr
*attrs
[IPVS_CMD_ATTR_MAX
+ 1];
2896 mutex_lock(&__ip_vs_mutex
);
2898 /* Try to find the service for which to dump destinations */
2899 if (nlmsg_parse(cb
->nlh
, GENL_HDRLEN
, attrs
,
2900 IPVS_CMD_ATTR_MAX
, ip_vs_cmd_policy
))
2903 svc
= ip_vs_genl_find_service(attrs
[IPVS_CMD_ATTR_SERVICE
]);
2904 if (IS_ERR(svc
) || svc
== NULL
)
2907 /* Dump the destinations */
2908 list_for_each_entry(dest
, &svc
->destinations
, n_list
) {
2911 if (ip_vs_genl_dump_dest(skb
, dest
, cb
) < 0) {
2913 goto nla_put_failure
;
2919 ip_vs_service_put(svc
);
2922 mutex_unlock(&__ip_vs_mutex
);
2927 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern
*udest
,
2928 struct nlattr
*nla
, int full_entry
)
2930 struct nlattr
*attrs
[IPVS_DEST_ATTR_MAX
+ 1];
2931 struct nlattr
*nla_addr
, *nla_port
;
2933 /* Parse mandatory identifying destination fields first */
2935 nla_parse_nested(attrs
, IPVS_DEST_ATTR_MAX
, nla
, ip_vs_dest_policy
))
2938 nla_addr
= attrs
[IPVS_DEST_ATTR_ADDR
];
2939 nla_port
= attrs
[IPVS_DEST_ATTR_PORT
];
2941 if (!(nla_addr
&& nla_port
))
2944 memset(udest
, 0, sizeof(*udest
));
2946 nla_memcpy(&udest
->addr
, nla_addr
, sizeof(udest
->addr
));
2947 udest
->port
= nla_get_u16(nla_port
);
2949 /* If a full entry was requested, check for the additional fields */
2951 struct nlattr
*nla_fwd
, *nla_weight
, *nla_u_thresh
,
2954 nla_fwd
= attrs
[IPVS_DEST_ATTR_FWD_METHOD
];
2955 nla_weight
= attrs
[IPVS_DEST_ATTR_WEIGHT
];
2956 nla_u_thresh
= attrs
[IPVS_DEST_ATTR_U_THRESH
];
2957 nla_l_thresh
= attrs
[IPVS_DEST_ATTR_L_THRESH
];
2959 if (!(nla_fwd
&& nla_weight
&& nla_u_thresh
&& nla_l_thresh
))
2962 udest
->conn_flags
= nla_get_u32(nla_fwd
)
2963 & IP_VS_CONN_F_FWD_MASK
;
2964 udest
->weight
= nla_get_u32(nla_weight
);
2965 udest
->u_threshold
= nla_get_u32(nla_u_thresh
);
2966 udest
->l_threshold
= nla_get_u32(nla_l_thresh
);
2972 static int ip_vs_genl_fill_daemon(struct sk_buff
*skb
, __be32 state
,
2973 const char *mcast_ifn
, __be32 syncid
)
2975 struct nlattr
*nl_daemon
;
2977 nl_daemon
= nla_nest_start(skb
, IPVS_CMD_ATTR_DAEMON
);
2981 NLA_PUT_U32(skb
, IPVS_DAEMON_ATTR_STATE
, state
);
2982 NLA_PUT_STRING(skb
, IPVS_DAEMON_ATTR_MCAST_IFN
, mcast_ifn
);
2983 NLA_PUT_U32(skb
, IPVS_DAEMON_ATTR_SYNC_ID
, syncid
);
2985 nla_nest_end(skb
, nl_daemon
);
2990 nla_nest_cancel(skb
, nl_daemon
);
2994 static int ip_vs_genl_dump_daemon(struct sk_buff
*skb
, __be32 state
,
2995 const char *mcast_ifn
, __be32 syncid
,
2996 struct netlink_callback
*cb
)
2999 hdr
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).pid
, cb
->nlh
->nlmsg_seq
,
3000 &ip_vs_genl_family
, NLM_F_MULTI
,
3001 IPVS_CMD_NEW_DAEMON
);
3005 if (ip_vs_genl_fill_daemon(skb
, state
, mcast_ifn
, syncid
))
3006 goto nla_put_failure
;
3008 return genlmsg_end(skb
, hdr
);
3011 genlmsg_cancel(skb
, hdr
);
3015 static int ip_vs_genl_dump_daemons(struct sk_buff
*skb
,
3016 struct netlink_callback
*cb
)
3018 mutex_lock(&__ip_vs_mutex
);
3019 if ((ip_vs_sync_state
& IP_VS_STATE_MASTER
) && !cb
->args
[0]) {
3020 if (ip_vs_genl_dump_daemon(skb
, IP_VS_STATE_MASTER
,
3021 ip_vs_master_mcast_ifn
,
3022 ip_vs_master_syncid
, cb
) < 0)
3023 goto nla_put_failure
;
3028 if ((ip_vs_sync_state
& IP_VS_STATE_BACKUP
) && !cb
->args
[1]) {
3029 if (ip_vs_genl_dump_daemon(skb
, IP_VS_STATE_BACKUP
,
3030 ip_vs_backup_mcast_ifn
,
3031 ip_vs_backup_syncid
, cb
) < 0)
3032 goto nla_put_failure
;
3038 mutex_unlock(&__ip_vs_mutex
);
3043 static int ip_vs_genl_new_daemon(struct nlattr
**attrs
)
3045 if (!(attrs
[IPVS_DAEMON_ATTR_STATE
] &&
3046 attrs
[IPVS_DAEMON_ATTR_MCAST_IFN
] &&
3047 attrs
[IPVS_DAEMON_ATTR_SYNC_ID
]))
3050 return start_sync_thread(nla_get_u32(attrs
[IPVS_DAEMON_ATTR_STATE
]),
3051 nla_data(attrs
[IPVS_DAEMON_ATTR_MCAST_IFN
]),
3052 nla_get_u32(attrs
[IPVS_DAEMON_ATTR_SYNC_ID
]));
3055 static int ip_vs_genl_del_daemon(struct nlattr
**attrs
)
3057 if (!attrs
[IPVS_DAEMON_ATTR_STATE
])
3060 return stop_sync_thread(nla_get_u32(attrs
[IPVS_DAEMON_ATTR_STATE
]));
3063 static int ip_vs_genl_set_config(struct nlattr
**attrs
)
3065 struct ip_vs_timeout_user t
;
3067 __ip_vs_get_timeouts(&t
);
3069 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP
])
3070 t
.tcp_timeout
= nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP
]);
3072 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
])
3074 nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
]);
3076 if (attrs
[IPVS_CMD_ATTR_TIMEOUT_UDP
])
3077 t
.udp_timeout
= nla_get_u32(attrs
[IPVS_CMD_ATTR_TIMEOUT_UDP
]);
3079 return ip_vs_set_timeout(&t
);
3082 static int ip_vs_genl_set_cmd(struct sk_buff
*skb
, struct genl_info
*info
)
3084 struct ip_vs_service
*svc
= NULL
;
3085 struct ip_vs_service_user_kern usvc
;
3086 struct ip_vs_dest_user_kern udest
;
3088 int need_full_svc
= 0, need_full_dest
= 0;
3090 cmd
= info
->genlhdr
->cmd
;
3092 mutex_lock(&__ip_vs_mutex
);
3094 if (cmd
== IPVS_CMD_FLUSH
) {
3095 ret
= ip_vs_flush();
3097 } else if (cmd
== IPVS_CMD_SET_CONFIG
) {
3098 ret
= ip_vs_genl_set_config(info
->attrs
);
3100 } else if (cmd
== IPVS_CMD_NEW_DAEMON
||
3101 cmd
== IPVS_CMD_DEL_DAEMON
) {
3103 struct nlattr
*daemon_attrs
[IPVS_DAEMON_ATTR_MAX
+ 1];
3105 if (!info
->attrs
[IPVS_CMD_ATTR_DAEMON
] ||
3106 nla_parse_nested(daemon_attrs
, IPVS_DAEMON_ATTR_MAX
,
3107 info
->attrs
[IPVS_CMD_ATTR_DAEMON
],
3108 ip_vs_daemon_policy
)) {
3113 if (cmd
== IPVS_CMD_NEW_DAEMON
)
3114 ret
= ip_vs_genl_new_daemon(daemon_attrs
);
3116 ret
= ip_vs_genl_del_daemon(daemon_attrs
);
3118 } else if (cmd
== IPVS_CMD_ZERO
&&
3119 !info
->attrs
[IPVS_CMD_ATTR_SERVICE
]) {
3120 ret
= ip_vs_zero_all();
3124 /* All following commands require a service argument, so check if we
3125 * received a valid one. We need a full service specification when
3126 * adding / editing a service. Only identifying members otherwise. */
3127 if (cmd
== IPVS_CMD_NEW_SERVICE
|| cmd
== IPVS_CMD_SET_SERVICE
)
3130 ret
= ip_vs_genl_parse_service(&usvc
,
3131 info
->attrs
[IPVS_CMD_ATTR_SERVICE
],
3136 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3137 if (usvc
.fwmark
== 0)
3138 svc
= __ip_vs_service_get(usvc
.af
, usvc
.protocol
,
3139 &usvc
.addr
, usvc
.port
);
3141 svc
= __ip_vs_svc_fwm_get(usvc
.af
, usvc
.fwmark
);
3143 /* Unless we're adding a new service, the service must already exist */
3144 if ((cmd
!= IPVS_CMD_NEW_SERVICE
) && (svc
== NULL
)) {
3149 /* Destination commands require a valid destination argument. For
3150 * adding / editing a destination, we need a full destination
3152 if (cmd
== IPVS_CMD_NEW_DEST
|| cmd
== IPVS_CMD_SET_DEST
||
3153 cmd
== IPVS_CMD_DEL_DEST
) {
3154 if (cmd
!= IPVS_CMD_DEL_DEST
)
3157 ret
= ip_vs_genl_parse_dest(&udest
,
3158 info
->attrs
[IPVS_CMD_ATTR_DEST
],
3165 case IPVS_CMD_NEW_SERVICE
:
3167 ret
= ip_vs_add_service(&usvc
, &svc
);
3171 case IPVS_CMD_SET_SERVICE
:
3172 ret
= ip_vs_edit_service(svc
, &usvc
);
3174 case IPVS_CMD_DEL_SERVICE
:
3175 ret
= ip_vs_del_service(svc
);
3177 case IPVS_CMD_NEW_DEST
:
3178 ret
= ip_vs_add_dest(svc
, &udest
);
3180 case IPVS_CMD_SET_DEST
:
3181 ret
= ip_vs_edit_dest(svc
, &udest
);
3183 case IPVS_CMD_DEL_DEST
:
3184 ret
= ip_vs_del_dest(svc
, &udest
);
3187 ret
= ip_vs_zero_service(svc
);
3195 ip_vs_service_put(svc
);
3196 mutex_unlock(&__ip_vs_mutex
);
3201 static int ip_vs_genl_get_cmd(struct sk_buff
*skb
, struct genl_info
*info
)
3203 struct sk_buff
*msg
;
3205 int ret
, cmd
, reply_cmd
;
3207 cmd
= info
->genlhdr
->cmd
;
3209 if (cmd
== IPVS_CMD_GET_SERVICE
)
3210 reply_cmd
= IPVS_CMD_NEW_SERVICE
;
3211 else if (cmd
== IPVS_CMD_GET_INFO
)
3212 reply_cmd
= IPVS_CMD_SET_INFO
;
3213 else if (cmd
== IPVS_CMD_GET_CONFIG
)
3214 reply_cmd
= IPVS_CMD_SET_CONFIG
;
3216 pr_err("unknown Generic Netlink command\n");
3220 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
3224 mutex_lock(&__ip_vs_mutex
);
3226 reply
= genlmsg_put_reply(msg
, info
, &ip_vs_genl_family
, 0, reply_cmd
);
3228 goto nla_put_failure
;
3231 case IPVS_CMD_GET_SERVICE
:
3233 struct ip_vs_service
*svc
;
3235 svc
= ip_vs_genl_find_service(info
->attrs
[IPVS_CMD_ATTR_SERVICE
]);
3240 ret
= ip_vs_genl_fill_service(msg
, svc
);
3241 ip_vs_service_put(svc
);
3243 goto nla_put_failure
;
3252 case IPVS_CMD_GET_CONFIG
:
3254 struct ip_vs_timeout_user t
;
3256 __ip_vs_get_timeouts(&t
);
3257 #ifdef CONFIG_IP_VS_PROTO_TCP
3258 NLA_PUT_U32(msg
, IPVS_CMD_ATTR_TIMEOUT_TCP
, t
.tcp_timeout
);
3259 NLA_PUT_U32(msg
, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN
,
3262 #ifdef CONFIG_IP_VS_PROTO_UDP
3263 NLA_PUT_U32(msg
, IPVS_CMD_ATTR_TIMEOUT_UDP
, t
.udp_timeout
);
3269 case IPVS_CMD_GET_INFO
:
3270 NLA_PUT_U32(msg
, IPVS_INFO_ATTR_VERSION
, IP_VS_VERSION_CODE
);
3271 NLA_PUT_U32(msg
, IPVS_INFO_ATTR_CONN_TAB_SIZE
,
3272 ip_vs_conn_tab_size
);
3276 genlmsg_end(msg
, reply
);
3277 ret
= genlmsg_reply(msg
, info
);
3281 pr_err("not enough space in Netlink message\n");
3287 mutex_unlock(&__ip_vs_mutex
);
3293 static struct genl_ops ip_vs_genl_ops
[] __read_mostly
= {
3295 .cmd
= IPVS_CMD_NEW_SERVICE
,
3296 .flags
= GENL_ADMIN_PERM
,
3297 .policy
= ip_vs_cmd_policy
,
3298 .doit
= ip_vs_genl_set_cmd
,
3301 .cmd
= IPVS_CMD_SET_SERVICE
,
3302 .flags
= GENL_ADMIN_PERM
,
3303 .policy
= ip_vs_cmd_policy
,
3304 .doit
= ip_vs_genl_set_cmd
,
3307 .cmd
= IPVS_CMD_DEL_SERVICE
,
3308 .flags
= GENL_ADMIN_PERM
,
3309 .policy
= ip_vs_cmd_policy
,
3310 .doit
= ip_vs_genl_set_cmd
,
3313 .cmd
= IPVS_CMD_GET_SERVICE
,
3314 .flags
= GENL_ADMIN_PERM
,
3315 .doit
= ip_vs_genl_get_cmd
,
3316 .dumpit
= ip_vs_genl_dump_services
,
3317 .policy
= ip_vs_cmd_policy
,
3320 .cmd
= IPVS_CMD_NEW_DEST
,
3321 .flags
= GENL_ADMIN_PERM
,
3322 .policy
= ip_vs_cmd_policy
,
3323 .doit
= ip_vs_genl_set_cmd
,
3326 .cmd
= IPVS_CMD_SET_DEST
,
3327 .flags
= GENL_ADMIN_PERM
,
3328 .policy
= ip_vs_cmd_policy
,
3329 .doit
= ip_vs_genl_set_cmd
,
3332 .cmd
= IPVS_CMD_DEL_DEST
,
3333 .flags
= GENL_ADMIN_PERM
,
3334 .policy
= ip_vs_cmd_policy
,
3335 .doit
= ip_vs_genl_set_cmd
,
3338 .cmd
= IPVS_CMD_GET_DEST
,
3339 .flags
= GENL_ADMIN_PERM
,
3340 .policy
= ip_vs_cmd_policy
,
3341 .dumpit
= ip_vs_genl_dump_dests
,
3344 .cmd
= IPVS_CMD_NEW_DAEMON
,
3345 .flags
= GENL_ADMIN_PERM
,
3346 .policy
= ip_vs_cmd_policy
,
3347 .doit
= ip_vs_genl_set_cmd
,
3350 .cmd
= IPVS_CMD_DEL_DAEMON
,
3351 .flags
= GENL_ADMIN_PERM
,
3352 .policy
= ip_vs_cmd_policy
,
3353 .doit
= ip_vs_genl_set_cmd
,
3356 .cmd
= IPVS_CMD_GET_DAEMON
,
3357 .flags
= GENL_ADMIN_PERM
,
3358 .dumpit
= ip_vs_genl_dump_daemons
,
3361 .cmd
= IPVS_CMD_SET_CONFIG
,
3362 .flags
= GENL_ADMIN_PERM
,
3363 .policy
= ip_vs_cmd_policy
,
3364 .doit
= ip_vs_genl_set_cmd
,
3367 .cmd
= IPVS_CMD_GET_CONFIG
,
3368 .flags
= GENL_ADMIN_PERM
,
3369 .doit
= ip_vs_genl_get_cmd
,
3372 .cmd
= IPVS_CMD_GET_INFO
,
3373 .flags
= GENL_ADMIN_PERM
,
3374 .doit
= ip_vs_genl_get_cmd
,
3377 .cmd
= IPVS_CMD_ZERO
,
3378 .flags
= GENL_ADMIN_PERM
,
3379 .policy
= ip_vs_cmd_policy
,
3380 .doit
= ip_vs_genl_set_cmd
,
3383 .cmd
= IPVS_CMD_FLUSH
,
3384 .flags
= GENL_ADMIN_PERM
,
3385 .doit
= ip_vs_genl_set_cmd
,
3389 static int __init
ip_vs_genl_register(void)
3391 return genl_register_family_with_ops(&ip_vs_genl_family
,
3392 ip_vs_genl_ops
, ARRAY_SIZE(ip_vs_genl_ops
));
3395 static void ip_vs_genl_unregister(void)
3397 genl_unregister_family(&ip_vs_genl_family
);
3400 /* End of Generic Netlink interface definitions */
3403 int __init
ip_vs_control_init(void)
3410 ret
= nf_register_sockopt(&ip_vs_sockopts
);
3412 pr_err("cannot register sockopt.\n");
3416 ret
= ip_vs_genl_register();
3418 pr_err("cannot register Generic Netlink interface.\n");
3419 nf_unregister_sockopt(&ip_vs_sockopts
);
3423 proc_net_fops_create(&init_net
, "ip_vs", 0, &ip_vs_info_fops
);
3424 proc_net_fops_create(&init_net
, "ip_vs_stats",0, &ip_vs_stats_fops
);
3426 sysctl_header
= register_sysctl_paths(net_vs_ctl_path
, vs_vars
);
3428 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3429 for(idx
= 0; idx
< IP_VS_SVC_TAB_SIZE
; idx
++) {
3430 INIT_LIST_HEAD(&ip_vs_svc_table
[idx
]);
3431 INIT_LIST_HEAD(&ip_vs_svc_fwm_table
[idx
]);
3433 for(idx
= 0; idx
< IP_VS_RTAB_SIZE
; idx
++) {
3434 INIT_LIST_HEAD(&ip_vs_rtable
[idx
]);
3437 ip_vs_new_estimator(&ip_vs_stats
);
3439 /* Hook the defense timer */
3440 schedule_delayed_work(&defense_work
, DEFENSE_TIMER_PERIOD
);
3447 void ip_vs_control_cleanup(void)
3450 ip_vs_trash_cleanup();
3451 cancel_rearming_delayed_work(&defense_work
);
3452 cancel_work_sync(&defense_work
.work
);
3453 ip_vs_kill_estimator(&ip_vs_stats
);
3454 unregister_sysctl_table(sysctl_header
);
3455 proc_net_remove(&init_net
, "ip_vs_stats");
3456 proc_net_remove(&init_net
, "ip_vs");
3457 ip_vs_genl_unregister();
3458 nf_unregister_sockopt(&ip_vs_sockopts
);