IPVS: netns, svc counters moved in ip_vs_ctl,c
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
75{
76 struct rt6_info *rt;
77 struct flowi fl = {
78 .oif = 0,
5811662b
CG
79 .fl6_dst = *addr,
80 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
81 };
82
83 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
84 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
85 return 1;
86
87 return 0;
88}
89#endif
1da177e4 90/*
af9debd4
JA
91 * update_defense_level is called from keventd and from sysctl,
92 * so it needs to protect itself from softirqs
1da177e4 93 */
9330419d 94static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
95{
96 struct sysinfo i;
97 static int old_secure_tcp = 0;
98 int availmem;
99 int nomem;
100 int to_change = -1;
101
102 /* we only count free and buffered memory (in pages) */
103 si_meminfo(&i);
104 availmem = i.freeram + i.bufferram;
105 /* however in linux 2.5 the i.bufferram is total page cache size,
106 we need adjust it */
107 /* si_swapinfo(&i); */
108 /* availmem = availmem - (i.totalswap - i.freeswap); */
109
a0840e2e 110 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 111
af9debd4
JA
112 local_bh_disable();
113
1da177e4 114 /* drop_entry */
a0840e2e
HS
115 spin_lock(&ipvs->dropentry_lock);
116 switch (ipvs->sysctl_drop_entry) {
1da177e4 117 case 0:
a0840e2e 118 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
119 break;
120 case 1:
121 if (nomem) {
a0840e2e
HS
122 atomic_set(&ipvs->dropentry, 1);
123 ipvs->sysctl_drop_entry = 2;
1da177e4 124 } else {
a0840e2e 125 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
126 }
127 break;
128 case 2:
129 if (nomem) {
a0840e2e 130 atomic_set(&ipvs->dropentry, 1);
1da177e4 131 } else {
a0840e2e
HS
132 atomic_set(&ipvs->dropentry, 0);
133 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
134 };
135 break;
136 case 3:
a0840e2e 137 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
138 break;
139 }
a0840e2e 140 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
141
142 /* drop_packet */
a0840e2e
HS
143 spin_lock(&ipvs->droppacket_lock);
144 switch (ipvs->sysctl_drop_packet) {
1da177e4 145 case 0:
a0840e2e 146 ipvs->drop_rate = 0;
1da177e4
LT
147 break;
148 case 1:
149 if (nomem) {
a0840e2e
HS
150 ipvs->drop_rate = ipvs->drop_counter
151 = ipvs->sysctl_amemthresh /
152 (ipvs->sysctl_amemthresh-availmem);
153 ipvs->sysctl_drop_packet = 2;
1da177e4 154 } else {
a0840e2e 155 ipvs->drop_rate = 0;
1da177e4
LT
156 }
157 break;
158 case 2:
159 if (nomem) {
a0840e2e
HS
160 ipvs->drop_rate = ipvs->drop_counter
161 = ipvs->sysctl_amemthresh /
162 (ipvs->sysctl_amemthresh-availmem);
1da177e4 163 } else {
a0840e2e
HS
164 ipvs->drop_rate = 0;
165 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
166 }
167 break;
168 case 3:
a0840e2e 169 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
170 break;
171 }
a0840e2e 172 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
173
174 /* secure_tcp */
a0840e2e
HS
175 spin_lock(&ipvs->securetcp_lock);
176 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
177 case 0:
178 if (old_secure_tcp >= 2)
179 to_change = 0;
180 break;
181 case 1:
182 if (nomem) {
183 if (old_secure_tcp < 2)
184 to_change = 1;
a0840e2e 185 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
186 } else {
187 if (old_secure_tcp >= 2)
188 to_change = 0;
189 }
190 break;
191 case 2:
192 if (nomem) {
193 if (old_secure_tcp < 2)
194 to_change = 1;
195 } else {
196 if (old_secure_tcp >= 2)
197 to_change = 0;
a0840e2e 198 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
199 }
200 break;
201 case 3:
202 if (old_secure_tcp < 2)
203 to_change = 1;
204 break;
205 }
a0840e2e 206 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 207 if (to_change >= 0)
9330419d 208 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
209 ipvs->sysctl_secure_tcp > 1);
210 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
211
212 local_bh_enable();
1da177e4
LT
213}
214
215
216/*
217 * Timer for checking the defense
218 */
219#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 220
c4028958 221static void defense_work_handler(struct work_struct *work)
1da177e4 222{
f6340ee0
HS
223 struct netns_ipvs *ipvs =
224 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
225
226 update_defense_level(ipvs);
a0840e2e 227 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
228 ip_vs_random_dropentry(ipvs->net);
229 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4
LT
230}
231
232int
233ip_vs_use_count_inc(void)
234{
235 return try_module_get(THIS_MODULE);
236}
237
238void
239ip_vs_use_count_dec(void)
240{
241 module_put(THIS_MODULE);
242}
243
244
245/*
246 * Hash table: for virtual service lookups
247 */
248#define IP_VS_SVC_TAB_BITS 8
249#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
250#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
251
252/* the service table hashed by <protocol, addr, port> */
253static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
254/* the service table hashed by fwmark */
255static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
256
1da177e4
LT
257
258/*
259 * Returns hash value for virtual service
260 */
fc723250
HS
261static inline unsigned
262ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
263 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
264{
265 register unsigned porth = ntohs(port);
b18610de 266 __be32 addr_fold = addr->ip;
1da177e4 267
b18610de
JV
268#ifdef CONFIG_IP_VS_IPV6
269 if (af == AF_INET6)
270 addr_fold = addr->ip6[0]^addr->ip6[1]^
271 addr->ip6[2]^addr->ip6[3];
272#endif
fc723250 273 addr_fold ^= ((size_t)net>>8);
b18610de
JV
274
275 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
276 & IP_VS_SVC_TAB_MASK;
277}
278
279/*
280 * Returns hash value of fwmark for virtual service lookup
281 */
fc723250 282static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 283{
fc723250 284 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
285}
286
287/*
fc723250 288 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
289 * or in the ip_vs_svc_fwm_table by fwmark.
290 * Should be called with locked tables.
291 */
292static int ip_vs_svc_hash(struct ip_vs_service *svc)
293{
294 unsigned hash;
295
296 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
297 pr_err("%s(): request for already hashed, called from %pF\n",
298 __func__, __builtin_return_address(0));
1da177e4
LT
299 return 0;
300 }
301
302 if (svc->fwmark == 0) {
303 /*
fc723250 304 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 305 */
fc723250
HS
306 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
307 &svc->addr, svc->port);
1da177e4
LT
308 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
309 } else {
310 /*
fc723250 311 * Hash it by fwmark in svc_fwm_table
1da177e4 312 */
fc723250 313 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
314 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
315 }
316
317 svc->flags |= IP_VS_SVC_F_HASHED;
318 /* increase its refcnt because it is referenced by the svc table */
319 atomic_inc(&svc->refcnt);
320 return 1;
321}
322
323
324/*
fc723250 325 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
326 * Should be called with locked tables.
327 */
328static int ip_vs_svc_unhash(struct ip_vs_service *svc)
329{
330 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
331 pr_err("%s(): request for unhash flagged, called from %pF\n",
332 __func__, __builtin_return_address(0));
1da177e4
LT
333 return 0;
334 }
335
336 if (svc->fwmark == 0) {
fc723250 337 /* Remove it from the svc_table table */
1da177e4
LT
338 list_del(&svc->s_list);
339 } else {
fc723250 340 /* Remove it from the svc_fwm_table table */
1da177e4
LT
341 list_del(&svc->f_list);
342 }
343
344 svc->flags &= ~IP_VS_SVC_F_HASHED;
345 atomic_dec(&svc->refcnt);
346 return 1;
347}
348
349
350/*
fc723250 351 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 352 */
b18610de 353static inline struct ip_vs_service *
fc723250
HS
354__ip_vs_service_find(struct net *net, int af, __u16 protocol,
355 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
356{
357 unsigned hash;
358 struct ip_vs_service *svc;
359
360 /* Check for "full" addressed entries */
fc723250 361 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
362
363 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
364 if ((svc->af == af)
365 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 366 && (svc->port == vport)
fc723250
HS
367 && (svc->protocol == protocol)
368 && net_eq(svc->net, net)) {
1da177e4 369 /* HIT */
1da177e4
LT
370 return svc;
371 }
372 }
373
374 return NULL;
375}
376
377
378/*
379 * Get service by {fwmark} in the service table.
380 */
b18610de 381static inline struct ip_vs_service *
fc723250 382__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
383{
384 unsigned hash;
385 struct ip_vs_service *svc;
386
387 /* Check for fwmark addressed entries */
fc723250 388 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
389
390 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
391 if (svc->fwmark == fwmark && svc->af == af
392 && net_eq(svc->net, net)) {
1da177e4 393 /* HIT */
1da177e4
LT
394 return svc;
395 }
396 }
397
398 return NULL;
399}
400
401struct ip_vs_service *
fc723250 402ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 403 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
404{
405 struct ip_vs_service *svc;
763f8d0e 406 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 407
1da177e4
LT
408 read_lock(&__ip_vs_svc_lock);
409
410 /*
411 * Check the table hashed by fwmark first
412 */
fc723250
HS
413 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
414 if (fwmark && svc)
1da177e4
LT
415 goto out;
416
417 /*
418 * Check the table hashed by <protocol,addr,port>
419 * for "full" addressed entries
420 */
fc723250 421 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
422
423 if (svc == NULL
424 && protocol == IPPROTO_TCP
763f8d0e 425 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
426 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
427 /*
428 * Check if ftp service entry exists, the packet
429 * might belong to FTP data connections.
430 */
fc723250 431 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
432 }
433
434 if (svc == NULL
763f8d0e 435 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
436 /*
437 * Check if the catch-all port (port zero) exists
438 */
fc723250 439 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
440 }
441
442 out:
26c15cfd
JA
443 if (svc)
444 atomic_inc(&svc->usecnt);
1da177e4
LT
445 read_unlock(&__ip_vs_svc_lock);
446
3c2e0505
JV
447 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
448 fwmark, ip_vs_proto_name(protocol),
449 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
450 svc ? "hit" : "not hit");
1da177e4
LT
451
452 return svc;
453}
454
455
456static inline void
457__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
458{
459 atomic_inc(&svc->refcnt);
460 dest->svc = svc;
461}
462
26c15cfd 463static void
1da177e4
LT
464__ip_vs_unbind_svc(struct ip_vs_dest *dest)
465{
466 struct ip_vs_service *svc = dest->svc;
467
468 dest->svc = NULL;
26c15cfd
JA
469 if (atomic_dec_and_test(&svc->refcnt)) {
470 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
471 svc->fwmark,
472 IP_VS_DBG_ADDR(svc->af, &svc->addr),
473 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 474 free_percpu(svc->stats.cpustats);
1da177e4 475 kfree(svc);
26c15cfd 476 }
1da177e4
LT
477}
478
479
480/*
481 * Returns hash value for real service
482 */
7937df15
JV
483static inline unsigned ip_vs_rs_hashkey(int af,
484 const union nf_inet_addr *addr,
485 __be16 port)
1da177e4
LT
486{
487 register unsigned porth = ntohs(port);
7937df15
JV
488 __be32 addr_fold = addr->ip;
489
490#ifdef CONFIG_IP_VS_IPV6
491 if (af == AF_INET6)
492 addr_fold = addr->ip6[0]^addr->ip6[1]^
493 addr->ip6[2]^addr->ip6[3];
494#endif
1da177e4 495
7937df15 496 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
497 & IP_VS_RTAB_MASK;
498}
499
500/*
fc723250 501 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
502 * should be called with locked tables.
503 */
fc723250 504static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
505{
506 unsigned hash;
507
508 if (!list_empty(&dest->d_list)) {
509 return 0;
510 }
511
512 /*
513 * Hash by proto,addr,port,
514 * which are the parameters of the real service.
515 */
7937df15
JV
516 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
517
fc723250 518 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
519
520 return 1;
521}
522
523/*
fc723250 524 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
525 * should be called with locked tables.
526 */
527static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
528{
529 /*
fc723250 530 * Remove it from the rs_table table.
1da177e4
LT
531 */
532 if (!list_empty(&dest->d_list)) {
533 list_del(&dest->d_list);
534 INIT_LIST_HEAD(&dest->d_list);
535 }
536
537 return 1;
538}
539
540/*
541 * Lookup real service by <proto,addr,port> in the real service table.
542 */
543struct ip_vs_dest *
fc723250 544ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
545 const union nf_inet_addr *daddr,
546 __be16 dport)
1da177e4 547{
fc723250 548 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
549 unsigned hash;
550 struct ip_vs_dest *dest;
551
552 /*
553 * Check for "full" addressed entries
554 * Return the first found entry
555 */
7937df15 556 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 557
a0840e2e 558 read_lock(&ipvs->rs_lock);
fc723250 559 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
560 if ((dest->af == af)
561 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
562 && (dest->port == dport)
563 && ((dest->protocol == protocol) ||
564 dest->vfwmark)) {
565 /* HIT */
a0840e2e 566 read_unlock(&ipvs->rs_lock);
1da177e4
LT
567 return dest;
568 }
569 }
a0840e2e 570 read_unlock(&ipvs->rs_lock);
1da177e4
LT
571
572 return NULL;
573}
574
575/*
576 * Lookup destination by {addr,port} in the given service
577 */
578static struct ip_vs_dest *
7937df15
JV
579ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
580 __be16 dport)
1da177e4
LT
581{
582 struct ip_vs_dest *dest;
583
584 /*
585 * Find the destination for the given service
586 */
587 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
588 if ((dest->af == svc->af)
589 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
590 && (dest->port == dport)) {
1da177e4
LT
591 /* HIT */
592 return dest;
593 }
594 }
595
596 return NULL;
597}
598
1e356f9c
RB
599/*
600 * Find destination by {daddr,dport,vaddr,protocol}
601 * Cretaed to be used in ip_vs_process_message() in
602 * the backup synchronization daemon. It finds the
603 * destination to be bound to the received connection
604 * on the backup.
605 *
606 * ip_vs_lookup_real_service() looked promissing, but
607 * seems not working as expected.
608 */
fc723250
HS
609struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
610 const union nf_inet_addr *daddr,
7937df15
JV
611 __be16 dport,
612 const union nf_inet_addr *vaddr,
0e051e68 613 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
614{
615 struct ip_vs_dest *dest;
616 struct ip_vs_service *svc;
617
fc723250 618 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
619 if (!svc)
620 return NULL;
621 dest = ip_vs_lookup_dest(svc, daddr, dport);
622 if (dest)
623 atomic_inc(&dest->refcnt);
624 ip_vs_service_put(svc);
625 return dest;
626}
1da177e4
LT
627
628/*
629 * Lookup dest by {svc,addr,port} in the destination trash.
630 * The destination trash is used to hold the destinations that are removed
631 * from the service table but are still referenced by some conn entries.
632 * The reason to add the destination trash is when the dest is temporary
633 * down (either by administrator or by monitor program), the dest can be
634 * picked back from the trash, the remaining connections to the dest can
635 * continue, and the counting information of the dest is also useful for
636 * scheduling.
637 */
638static struct ip_vs_dest *
7937df15
JV
639ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
640 __be16 dport)
1da177e4
LT
641{
642 struct ip_vs_dest *dest, *nxt;
f2431e6e 643 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
644
645 /*
646 * Find the destination in trash
647 */
f2431e6e 648 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
649 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
650 "dest->refcnt=%d\n",
651 dest->vfwmark,
652 IP_VS_DBG_ADDR(svc->af, &dest->addr),
653 ntohs(dest->port),
654 atomic_read(&dest->refcnt));
655 if (dest->af == svc->af &&
656 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
657 dest->port == dport &&
658 dest->vfwmark == svc->fwmark &&
659 dest->protocol == svc->protocol &&
660 (svc->fwmark ||
7937df15 661 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
662 dest->vport == svc->port))) {
663 /* HIT */
664 return dest;
665 }
666
667 /*
668 * Try to purge the destination from trash if not referenced
669 */
670 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
671 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
672 "from trash\n",
673 dest->vfwmark,
674 IP_VS_DBG_ADDR(svc->af, &dest->addr),
675 ntohs(dest->port));
1da177e4
LT
676 list_del(&dest->n_list);
677 ip_vs_dst_reset(dest);
678 __ip_vs_unbind_svc(dest);
b17fc996 679 free_percpu(dest->stats.cpustats);
1da177e4
LT
680 kfree(dest);
681 }
682 }
683
684 return NULL;
685}
686
687
688/*
689 * Clean up all the destinations in the trash
690 * Called by the ip_vs_control_cleanup()
691 *
692 * When the ip_vs_control_clearup is activated by ipvs module exit,
693 * the service tables must have been flushed and all the connections
694 * are expired, and the refcnt of each destination in the trash must
695 * be 1, so we simply release them here.
696 */
f2431e6e 697static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
698{
699 struct ip_vs_dest *dest, *nxt;
f2431e6e 700 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 701
f2431e6e 702 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4
LT
703 list_del(&dest->n_list);
704 ip_vs_dst_reset(dest);
705 __ip_vs_unbind_svc(dest);
b17fc996 706 free_percpu(dest->stats.cpustats);
1da177e4
LT
707 kfree(dest);
708 }
709}
710
711
712static void
713ip_vs_zero_stats(struct ip_vs_stats *stats)
714{
715 spin_lock_bh(&stats->lock);
e93615d0 716
e9c0ce23 717 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 718 ip_vs_zero_estimator(stats);
e93615d0 719
3a14a313 720 spin_unlock_bh(&stats->lock);
1da177e4
LT
721}
722
723/*
724 * Update a destination in the given service
725 */
726static void
26c15cfd
JA
727__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
728 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 729{
fc723250 730 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
731 int conn_flags;
732
733 /* set the weight and the flags */
734 atomic_set(&dest->weight, udest->weight);
3575792e
JA
735 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
736 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 737
1da177e4 738 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 739 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
740 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
741 } else {
742 /*
fc723250 743 * Put the real service in rs_table if not present.
1da177e4
LT
744 * For now only for NAT!
745 */
a0840e2e 746 write_lock_bh(&ipvs->rs_lock);
fc723250 747 ip_vs_rs_hash(ipvs, dest);
a0840e2e 748 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
749 }
750 atomic_set(&dest->conn_flags, conn_flags);
751
752 /* bind the service */
753 if (!dest->svc) {
754 __ip_vs_bind_svc(dest, svc);
755 } else {
756 if (dest->svc != svc) {
757 __ip_vs_unbind_svc(dest);
758 ip_vs_zero_stats(&dest->stats);
759 __ip_vs_bind_svc(dest, svc);
760 }
761 }
762
763 /* set the dest status flags */
764 dest->flags |= IP_VS_DEST_F_AVAILABLE;
765
766 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
767 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
768 dest->u_threshold = udest->u_threshold;
769 dest->l_threshold = udest->l_threshold;
26c15cfd 770
fc604767
JA
771 spin_lock(&dest->dst_lock);
772 ip_vs_dst_reset(dest);
773 spin_unlock(&dest->dst_lock);
774
26c15cfd 775 if (add)
29c2026f 776 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
777
778 write_lock_bh(&__ip_vs_svc_lock);
779
780 /* Wait until all other svc users go away */
781 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
782
783 if (add) {
784 list_add(&dest->n_list, &svc->destinations);
785 svc->num_dests++;
786 }
787
788 /* call the update_service, because server weight may be changed */
789 if (svc->scheduler->update_service)
790 svc->scheduler->update_service(svc);
791
792 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
793}
794
795
796/*
797 * Create a destination for the given service
798 */
799static int
c860c6b1 800ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
801 struct ip_vs_dest **dest_p)
802{
803 struct ip_vs_dest *dest;
804 unsigned atype;
805
806 EnterFunction(2);
807
09571c7a
VB
808#ifdef CONFIG_IP_VS_IPV6
809 if (svc->af == AF_INET6) {
810 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
811 if ((!(atype & IPV6_ADDR_UNICAST) ||
812 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
813 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
814 return -EINVAL;
815 } else
816#endif
817 {
818 atype = inet_addr_type(&init_net, udest->addr.ip);
819 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
820 return -EINVAL;
821 }
1da177e4 822
dee06e47 823 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 824 if (dest == NULL) {
1e3e238e 825 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
826 return -ENOMEM;
827 }
b17fc996
HS
828 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
829 if (!dest->stats.cpustats) {
830 pr_err("%s() alloc_percpu failed\n", __func__);
831 goto err_alloc;
832 }
1da177e4 833
c860c6b1 834 dest->af = svc->af;
1da177e4 835 dest->protocol = svc->protocol;
c860c6b1 836 dest->vaddr = svc->addr;
1da177e4
LT
837 dest->vport = svc->port;
838 dest->vfwmark = svc->fwmark;
c860c6b1 839 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
840 dest->port = udest->port;
841
842 atomic_set(&dest->activeconns, 0);
843 atomic_set(&dest->inactconns, 0);
844 atomic_set(&dest->persistconns, 0);
26c15cfd 845 atomic_set(&dest->refcnt, 1);
1da177e4
LT
846
847 INIT_LIST_HEAD(&dest->d_list);
848 spin_lock_init(&dest->dst_lock);
849 spin_lock_init(&dest->stats.lock);
26c15cfd 850 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
851
852 *dest_p = dest;
853
854 LeaveFunction(2);
855 return 0;
b17fc996
HS
856
857err_alloc:
858 kfree(dest);
859 return -ENOMEM;
1da177e4
LT
860}
861
862
863/*
864 * Add a destination into an existing service
865 */
866static int
c860c6b1 867ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
868{
869 struct ip_vs_dest *dest;
c860c6b1 870 union nf_inet_addr daddr;
014d730d 871 __be16 dport = udest->port;
1da177e4
LT
872 int ret;
873
874 EnterFunction(2);
875
876 if (udest->weight < 0) {
1e3e238e 877 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
878 return -ERANGE;
879 }
880
881 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
882 pr_err("%s(): lower threshold is higher than upper threshold\n",
883 __func__);
1da177e4
LT
884 return -ERANGE;
885 }
886
c860c6b1
JV
887 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
888
1da177e4
LT
889 /*
890 * Check if the dest already exists in the list
891 */
7937df15
JV
892 dest = ip_vs_lookup_dest(svc, &daddr, dport);
893
1da177e4 894 if (dest != NULL) {
1e3e238e 895 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
896 return -EEXIST;
897 }
898
899 /*
900 * Check if the dest already exists in the trash and
901 * is from the same service
902 */
7937df15
JV
903 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
904
1da177e4 905 if (dest != NULL) {
cfc78c5a
JV
906 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
907 "dest->refcnt=%d, service %u/%s:%u\n",
908 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
909 atomic_read(&dest->refcnt),
910 dest->vfwmark,
911 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
912 ntohs(dest->vport));
913
1da177e4
LT
914 /*
915 * Get the destination from the trash
916 */
917 list_del(&dest->n_list);
918
26c15cfd
JA
919 __ip_vs_update_dest(svc, dest, udest, 1);
920 ret = 0;
921 } else {
1da177e4 922 /*
26c15cfd 923 * Allocate and initialize the dest structure
1da177e4 924 */
26c15cfd 925 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 926 }
1da177e4
LT
927 LeaveFunction(2);
928
26c15cfd 929 return ret;
1da177e4
LT
930}
931
932
933/*
934 * Edit a destination in the given service
935 */
936static int
c860c6b1 937ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
938{
939 struct ip_vs_dest *dest;
c860c6b1 940 union nf_inet_addr daddr;
014d730d 941 __be16 dport = udest->port;
1da177e4
LT
942
943 EnterFunction(2);
944
945 if (udest->weight < 0) {
1e3e238e 946 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
947 return -ERANGE;
948 }
949
950 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
951 pr_err("%s(): lower threshold is higher than upper threshold\n",
952 __func__);
1da177e4
LT
953 return -ERANGE;
954 }
955
c860c6b1
JV
956 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
957
1da177e4
LT
958 /*
959 * Lookup the destination list
960 */
7937df15
JV
961 dest = ip_vs_lookup_dest(svc, &daddr, dport);
962
1da177e4 963 if (dest == NULL) {
1e3e238e 964 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
965 return -ENOENT;
966 }
967
26c15cfd 968 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
969 LeaveFunction(2);
970
971 return 0;
972}
973
974
975/*
976 * Delete a destination (must be already unlinked from the service)
977 */
29c2026f 978static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 979{
a0840e2e
HS
980 struct netns_ipvs *ipvs = net_ipvs(net);
981
29c2026f 982 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
983
984 /*
985 * Remove it from the d-linked list with the real services.
986 */
a0840e2e 987 write_lock_bh(&ipvs->rs_lock);
1da177e4 988 ip_vs_rs_unhash(dest);
a0840e2e 989 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
990
991 /*
992 * Decrease the refcnt of the dest, and free the dest
993 * if nobody refers to it (refcnt=0). Otherwise, throw
994 * the destination into the trash.
995 */
996 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
997 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
998 dest->vfwmark,
999 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1000 ntohs(dest->port));
1da177e4
LT
1001 ip_vs_dst_reset(dest);
1002 /* simply decrease svc->refcnt here, let the caller check
1003 and release the service if nobody refers to it.
1004 Only user context can release destination and service,
1005 and only one user context can update virtual service at a
1006 time, so the operation here is OK */
1007 atomic_dec(&dest->svc->refcnt);
b17fc996 1008 free_percpu(dest->stats.cpustats);
1da177e4
LT
1009 kfree(dest);
1010 } else {
cfc78c5a
JV
1011 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1012 "dest->refcnt=%d\n",
1013 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1014 ntohs(dest->port),
1015 atomic_read(&dest->refcnt));
f2431e6e 1016 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1017 atomic_inc(&dest->refcnt);
1018 }
1019}
1020
1021
1022/*
1023 * Unlink a destination from the given service
1024 */
1025static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1026 struct ip_vs_dest *dest,
1027 int svcupd)
1028{
1029 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1030
1031 /*
1032 * Remove it from the d-linked destination list.
1033 */
1034 list_del(&dest->n_list);
1035 svc->num_dests--;
82dfb6f3
SW
1036
1037 /*
1038 * Call the update_service function of its scheduler
1039 */
1040 if (svcupd && svc->scheduler->update_service)
1041 svc->scheduler->update_service(svc);
1da177e4
LT
1042}
1043
1044
1045/*
1046 * Delete a destination server in the given service
1047 */
1048static int
c860c6b1 1049ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1050{
1051 struct ip_vs_dest *dest;
014d730d 1052 __be16 dport = udest->port;
1da177e4
LT
1053
1054 EnterFunction(2);
1055
7937df15 1056 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1057
1da177e4 1058 if (dest == NULL) {
1e3e238e 1059 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1060 return -ENOENT;
1061 }
1062
1063 write_lock_bh(&__ip_vs_svc_lock);
1064
1065 /*
1066 * Wait until all other svc users go away.
1067 */
26c15cfd 1068 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1069
1070 /*
1071 * Unlink dest from the service
1072 */
1073 __ip_vs_unlink_dest(svc, dest, 1);
1074
1075 write_unlock_bh(&__ip_vs_svc_lock);
1076
1077 /*
1078 * Delete the destination
1079 */
a0840e2e 1080 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1081
1082 LeaveFunction(2);
1083
1084 return 0;
1085}
1086
1087
1088/*
1089 * Add a service into the service hash table
1090 */
1091static int
fc723250 1092ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1093 struct ip_vs_service **svc_p)
1da177e4
LT
1094{
1095 int ret = 0;
1096 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1097 struct ip_vs_pe *pe = NULL;
1da177e4 1098 struct ip_vs_service *svc = NULL;
a0840e2e 1099 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1100
1101 /* increase the module use count */
1102 ip_vs_use_count_inc();
1103
1104 /* Lookup the scheduler by 'u->sched_name' */
1105 sched = ip_vs_scheduler_get(u->sched_name);
1106 if (sched == NULL) {
1e3e238e 1107 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1108 ret = -ENOENT;
6e08bfb8 1109 goto out_err;
1da177e4
LT
1110 }
1111
0d1e71b0 1112 if (u->pe_name && *u->pe_name) {
e9e5eee8 1113 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1114 if (pe == NULL) {
1115 pr_info("persistence engine module ip_vs_pe_%s "
1116 "not found\n", u->pe_name);
1117 ret = -ENOENT;
1118 goto out_err;
1119 }
1120 }
1121
f94fd041 1122#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1123 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1124 ret = -EINVAL;
1125 goto out_err;
f94fd041
JV
1126 }
1127#endif
1128
dee06e47 1129 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1130 if (svc == NULL) {
1e3e238e 1131 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1132 ret = -ENOMEM;
1133 goto out_err;
1134 }
b17fc996
HS
1135 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1136 if (!svc->stats.cpustats) {
1137 pr_err("%s() alloc_percpu failed\n", __func__);
1138 goto out_err;
1139 }
1da177e4
LT
1140
1141 /* I'm the first user of the service */
26c15cfd 1142 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1143 atomic_set(&svc->refcnt, 0);
1144
c860c6b1 1145 svc->af = u->af;
1da177e4 1146 svc->protocol = u->protocol;
c860c6b1 1147 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1148 svc->port = u->port;
1149 svc->fwmark = u->fwmark;
1150 svc->flags = u->flags;
1151 svc->timeout = u->timeout * HZ;
1152 svc->netmask = u->netmask;
fc723250 1153 svc->net = net;
1da177e4
LT
1154
1155 INIT_LIST_HEAD(&svc->destinations);
1156 rwlock_init(&svc->sched_lock);
1157 spin_lock_init(&svc->stats.lock);
1158
1159 /* Bind the scheduler */
1160 ret = ip_vs_bind_scheduler(svc, sched);
1161 if (ret)
1162 goto out_err;
1163 sched = NULL;
1164
0d1e71b0
SH
1165 /* Bind the ct retriever */
1166 ip_vs_bind_pe(svc, pe);
1167 pe = NULL;
1168
1da177e4
LT
1169 /* Update the virtual service counters */
1170 if (svc->port == FTPPORT)
763f8d0e 1171 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1172 else if (svc->port == 0)
763f8d0e 1173 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1174
29c2026f 1175 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1176
1177 /* Count only IPv4 services for old get/setsockopt interface */
1178 if (svc->af == AF_INET)
a0840e2e 1179 ipvs->num_services++;
1da177e4
LT
1180
1181 /* Hash the service into the service table */
1182 write_lock_bh(&__ip_vs_svc_lock);
1183 ip_vs_svc_hash(svc);
1184 write_unlock_bh(&__ip_vs_svc_lock);
1185
1186 *svc_p = svc;
1187 return 0;
1188
b17fc996 1189
6e08bfb8 1190 out_err:
1da177e4 1191 if (svc != NULL) {
2fabf35b 1192 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1193 if (svc->inc) {
1194 local_bh_disable();
1195 ip_vs_app_inc_put(svc->inc);
1196 local_bh_enable();
1197 }
b17fc996
HS
1198 if (svc->stats.cpustats)
1199 free_percpu(svc->stats.cpustats);
1da177e4
LT
1200 kfree(svc);
1201 }
1202 ip_vs_scheduler_put(sched);
0d1e71b0 1203 ip_vs_pe_put(pe);
1da177e4 1204
1da177e4
LT
1205 /* decrease the module use count */
1206 ip_vs_use_count_dec();
1207
1208 return ret;
1209}
1210
1211
1212/*
1213 * Edit a service and bind it with a new scheduler
1214 */
1215static int
c860c6b1 1216ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1217{
1218 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1219 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1220 int ret = 0;
1221
1222 /*
1223 * Lookup the scheduler, by 'u->sched_name'
1224 */
1225 sched = ip_vs_scheduler_get(u->sched_name);
1226 if (sched == NULL) {
1e3e238e 1227 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1228 return -ENOENT;
1229 }
1230 old_sched = sched;
1231
0d1e71b0 1232 if (u->pe_name && *u->pe_name) {
e9e5eee8 1233 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1234 if (pe == NULL) {
1235 pr_info("persistence engine module ip_vs_pe_%s "
1236 "not found\n", u->pe_name);
1237 ret = -ENOENT;
1238 goto out;
1239 }
1240 old_pe = pe;
1241 }
1242
f94fd041 1243#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1244 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1245 ret = -EINVAL;
1246 goto out;
f94fd041
JV
1247 }
1248#endif
1249
1da177e4
LT
1250 write_lock_bh(&__ip_vs_svc_lock);
1251
1252 /*
1253 * Wait until all other svc users go away.
1254 */
26c15cfd 1255 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1256
1257 /*
1258 * Set the flags and timeout value
1259 */
1260 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1261 svc->timeout = u->timeout * HZ;
1262 svc->netmask = u->netmask;
1263
1264 old_sched = svc->scheduler;
1265 if (sched != old_sched) {
1266 /*
1267 * Unbind the old scheduler
1268 */
1269 if ((ret = ip_vs_unbind_scheduler(svc))) {
1270 old_sched = sched;
9e691ed6 1271 goto out_unlock;
1da177e4
LT
1272 }
1273
1274 /*
1275 * Bind the new scheduler
1276 */
1277 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1278 /*
1279 * If ip_vs_bind_scheduler fails, restore the old
1280 * scheduler.
1281 * The main reason of failure is out of memory.
1282 *
1283 * The question is if the old scheduler can be
1284 * restored all the time. TODO: if it cannot be
1285 * restored some time, we must delete the service,
1286 * otherwise the system may crash.
1287 */
1288 ip_vs_bind_scheduler(svc, old_sched);
1289 old_sched = sched;
9e691ed6 1290 goto out_unlock;
1da177e4
LT
1291 }
1292 }
1293
0d1e71b0
SH
1294 old_pe = svc->pe;
1295 if (pe != old_pe) {
1296 ip_vs_unbind_pe(svc);
1297 ip_vs_bind_pe(svc, pe);
1298 }
1299
9e691ed6 1300 out_unlock:
1da177e4 1301 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1302 out:
6e08bfb8 1303 ip_vs_scheduler_put(old_sched);
0d1e71b0 1304 ip_vs_pe_put(old_pe);
1da177e4
LT
1305 return ret;
1306}
1307
1308
1309/*
1310 * Delete a service from the service list
1311 * - The service must be unlinked, unlocked and not referenced!
1312 * - We are called under _bh lock
1313 */
1314static void __ip_vs_del_service(struct ip_vs_service *svc)
1315{
1316 struct ip_vs_dest *dest, *nxt;
1317 struct ip_vs_scheduler *old_sched;
0d1e71b0 1318 struct ip_vs_pe *old_pe;
a0840e2e 1319 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1320
1321 pr_info("%s: enter\n", __func__);
1da177e4 1322
f94fd041
JV
1323 /* Count only IPv4 services for old get/setsockopt interface */
1324 if (svc->af == AF_INET)
a0840e2e 1325 ipvs->num_services--;
f94fd041 1326
29c2026f 1327 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1328
1329 /* Unbind scheduler */
1330 old_sched = svc->scheduler;
1331 ip_vs_unbind_scheduler(svc);
6e08bfb8 1332 ip_vs_scheduler_put(old_sched);
1da177e4 1333
0d1e71b0
SH
1334 /* Unbind persistence engine */
1335 old_pe = svc->pe;
1336 ip_vs_unbind_pe(svc);
1337 ip_vs_pe_put(old_pe);
1338
1da177e4
LT
1339 /* Unbind app inc */
1340 if (svc->inc) {
1341 ip_vs_app_inc_put(svc->inc);
1342 svc->inc = NULL;
1343 }
1344
1345 /*
1346 * Unlink the whole destination list
1347 */
1348 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1349 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1350 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1351 }
1352
1353 /*
1354 * Update the virtual service counters
1355 */
1356 if (svc->port == FTPPORT)
763f8d0e 1357 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1358 else if (svc->port == 0)
763f8d0e 1359 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1360
1361 /*
1362 * Free the service if nobody refers to it
1363 */
26c15cfd
JA
1364 if (atomic_read(&svc->refcnt) == 0) {
1365 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1366 svc->fwmark,
1367 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1368 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1369 free_percpu(svc->stats.cpustats);
1da177e4 1370 kfree(svc);
26c15cfd 1371 }
1da177e4
LT
1372
1373 /* decrease the module use count */
1374 ip_vs_use_count_dec();
1375}
1376
1377/*
26c15cfd 1378 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1379 */
26c15cfd 1380static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1381{
1da177e4
LT
1382 /*
1383 * Unhash it from the service table
1384 */
1385 write_lock_bh(&__ip_vs_svc_lock);
1386
1387 ip_vs_svc_unhash(svc);
1388
1389 /*
1390 * Wait until all the svc users go away.
1391 */
26c15cfd 1392 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1393
1394 __ip_vs_del_service(svc);
1395
1396 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1397}
1398
1399/*
1400 * Delete a service from the service list
1401 */
1402static int ip_vs_del_service(struct ip_vs_service *svc)
1403{
1404 if (svc == NULL)
1405 return -EEXIST;
1406 ip_vs_unlink_service(svc);
1da177e4
LT
1407
1408 return 0;
1409}
1410
1411
1412/*
1413 * Flush all the virtual services
1414 */
fc723250 1415static int ip_vs_flush(struct net *net)
1da177e4
LT
1416{
1417 int idx;
1418 struct ip_vs_service *svc, *nxt;
1419
1420 /*
fc723250 1421 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1422 */
1423 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1424 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1425 s_list) {
1426 if (net_eq(svc->net, net))
1427 ip_vs_unlink_service(svc);
1da177e4
LT
1428 }
1429 }
1430
1431 /*
1432 * Flush the service table hashed by fwmark
1433 */
1434 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1435 list_for_each_entry_safe(svc, nxt,
1436 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1437 if (net_eq(svc->net, net))
1438 ip_vs_unlink_service(svc);
1da177e4
LT
1439 }
1440 }
1441
1442 return 0;
1443}
1444
1445
1446/*
1447 * Zero counters in a service or all services
1448 */
1449static int ip_vs_zero_service(struct ip_vs_service *svc)
1450{
1451 struct ip_vs_dest *dest;
1452
1453 write_lock_bh(&__ip_vs_svc_lock);
1454 list_for_each_entry(dest, &svc->destinations, n_list) {
1455 ip_vs_zero_stats(&dest->stats);
1456 }
1457 ip_vs_zero_stats(&svc->stats);
1458 write_unlock_bh(&__ip_vs_svc_lock);
1459 return 0;
1460}
1461
fc723250 1462static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1463{
1464 int idx;
1465 struct ip_vs_service *svc;
1466
1467 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1468 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1469 if (net_eq(svc->net, net))
1470 ip_vs_zero_service(svc);
1da177e4
LT
1471 }
1472 }
1473
1474 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1475 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1476 if (net_eq(svc->net, net))
1477 ip_vs_zero_service(svc);
1da177e4
LT
1478 }
1479 }
1480
b17fc996 1481 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1482 return 0;
1483}
1484
1485
1486static int
8d65af78 1487proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1488 void __user *buffer, size_t *lenp, loff_t *ppos)
1489{
9330419d 1490 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1491 int *valp = table->data;
1492 int val = *valp;
1493 int rc;
1494
8d65af78 1495 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1496 if (write && (*valp != val)) {
1497 if ((*valp < 0) || (*valp > 3)) {
1498 /* Restore the correct value */
1499 *valp = val;
1500 } else {
9330419d 1501 update_defense_level(net_ipvs(net));
1da177e4
LT
1502 }
1503 }
1504 return rc;
1505}
1506
1507
1508static int
8d65af78 1509proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1510 void __user *buffer, size_t *lenp, loff_t *ppos)
1511{
1512 int *valp = table->data;
1513 int val[2];
1514 int rc;
1515
1516 /* backup the value first */
1517 memcpy(val, valp, sizeof(val));
1518
8d65af78 1519 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1520 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1521 /* Restore the correct value */
1522 memcpy(valp, val, sizeof(val));
1523 }
1524 return rc;
1525}
1526
b880c1f0
HS
1527static int
1528proc_do_sync_mode(ctl_table *table, int write,
1529 void __user *buffer, size_t *lenp, loff_t *ppos)
1530{
1531 int *valp = table->data;
1532 int val = *valp;
1533 int rc;
1534
1535 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1536 if (write && (*valp != val)) {
1537 if ((*valp < 0) || (*valp > 1)) {
1538 /* Restore the correct value */
1539 *valp = val;
1540 } else {
f131315f
HS
1541 struct net *net = current->nsproxy->net_ns;
1542 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1543 }
1544 }
1545 return rc;
1546}
1da177e4
LT
1547
1548/*
1549 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1550 * Do not change order or insert new entries without
1551 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1552 */
1553
1554static struct ctl_table vs_vars[] = {
1555 {
1da177e4 1556 .procname = "amemthresh",
1da177e4
LT
1557 .maxlen = sizeof(int),
1558 .mode = 0644,
6d9f239a 1559 .proc_handler = proc_dointvec,
1da177e4 1560 },
1da177e4 1561 {
1da177e4 1562 .procname = "am_droprate",
1da177e4
LT
1563 .maxlen = sizeof(int),
1564 .mode = 0644,
6d9f239a 1565 .proc_handler = proc_dointvec,
1da177e4
LT
1566 },
1567 {
1da177e4 1568 .procname = "drop_entry",
1da177e4
LT
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
6d9f239a 1571 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1572 },
1573 {
1da177e4 1574 .procname = "drop_packet",
1da177e4
LT
1575 .maxlen = sizeof(int),
1576 .mode = 0644,
6d9f239a 1577 .proc_handler = proc_do_defense_mode,
1da177e4 1578 },
f4bc17cd
JA
1579#ifdef CONFIG_IP_VS_NFCT
1580 {
1581 .procname = "conntrack",
f4bc17cd
JA
1582 .maxlen = sizeof(int),
1583 .mode = 0644,
1584 .proc_handler = &proc_dointvec,
1585 },
1586#endif
1da177e4 1587 {
1da177e4 1588 .procname = "secure_tcp",
1da177e4
LT
1589 .maxlen = sizeof(int),
1590 .mode = 0644,
6d9f239a 1591 .proc_handler = proc_do_defense_mode,
1da177e4 1592 },
8a803040
JA
1593 {
1594 .procname = "snat_reroute",
8a803040
JA
1595 .maxlen = sizeof(int),
1596 .mode = 0644,
1597 .proc_handler = &proc_dointvec,
1598 },
b880c1f0
HS
1599 {
1600 .procname = "sync_version",
b880c1f0
HS
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_do_sync_mode,
1604 },
a0840e2e
HS
1605 {
1606 .procname = "cache_bypass",
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
1609 .proc_handler = proc_dointvec,
1610 },
1611 {
1612 .procname = "expire_nodest_conn",
1613 .maxlen = sizeof(int),
1614 .mode = 0644,
1615 .proc_handler = proc_dointvec,
1616 },
1617 {
1618 .procname = "expire_quiescent_template",
1619 .maxlen = sizeof(int),
1620 .mode = 0644,
1621 .proc_handler = proc_dointvec,
1622 },
1623 {
1624 .procname = "sync_threshold",
1625 .maxlen =
1626 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1627 .mode = 0644,
1628 .proc_handler = proc_do_sync_threshold,
1629 },
1630 {
1631 .procname = "nat_icmp_send",
1632 .maxlen = sizeof(int),
1633 .mode = 0644,
1634 .proc_handler = proc_dointvec,
1635 },
1636#ifdef CONFIG_IP_VS_DEBUG
1637 {
1638 .procname = "debug_level",
1639 .data = &sysctl_ip_vs_debug_level,
1640 .maxlen = sizeof(int),
1641 .mode = 0644,
1642 .proc_handler = proc_dointvec,
1643 },
1644#endif
1da177e4
LT
1645#if 0
1646 {
1da177e4
LT
1647 .procname = "timeout_established",
1648 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1649 .maxlen = sizeof(int),
1650 .mode = 0644,
6d9f239a 1651 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1652 },
1653 {
1da177e4
LT
1654 .procname = "timeout_synsent",
1655 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1656 .maxlen = sizeof(int),
1657 .mode = 0644,
6d9f239a 1658 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1659 },
1660 {
1da177e4
LT
1661 .procname = "timeout_synrecv",
1662 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1663 .maxlen = sizeof(int),
1664 .mode = 0644,
6d9f239a 1665 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1666 },
1667 {
1da177e4
LT
1668 .procname = "timeout_finwait",
1669 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1670 .maxlen = sizeof(int),
1671 .mode = 0644,
6d9f239a 1672 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1673 },
1674 {
1da177e4
LT
1675 .procname = "timeout_timewait",
1676 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1677 .maxlen = sizeof(int),
1678 .mode = 0644,
6d9f239a 1679 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1680 },
1681 {
1da177e4
LT
1682 .procname = "timeout_close",
1683 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1684 .maxlen = sizeof(int),
1685 .mode = 0644,
6d9f239a 1686 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1687 },
1688 {
1da177e4
LT
1689 .procname = "timeout_closewait",
1690 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1691 .maxlen = sizeof(int),
1692 .mode = 0644,
6d9f239a 1693 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1694 },
1695 {
1da177e4
LT
1696 .procname = "timeout_lastack",
1697 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1698 .maxlen = sizeof(int),
1699 .mode = 0644,
6d9f239a 1700 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1701 },
1702 {
1da177e4
LT
1703 .procname = "timeout_listen",
1704 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1705 .maxlen = sizeof(int),
1706 .mode = 0644,
6d9f239a 1707 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1708 },
1709 {
1da177e4
LT
1710 .procname = "timeout_synack",
1711 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1712 .maxlen = sizeof(int),
1713 .mode = 0644,
6d9f239a 1714 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1715 },
1716 {
1da177e4
LT
1717 .procname = "timeout_udp",
1718 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1719 .maxlen = sizeof(int),
1720 .mode = 0644,
6d9f239a 1721 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1722 },
1723 {
1da177e4
LT
1724 .procname = "timeout_icmp",
1725 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1726 .maxlen = sizeof(int),
1727 .mode = 0644,
6d9f239a 1728 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1729 },
1730#endif
f8572d8f 1731 { }
1da177e4
LT
1732};
1733
5587da55 1734const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1735 { .procname = "net", },
1736 { .procname = "ipv4", },
90754f8e
PE
1737 { .procname = "vs", },
1738 { }
1da177e4 1739};
90754f8e 1740EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4 1741
1da177e4
LT
1742#ifdef CONFIG_PROC_FS
1743
1744struct ip_vs_iter {
fc723250 1745 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1746 struct list_head *table;
1747 int bucket;
1748};
1749
1750/*
1751 * Write the contents of the VS rule table to a PROCfs file.
1752 * (It is kept just for backward compatibility)
1753 */
1754static inline const char *ip_vs_fwd_name(unsigned flags)
1755{
1756 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1757 case IP_VS_CONN_F_LOCALNODE:
1758 return "Local";
1759 case IP_VS_CONN_F_TUNNEL:
1760 return "Tunnel";
1761 case IP_VS_CONN_F_DROUTE:
1762 return "Route";
1763 default:
1764 return "Masq";
1765 }
1766}
1767
1768
1769/* Get the Nth entry in the two lists */
1770static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1771{
fc723250 1772 struct net *net = seq_file_net(seq);
1da177e4
LT
1773 struct ip_vs_iter *iter = seq->private;
1774 int idx;
1775 struct ip_vs_service *svc;
1776
1777 /* look in hash by protocol */
1778 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1779 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1780 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1781 iter->table = ip_vs_svc_table;
1782 iter->bucket = idx;
1783 return svc;
1784 }
1785 }
1786 }
1787
1788 /* keep looking in fwmark */
1789 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1790 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1791 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1792 iter->table = ip_vs_svc_fwm_table;
1793 iter->bucket = idx;
1794 return svc;
1795 }
1796 }
1797 }
1798
1799 return NULL;
1800}
1801
1802static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1803__acquires(__ip_vs_svc_lock)
1da177e4
LT
1804{
1805
1806 read_lock_bh(&__ip_vs_svc_lock);
1807 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1808}
1809
1810
1811static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1812{
1813 struct list_head *e;
1814 struct ip_vs_iter *iter;
1815 struct ip_vs_service *svc;
1816
1817 ++*pos;
1818 if (v == SEQ_START_TOKEN)
1819 return ip_vs_info_array(seq,0);
1820
1821 svc = v;
1822 iter = seq->private;
1823
1824 if (iter->table == ip_vs_svc_table) {
1825 /* next service in table hashed by protocol */
1826 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1827 return list_entry(e, struct ip_vs_service, s_list);
1828
1829
1830 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1831 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1832 s_list) {
1833 return svc;
1834 }
1835 }
1836
1837 iter->table = ip_vs_svc_fwm_table;
1838 iter->bucket = -1;
1839 goto scan_fwmark;
1840 }
1841
1842 /* next service in hashed by fwmark */
1843 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1844 return list_entry(e, struct ip_vs_service, f_list);
1845
1846 scan_fwmark:
1847 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1848 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1849 f_list)
1850 return svc;
1851 }
1852
1853 return NULL;
1854}
1855
1856static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1857__releases(__ip_vs_svc_lock)
1da177e4
LT
1858{
1859 read_unlock_bh(&__ip_vs_svc_lock);
1860}
1861
1862
1863static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1864{
1865 if (v == SEQ_START_TOKEN) {
1866 seq_printf(seq,
1867 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1868 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1869 seq_puts(seq,
1870 "Prot LocalAddress:Port Scheduler Flags\n");
1871 seq_puts(seq,
1872 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1873 } else {
1874 const struct ip_vs_service *svc = v;
1875 const struct ip_vs_iter *iter = seq->private;
1876 const struct ip_vs_dest *dest;
1877
667a5f18
VB
1878 if (iter->table == ip_vs_svc_table) {
1879#ifdef CONFIG_IP_VS_IPV6
1880 if (svc->af == AF_INET6)
5b095d98 1881 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1882 ip_vs_proto_name(svc->protocol),
38ff4fa4 1883 &svc->addr.in6,
667a5f18
VB
1884 ntohs(svc->port),
1885 svc->scheduler->name);
1886 else
1887#endif
26ec037f 1888 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1889 ip_vs_proto_name(svc->protocol),
1890 ntohl(svc->addr.ip),
1891 ntohs(svc->port),
26ec037f
NC
1892 svc->scheduler->name,
1893 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1894 } else {
26ec037f
NC
1895 seq_printf(seq, "FWM %08X %s %s",
1896 svc->fwmark, svc->scheduler->name,
1897 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1898 }
1da177e4
LT
1899
1900 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1901 seq_printf(seq, "persistent %d %08X\n",
1902 svc->timeout,
1903 ntohl(svc->netmask));
1904 else
1905 seq_putc(seq, '\n');
1906
1907 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1908#ifdef CONFIG_IP_VS_IPV6
1909 if (dest->af == AF_INET6)
1910 seq_printf(seq,
5b095d98 1911 " -> [%pI6]:%04X"
667a5f18 1912 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1913 &dest->addr.in6,
667a5f18
VB
1914 ntohs(dest->port),
1915 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1916 atomic_read(&dest->weight),
1917 atomic_read(&dest->activeconns),
1918 atomic_read(&dest->inactconns));
1919 else
1920#endif
1921 seq_printf(seq,
1922 " -> %08X:%04X "
1923 "%-7s %-6d %-10d %-10d\n",
1924 ntohl(dest->addr.ip),
1925 ntohs(dest->port),
1926 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1927 atomic_read(&dest->weight),
1928 atomic_read(&dest->activeconns),
1929 atomic_read(&dest->inactconns));
1930
1da177e4
LT
1931 }
1932 }
1933 return 0;
1934}
1935
56b3d975 1936static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1937 .start = ip_vs_info_seq_start,
1938 .next = ip_vs_info_seq_next,
1939 .stop = ip_vs_info_seq_stop,
1940 .show = ip_vs_info_seq_show,
1941};
1942
1943static int ip_vs_info_open(struct inode *inode, struct file *file)
1944{
fc723250 1945 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1946 sizeof(struct ip_vs_iter));
1da177e4
LT
1947}
1948
9a32144e 1949static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1950 .owner = THIS_MODULE,
1951 .open = ip_vs_info_open,
1952 .read = seq_read,
1953 .llseek = seq_lseek,
1954 .release = seq_release_private,
1955};
1956
1957#endif
1958
1da177e4
LT
1959#ifdef CONFIG_PROC_FS
1960static int ip_vs_stats_show(struct seq_file *seq, void *v)
1961{
b17fc996
HS
1962 struct net *net = seq_file_single_net(seq);
1963 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
1964
1965/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1966 seq_puts(seq,
1967 " Total Incoming Outgoing Incoming Outgoing\n");
1968 seq_printf(seq,
1969 " Conns Packets Packets Bytes Bytes\n");
1970
b17fc996
HS
1971 spin_lock_bh(&tot_stats->lock);
1972 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1973 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1974 (unsigned long long) tot_stats->ustats.inbytes,
1975 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
1976
1977/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1978 seq_puts(seq,
1979 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1980 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
1981 tot_stats->ustats.cps,
1982 tot_stats->ustats.inpps,
1983 tot_stats->ustats.outpps,
1984 tot_stats->ustats.inbps,
1985 tot_stats->ustats.outbps);
1986 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
1987
1988 return 0;
1989}
1990
1991static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1992{
fc723250 1993 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
1994}
1995
9a32144e 1996static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1997 .owner = THIS_MODULE,
1998 .open = ip_vs_stats_seq_open,
1999 .read = seq_read,
2000 .llseek = seq_lseek,
2001 .release = single_release,
2002};
2003
b17fc996
HS
2004static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2005{
2006 struct net *net = seq_file_single_net(seq);
2007 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2008 int i;
2009
2010/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2011 seq_puts(seq,
2012 " Total Incoming Outgoing Incoming Outgoing\n");
2013 seq_printf(seq,
2014 "CPU Conns Packets Packets Bytes Bytes\n");
2015
2016 for_each_possible_cpu(i) {
2017 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2018 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2019 i, u->ustats.conns, u->ustats.inpkts,
2020 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2021 (__u64)u->ustats.outbytes);
2022 }
2023
2024 spin_lock_bh(&tot_stats->lock);
2025 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2026 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2027 tot_stats->ustats.outpkts,
2028 (unsigned long long) tot_stats->ustats.inbytes,
2029 (unsigned long long) tot_stats->ustats.outbytes);
2030
2031/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2032 seq_puts(seq,
2033 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2034 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2035 tot_stats->ustats.cps,
2036 tot_stats->ustats.inpps,
2037 tot_stats->ustats.outpps,
2038 tot_stats->ustats.inbps,
2039 tot_stats->ustats.outbps);
2040 spin_unlock_bh(&tot_stats->lock);
2041
2042 return 0;
2043}
2044
2045static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2046{
2047 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2048}
2049
2050static const struct file_operations ip_vs_stats_percpu_fops = {
2051 .owner = THIS_MODULE,
2052 .open = ip_vs_stats_percpu_seq_open,
2053 .read = seq_read,
2054 .llseek = seq_lseek,
2055 .release = single_release,
2056};
1da177e4
LT
2057#endif
2058
2059/*
2060 * Set timeout values for tcp tcpfin udp in the timeout_table.
2061 */
9330419d 2062static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2063{
9330419d
HS
2064 struct ip_vs_proto_data *pd;
2065
1da177e4
LT
2066 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2067 u->tcp_timeout,
2068 u->tcp_fin_timeout,
2069 u->udp_timeout);
2070
2071#ifdef CONFIG_IP_VS_PROTO_TCP
2072 if (u->tcp_timeout) {
9330419d
HS
2073 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2074 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2075 = u->tcp_timeout * HZ;
2076 }
2077
2078 if (u->tcp_fin_timeout) {
9330419d
HS
2079 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2080 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2081 = u->tcp_fin_timeout * HZ;
2082 }
2083#endif
2084
2085#ifdef CONFIG_IP_VS_PROTO_UDP
2086 if (u->udp_timeout) {
9330419d
HS
2087 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2088 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2089 = u->udp_timeout * HZ;
2090 }
2091#endif
2092 return 0;
2093}
2094
2095
2096#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2097#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2098#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2099 sizeof(struct ip_vs_dest_user))
2100#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2101#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2102#define MAX_ARG_LEN SVCDEST_ARG_LEN
2103
9b5b5cff 2104static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2105 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2106 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2107 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2108 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2109 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2110 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2111 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2112 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2113 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2116};
2117
c860c6b1
JV
2118static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2119 struct ip_vs_service_user *usvc_compat)
2120{
0d1e71b0
SH
2121 memset(usvc, 0, sizeof(*usvc));
2122
c860c6b1
JV
2123 usvc->af = AF_INET;
2124 usvc->protocol = usvc_compat->protocol;
2125 usvc->addr.ip = usvc_compat->addr;
2126 usvc->port = usvc_compat->port;
2127 usvc->fwmark = usvc_compat->fwmark;
2128
2129 /* Deep copy of sched_name is not needed here */
2130 usvc->sched_name = usvc_compat->sched_name;
2131
2132 usvc->flags = usvc_compat->flags;
2133 usvc->timeout = usvc_compat->timeout;
2134 usvc->netmask = usvc_compat->netmask;
2135}
2136
2137static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2138 struct ip_vs_dest_user *udest_compat)
2139{
0d1e71b0
SH
2140 memset(udest, 0, sizeof(*udest));
2141
c860c6b1
JV
2142 udest->addr.ip = udest_compat->addr;
2143 udest->port = udest_compat->port;
2144 udest->conn_flags = udest_compat->conn_flags;
2145 udest->weight = udest_compat->weight;
2146 udest->u_threshold = udest_compat->u_threshold;
2147 udest->l_threshold = udest_compat->l_threshold;
2148}
2149
1da177e4
LT
2150static int
2151do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2152{
fc723250 2153 struct net *net = sock_net(sk);
1da177e4
LT
2154 int ret;
2155 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2156 struct ip_vs_service_user *usvc_compat;
2157 struct ip_vs_service_user_kern usvc;
1da177e4 2158 struct ip_vs_service *svc;
c860c6b1
JV
2159 struct ip_vs_dest_user *udest_compat;
2160 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2161
2162 if (!capable(CAP_NET_ADMIN))
2163 return -EPERM;
2164
04bcef2a
AV
2165 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2166 return -EINVAL;
2167 if (len < 0 || len > MAX_ARG_LEN)
2168 return -EINVAL;
1da177e4 2169 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2170 pr_err("set_ctl: len %u != %u\n",
2171 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2172 return -EINVAL;
2173 }
2174
2175 if (copy_from_user(arg, user, len) != 0)
2176 return -EFAULT;
2177
2178 /* increase the module use count */
2179 ip_vs_use_count_inc();
2180
14cc3e2b 2181 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2182 ret = -ERESTARTSYS;
2183 goto out_dec;
2184 }
2185
2186 if (cmd == IP_VS_SO_SET_FLUSH) {
2187 /* Flush the virtual service */
fc723250 2188 ret = ip_vs_flush(net);
1da177e4
LT
2189 goto out_unlock;
2190 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2191 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2192 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2193 goto out_unlock;
2194 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2195 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2196 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2197 dm->syncid);
1da177e4
LT
2198 goto out_unlock;
2199 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2200 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2201 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2202 goto out_unlock;
2203 }
2204
c860c6b1
JV
2205 usvc_compat = (struct ip_vs_service_user *)arg;
2206 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2207
2208 /* We only use the new structs internally, so copy userspace compat
2209 * structs to extended internal versions */
2210 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2211 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2212
2213 if (cmd == IP_VS_SO_SET_ZERO) {
2214 /* if no service address is set, zero counters in all */
c860c6b1 2215 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2216 ret = ip_vs_zero_all(net);
1da177e4
LT
2217 goto out_unlock;
2218 }
2219 }
2220
2906f66a
VMR
2221 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2222 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2223 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2224 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2225 usvc.protocol, &usvc.addr.ip,
2226 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2227 ret = -EFAULT;
2228 goto out_unlock;
2229 }
2230
2231 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2232 if (usvc.fwmark == 0)
fc723250 2233 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2234 &usvc.addr, usvc.port);
1da177e4 2235 else
fc723250 2236 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2237
2238 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2239 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2240 ret = -ESRCH;
26c15cfd 2241 goto out_unlock;
1da177e4
LT
2242 }
2243
2244 switch (cmd) {
2245 case IP_VS_SO_SET_ADD:
2246 if (svc != NULL)
2247 ret = -EEXIST;
2248 else
fc723250 2249 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2250 break;
2251 case IP_VS_SO_SET_EDIT:
c860c6b1 2252 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2253 break;
2254 case IP_VS_SO_SET_DEL:
2255 ret = ip_vs_del_service(svc);
2256 if (!ret)
2257 goto out_unlock;
2258 break;
2259 case IP_VS_SO_SET_ZERO:
2260 ret = ip_vs_zero_service(svc);
2261 break;
2262 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2263 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2264 break;
2265 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2266 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2267 break;
2268 case IP_VS_SO_SET_DELDEST:
c860c6b1 2269 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2270 break;
2271 default:
2272 ret = -EINVAL;
2273 }
2274
1da177e4 2275 out_unlock:
14cc3e2b 2276 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2277 out_dec:
2278 /* decrease the module use count */
2279 ip_vs_use_count_dec();
2280
2281 return ret;
2282}
2283
2284
2285static void
2286ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2287{
2288 spin_lock_bh(&src->lock);
e9c0ce23 2289 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2290 spin_unlock_bh(&src->lock);
2291}
2292
2293static void
2294ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2295{
2296 dst->protocol = src->protocol;
e7ade46a 2297 dst->addr = src->addr.ip;
1da177e4
LT
2298 dst->port = src->port;
2299 dst->fwmark = src->fwmark;
4da62fc7 2300 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2301 dst->flags = src->flags;
2302 dst->timeout = src->timeout / HZ;
2303 dst->netmask = src->netmask;
2304 dst->num_dests = src->num_dests;
2305 ip_vs_copy_stats(&dst->stats, &src->stats);
2306}
2307
2308static inline int
fc723250
HS
2309__ip_vs_get_service_entries(struct net *net,
2310 const struct ip_vs_get_services *get,
1da177e4
LT
2311 struct ip_vs_get_services __user *uptr)
2312{
2313 int idx, count=0;
2314 struct ip_vs_service *svc;
2315 struct ip_vs_service_entry entry;
2316 int ret = 0;
2317
2318 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2319 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2320 /* Only expose IPv4 entries to old interface */
fc723250 2321 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2322 continue;
2323
1da177e4
LT
2324 if (count >= get->num_services)
2325 goto out;
4da62fc7 2326 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2327 ip_vs_copy_service(&entry, svc);
2328 if (copy_to_user(&uptr->entrytable[count],
2329 &entry, sizeof(entry))) {
2330 ret = -EFAULT;
2331 goto out;
2332 }
2333 count++;
2334 }
2335 }
2336
2337 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2338 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2339 /* Only expose IPv4 entries to old interface */
fc723250 2340 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2341 continue;
2342
1da177e4
LT
2343 if (count >= get->num_services)
2344 goto out;
4da62fc7 2345 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2346 ip_vs_copy_service(&entry, svc);
2347 if (copy_to_user(&uptr->entrytable[count],
2348 &entry, sizeof(entry))) {
2349 ret = -EFAULT;
2350 goto out;
2351 }
2352 count++;
2353 }
2354 }
2355 out:
2356 return ret;
2357}
2358
2359static inline int
fc723250 2360__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2361 struct ip_vs_get_dests __user *uptr)
2362{
2363 struct ip_vs_service *svc;
b18610de 2364 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2365 int ret = 0;
2366
2367 if (get->fwmark)
fc723250 2368 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2369 else
fc723250 2370 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2371 get->port);
b18610de 2372
1da177e4
LT
2373 if (svc) {
2374 int count = 0;
2375 struct ip_vs_dest *dest;
2376 struct ip_vs_dest_entry entry;
2377
2378 list_for_each_entry(dest, &svc->destinations, n_list) {
2379 if (count >= get->num_dests)
2380 break;
2381
e7ade46a 2382 entry.addr = dest->addr.ip;
1da177e4
LT
2383 entry.port = dest->port;
2384 entry.conn_flags = atomic_read(&dest->conn_flags);
2385 entry.weight = atomic_read(&dest->weight);
2386 entry.u_threshold = dest->u_threshold;
2387 entry.l_threshold = dest->l_threshold;
2388 entry.activeconns = atomic_read(&dest->activeconns);
2389 entry.inactconns = atomic_read(&dest->inactconns);
2390 entry.persistconns = atomic_read(&dest->persistconns);
2391 ip_vs_copy_stats(&entry.stats, &dest->stats);
2392 if (copy_to_user(&uptr->entrytable[count],
2393 &entry, sizeof(entry))) {
2394 ret = -EFAULT;
2395 break;
2396 }
2397 count++;
2398 }
1da177e4
LT
2399 } else
2400 ret = -ESRCH;
2401 return ret;
2402}
2403
2404static inline void
9330419d 2405__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2406{
9330419d
HS
2407 struct ip_vs_proto_data *pd;
2408
1da177e4 2409#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2410 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2411 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2412 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2413#endif
2414#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2415 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2416 u->udp_timeout =
9330419d 2417 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2418#endif
2419}
2420
2421
2422#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2423#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2424#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2425#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2426#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2427#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2428#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2429
9b5b5cff 2430static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2431 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2432 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2433 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2434 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2435 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2436 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2437 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2438};
2439
2440static int
2441do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2442{
2443 unsigned char arg[128];
2444 int ret = 0;
04bcef2a 2445 unsigned int copylen;
fc723250 2446 struct net *net = sock_net(sk);
f131315f 2447 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2448
fc723250 2449 BUG_ON(!net);
1da177e4
LT
2450 if (!capable(CAP_NET_ADMIN))
2451 return -EPERM;
2452
04bcef2a
AV
2453 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2454 return -EINVAL;
2455
1da177e4 2456 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2457 pr_err("get_ctl: len %u < %u\n",
2458 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2459 return -EINVAL;
2460 }
2461
04bcef2a
AV
2462 copylen = get_arglen[GET_CMDID(cmd)];
2463 if (copylen > 128)
2464 return -EINVAL;
2465
2466 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2467 return -EFAULT;
2468
14cc3e2b 2469 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2470 return -ERESTARTSYS;
2471
2472 switch (cmd) {
2473 case IP_VS_SO_GET_VERSION:
2474 {
2475 char buf[64];
2476
2477 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2478 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2479 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2480 ret = -EFAULT;
2481 goto out;
2482 }
2483 *len = strlen(buf)+1;
2484 }
2485 break;
2486
2487 case IP_VS_SO_GET_INFO:
2488 {
2489 struct ip_vs_getinfo info;
2490 info.version = IP_VS_VERSION_CODE;
6f7edb48 2491 info.size = ip_vs_conn_tab_size;
a0840e2e 2492 info.num_services = ipvs->num_services;
1da177e4
LT
2493 if (copy_to_user(user, &info, sizeof(info)) != 0)
2494 ret = -EFAULT;
2495 }
2496 break;
2497
2498 case IP_VS_SO_GET_SERVICES:
2499 {
2500 struct ip_vs_get_services *get;
2501 int size;
2502
2503 get = (struct ip_vs_get_services *)arg;
2504 size = sizeof(*get) +
2505 sizeof(struct ip_vs_service_entry) * get->num_services;
2506 if (*len != size) {
1e3e238e 2507 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2508 ret = -EINVAL;
2509 goto out;
2510 }
fc723250 2511 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2512 }
2513 break;
2514
2515 case IP_VS_SO_GET_SERVICE:
2516 {
2517 struct ip_vs_service_entry *entry;
2518 struct ip_vs_service *svc;
b18610de 2519 union nf_inet_addr addr;
1da177e4
LT
2520
2521 entry = (struct ip_vs_service_entry *)arg;
b18610de 2522 addr.ip = entry->addr;
1da177e4 2523 if (entry->fwmark)
fc723250 2524 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2525 else
fc723250
HS
2526 svc = __ip_vs_service_find(net, AF_INET,
2527 entry->protocol, &addr,
2528 entry->port);
1da177e4
LT
2529 if (svc) {
2530 ip_vs_copy_service(entry, svc);
2531 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2532 ret = -EFAULT;
1da177e4
LT
2533 } else
2534 ret = -ESRCH;
2535 }
2536 break;
2537
2538 case IP_VS_SO_GET_DESTS:
2539 {
2540 struct ip_vs_get_dests *get;
2541 int size;
2542
2543 get = (struct ip_vs_get_dests *)arg;
2544 size = sizeof(*get) +
2545 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2546 if (*len != size) {
1e3e238e 2547 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2548 ret = -EINVAL;
2549 goto out;
2550 }
fc723250 2551 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2552 }
2553 break;
2554
2555 case IP_VS_SO_GET_TIMEOUT:
2556 {
2557 struct ip_vs_timeout_user t;
2558
9330419d 2559 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2560 if (copy_to_user(user, &t, sizeof(t)) != 0)
2561 ret = -EFAULT;
2562 }
2563 break;
2564
2565 case IP_VS_SO_GET_DAEMON:
2566 {
2567 struct ip_vs_daemon_user d[2];
2568
2569 memset(&d, 0, sizeof(d));
f131315f 2570 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2571 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2572 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2573 sizeof(d[0].mcast_ifn));
2574 d[0].syncid = ipvs->master_syncid;
1da177e4 2575 }
f131315f 2576 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2577 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2578 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2579 sizeof(d[1].mcast_ifn));
2580 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2581 }
2582 if (copy_to_user(user, &d, sizeof(d)) != 0)
2583 ret = -EFAULT;
2584 }
2585 break;
2586
2587 default:
2588 ret = -EINVAL;
2589 }
2590
2591 out:
14cc3e2b 2592 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2593 return ret;
2594}
2595
2596
2597static struct nf_sockopt_ops ip_vs_sockopts = {
2598 .pf = PF_INET,
2599 .set_optmin = IP_VS_BASE_CTL,
2600 .set_optmax = IP_VS_SO_SET_MAX+1,
2601 .set = do_ip_vs_set_ctl,
2602 .get_optmin = IP_VS_BASE_CTL,
2603 .get_optmax = IP_VS_SO_GET_MAX+1,
2604 .get = do_ip_vs_get_ctl,
16fcec35 2605 .owner = THIS_MODULE,
1da177e4
LT
2606};
2607
9a812198
JV
2608/*
2609 * Generic Netlink interface
2610 */
2611
2612/* IPVS genetlink family */
2613static struct genl_family ip_vs_genl_family = {
2614 .id = GENL_ID_GENERATE,
2615 .hdrsize = 0,
2616 .name = IPVS_GENL_NAME,
2617 .version = IPVS_GENL_VERSION,
2618 .maxattr = IPVS_CMD_MAX,
2619};
2620
2621/* Policy used for first-level command attributes */
2622static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2623 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2624 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2625 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2626 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2627 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2628 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2629};
2630
2631/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2632static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2633 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2634 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2635 .len = IP_VS_IFNAME_MAXLEN },
2636 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2637};
2638
2639/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2640static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2641 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2642 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2643 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2644 .len = sizeof(union nf_inet_addr) },
2645 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2646 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2647 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2648 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2649 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2650 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2651 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2652 .len = sizeof(struct ip_vs_flags) },
2653 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2654 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2655 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2656};
2657
2658/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2659static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2660 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2661 .len = sizeof(union nf_inet_addr) },
2662 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2663 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2664 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2665 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2666 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2667 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2668 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2669 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2670 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2671};
2672
2673static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2674 struct ip_vs_stats *stats)
2675{
2676 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2677 if (!nl_stats)
2678 return -EMSGSIZE;
2679
2680 spin_lock_bh(&stats->lock);
2681
e9c0ce23
SW
2682 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2683 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2684 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2685 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2686 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2687 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2689 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2692
2693 spin_unlock_bh(&stats->lock);
2694
2695 nla_nest_end(skb, nl_stats);
2696
2697 return 0;
2698
2699nla_put_failure:
2700 spin_unlock_bh(&stats->lock);
2701 nla_nest_cancel(skb, nl_stats);
2702 return -EMSGSIZE;
2703}
2704
2705static int ip_vs_genl_fill_service(struct sk_buff *skb,
2706 struct ip_vs_service *svc)
2707{
2708 struct nlattr *nl_service;
2709 struct ip_vs_flags flags = { .flags = svc->flags,
2710 .mask = ~0 };
2711
2712 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2713 if (!nl_service)
2714 return -EMSGSIZE;
2715
f94fd041 2716 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2717
2718 if (svc->fwmark) {
2719 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2720 } else {
2721 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2722 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2723 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2724 }
2725
2726 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2727 if (svc->pe)
2728 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2729 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2730 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2731 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2732
2733 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2734 goto nla_put_failure;
2735
2736 nla_nest_end(skb, nl_service);
2737
2738 return 0;
2739
2740nla_put_failure:
2741 nla_nest_cancel(skb, nl_service);
2742 return -EMSGSIZE;
2743}
2744
2745static int ip_vs_genl_dump_service(struct sk_buff *skb,
2746 struct ip_vs_service *svc,
2747 struct netlink_callback *cb)
2748{
2749 void *hdr;
2750
2751 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2752 &ip_vs_genl_family, NLM_F_MULTI,
2753 IPVS_CMD_NEW_SERVICE);
2754 if (!hdr)
2755 return -EMSGSIZE;
2756
2757 if (ip_vs_genl_fill_service(skb, svc) < 0)
2758 goto nla_put_failure;
2759
2760 return genlmsg_end(skb, hdr);
2761
2762nla_put_failure:
2763 genlmsg_cancel(skb, hdr);
2764 return -EMSGSIZE;
2765}
2766
2767static int ip_vs_genl_dump_services(struct sk_buff *skb,
2768 struct netlink_callback *cb)
2769{
2770 int idx = 0, i;
2771 int start = cb->args[0];
2772 struct ip_vs_service *svc;
fc723250 2773 struct net *net = skb_sknet(skb);
9a812198
JV
2774
2775 mutex_lock(&__ip_vs_mutex);
2776 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2777 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2778 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2779 continue;
2780 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2781 idx--;
2782 goto nla_put_failure;
2783 }
2784 }
2785 }
2786
2787 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2788 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2789 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2790 continue;
2791 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2792 idx--;
2793 goto nla_put_failure;
2794 }
2795 }
2796 }
2797
2798nla_put_failure:
2799 mutex_unlock(&__ip_vs_mutex);
2800 cb->args[0] = idx;
2801
2802 return skb->len;
2803}
2804
fc723250
HS
2805static int ip_vs_genl_parse_service(struct net *net,
2806 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2807 struct nlattr *nla, int full_entry,
2808 struct ip_vs_service **ret_svc)
9a812198
JV
2809{
2810 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2811 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2812 struct ip_vs_service *svc;
9a812198
JV
2813
2814 /* Parse mandatory identifying service fields first */
2815 if (nla == NULL ||
2816 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2817 return -EINVAL;
2818
2819 nla_af = attrs[IPVS_SVC_ATTR_AF];
2820 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2821 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2822 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2823 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2824
2825 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2826 return -EINVAL;
2827
258c8893
SH
2828 memset(usvc, 0, sizeof(*usvc));
2829
c860c6b1 2830 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2831#ifdef CONFIG_IP_VS_IPV6
2832 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2833#else
2834 if (usvc->af != AF_INET)
2835#endif
9a812198
JV
2836 return -EAFNOSUPPORT;
2837
2838 if (nla_fwmark) {
2839 usvc->protocol = IPPROTO_TCP;
2840 usvc->fwmark = nla_get_u32(nla_fwmark);
2841 } else {
2842 usvc->protocol = nla_get_u16(nla_protocol);
2843 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2844 usvc->port = nla_get_u16(nla_port);
2845 usvc->fwmark = 0;
2846 }
2847
26c15cfd 2848 if (usvc->fwmark)
fc723250 2849 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2850 else
fc723250 2851 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2852 &usvc->addr, usvc->port);
2853 *ret_svc = svc;
2854
9a812198
JV
2855 /* If a full entry was requested, check for the additional fields */
2856 if (full_entry) {
0d1e71b0 2857 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2858 *nla_netmask;
2859 struct ip_vs_flags flags;
9a812198
JV
2860
2861 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2862 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2863 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2864 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2865 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2866
2867 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2868 return -EINVAL;
2869
2870 nla_memcpy(&flags, nla_flags, sizeof(flags));
2871
2872 /* prefill flags from service if it already exists */
26c15cfd 2873 if (svc)
9a812198 2874 usvc->flags = svc->flags;
9a812198
JV
2875
2876 /* set new flags from userland */
2877 usvc->flags = (usvc->flags & ~flags.mask) |
2878 (flags.flags & flags.mask);
c860c6b1 2879 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2880 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2881 usvc->timeout = nla_get_u32(nla_timeout);
2882 usvc->netmask = nla_get_u32(nla_netmask);
2883 }
2884
2885 return 0;
2886}
2887
fc723250
HS
2888static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2889 struct nlattr *nla)
9a812198 2890{
c860c6b1 2891 struct ip_vs_service_user_kern usvc;
26c15cfd 2892 struct ip_vs_service *svc;
9a812198
JV
2893 int ret;
2894
fc723250 2895 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2896 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2897}
2898
2899static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2900{
2901 struct nlattr *nl_dest;
2902
2903 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2904 if (!nl_dest)
2905 return -EMSGSIZE;
2906
2907 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2908 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2909
2910 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2911 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2912 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2913 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2914 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2915 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2916 atomic_read(&dest->activeconns));
2917 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2918 atomic_read(&dest->inactconns));
2919 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2920 atomic_read(&dest->persistconns));
2921
2922 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2923 goto nla_put_failure;
2924
2925 nla_nest_end(skb, nl_dest);
2926
2927 return 0;
2928
2929nla_put_failure:
2930 nla_nest_cancel(skb, nl_dest);
2931 return -EMSGSIZE;
2932}
2933
2934static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2935 struct netlink_callback *cb)
2936{
2937 void *hdr;
2938
2939 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2940 &ip_vs_genl_family, NLM_F_MULTI,
2941 IPVS_CMD_NEW_DEST);
2942 if (!hdr)
2943 return -EMSGSIZE;
2944
2945 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2946 goto nla_put_failure;
2947
2948 return genlmsg_end(skb, hdr);
2949
2950nla_put_failure:
2951 genlmsg_cancel(skb, hdr);
2952 return -EMSGSIZE;
2953}
2954
2955static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2956 struct netlink_callback *cb)
2957{
2958 int idx = 0;
2959 int start = cb->args[0];
2960 struct ip_vs_service *svc;
2961 struct ip_vs_dest *dest;
2962 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2963 struct net *net = skb_sknet(skb);
9a812198
JV
2964
2965 mutex_lock(&__ip_vs_mutex);
2966
2967 /* Try to find the service for which to dump destinations */
2968 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2969 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2970 goto out_err;
2971
a0840e2e 2972
fc723250 2973 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
2974 if (IS_ERR(svc) || svc == NULL)
2975 goto out_err;
2976
2977 /* Dump the destinations */
2978 list_for_each_entry(dest, &svc->destinations, n_list) {
2979 if (++idx <= start)
2980 continue;
2981 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2982 idx--;
2983 goto nla_put_failure;
2984 }
2985 }
2986
2987nla_put_failure:
2988 cb->args[0] = idx;
9a812198
JV
2989
2990out_err:
2991 mutex_unlock(&__ip_vs_mutex);
2992
2993 return skb->len;
2994}
2995
c860c6b1 2996static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2997 struct nlattr *nla, int full_entry)
2998{
2999 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3000 struct nlattr *nla_addr, *nla_port;
3001
3002 /* Parse mandatory identifying destination fields first */
3003 if (nla == NULL ||
3004 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3005 return -EINVAL;
3006
3007 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3008 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3009
3010 if (!(nla_addr && nla_port))
3011 return -EINVAL;
3012
258c8893
SH
3013 memset(udest, 0, sizeof(*udest));
3014
9a812198
JV
3015 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3016 udest->port = nla_get_u16(nla_port);
3017
3018 /* If a full entry was requested, check for the additional fields */
3019 if (full_entry) {
3020 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3021 *nla_l_thresh;
3022
3023 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3024 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3025 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3026 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3027
3028 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3029 return -EINVAL;
3030
3031 udest->conn_flags = nla_get_u32(nla_fwd)
3032 & IP_VS_CONN_F_FWD_MASK;
3033 udest->weight = nla_get_u32(nla_weight);
3034 udest->u_threshold = nla_get_u32(nla_u_thresh);
3035 udest->l_threshold = nla_get_u32(nla_l_thresh);
3036 }
3037
3038 return 0;
3039}
3040
3041static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3042 const char *mcast_ifn, __be32 syncid)
3043{
3044 struct nlattr *nl_daemon;
3045
3046 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3047 if (!nl_daemon)
3048 return -EMSGSIZE;
3049
3050 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3051 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3052 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3053
3054 nla_nest_end(skb, nl_daemon);
3055
3056 return 0;
3057
3058nla_put_failure:
3059 nla_nest_cancel(skb, nl_daemon);
3060 return -EMSGSIZE;
3061}
3062
3063static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3064 const char *mcast_ifn, __be32 syncid,
3065 struct netlink_callback *cb)
3066{
3067 void *hdr;
3068 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3069 &ip_vs_genl_family, NLM_F_MULTI,
3070 IPVS_CMD_NEW_DAEMON);
3071 if (!hdr)
3072 return -EMSGSIZE;
3073
3074 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3075 goto nla_put_failure;
3076
3077 return genlmsg_end(skb, hdr);
3078
3079nla_put_failure:
3080 genlmsg_cancel(skb, hdr);
3081 return -EMSGSIZE;
3082}
3083
3084static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3085 struct netlink_callback *cb)
3086{
f131315f
HS
3087 struct net *net = skb_net(skb);
3088 struct netns_ipvs *ipvs = net_ipvs(net);
3089
9a812198 3090 mutex_lock(&__ip_vs_mutex);
f131315f 3091 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3092 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3093 ipvs->master_mcast_ifn,
3094 ipvs->master_syncid, cb) < 0)
9a812198
JV
3095 goto nla_put_failure;
3096
3097 cb->args[0] = 1;
3098 }
3099
f131315f 3100 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3101 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3102 ipvs->backup_mcast_ifn,
3103 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3104 goto nla_put_failure;
3105
3106 cb->args[1] = 1;
3107 }
3108
3109nla_put_failure:
3110 mutex_unlock(&__ip_vs_mutex);
3111
3112 return skb->len;
3113}
3114
f131315f 3115static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3116{
3117 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3118 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3119 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3120 return -EINVAL;
3121
f131315f
HS
3122 return start_sync_thread(net,
3123 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3124 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3125 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3126}
3127
f131315f 3128static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3129{
3130 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3131 return -EINVAL;
3132
f131315f
HS
3133 return stop_sync_thread(net,
3134 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3135}
3136
9330419d 3137static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3138{
3139 struct ip_vs_timeout_user t;
3140
9330419d 3141 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3142
3143 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3144 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3145
3146 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3147 t.tcp_fin_timeout =
3148 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3149
3150 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3151 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3152
9330419d 3153 return ip_vs_set_timeout(net, &t);
9a812198
JV
3154}
3155
3156static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3157{
3158 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3159 struct ip_vs_service_user_kern usvc;
3160 struct ip_vs_dest_user_kern udest;
9a812198
JV
3161 int ret = 0, cmd;
3162 int need_full_svc = 0, need_full_dest = 0;
fc723250 3163 struct net *net;
a0840e2e 3164 struct netns_ipvs *ipvs;
9a812198 3165
fc723250 3166 net = skb_sknet(skb);
a0840e2e 3167 ipvs = net_ipvs(net);
9a812198
JV
3168 cmd = info->genlhdr->cmd;
3169
3170 mutex_lock(&__ip_vs_mutex);
3171
3172 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3173 ret = ip_vs_flush(net);
9a812198
JV
3174 goto out;
3175 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3176 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3177 goto out;
3178 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3179 cmd == IPVS_CMD_DEL_DAEMON) {
3180
3181 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3182
3183 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3184 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3185 info->attrs[IPVS_CMD_ATTR_DAEMON],
3186 ip_vs_daemon_policy)) {
3187 ret = -EINVAL;
3188 goto out;
3189 }
3190
3191 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3192 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3193 else
f131315f 3194 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3195 goto out;
3196 } else if (cmd == IPVS_CMD_ZERO &&
3197 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3198 ret = ip_vs_zero_all(net);
9a812198
JV
3199 goto out;
3200 }
3201
3202 /* All following commands require a service argument, so check if we
3203 * received a valid one. We need a full service specification when
3204 * adding / editing a service. Only identifying members otherwise. */
3205 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3206 need_full_svc = 1;
3207
fc723250 3208 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3209 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3210 need_full_svc, &svc);
9a812198
JV
3211 if (ret)
3212 goto out;
3213
9a812198
JV
3214 /* Unless we're adding a new service, the service must already exist */
3215 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3216 ret = -ESRCH;
3217 goto out;
3218 }
3219
3220 /* Destination commands require a valid destination argument. For
3221 * adding / editing a destination, we need a full destination
3222 * specification. */
3223 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3224 cmd == IPVS_CMD_DEL_DEST) {
3225 if (cmd != IPVS_CMD_DEL_DEST)
3226 need_full_dest = 1;
3227
3228 ret = ip_vs_genl_parse_dest(&udest,
3229 info->attrs[IPVS_CMD_ATTR_DEST],
3230 need_full_dest);
3231 if (ret)
3232 goto out;
3233 }
3234
3235 switch (cmd) {
3236 case IPVS_CMD_NEW_SERVICE:
3237 if (svc == NULL)
fc723250 3238 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3239 else
3240 ret = -EEXIST;
3241 break;
3242 case IPVS_CMD_SET_SERVICE:
3243 ret = ip_vs_edit_service(svc, &usvc);
3244 break;
3245 case IPVS_CMD_DEL_SERVICE:
3246 ret = ip_vs_del_service(svc);
26c15cfd 3247 /* do not use svc, it can be freed */
9a812198
JV
3248 break;
3249 case IPVS_CMD_NEW_DEST:
3250 ret = ip_vs_add_dest(svc, &udest);
3251 break;
3252 case IPVS_CMD_SET_DEST:
3253 ret = ip_vs_edit_dest(svc, &udest);
3254 break;
3255 case IPVS_CMD_DEL_DEST:
3256 ret = ip_vs_del_dest(svc, &udest);
3257 break;
3258 case IPVS_CMD_ZERO:
3259 ret = ip_vs_zero_service(svc);
3260 break;
3261 default:
3262 ret = -EINVAL;
3263 }
3264
3265out:
9a812198
JV
3266 mutex_unlock(&__ip_vs_mutex);
3267
3268 return ret;
3269}
3270
3271static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3272{
3273 struct sk_buff *msg;
3274 void *reply;
3275 int ret, cmd, reply_cmd;
fc723250 3276 struct net *net;
a0840e2e 3277 struct netns_ipvs *ipvs;
9a812198 3278
fc723250 3279 net = skb_sknet(skb);
a0840e2e 3280 ipvs = net_ipvs(net);
9a812198
JV
3281 cmd = info->genlhdr->cmd;
3282
3283 if (cmd == IPVS_CMD_GET_SERVICE)
3284 reply_cmd = IPVS_CMD_NEW_SERVICE;
3285 else if (cmd == IPVS_CMD_GET_INFO)
3286 reply_cmd = IPVS_CMD_SET_INFO;
3287 else if (cmd == IPVS_CMD_GET_CONFIG)
3288 reply_cmd = IPVS_CMD_SET_CONFIG;
3289 else {
1e3e238e 3290 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3291 return -EINVAL;
3292 }
3293
3294 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3295 if (!msg)
3296 return -ENOMEM;
3297
3298 mutex_lock(&__ip_vs_mutex);
3299
3300 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3301 if (reply == NULL)
3302 goto nla_put_failure;
3303
3304 switch (cmd) {
3305 case IPVS_CMD_GET_SERVICE:
3306 {
3307 struct ip_vs_service *svc;
3308
fc723250
HS
3309 svc = ip_vs_genl_find_service(net,
3310 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3311 if (IS_ERR(svc)) {
3312 ret = PTR_ERR(svc);
3313 goto out_err;
3314 } else if (svc) {
3315 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3316 if (ret)
3317 goto nla_put_failure;
3318 } else {
3319 ret = -ESRCH;
3320 goto out_err;
3321 }
3322
3323 break;
3324 }
3325
3326 case IPVS_CMD_GET_CONFIG:
3327 {
3328 struct ip_vs_timeout_user t;
3329
9330419d 3330 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3331#ifdef CONFIG_IP_VS_PROTO_TCP
3332 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3333 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3334 t.tcp_fin_timeout);
3335#endif
3336#ifdef CONFIG_IP_VS_PROTO_UDP
3337 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3338#endif
3339
3340 break;
3341 }
3342
3343 case IPVS_CMD_GET_INFO:
3344 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3345 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3346 ip_vs_conn_tab_size);
9a812198
JV
3347 break;
3348 }
3349
3350 genlmsg_end(msg, reply);
134e6375 3351 ret = genlmsg_reply(msg, info);
9a812198
JV
3352 goto out;
3353
3354nla_put_failure:
1e3e238e 3355 pr_err("not enough space in Netlink message\n");
9a812198
JV
3356 ret = -EMSGSIZE;
3357
3358out_err:
3359 nlmsg_free(msg);
3360out:
3361 mutex_unlock(&__ip_vs_mutex);
3362
3363 return ret;
3364}
3365
3366
3367static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3368 {
3369 .cmd = IPVS_CMD_NEW_SERVICE,
3370 .flags = GENL_ADMIN_PERM,
3371 .policy = ip_vs_cmd_policy,
3372 .doit = ip_vs_genl_set_cmd,
3373 },
3374 {
3375 .cmd = IPVS_CMD_SET_SERVICE,
3376 .flags = GENL_ADMIN_PERM,
3377 .policy = ip_vs_cmd_policy,
3378 .doit = ip_vs_genl_set_cmd,
3379 },
3380 {
3381 .cmd = IPVS_CMD_DEL_SERVICE,
3382 .flags = GENL_ADMIN_PERM,
3383 .policy = ip_vs_cmd_policy,
3384 .doit = ip_vs_genl_set_cmd,
3385 },
3386 {
3387 .cmd = IPVS_CMD_GET_SERVICE,
3388 .flags = GENL_ADMIN_PERM,
3389 .doit = ip_vs_genl_get_cmd,
3390 .dumpit = ip_vs_genl_dump_services,
3391 .policy = ip_vs_cmd_policy,
3392 },
3393 {
3394 .cmd = IPVS_CMD_NEW_DEST,
3395 .flags = GENL_ADMIN_PERM,
3396 .policy = ip_vs_cmd_policy,
3397 .doit = ip_vs_genl_set_cmd,
3398 },
3399 {
3400 .cmd = IPVS_CMD_SET_DEST,
3401 .flags = GENL_ADMIN_PERM,
3402 .policy = ip_vs_cmd_policy,
3403 .doit = ip_vs_genl_set_cmd,
3404 },
3405 {
3406 .cmd = IPVS_CMD_DEL_DEST,
3407 .flags = GENL_ADMIN_PERM,
3408 .policy = ip_vs_cmd_policy,
3409 .doit = ip_vs_genl_set_cmd,
3410 },
3411 {
3412 .cmd = IPVS_CMD_GET_DEST,
3413 .flags = GENL_ADMIN_PERM,
3414 .policy = ip_vs_cmd_policy,
3415 .dumpit = ip_vs_genl_dump_dests,
3416 },
3417 {
3418 .cmd = IPVS_CMD_NEW_DAEMON,
3419 .flags = GENL_ADMIN_PERM,
3420 .policy = ip_vs_cmd_policy,
3421 .doit = ip_vs_genl_set_cmd,
3422 },
3423 {
3424 .cmd = IPVS_CMD_DEL_DAEMON,
3425 .flags = GENL_ADMIN_PERM,
3426 .policy = ip_vs_cmd_policy,
3427 .doit = ip_vs_genl_set_cmd,
3428 },
3429 {
3430 .cmd = IPVS_CMD_GET_DAEMON,
3431 .flags = GENL_ADMIN_PERM,
3432 .dumpit = ip_vs_genl_dump_daemons,
3433 },
3434 {
3435 .cmd = IPVS_CMD_SET_CONFIG,
3436 .flags = GENL_ADMIN_PERM,
3437 .policy = ip_vs_cmd_policy,
3438 .doit = ip_vs_genl_set_cmd,
3439 },
3440 {
3441 .cmd = IPVS_CMD_GET_CONFIG,
3442 .flags = GENL_ADMIN_PERM,
3443 .doit = ip_vs_genl_get_cmd,
3444 },
3445 {
3446 .cmd = IPVS_CMD_GET_INFO,
3447 .flags = GENL_ADMIN_PERM,
3448 .doit = ip_vs_genl_get_cmd,
3449 },
3450 {
3451 .cmd = IPVS_CMD_ZERO,
3452 .flags = GENL_ADMIN_PERM,
3453 .policy = ip_vs_cmd_policy,
3454 .doit = ip_vs_genl_set_cmd,
3455 },
3456 {
3457 .cmd = IPVS_CMD_FLUSH,
3458 .flags = GENL_ADMIN_PERM,
3459 .doit = ip_vs_genl_set_cmd,
3460 },
3461};
3462
3463static int __init ip_vs_genl_register(void)
3464{
8f698d54
MM
3465 return genl_register_family_with_ops(&ip_vs_genl_family,
3466 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3467}
3468
3469static void ip_vs_genl_unregister(void)
3470{
3471 genl_unregister_family(&ip_vs_genl_family);
3472}
3473
3474/* End of Generic Netlink interface definitions */
3475
61b1ab45
HS
3476/*
3477 * per netns intit/exit func.
3478 */
3479int __net_init __ip_vs_control_init(struct net *net)
3480{
fc723250
HS
3481 int idx;
3482 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3483 struct ctl_table *tbl;
fc723250 3484
61b1ab45
HS
3485 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3486 return -EPERM;
a0840e2e
HS
3487
3488 atomic_set(&ipvs->dropentry, 0);
3489 spin_lock_init(&ipvs->dropentry_lock);
3490 spin_lock_init(&ipvs->droppacket_lock);
3491 spin_lock_init(&ipvs->securetcp_lock);
3492 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3493
3494 /* Initialize rs_table */
3495 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3496 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3497
f2431e6e 3498 INIT_LIST_HEAD(&ipvs->dest_trash);
763f8d0e
HS
3499 atomic_set(&ipvs->ftpsvc_counter, 0);
3500 atomic_set(&ipvs->nullsvc_counter, 0);
f2431e6e 3501
b17fc996
HS
3502 /* procfs stats */
3503 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3504 if (ipvs->tot_stats == NULL) {
3505 pr_err("%s(): no memory.\n", __func__);
3506 return -ENOMEM;
3507 }
3508 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3509 if (!ipvs->cpustats) {
3510 pr_err("%s() alloc_percpu failed\n", __func__);
3511 goto err_alloc;
3512 }
3513 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45 3514
fc723250
HS
3515 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3516 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3517
61b1ab45
HS
3518 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3519 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3520 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3521 &ip_vs_stats_percpu_fops);
a0840e2e
HS
3522
3523 if (!net_eq(net, &init_net)) {
3524 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3525 if (tbl == NULL)
3526 goto err_dup;
3527 } else
3528 tbl = vs_vars;
3529 /* Initialize sysctl defaults */
3530 idx = 0;
3531 ipvs->sysctl_amemthresh = 1024;
3532 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3533 ipvs->sysctl_am_droprate = 10;
3534 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3535 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3536 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3537#ifdef CONFIG_IP_VS_NFCT
3538 tbl[idx++].data = &ipvs->sysctl_conntrack;
3539#endif
3540 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3541 ipvs->sysctl_snat_reroute = 1;
3542 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3543 ipvs->sysctl_sync_ver = 1;
3544 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3545 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3546 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3547 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3548 ipvs->sysctl_sync_threshold[0] = 3;
3549 ipvs->sysctl_sync_threshold[1] = 50;
3550 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3551 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3552 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3553
3554
3555 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
61b1ab45 3556 vs_vars);
a0840e2e 3557 if (ipvs->sysctl_hdr == NULL)
61b1ab45 3558 goto err_reg;
b17fc996 3559 ip_vs_new_estimator(net, ipvs->tot_stats);
a0840e2e 3560 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3561 /* Schedule defense work */
3562 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3563 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45
HS
3564 return 0;
3565
3566err_reg:
a0840e2e
HS
3567 if (!net_eq(net, &init_net))
3568 kfree(tbl);
3569err_dup:
b17fc996
HS
3570 free_percpu(ipvs->cpustats);
3571err_alloc:
3572 kfree(ipvs->tot_stats);
61b1ab45
HS
3573 return -ENOMEM;
3574}
3575
3576static void __net_exit __ip_vs_control_cleanup(struct net *net)
3577{
b17fc996
HS
3578 struct netns_ipvs *ipvs = net_ipvs(net);
3579
61b1ab45
HS
3580 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3581 return;
3582
f2431e6e 3583 ip_vs_trash_cleanup(net);
b17fc996 3584 ip_vs_kill_estimator(net, ipvs->tot_stats);
f2431e6e
HS
3585 cancel_delayed_work_sync(&ipvs->defense_work);
3586 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3587 unregister_net_sysctl_table(ipvs->sysctl_hdr);
b17fc996 3588 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3589 proc_net_remove(net, "ip_vs_stats");
3590 proc_net_remove(net, "ip_vs");
b17fc996
HS
3591 free_percpu(ipvs->cpustats);
3592 kfree(ipvs->tot_stats);
61b1ab45
HS
3593}
3594
3595static struct pernet_operations ipvs_control_ops = {
3596 .init = __ip_vs_control_init,
3597 .exit = __ip_vs_control_cleanup,
3598};
1da177e4 3599
048cf48b 3600int __init ip_vs_control_init(void)
1da177e4 3601{
1da177e4 3602 int idx;
fc723250 3603 int ret;
1da177e4
LT
3604
3605 EnterFunction(2);
3606
fc723250 3607 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3608 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3609 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3610 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3611 }
fc723250
HS
3612
3613 ret = register_pernet_subsys(&ipvs_control_ops);
3614 if (ret) {
3615 pr_err("cannot register namespace.\n");
3616 goto err;
d86bef73 3617 }
fc723250
HS
3618
3619 smp_wmb(); /* Do we really need it now ? */
d86bef73 3620
1da177e4
LT
3621 ret = nf_register_sockopt(&ip_vs_sockopts);
3622 if (ret) {
1e3e238e 3623 pr_err("cannot register sockopt.\n");
fc723250 3624 goto err_net;
1da177e4
LT
3625 }
3626
9a812198
JV
3627 ret = ip_vs_genl_register();
3628 if (ret) {
1e3e238e 3629 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3630 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3631 goto err_net;
9a812198
JV
3632 }
3633
1da177e4
LT
3634 LeaveFunction(2);
3635 return 0;
fc723250
HS
3636
3637err_net:
3638 unregister_pernet_subsys(&ipvs_control_ops);
3639err:
3640 return ret;
1da177e4
LT
3641}
3642
3643
3644void ip_vs_control_cleanup(void)
3645{
3646 EnterFunction(2);
61b1ab45 3647 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3648 ip_vs_genl_unregister();
1da177e4
LT
3649 nf_unregister_sockopt(&ip_vs_sockopts);
3650 LeaveFunction(2);
3651}