IPVS: netns, defense work timer.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
74static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
75{
76 struct rt6_info *rt;
77 struct flowi fl = {
78 .oif = 0,
5811662b
CG
79 .fl6_dst = *addr,
80 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
09571c7a
VB
81 };
82
83 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
84 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
85 return 1;
86
87 return 0;
88}
89#endif
1da177e4 90/*
af9debd4
JA
91 * update_defense_level is called from keventd and from sysctl,
92 * so it needs to protect itself from softirqs
1da177e4 93 */
9330419d 94static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
95{
96 struct sysinfo i;
97 static int old_secure_tcp = 0;
98 int availmem;
99 int nomem;
100 int to_change = -1;
101
102 /* we only count free and buffered memory (in pages) */
103 si_meminfo(&i);
104 availmem = i.freeram + i.bufferram;
105 /* however in linux 2.5 the i.bufferram is total page cache size,
106 we need adjust it */
107 /* si_swapinfo(&i); */
108 /* availmem = availmem - (i.totalswap - i.freeswap); */
109
a0840e2e 110 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 111
af9debd4
JA
112 local_bh_disable();
113
1da177e4 114 /* drop_entry */
a0840e2e
HS
115 spin_lock(&ipvs->dropentry_lock);
116 switch (ipvs->sysctl_drop_entry) {
1da177e4 117 case 0:
a0840e2e 118 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
119 break;
120 case 1:
121 if (nomem) {
a0840e2e
HS
122 atomic_set(&ipvs->dropentry, 1);
123 ipvs->sysctl_drop_entry = 2;
1da177e4 124 } else {
a0840e2e 125 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
126 }
127 break;
128 case 2:
129 if (nomem) {
a0840e2e 130 atomic_set(&ipvs->dropentry, 1);
1da177e4 131 } else {
a0840e2e
HS
132 atomic_set(&ipvs->dropentry, 0);
133 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
134 };
135 break;
136 case 3:
a0840e2e 137 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
138 break;
139 }
a0840e2e 140 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
141
142 /* drop_packet */
a0840e2e
HS
143 spin_lock(&ipvs->droppacket_lock);
144 switch (ipvs->sysctl_drop_packet) {
1da177e4 145 case 0:
a0840e2e 146 ipvs->drop_rate = 0;
1da177e4
LT
147 break;
148 case 1:
149 if (nomem) {
a0840e2e
HS
150 ipvs->drop_rate = ipvs->drop_counter
151 = ipvs->sysctl_amemthresh /
152 (ipvs->sysctl_amemthresh-availmem);
153 ipvs->sysctl_drop_packet = 2;
1da177e4 154 } else {
a0840e2e 155 ipvs->drop_rate = 0;
1da177e4
LT
156 }
157 break;
158 case 2:
159 if (nomem) {
a0840e2e
HS
160 ipvs->drop_rate = ipvs->drop_counter
161 = ipvs->sysctl_amemthresh /
162 (ipvs->sysctl_amemthresh-availmem);
1da177e4 163 } else {
a0840e2e
HS
164 ipvs->drop_rate = 0;
165 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
166 }
167 break;
168 case 3:
a0840e2e 169 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
170 break;
171 }
a0840e2e 172 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
173
174 /* secure_tcp */
a0840e2e
HS
175 spin_lock(&ipvs->securetcp_lock);
176 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
177 case 0:
178 if (old_secure_tcp >= 2)
179 to_change = 0;
180 break;
181 case 1:
182 if (nomem) {
183 if (old_secure_tcp < 2)
184 to_change = 1;
a0840e2e 185 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
186 } else {
187 if (old_secure_tcp >= 2)
188 to_change = 0;
189 }
190 break;
191 case 2:
192 if (nomem) {
193 if (old_secure_tcp < 2)
194 to_change = 1;
195 } else {
196 if (old_secure_tcp >= 2)
197 to_change = 0;
a0840e2e 198 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
199 }
200 break;
201 case 3:
202 if (old_secure_tcp < 2)
203 to_change = 1;
204 break;
205 }
a0840e2e 206 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 207 if (to_change >= 0)
9330419d 208 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
209 ipvs->sysctl_secure_tcp > 1);
210 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
211
212 local_bh_enable();
1da177e4
LT
213}
214
215
216/*
217 * Timer for checking the defense
218 */
219#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 220
c4028958 221static void defense_work_handler(struct work_struct *work)
1da177e4 222{
f6340ee0
HS
223 struct netns_ipvs *ipvs =
224 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
225
226 update_defense_level(ipvs);
a0840e2e 227 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
228 ip_vs_random_dropentry(ipvs->net);
229 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4
LT
230}
231
232int
233ip_vs_use_count_inc(void)
234{
235 return try_module_get(THIS_MODULE);
236}
237
238void
239ip_vs_use_count_dec(void)
240{
241 module_put(THIS_MODULE);
242}
243
244
245/*
246 * Hash table: for virtual service lookups
247 */
248#define IP_VS_SVC_TAB_BITS 8
249#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
250#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
251
252/* the service table hashed by <protocol, addr, port> */
253static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
254/* the service table hashed by fwmark */
255static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
256
1da177e4
LT
257/*
258 * Trash for destinations
259 */
260static LIST_HEAD(ip_vs_dest_trash);
261
262/*
263 * FTP & NULL virtual service counters
264 */
265static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
266static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
267
268
269/*
270 * Returns hash value for virtual service
271 */
fc723250
HS
272static inline unsigned
273ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
274 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
275{
276 register unsigned porth = ntohs(port);
b18610de 277 __be32 addr_fold = addr->ip;
1da177e4 278
b18610de
JV
279#ifdef CONFIG_IP_VS_IPV6
280 if (af == AF_INET6)
281 addr_fold = addr->ip6[0]^addr->ip6[1]^
282 addr->ip6[2]^addr->ip6[3];
283#endif
fc723250 284 addr_fold ^= ((size_t)net>>8);
b18610de
JV
285
286 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
287 & IP_VS_SVC_TAB_MASK;
288}
289
290/*
291 * Returns hash value of fwmark for virtual service lookup
292 */
fc723250 293static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 294{
fc723250 295 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
296}
297
298/*
fc723250 299 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
300 * or in the ip_vs_svc_fwm_table by fwmark.
301 * Should be called with locked tables.
302 */
303static int ip_vs_svc_hash(struct ip_vs_service *svc)
304{
305 unsigned hash;
306
307 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
308 pr_err("%s(): request for already hashed, called from %pF\n",
309 __func__, __builtin_return_address(0));
1da177e4
LT
310 return 0;
311 }
312
313 if (svc->fwmark == 0) {
314 /*
fc723250 315 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 316 */
fc723250
HS
317 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
318 &svc->addr, svc->port);
1da177e4
LT
319 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
320 } else {
321 /*
fc723250 322 * Hash it by fwmark in svc_fwm_table
1da177e4 323 */
fc723250 324 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
325 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
326 }
327
328 svc->flags |= IP_VS_SVC_F_HASHED;
329 /* increase its refcnt because it is referenced by the svc table */
330 atomic_inc(&svc->refcnt);
331 return 1;
332}
333
334
335/*
fc723250 336 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
337 * Should be called with locked tables.
338 */
339static int ip_vs_svc_unhash(struct ip_vs_service *svc)
340{
341 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
342 pr_err("%s(): request for unhash flagged, called from %pF\n",
343 __func__, __builtin_return_address(0));
1da177e4
LT
344 return 0;
345 }
346
347 if (svc->fwmark == 0) {
fc723250 348 /* Remove it from the svc_table table */
1da177e4
LT
349 list_del(&svc->s_list);
350 } else {
fc723250 351 /* Remove it from the svc_fwm_table table */
1da177e4
LT
352 list_del(&svc->f_list);
353 }
354
355 svc->flags &= ~IP_VS_SVC_F_HASHED;
356 atomic_dec(&svc->refcnt);
357 return 1;
358}
359
360
361/*
fc723250 362 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 363 */
b18610de 364static inline struct ip_vs_service *
fc723250
HS
365__ip_vs_service_find(struct net *net, int af, __u16 protocol,
366 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
367{
368 unsigned hash;
369 struct ip_vs_service *svc;
370
371 /* Check for "full" addressed entries */
fc723250 372 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
373
374 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
375 if ((svc->af == af)
376 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 377 && (svc->port == vport)
fc723250
HS
378 && (svc->protocol == protocol)
379 && net_eq(svc->net, net)) {
1da177e4 380 /* HIT */
1da177e4
LT
381 return svc;
382 }
383 }
384
385 return NULL;
386}
387
388
389/*
390 * Get service by {fwmark} in the service table.
391 */
b18610de 392static inline struct ip_vs_service *
fc723250 393__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
394{
395 unsigned hash;
396 struct ip_vs_service *svc;
397
398 /* Check for fwmark addressed entries */
fc723250 399 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
400
401 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
402 if (svc->fwmark == fwmark && svc->af == af
403 && net_eq(svc->net, net)) {
1da177e4 404 /* HIT */
1da177e4
LT
405 return svc;
406 }
407 }
408
409 return NULL;
410}
411
412struct ip_vs_service *
fc723250 413ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 414 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
415{
416 struct ip_vs_service *svc;
3c2e0505 417
1da177e4
LT
418 read_lock(&__ip_vs_svc_lock);
419
420 /*
421 * Check the table hashed by fwmark first
422 */
fc723250
HS
423 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
424 if (fwmark && svc)
1da177e4
LT
425 goto out;
426
427 /*
428 * Check the table hashed by <protocol,addr,port>
429 * for "full" addressed entries
430 */
fc723250 431 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
432
433 if (svc == NULL
434 && protocol == IPPROTO_TCP
435 && atomic_read(&ip_vs_ftpsvc_counter)
436 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
437 /*
438 * Check if ftp service entry exists, the packet
439 * might belong to FTP data connections.
440 */
fc723250 441 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
442 }
443
444 if (svc == NULL
445 && atomic_read(&ip_vs_nullsvc_counter)) {
446 /*
447 * Check if the catch-all port (port zero) exists
448 */
fc723250 449 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
450 }
451
452 out:
26c15cfd
JA
453 if (svc)
454 atomic_inc(&svc->usecnt);
1da177e4
LT
455 read_unlock(&__ip_vs_svc_lock);
456
3c2e0505
JV
457 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
458 fwmark, ip_vs_proto_name(protocol),
459 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
460 svc ? "hit" : "not hit");
1da177e4
LT
461
462 return svc;
463}
464
465
466static inline void
467__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
468{
469 atomic_inc(&svc->refcnt);
470 dest->svc = svc;
471}
472
26c15cfd 473static void
1da177e4
LT
474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
475{
476 struct ip_vs_service *svc = dest->svc;
477
478 dest->svc = NULL;
26c15cfd
JA
479 if (atomic_dec_and_test(&svc->refcnt)) {
480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
481 svc->fwmark,
482 IP_VS_DBG_ADDR(svc->af, &svc->addr),
483 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 484 free_percpu(svc->stats.cpustats);
1da177e4 485 kfree(svc);
26c15cfd 486 }
1da177e4
LT
487}
488
489
490/*
491 * Returns hash value for real service
492 */
7937df15
JV
493static inline unsigned ip_vs_rs_hashkey(int af,
494 const union nf_inet_addr *addr,
495 __be16 port)
1da177e4
LT
496{
497 register unsigned porth = ntohs(port);
7937df15
JV
498 __be32 addr_fold = addr->ip;
499
500#ifdef CONFIG_IP_VS_IPV6
501 if (af == AF_INET6)
502 addr_fold = addr->ip6[0]^addr->ip6[1]^
503 addr->ip6[2]^addr->ip6[3];
504#endif
1da177e4 505
7937df15 506 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
507 & IP_VS_RTAB_MASK;
508}
509
510/*
fc723250 511 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
512 * should be called with locked tables.
513 */
fc723250 514static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
515{
516 unsigned hash;
517
518 if (!list_empty(&dest->d_list)) {
519 return 0;
520 }
521
522 /*
523 * Hash by proto,addr,port,
524 * which are the parameters of the real service.
525 */
7937df15
JV
526 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
527
fc723250 528 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
529
530 return 1;
531}
532
533/*
fc723250 534 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
535 * should be called with locked tables.
536 */
537static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
538{
539 /*
fc723250 540 * Remove it from the rs_table table.
1da177e4
LT
541 */
542 if (!list_empty(&dest->d_list)) {
543 list_del(&dest->d_list);
544 INIT_LIST_HEAD(&dest->d_list);
545 }
546
547 return 1;
548}
549
550/*
551 * Lookup real service by <proto,addr,port> in the real service table.
552 */
553struct ip_vs_dest *
fc723250 554ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
555 const union nf_inet_addr *daddr,
556 __be16 dport)
1da177e4 557{
fc723250 558 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
559 unsigned hash;
560 struct ip_vs_dest *dest;
561
562 /*
563 * Check for "full" addressed entries
564 * Return the first found entry
565 */
7937df15 566 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 567
a0840e2e 568 read_lock(&ipvs->rs_lock);
fc723250 569 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
570 if ((dest->af == af)
571 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
572 && (dest->port == dport)
573 && ((dest->protocol == protocol) ||
574 dest->vfwmark)) {
575 /* HIT */
a0840e2e 576 read_unlock(&ipvs->rs_lock);
1da177e4
LT
577 return dest;
578 }
579 }
a0840e2e 580 read_unlock(&ipvs->rs_lock);
1da177e4
LT
581
582 return NULL;
583}
584
585/*
586 * Lookup destination by {addr,port} in the given service
587 */
588static struct ip_vs_dest *
7937df15
JV
589ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
590 __be16 dport)
1da177e4
LT
591{
592 struct ip_vs_dest *dest;
593
594 /*
595 * Find the destination for the given service
596 */
597 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
598 if ((dest->af == svc->af)
599 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
600 && (dest->port == dport)) {
1da177e4
LT
601 /* HIT */
602 return dest;
603 }
604 }
605
606 return NULL;
607}
608
1e356f9c
RB
609/*
610 * Find destination by {daddr,dport,vaddr,protocol}
611 * Cretaed to be used in ip_vs_process_message() in
612 * the backup synchronization daemon. It finds the
613 * destination to be bound to the received connection
614 * on the backup.
615 *
616 * ip_vs_lookup_real_service() looked promissing, but
617 * seems not working as expected.
618 */
fc723250
HS
619struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
620 const union nf_inet_addr *daddr,
7937df15
JV
621 __be16 dport,
622 const union nf_inet_addr *vaddr,
0e051e68 623 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
624{
625 struct ip_vs_dest *dest;
626 struct ip_vs_service *svc;
627
fc723250 628 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
629 if (!svc)
630 return NULL;
631 dest = ip_vs_lookup_dest(svc, daddr, dport);
632 if (dest)
633 atomic_inc(&dest->refcnt);
634 ip_vs_service_put(svc);
635 return dest;
636}
1da177e4
LT
637
638/*
639 * Lookup dest by {svc,addr,port} in the destination trash.
640 * The destination trash is used to hold the destinations that are removed
641 * from the service table but are still referenced by some conn entries.
642 * The reason to add the destination trash is when the dest is temporary
643 * down (either by administrator or by monitor program), the dest can be
644 * picked back from the trash, the remaining connections to the dest can
645 * continue, and the counting information of the dest is also useful for
646 * scheduling.
647 */
648static struct ip_vs_dest *
7937df15
JV
649ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
650 __be16 dport)
1da177e4
LT
651{
652 struct ip_vs_dest *dest, *nxt;
653
654 /*
655 * Find the destination in trash
656 */
657 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
658 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
659 "dest->refcnt=%d\n",
660 dest->vfwmark,
661 IP_VS_DBG_ADDR(svc->af, &dest->addr),
662 ntohs(dest->port),
663 atomic_read(&dest->refcnt));
664 if (dest->af == svc->af &&
665 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
666 dest->port == dport &&
667 dest->vfwmark == svc->fwmark &&
668 dest->protocol == svc->protocol &&
669 (svc->fwmark ||
7937df15 670 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
671 dest->vport == svc->port))) {
672 /* HIT */
673 return dest;
674 }
675
676 /*
677 * Try to purge the destination from trash if not referenced
678 */
679 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
680 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
681 "from trash\n",
682 dest->vfwmark,
683 IP_VS_DBG_ADDR(svc->af, &dest->addr),
684 ntohs(dest->port));
1da177e4
LT
685 list_del(&dest->n_list);
686 ip_vs_dst_reset(dest);
687 __ip_vs_unbind_svc(dest);
b17fc996 688 free_percpu(dest->stats.cpustats);
1da177e4
LT
689 kfree(dest);
690 }
691 }
692
693 return NULL;
694}
695
696
697/*
698 * Clean up all the destinations in the trash
699 * Called by the ip_vs_control_cleanup()
700 *
701 * When the ip_vs_control_clearup is activated by ipvs module exit,
702 * the service tables must have been flushed and all the connections
703 * are expired, and the refcnt of each destination in the trash must
704 * be 1, so we simply release them here.
705 */
706static void ip_vs_trash_cleanup(void)
707{
708 struct ip_vs_dest *dest, *nxt;
709
710 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
711 list_del(&dest->n_list);
712 ip_vs_dst_reset(dest);
713 __ip_vs_unbind_svc(dest);
b17fc996 714 free_percpu(dest->stats.cpustats);
1da177e4
LT
715 kfree(dest);
716 }
717}
718
719
720static void
721ip_vs_zero_stats(struct ip_vs_stats *stats)
722{
723 spin_lock_bh(&stats->lock);
e93615d0 724
e9c0ce23 725 memset(&stats->ustats, 0, sizeof(stats->ustats));
1da177e4 726 ip_vs_zero_estimator(stats);
e93615d0 727
3a14a313 728 spin_unlock_bh(&stats->lock);
1da177e4
LT
729}
730
731/*
732 * Update a destination in the given service
733 */
734static void
26c15cfd
JA
735__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
736 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 737{
fc723250 738 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
739 int conn_flags;
740
741 /* set the weight and the flags */
742 atomic_set(&dest->weight, udest->weight);
3575792e
JA
743 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
744 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 745
1da177e4 746 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 747 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
748 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
749 } else {
750 /*
fc723250 751 * Put the real service in rs_table if not present.
1da177e4
LT
752 * For now only for NAT!
753 */
a0840e2e 754 write_lock_bh(&ipvs->rs_lock);
fc723250 755 ip_vs_rs_hash(ipvs, dest);
a0840e2e 756 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
757 }
758 atomic_set(&dest->conn_flags, conn_flags);
759
760 /* bind the service */
761 if (!dest->svc) {
762 __ip_vs_bind_svc(dest, svc);
763 } else {
764 if (dest->svc != svc) {
765 __ip_vs_unbind_svc(dest);
766 ip_vs_zero_stats(&dest->stats);
767 __ip_vs_bind_svc(dest, svc);
768 }
769 }
770
771 /* set the dest status flags */
772 dest->flags |= IP_VS_DEST_F_AVAILABLE;
773
774 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
775 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
776 dest->u_threshold = udest->u_threshold;
777 dest->l_threshold = udest->l_threshold;
26c15cfd 778
fc604767
JA
779 spin_lock(&dest->dst_lock);
780 ip_vs_dst_reset(dest);
781 spin_unlock(&dest->dst_lock);
782
26c15cfd 783 if (add)
29c2026f 784 ip_vs_new_estimator(svc->net, &dest->stats);
26c15cfd
JA
785
786 write_lock_bh(&__ip_vs_svc_lock);
787
788 /* Wait until all other svc users go away */
789 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
790
791 if (add) {
792 list_add(&dest->n_list, &svc->destinations);
793 svc->num_dests++;
794 }
795
796 /* call the update_service, because server weight may be changed */
797 if (svc->scheduler->update_service)
798 svc->scheduler->update_service(svc);
799
800 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
801}
802
803
804/*
805 * Create a destination for the given service
806 */
807static int
c860c6b1 808ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
809 struct ip_vs_dest **dest_p)
810{
811 struct ip_vs_dest *dest;
812 unsigned atype;
813
814 EnterFunction(2);
815
09571c7a
VB
816#ifdef CONFIG_IP_VS_IPV6
817 if (svc->af == AF_INET6) {
818 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
819 if ((!(atype & IPV6_ADDR_UNICAST) ||
820 atype & IPV6_ADDR_LINKLOCAL) &&
09571c7a
VB
821 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
822 return -EINVAL;
823 } else
824#endif
825 {
826 atype = inet_addr_type(&init_net, udest->addr.ip);
827 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
828 return -EINVAL;
829 }
1da177e4 830
dee06e47 831 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 832 if (dest == NULL) {
1e3e238e 833 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
834 return -ENOMEM;
835 }
b17fc996
HS
836 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
837 if (!dest->stats.cpustats) {
838 pr_err("%s() alloc_percpu failed\n", __func__);
839 goto err_alloc;
840 }
1da177e4 841
c860c6b1 842 dest->af = svc->af;
1da177e4 843 dest->protocol = svc->protocol;
c860c6b1 844 dest->vaddr = svc->addr;
1da177e4
LT
845 dest->vport = svc->port;
846 dest->vfwmark = svc->fwmark;
c860c6b1 847 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
848 dest->port = udest->port;
849
850 atomic_set(&dest->activeconns, 0);
851 atomic_set(&dest->inactconns, 0);
852 atomic_set(&dest->persistconns, 0);
26c15cfd 853 atomic_set(&dest->refcnt, 1);
1da177e4
LT
854
855 INIT_LIST_HEAD(&dest->d_list);
856 spin_lock_init(&dest->dst_lock);
857 spin_lock_init(&dest->stats.lock);
26c15cfd 858 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
859
860 *dest_p = dest;
861
862 LeaveFunction(2);
863 return 0;
b17fc996
HS
864
865err_alloc:
866 kfree(dest);
867 return -ENOMEM;
1da177e4
LT
868}
869
870
871/*
872 * Add a destination into an existing service
873 */
874static int
c860c6b1 875ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
876{
877 struct ip_vs_dest *dest;
c860c6b1 878 union nf_inet_addr daddr;
014d730d 879 __be16 dport = udest->port;
1da177e4
LT
880 int ret;
881
882 EnterFunction(2);
883
884 if (udest->weight < 0) {
1e3e238e 885 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
886 return -ERANGE;
887 }
888
889 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
890 pr_err("%s(): lower threshold is higher than upper threshold\n",
891 __func__);
1da177e4
LT
892 return -ERANGE;
893 }
894
c860c6b1
JV
895 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
896
1da177e4
LT
897 /*
898 * Check if the dest already exists in the list
899 */
7937df15
JV
900 dest = ip_vs_lookup_dest(svc, &daddr, dport);
901
1da177e4 902 if (dest != NULL) {
1e3e238e 903 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
904 return -EEXIST;
905 }
906
907 /*
908 * Check if the dest already exists in the trash and
909 * is from the same service
910 */
7937df15
JV
911 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
912
1da177e4 913 if (dest != NULL) {
cfc78c5a
JV
914 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
915 "dest->refcnt=%d, service %u/%s:%u\n",
916 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
917 atomic_read(&dest->refcnt),
918 dest->vfwmark,
919 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
920 ntohs(dest->vport));
921
1da177e4
LT
922 /*
923 * Get the destination from the trash
924 */
925 list_del(&dest->n_list);
926
26c15cfd
JA
927 __ip_vs_update_dest(svc, dest, udest, 1);
928 ret = 0;
929 } else {
1da177e4 930 /*
26c15cfd 931 * Allocate and initialize the dest structure
1da177e4 932 */
26c15cfd 933 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 934 }
1da177e4
LT
935 LeaveFunction(2);
936
26c15cfd 937 return ret;
1da177e4
LT
938}
939
940
941/*
942 * Edit a destination in the given service
943 */
944static int
c860c6b1 945ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
946{
947 struct ip_vs_dest *dest;
c860c6b1 948 union nf_inet_addr daddr;
014d730d 949 __be16 dport = udest->port;
1da177e4
LT
950
951 EnterFunction(2);
952
953 if (udest->weight < 0) {
1e3e238e 954 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
955 return -ERANGE;
956 }
957
958 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
959 pr_err("%s(): lower threshold is higher than upper threshold\n",
960 __func__);
1da177e4
LT
961 return -ERANGE;
962 }
963
c860c6b1
JV
964 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
965
1da177e4
LT
966 /*
967 * Lookup the destination list
968 */
7937df15
JV
969 dest = ip_vs_lookup_dest(svc, &daddr, dport);
970
1da177e4 971 if (dest == NULL) {
1e3e238e 972 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
973 return -ENOENT;
974 }
975
26c15cfd 976 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
977 LeaveFunction(2);
978
979 return 0;
980}
981
982
983/*
984 * Delete a destination (must be already unlinked from the service)
985 */
29c2026f 986static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 987{
a0840e2e
HS
988 struct netns_ipvs *ipvs = net_ipvs(net);
989
29c2026f 990 ip_vs_kill_estimator(net, &dest->stats);
1da177e4
LT
991
992 /*
993 * Remove it from the d-linked list with the real services.
994 */
a0840e2e 995 write_lock_bh(&ipvs->rs_lock);
1da177e4 996 ip_vs_rs_unhash(dest);
a0840e2e 997 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
998
999 /*
1000 * Decrease the refcnt of the dest, and free the dest
1001 * if nobody refers to it (refcnt=0). Otherwise, throw
1002 * the destination into the trash.
1003 */
1004 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1005 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1006 dest->vfwmark,
1007 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1008 ntohs(dest->port));
1da177e4
LT
1009 ip_vs_dst_reset(dest);
1010 /* simply decrease svc->refcnt here, let the caller check
1011 and release the service if nobody refers to it.
1012 Only user context can release destination and service,
1013 and only one user context can update virtual service at a
1014 time, so the operation here is OK */
1015 atomic_dec(&dest->svc->refcnt);
b17fc996 1016 free_percpu(dest->stats.cpustats);
1da177e4
LT
1017 kfree(dest);
1018 } else {
cfc78c5a
JV
1019 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1020 "dest->refcnt=%d\n",
1021 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1022 ntohs(dest->port),
1023 atomic_read(&dest->refcnt));
1da177e4
LT
1024 list_add(&dest->n_list, &ip_vs_dest_trash);
1025 atomic_inc(&dest->refcnt);
1026 }
1027}
1028
1029
1030/*
1031 * Unlink a destination from the given service
1032 */
1033static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1034 struct ip_vs_dest *dest,
1035 int svcupd)
1036{
1037 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1038
1039 /*
1040 * Remove it from the d-linked destination list.
1041 */
1042 list_del(&dest->n_list);
1043 svc->num_dests--;
82dfb6f3
SW
1044
1045 /*
1046 * Call the update_service function of its scheduler
1047 */
1048 if (svcupd && svc->scheduler->update_service)
1049 svc->scheduler->update_service(svc);
1da177e4
LT
1050}
1051
1052
1053/*
1054 * Delete a destination server in the given service
1055 */
1056static int
c860c6b1 1057ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1058{
1059 struct ip_vs_dest *dest;
014d730d 1060 __be16 dport = udest->port;
1da177e4
LT
1061
1062 EnterFunction(2);
1063
7937df15 1064 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1065
1da177e4 1066 if (dest == NULL) {
1e3e238e 1067 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1068 return -ENOENT;
1069 }
1070
1071 write_lock_bh(&__ip_vs_svc_lock);
1072
1073 /*
1074 * Wait until all other svc users go away.
1075 */
26c15cfd 1076 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1077
1078 /*
1079 * Unlink dest from the service
1080 */
1081 __ip_vs_unlink_dest(svc, dest, 1);
1082
1083 write_unlock_bh(&__ip_vs_svc_lock);
1084
1085 /*
1086 * Delete the destination
1087 */
a0840e2e 1088 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1089
1090 LeaveFunction(2);
1091
1092 return 0;
1093}
1094
1095
1096/*
1097 * Add a service into the service hash table
1098 */
1099static int
fc723250 1100ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1101 struct ip_vs_service **svc_p)
1da177e4
LT
1102{
1103 int ret = 0;
1104 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1105 struct ip_vs_pe *pe = NULL;
1da177e4 1106 struct ip_vs_service *svc = NULL;
a0840e2e 1107 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1108
1109 /* increase the module use count */
1110 ip_vs_use_count_inc();
1111
1112 /* Lookup the scheduler by 'u->sched_name' */
1113 sched = ip_vs_scheduler_get(u->sched_name);
1114 if (sched == NULL) {
1e3e238e 1115 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1116 ret = -ENOENT;
6e08bfb8 1117 goto out_err;
1da177e4
LT
1118 }
1119
0d1e71b0 1120 if (u->pe_name && *u->pe_name) {
e9e5eee8 1121 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1122 if (pe == NULL) {
1123 pr_info("persistence engine module ip_vs_pe_%s "
1124 "not found\n", u->pe_name);
1125 ret = -ENOENT;
1126 goto out_err;
1127 }
1128 }
1129
f94fd041 1130#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1131 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1132 ret = -EINVAL;
1133 goto out_err;
f94fd041
JV
1134 }
1135#endif
1136
dee06e47 1137 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1138 if (svc == NULL) {
1e3e238e 1139 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1140 ret = -ENOMEM;
1141 goto out_err;
1142 }
b17fc996
HS
1143 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1144 if (!svc->stats.cpustats) {
1145 pr_err("%s() alloc_percpu failed\n", __func__);
1146 goto out_err;
1147 }
1da177e4
LT
1148
1149 /* I'm the first user of the service */
26c15cfd 1150 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1151 atomic_set(&svc->refcnt, 0);
1152
c860c6b1 1153 svc->af = u->af;
1da177e4 1154 svc->protocol = u->protocol;
c860c6b1 1155 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1156 svc->port = u->port;
1157 svc->fwmark = u->fwmark;
1158 svc->flags = u->flags;
1159 svc->timeout = u->timeout * HZ;
1160 svc->netmask = u->netmask;
fc723250 1161 svc->net = net;
1da177e4
LT
1162
1163 INIT_LIST_HEAD(&svc->destinations);
1164 rwlock_init(&svc->sched_lock);
1165 spin_lock_init(&svc->stats.lock);
1166
1167 /* Bind the scheduler */
1168 ret = ip_vs_bind_scheduler(svc, sched);
1169 if (ret)
1170 goto out_err;
1171 sched = NULL;
1172
0d1e71b0
SH
1173 /* Bind the ct retriever */
1174 ip_vs_bind_pe(svc, pe);
1175 pe = NULL;
1176
1da177e4
LT
1177 /* Update the virtual service counters */
1178 if (svc->port == FTPPORT)
1179 atomic_inc(&ip_vs_ftpsvc_counter);
1180 else if (svc->port == 0)
1181 atomic_inc(&ip_vs_nullsvc_counter);
1182
29c2026f 1183 ip_vs_new_estimator(net, &svc->stats);
f94fd041
JV
1184
1185 /* Count only IPv4 services for old get/setsockopt interface */
1186 if (svc->af == AF_INET)
a0840e2e 1187 ipvs->num_services++;
1da177e4
LT
1188
1189 /* Hash the service into the service table */
1190 write_lock_bh(&__ip_vs_svc_lock);
1191 ip_vs_svc_hash(svc);
1192 write_unlock_bh(&__ip_vs_svc_lock);
1193
1194 *svc_p = svc;
1195 return 0;
1196
b17fc996 1197
6e08bfb8 1198 out_err:
1da177e4 1199 if (svc != NULL) {
2fabf35b 1200 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1201 if (svc->inc) {
1202 local_bh_disable();
1203 ip_vs_app_inc_put(svc->inc);
1204 local_bh_enable();
1205 }
b17fc996
HS
1206 if (svc->stats.cpustats)
1207 free_percpu(svc->stats.cpustats);
1da177e4
LT
1208 kfree(svc);
1209 }
1210 ip_vs_scheduler_put(sched);
0d1e71b0 1211 ip_vs_pe_put(pe);
1da177e4 1212
1da177e4
LT
1213 /* decrease the module use count */
1214 ip_vs_use_count_dec();
1215
1216 return ret;
1217}
1218
1219
1220/*
1221 * Edit a service and bind it with a new scheduler
1222 */
1223static int
c860c6b1 1224ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1225{
1226 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1227 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1228 int ret = 0;
1229
1230 /*
1231 * Lookup the scheduler, by 'u->sched_name'
1232 */
1233 sched = ip_vs_scheduler_get(u->sched_name);
1234 if (sched == NULL) {
1e3e238e 1235 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1236 return -ENOENT;
1237 }
1238 old_sched = sched;
1239
0d1e71b0 1240 if (u->pe_name && *u->pe_name) {
e9e5eee8 1241 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1242 if (pe == NULL) {
1243 pr_info("persistence engine module ip_vs_pe_%s "
1244 "not found\n", u->pe_name);
1245 ret = -ENOENT;
1246 goto out;
1247 }
1248 old_pe = pe;
1249 }
1250
f94fd041 1251#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1252 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1253 ret = -EINVAL;
1254 goto out;
f94fd041
JV
1255 }
1256#endif
1257
1da177e4
LT
1258 write_lock_bh(&__ip_vs_svc_lock);
1259
1260 /*
1261 * Wait until all other svc users go away.
1262 */
26c15cfd 1263 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1264
1265 /*
1266 * Set the flags and timeout value
1267 */
1268 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1269 svc->timeout = u->timeout * HZ;
1270 svc->netmask = u->netmask;
1271
1272 old_sched = svc->scheduler;
1273 if (sched != old_sched) {
1274 /*
1275 * Unbind the old scheduler
1276 */
1277 if ((ret = ip_vs_unbind_scheduler(svc))) {
1278 old_sched = sched;
9e691ed6 1279 goto out_unlock;
1da177e4
LT
1280 }
1281
1282 /*
1283 * Bind the new scheduler
1284 */
1285 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1286 /*
1287 * If ip_vs_bind_scheduler fails, restore the old
1288 * scheduler.
1289 * The main reason of failure is out of memory.
1290 *
1291 * The question is if the old scheduler can be
1292 * restored all the time. TODO: if it cannot be
1293 * restored some time, we must delete the service,
1294 * otherwise the system may crash.
1295 */
1296 ip_vs_bind_scheduler(svc, old_sched);
1297 old_sched = sched;
9e691ed6 1298 goto out_unlock;
1da177e4
LT
1299 }
1300 }
1301
0d1e71b0
SH
1302 old_pe = svc->pe;
1303 if (pe != old_pe) {
1304 ip_vs_unbind_pe(svc);
1305 ip_vs_bind_pe(svc, pe);
1306 }
1307
9e691ed6 1308 out_unlock:
1da177e4 1309 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1310 out:
6e08bfb8 1311 ip_vs_scheduler_put(old_sched);
0d1e71b0 1312 ip_vs_pe_put(old_pe);
1da177e4
LT
1313 return ret;
1314}
1315
1316
1317/*
1318 * Delete a service from the service list
1319 * - The service must be unlinked, unlocked and not referenced!
1320 * - We are called under _bh lock
1321 */
1322static void __ip_vs_del_service(struct ip_vs_service *svc)
1323{
1324 struct ip_vs_dest *dest, *nxt;
1325 struct ip_vs_scheduler *old_sched;
0d1e71b0 1326 struct ip_vs_pe *old_pe;
a0840e2e 1327 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1328
1329 pr_info("%s: enter\n", __func__);
1da177e4 1330
f94fd041
JV
1331 /* Count only IPv4 services for old get/setsockopt interface */
1332 if (svc->af == AF_INET)
a0840e2e 1333 ipvs->num_services--;
f94fd041 1334
29c2026f 1335 ip_vs_kill_estimator(svc->net, &svc->stats);
1da177e4
LT
1336
1337 /* Unbind scheduler */
1338 old_sched = svc->scheduler;
1339 ip_vs_unbind_scheduler(svc);
6e08bfb8 1340 ip_vs_scheduler_put(old_sched);
1da177e4 1341
0d1e71b0
SH
1342 /* Unbind persistence engine */
1343 old_pe = svc->pe;
1344 ip_vs_unbind_pe(svc);
1345 ip_vs_pe_put(old_pe);
1346
1da177e4
LT
1347 /* Unbind app inc */
1348 if (svc->inc) {
1349 ip_vs_app_inc_put(svc->inc);
1350 svc->inc = NULL;
1351 }
1352
1353 /*
1354 * Unlink the whole destination list
1355 */
1356 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1357 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1358 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1359 }
1360
1361 /*
1362 * Update the virtual service counters
1363 */
1364 if (svc->port == FTPPORT)
1365 atomic_dec(&ip_vs_ftpsvc_counter);
1366 else if (svc->port == 0)
1367 atomic_dec(&ip_vs_nullsvc_counter);
1368
1369 /*
1370 * Free the service if nobody refers to it
1371 */
26c15cfd
JA
1372 if (atomic_read(&svc->refcnt) == 0) {
1373 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1374 svc->fwmark,
1375 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1376 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1377 free_percpu(svc->stats.cpustats);
1da177e4 1378 kfree(svc);
26c15cfd 1379 }
1da177e4
LT
1380
1381 /* decrease the module use count */
1382 ip_vs_use_count_dec();
1383}
1384
1385/*
26c15cfd 1386 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1387 */
26c15cfd 1388static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1389{
1da177e4
LT
1390 /*
1391 * Unhash it from the service table
1392 */
1393 write_lock_bh(&__ip_vs_svc_lock);
1394
1395 ip_vs_svc_unhash(svc);
1396
1397 /*
1398 * Wait until all the svc users go away.
1399 */
26c15cfd 1400 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1401
1402 __ip_vs_del_service(svc);
1403
1404 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1405}
1406
1407/*
1408 * Delete a service from the service list
1409 */
1410static int ip_vs_del_service(struct ip_vs_service *svc)
1411{
1412 if (svc == NULL)
1413 return -EEXIST;
1414 ip_vs_unlink_service(svc);
1da177e4
LT
1415
1416 return 0;
1417}
1418
1419
1420/*
1421 * Flush all the virtual services
1422 */
fc723250 1423static int ip_vs_flush(struct net *net)
1da177e4
LT
1424{
1425 int idx;
1426 struct ip_vs_service *svc, *nxt;
1427
1428 /*
fc723250 1429 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1430 */
1431 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1432 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1433 s_list) {
1434 if (net_eq(svc->net, net))
1435 ip_vs_unlink_service(svc);
1da177e4
LT
1436 }
1437 }
1438
1439 /*
1440 * Flush the service table hashed by fwmark
1441 */
1442 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1443 list_for_each_entry_safe(svc, nxt,
1444 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1445 if (net_eq(svc->net, net))
1446 ip_vs_unlink_service(svc);
1da177e4
LT
1447 }
1448 }
1449
1450 return 0;
1451}
1452
1453
1454/*
1455 * Zero counters in a service or all services
1456 */
1457static int ip_vs_zero_service(struct ip_vs_service *svc)
1458{
1459 struct ip_vs_dest *dest;
1460
1461 write_lock_bh(&__ip_vs_svc_lock);
1462 list_for_each_entry(dest, &svc->destinations, n_list) {
1463 ip_vs_zero_stats(&dest->stats);
1464 }
1465 ip_vs_zero_stats(&svc->stats);
1466 write_unlock_bh(&__ip_vs_svc_lock);
1467 return 0;
1468}
1469
fc723250 1470static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1471{
1472 int idx;
1473 struct ip_vs_service *svc;
1474
1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1477 if (net_eq(svc->net, net))
1478 ip_vs_zero_service(svc);
1da177e4
LT
1479 }
1480 }
1481
1482 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1484 if (net_eq(svc->net, net))
1485 ip_vs_zero_service(svc);
1da177e4
LT
1486 }
1487 }
1488
b17fc996 1489 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1da177e4
LT
1490 return 0;
1491}
1492
1493
1494static int
8d65af78 1495proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1496 void __user *buffer, size_t *lenp, loff_t *ppos)
1497{
9330419d 1498 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1499 int *valp = table->data;
1500 int val = *valp;
1501 int rc;
1502
8d65af78 1503 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1504 if (write && (*valp != val)) {
1505 if ((*valp < 0) || (*valp > 3)) {
1506 /* Restore the correct value */
1507 *valp = val;
1508 } else {
9330419d 1509 update_defense_level(net_ipvs(net));
1da177e4
LT
1510 }
1511 }
1512 return rc;
1513}
1514
1515
1516static int
8d65af78 1517proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1518 void __user *buffer, size_t *lenp, loff_t *ppos)
1519{
1520 int *valp = table->data;
1521 int val[2];
1522 int rc;
1523
1524 /* backup the value first */
1525 memcpy(val, valp, sizeof(val));
1526
8d65af78 1527 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1528 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1529 /* Restore the correct value */
1530 memcpy(valp, val, sizeof(val));
1531 }
1532 return rc;
1533}
1534
b880c1f0
HS
1535static int
1536proc_do_sync_mode(ctl_table *table, int write,
1537 void __user *buffer, size_t *lenp, loff_t *ppos)
1538{
1539 int *valp = table->data;
1540 int val = *valp;
1541 int rc;
1542
1543 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1544 if (write && (*valp != val)) {
1545 if ((*valp < 0) || (*valp > 1)) {
1546 /* Restore the correct value */
1547 *valp = val;
1548 } else {
f131315f
HS
1549 struct net *net = current->nsproxy->net_ns;
1550 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1551 }
1552 }
1553 return rc;
1554}
1da177e4
LT
1555
1556/*
1557 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1558 * Do not change order or insert new entries without
1559 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1560 */
1561
1562static struct ctl_table vs_vars[] = {
1563 {
1da177e4 1564 .procname = "amemthresh",
1da177e4
LT
1565 .maxlen = sizeof(int),
1566 .mode = 0644,
6d9f239a 1567 .proc_handler = proc_dointvec,
1da177e4 1568 },
1da177e4 1569 {
1da177e4 1570 .procname = "am_droprate",
1da177e4
LT
1571 .maxlen = sizeof(int),
1572 .mode = 0644,
6d9f239a 1573 .proc_handler = proc_dointvec,
1da177e4
LT
1574 },
1575 {
1da177e4 1576 .procname = "drop_entry",
1da177e4
LT
1577 .maxlen = sizeof(int),
1578 .mode = 0644,
6d9f239a 1579 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1580 },
1581 {
1da177e4 1582 .procname = "drop_packet",
1da177e4
LT
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
6d9f239a 1585 .proc_handler = proc_do_defense_mode,
1da177e4 1586 },
f4bc17cd
JA
1587#ifdef CONFIG_IP_VS_NFCT
1588 {
1589 .procname = "conntrack",
f4bc17cd
JA
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec,
1593 },
1594#endif
1da177e4 1595 {
1da177e4 1596 .procname = "secure_tcp",
1da177e4
LT
1597 .maxlen = sizeof(int),
1598 .mode = 0644,
6d9f239a 1599 .proc_handler = proc_do_defense_mode,
1da177e4 1600 },
8a803040
JA
1601 {
1602 .procname = "snat_reroute",
8a803040
JA
1603 .maxlen = sizeof(int),
1604 .mode = 0644,
1605 .proc_handler = &proc_dointvec,
1606 },
b880c1f0
HS
1607 {
1608 .procname = "sync_version",
b880c1f0
HS
1609 .maxlen = sizeof(int),
1610 .mode = 0644,
1611 .proc_handler = &proc_do_sync_mode,
1612 },
a0840e2e
HS
1613 {
1614 .procname = "cache_bypass",
1615 .maxlen = sizeof(int),
1616 .mode = 0644,
1617 .proc_handler = proc_dointvec,
1618 },
1619 {
1620 .procname = "expire_nodest_conn",
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
1623 .proc_handler = proc_dointvec,
1624 },
1625 {
1626 .procname = "expire_quiescent_template",
1627 .maxlen = sizeof(int),
1628 .mode = 0644,
1629 .proc_handler = proc_dointvec,
1630 },
1631 {
1632 .procname = "sync_threshold",
1633 .maxlen =
1634 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1635 .mode = 0644,
1636 .proc_handler = proc_do_sync_threshold,
1637 },
1638 {
1639 .procname = "nat_icmp_send",
1640 .maxlen = sizeof(int),
1641 .mode = 0644,
1642 .proc_handler = proc_dointvec,
1643 },
1644#ifdef CONFIG_IP_VS_DEBUG
1645 {
1646 .procname = "debug_level",
1647 .data = &sysctl_ip_vs_debug_level,
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec,
1651 },
1652#endif
1da177e4
LT
1653#if 0
1654 {
1da177e4
LT
1655 .procname = "timeout_established",
1656 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1657 .maxlen = sizeof(int),
1658 .mode = 0644,
6d9f239a 1659 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1660 },
1661 {
1da177e4
LT
1662 .procname = "timeout_synsent",
1663 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1664 .maxlen = sizeof(int),
1665 .mode = 0644,
6d9f239a 1666 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1667 },
1668 {
1da177e4
LT
1669 .procname = "timeout_synrecv",
1670 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1671 .maxlen = sizeof(int),
1672 .mode = 0644,
6d9f239a 1673 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1674 },
1675 {
1da177e4
LT
1676 .procname = "timeout_finwait",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
6d9f239a 1680 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1681 },
1682 {
1da177e4
LT
1683 .procname = "timeout_timewait",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
6d9f239a 1687 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1688 },
1689 {
1da177e4
LT
1690 .procname = "timeout_close",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
6d9f239a 1694 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1695 },
1696 {
1da177e4
LT
1697 .procname = "timeout_closewait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
6d9f239a 1701 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1702 },
1703 {
1da177e4
LT
1704 .procname = "timeout_lastack",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
6d9f239a 1708 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1709 },
1710 {
1da177e4
LT
1711 .procname = "timeout_listen",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
6d9f239a 1715 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1716 },
1717 {
1da177e4
LT
1718 .procname = "timeout_synack",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
6d9f239a 1722 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1723 },
1724 {
1da177e4
LT
1725 .procname = "timeout_udp",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
6d9f239a 1729 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1730 },
1731 {
1da177e4
LT
1732 .procname = "timeout_icmp",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1734 .maxlen = sizeof(int),
1735 .mode = 0644,
6d9f239a 1736 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1737 },
1738#endif
f8572d8f 1739 { }
1da177e4
LT
1740};
1741
5587da55 1742const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1743 { .procname = "net", },
1744 { .procname = "ipv4", },
90754f8e
PE
1745 { .procname = "vs", },
1746 { }
1da177e4 1747};
90754f8e 1748EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4 1749
1da177e4
LT
1750#ifdef CONFIG_PROC_FS
1751
1752struct ip_vs_iter {
fc723250 1753 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1754 struct list_head *table;
1755 int bucket;
1756};
1757
1758/*
1759 * Write the contents of the VS rule table to a PROCfs file.
1760 * (It is kept just for backward compatibility)
1761 */
1762static inline const char *ip_vs_fwd_name(unsigned flags)
1763{
1764 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1765 case IP_VS_CONN_F_LOCALNODE:
1766 return "Local";
1767 case IP_VS_CONN_F_TUNNEL:
1768 return "Tunnel";
1769 case IP_VS_CONN_F_DROUTE:
1770 return "Route";
1771 default:
1772 return "Masq";
1773 }
1774}
1775
1776
1777/* Get the Nth entry in the two lists */
1778static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1779{
fc723250 1780 struct net *net = seq_file_net(seq);
1da177e4
LT
1781 struct ip_vs_iter *iter = seq->private;
1782 int idx;
1783 struct ip_vs_service *svc;
1784
1785 /* look in hash by protocol */
1786 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1787 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1788 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1789 iter->table = ip_vs_svc_table;
1790 iter->bucket = idx;
1791 return svc;
1792 }
1793 }
1794 }
1795
1796 /* keep looking in fwmark */
1797 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1798 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1799 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1800 iter->table = ip_vs_svc_fwm_table;
1801 iter->bucket = idx;
1802 return svc;
1803 }
1804 }
1805 }
1806
1807 return NULL;
1808}
1809
1810static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1811__acquires(__ip_vs_svc_lock)
1da177e4
LT
1812{
1813
1814 read_lock_bh(&__ip_vs_svc_lock);
1815 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1816}
1817
1818
1819static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1820{
1821 struct list_head *e;
1822 struct ip_vs_iter *iter;
1823 struct ip_vs_service *svc;
1824
1825 ++*pos;
1826 if (v == SEQ_START_TOKEN)
1827 return ip_vs_info_array(seq,0);
1828
1829 svc = v;
1830 iter = seq->private;
1831
1832 if (iter->table == ip_vs_svc_table) {
1833 /* next service in table hashed by protocol */
1834 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1835 return list_entry(e, struct ip_vs_service, s_list);
1836
1837
1838 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1839 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1840 s_list) {
1841 return svc;
1842 }
1843 }
1844
1845 iter->table = ip_vs_svc_fwm_table;
1846 iter->bucket = -1;
1847 goto scan_fwmark;
1848 }
1849
1850 /* next service in hashed by fwmark */
1851 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1852 return list_entry(e, struct ip_vs_service, f_list);
1853
1854 scan_fwmark:
1855 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1856 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1857 f_list)
1858 return svc;
1859 }
1860
1861 return NULL;
1862}
1863
1864static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1865__releases(__ip_vs_svc_lock)
1da177e4
LT
1866{
1867 read_unlock_bh(&__ip_vs_svc_lock);
1868}
1869
1870
1871static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1872{
1873 if (v == SEQ_START_TOKEN) {
1874 seq_printf(seq,
1875 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1876 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1877 seq_puts(seq,
1878 "Prot LocalAddress:Port Scheduler Flags\n");
1879 seq_puts(seq,
1880 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1881 } else {
1882 const struct ip_vs_service *svc = v;
1883 const struct ip_vs_iter *iter = seq->private;
1884 const struct ip_vs_dest *dest;
1885
667a5f18
VB
1886 if (iter->table == ip_vs_svc_table) {
1887#ifdef CONFIG_IP_VS_IPV6
1888 if (svc->af == AF_INET6)
5b095d98 1889 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1890 ip_vs_proto_name(svc->protocol),
38ff4fa4 1891 &svc->addr.in6,
667a5f18
VB
1892 ntohs(svc->port),
1893 svc->scheduler->name);
1894 else
1895#endif
26ec037f 1896 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1897 ip_vs_proto_name(svc->protocol),
1898 ntohl(svc->addr.ip),
1899 ntohs(svc->port),
26ec037f
NC
1900 svc->scheduler->name,
1901 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1902 } else {
26ec037f
NC
1903 seq_printf(seq, "FWM %08X %s %s",
1904 svc->fwmark, svc->scheduler->name,
1905 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1906 }
1da177e4
LT
1907
1908 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1909 seq_printf(seq, "persistent %d %08X\n",
1910 svc->timeout,
1911 ntohl(svc->netmask));
1912 else
1913 seq_putc(seq, '\n');
1914
1915 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1916#ifdef CONFIG_IP_VS_IPV6
1917 if (dest->af == AF_INET6)
1918 seq_printf(seq,
5b095d98 1919 " -> [%pI6]:%04X"
667a5f18 1920 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1921 &dest->addr.in6,
667a5f18
VB
1922 ntohs(dest->port),
1923 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1924 atomic_read(&dest->weight),
1925 atomic_read(&dest->activeconns),
1926 atomic_read(&dest->inactconns));
1927 else
1928#endif
1929 seq_printf(seq,
1930 " -> %08X:%04X "
1931 "%-7s %-6d %-10d %-10d\n",
1932 ntohl(dest->addr.ip),
1933 ntohs(dest->port),
1934 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1935 atomic_read(&dest->weight),
1936 atomic_read(&dest->activeconns),
1937 atomic_read(&dest->inactconns));
1938
1da177e4
LT
1939 }
1940 }
1941 return 0;
1942}
1943
56b3d975 1944static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1945 .start = ip_vs_info_seq_start,
1946 .next = ip_vs_info_seq_next,
1947 .stop = ip_vs_info_seq_stop,
1948 .show = ip_vs_info_seq_show,
1949};
1950
1951static int ip_vs_info_open(struct inode *inode, struct file *file)
1952{
fc723250 1953 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1954 sizeof(struct ip_vs_iter));
1da177e4
LT
1955}
1956
9a32144e 1957static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1958 .owner = THIS_MODULE,
1959 .open = ip_vs_info_open,
1960 .read = seq_read,
1961 .llseek = seq_lseek,
1962 .release = seq_release_private,
1963};
1964
1965#endif
1966
1da177e4
LT
1967#ifdef CONFIG_PROC_FS
1968static int ip_vs_stats_show(struct seq_file *seq, void *v)
1969{
b17fc996
HS
1970 struct net *net = seq_file_single_net(seq);
1971 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1da177e4
LT
1972
1973/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1974 seq_puts(seq,
1975 " Total Incoming Outgoing Incoming Outgoing\n");
1976 seq_printf(seq,
1977 " Conns Packets Packets Bytes Bytes\n");
1978
b17fc996
HS
1979 spin_lock_bh(&tot_stats->lock);
1980 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1981 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1982 (unsigned long long) tot_stats->ustats.inbytes,
1983 (unsigned long long) tot_stats->ustats.outbytes);
1da177e4
LT
1984
1985/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1986 seq_puts(seq,
1987 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1988 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
b17fc996
HS
1989 tot_stats->ustats.cps,
1990 tot_stats->ustats.inpps,
1991 tot_stats->ustats.outpps,
1992 tot_stats->ustats.inbps,
1993 tot_stats->ustats.outbps);
1994 spin_unlock_bh(&tot_stats->lock);
1da177e4
LT
1995
1996 return 0;
1997}
1998
1999static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2000{
fc723250 2001 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2002}
2003
9a32144e 2004static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2005 .owner = THIS_MODULE,
2006 .open = ip_vs_stats_seq_open,
2007 .read = seq_read,
2008 .llseek = seq_lseek,
2009 .release = single_release,
2010};
2011
b17fc996
HS
2012static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2013{
2014 struct net *net = seq_file_single_net(seq);
2015 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2016 int i;
2017
2018/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2019 seq_puts(seq,
2020 " Total Incoming Outgoing Incoming Outgoing\n");
2021 seq_printf(seq,
2022 "CPU Conns Packets Packets Bytes Bytes\n");
2023
2024 for_each_possible_cpu(i) {
2025 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2026 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2027 i, u->ustats.conns, u->ustats.inpkts,
2028 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2029 (__u64)u->ustats.outbytes);
2030 }
2031
2032 spin_lock_bh(&tot_stats->lock);
2033 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2034 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2035 tot_stats->ustats.outpkts,
2036 (unsigned long long) tot_stats->ustats.inbytes,
2037 (unsigned long long) tot_stats->ustats.outbytes);
2038
2039/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2040 seq_puts(seq,
2041 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2042 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2043 tot_stats->ustats.cps,
2044 tot_stats->ustats.inpps,
2045 tot_stats->ustats.outpps,
2046 tot_stats->ustats.inbps,
2047 tot_stats->ustats.outbps);
2048 spin_unlock_bh(&tot_stats->lock);
2049
2050 return 0;
2051}
2052
2053static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2054{
2055 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2056}
2057
2058static const struct file_operations ip_vs_stats_percpu_fops = {
2059 .owner = THIS_MODULE,
2060 .open = ip_vs_stats_percpu_seq_open,
2061 .read = seq_read,
2062 .llseek = seq_lseek,
2063 .release = single_release,
2064};
1da177e4
LT
2065#endif
2066
2067/*
2068 * Set timeout values for tcp tcpfin udp in the timeout_table.
2069 */
9330419d 2070static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2071{
9330419d
HS
2072 struct ip_vs_proto_data *pd;
2073
1da177e4
LT
2074 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2075 u->tcp_timeout,
2076 u->tcp_fin_timeout,
2077 u->udp_timeout);
2078
2079#ifdef CONFIG_IP_VS_PROTO_TCP
2080 if (u->tcp_timeout) {
9330419d
HS
2081 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2082 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2083 = u->tcp_timeout * HZ;
2084 }
2085
2086 if (u->tcp_fin_timeout) {
9330419d
HS
2087 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2088 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2089 = u->tcp_fin_timeout * HZ;
2090 }
2091#endif
2092
2093#ifdef CONFIG_IP_VS_PROTO_UDP
2094 if (u->udp_timeout) {
9330419d
HS
2095 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2096 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2097 = u->udp_timeout * HZ;
2098 }
2099#endif
2100 return 0;
2101}
2102
2103
2104#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2105#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2106#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2107 sizeof(struct ip_vs_dest_user))
2108#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2109#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2110#define MAX_ARG_LEN SVCDEST_ARG_LEN
2111
9b5b5cff 2112static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2113 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2114 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2115 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2116 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2117 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2118 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2119 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2120 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2121 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2122 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2123 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2124};
2125
c860c6b1
JV
2126static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2127 struct ip_vs_service_user *usvc_compat)
2128{
0d1e71b0
SH
2129 memset(usvc, 0, sizeof(*usvc));
2130
c860c6b1
JV
2131 usvc->af = AF_INET;
2132 usvc->protocol = usvc_compat->protocol;
2133 usvc->addr.ip = usvc_compat->addr;
2134 usvc->port = usvc_compat->port;
2135 usvc->fwmark = usvc_compat->fwmark;
2136
2137 /* Deep copy of sched_name is not needed here */
2138 usvc->sched_name = usvc_compat->sched_name;
2139
2140 usvc->flags = usvc_compat->flags;
2141 usvc->timeout = usvc_compat->timeout;
2142 usvc->netmask = usvc_compat->netmask;
2143}
2144
2145static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2146 struct ip_vs_dest_user *udest_compat)
2147{
0d1e71b0
SH
2148 memset(udest, 0, sizeof(*udest));
2149
c860c6b1
JV
2150 udest->addr.ip = udest_compat->addr;
2151 udest->port = udest_compat->port;
2152 udest->conn_flags = udest_compat->conn_flags;
2153 udest->weight = udest_compat->weight;
2154 udest->u_threshold = udest_compat->u_threshold;
2155 udest->l_threshold = udest_compat->l_threshold;
2156}
2157
1da177e4
LT
2158static int
2159do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2160{
fc723250 2161 struct net *net = sock_net(sk);
1da177e4
LT
2162 int ret;
2163 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2164 struct ip_vs_service_user *usvc_compat;
2165 struct ip_vs_service_user_kern usvc;
1da177e4 2166 struct ip_vs_service *svc;
c860c6b1
JV
2167 struct ip_vs_dest_user *udest_compat;
2168 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2169
2170 if (!capable(CAP_NET_ADMIN))
2171 return -EPERM;
2172
04bcef2a
AV
2173 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2174 return -EINVAL;
2175 if (len < 0 || len > MAX_ARG_LEN)
2176 return -EINVAL;
1da177e4 2177 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2178 pr_err("set_ctl: len %u != %u\n",
2179 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2180 return -EINVAL;
2181 }
2182
2183 if (copy_from_user(arg, user, len) != 0)
2184 return -EFAULT;
2185
2186 /* increase the module use count */
2187 ip_vs_use_count_inc();
2188
14cc3e2b 2189 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2190 ret = -ERESTARTSYS;
2191 goto out_dec;
2192 }
2193
2194 if (cmd == IP_VS_SO_SET_FLUSH) {
2195 /* Flush the virtual service */
fc723250 2196 ret = ip_vs_flush(net);
1da177e4
LT
2197 goto out_unlock;
2198 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2199 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2200 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2201 goto out_unlock;
2202 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2203 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2204 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2205 dm->syncid);
1da177e4
LT
2206 goto out_unlock;
2207 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2208 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2209 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2210 goto out_unlock;
2211 }
2212
c860c6b1
JV
2213 usvc_compat = (struct ip_vs_service_user *)arg;
2214 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2215
2216 /* We only use the new structs internally, so copy userspace compat
2217 * structs to extended internal versions */
2218 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2219 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2220
2221 if (cmd == IP_VS_SO_SET_ZERO) {
2222 /* if no service address is set, zero counters in all */
c860c6b1 2223 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2224 ret = ip_vs_zero_all(net);
1da177e4
LT
2225 goto out_unlock;
2226 }
2227 }
2228
2906f66a
VMR
2229 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2230 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2231 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2232 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2233 usvc.protocol, &usvc.addr.ip,
2234 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2235 ret = -EFAULT;
2236 goto out_unlock;
2237 }
2238
2239 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2240 if (usvc.fwmark == 0)
fc723250 2241 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2242 &usvc.addr, usvc.port);
1da177e4 2243 else
fc723250 2244 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2245
2246 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2247 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2248 ret = -ESRCH;
26c15cfd 2249 goto out_unlock;
1da177e4
LT
2250 }
2251
2252 switch (cmd) {
2253 case IP_VS_SO_SET_ADD:
2254 if (svc != NULL)
2255 ret = -EEXIST;
2256 else
fc723250 2257 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2258 break;
2259 case IP_VS_SO_SET_EDIT:
c860c6b1 2260 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2261 break;
2262 case IP_VS_SO_SET_DEL:
2263 ret = ip_vs_del_service(svc);
2264 if (!ret)
2265 goto out_unlock;
2266 break;
2267 case IP_VS_SO_SET_ZERO:
2268 ret = ip_vs_zero_service(svc);
2269 break;
2270 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2271 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2272 break;
2273 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2274 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2275 break;
2276 case IP_VS_SO_SET_DELDEST:
c860c6b1 2277 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2278 break;
2279 default:
2280 ret = -EINVAL;
2281 }
2282
1da177e4 2283 out_unlock:
14cc3e2b 2284 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2285 out_dec:
2286 /* decrease the module use count */
2287 ip_vs_use_count_dec();
2288
2289 return ret;
2290}
2291
2292
2293static void
2294ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2295{
2296 spin_lock_bh(&src->lock);
e9c0ce23 2297 memcpy(dst, &src->ustats, sizeof(*dst));
1da177e4
LT
2298 spin_unlock_bh(&src->lock);
2299}
2300
2301static void
2302ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2303{
2304 dst->protocol = src->protocol;
e7ade46a 2305 dst->addr = src->addr.ip;
1da177e4
LT
2306 dst->port = src->port;
2307 dst->fwmark = src->fwmark;
4da62fc7 2308 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2309 dst->flags = src->flags;
2310 dst->timeout = src->timeout / HZ;
2311 dst->netmask = src->netmask;
2312 dst->num_dests = src->num_dests;
2313 ip_vs_copy_stats(&dst->stats, &src->stats);
2314}
2315
2316static inline int
fc723250
HS
2317__ip_vs_get_service_entries(struct net *net,
2318 const struct ip_vs_get_services *get,
1da177e4
LT
2319 struct ip_vs_get_services __user *uptr)
2320{
2321 int idx, count=0;
2322 struct ip_vs_service *svc;
2323 struct ip_vs_service_entry entry;
2324 int ret = 0;
2325
2326 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2327 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2328 /* Only expose IPv4 entries to old interface */
fc723250 2329 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2330 continue;
2331
1da177e4
LT
2332 if (count >= get->num_services)
2333 goto out;
4da62fc7 2334 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2335 ip_vs_copy_service(&entry, svc);
2336 if (copy_to_user(&uptr->entrytable[count],
2337 &entry, sizeof(entry))) {
2338 ret = -EFAULT;
2339 goto out;
2340 }
2341 count++;
2342 }
2343 }
2344
2345 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2346 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2347 /* Only expose IPv4 entries to old interface */
fc723250 2348 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2349 continue;
2350
1da177e4
LT
2351 if (count >= get->num_services)
2352 goto out;
4da62fc7 2353 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2354 ip_vs_copy_service(&entry, svc);
2355 if (copy_to_user(&uptr->entrytable[count],
2356 &entry, sizeof(entry))) {
2357 ret = -EFAULT;
2358 goto out;
2359 }
2360 count++;
2361 }
2362 }
2363 out:
2364 return ret;
2365}
2366
2367static inline int
fc723250 2368__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2369 struct ip_vs_get_dests __user *uptr)
2370{
2371 struct ip_vs_service *svc;
b18610de 2372 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2373 int ret = 0;
2374
2375 if (get->fwmark)
fc723250 2376 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2377 else
fc723250 2378 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2379 get->port);
b18610de 2380
1da177e4
LT
2381 if (svc) {
2382 int count = 0;
2383 struct ip_vs_dest *dest;
2384 struct ip_vs_dest_entry entry;
2385
2386 list_for_each_entry(dest, &svc->destinations, n_list) {
2387 if (count >= get->num_dests)
2388 break;
2389
e7ade46a 2390 entry.addr = dest->addr.ip;
1da177e4
LT
2391 entry.port = dest->port;
2392 entry.conn_flags = atomic_read(&dest->conn_flags);
2393 entry.weight = atomic_read(&dest->weight);
2394 entry.u_threshold = dest->u_threshold;
2395 entry.l_threshold = dest->l_threshold;
2396 entry.activeconns = atomic_read(&dest->activeconns);
2397 entry.inactconns = atomic_read(&dest->inactconns);
2398 entry.persistconns = atomic_read(&dest->persistconns);
2399 ip_vs_copy_stats(&entry.stats, &dest->stats);
2400 if (copy_to_user(&uptr->entrytable[count],
2401 &entry, sizeof(entry))) {
2402 ret = -EFAULT;
2403 break;
2404 }
2405 count++;
2406 }
1da177e4
LT
2407 } else
2408 ret = -ESRCH;
2409 return ret;
2410}
2411
2412static inline void
9330419d 2413__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2414{
9330419d
HS
2415 struct ip_vs_proto_data *pd;
2416
1da177e4 2417#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2418 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2419 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2420 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2421#endif
2422#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2423 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2424 u->udp_timeout =
9330419d 2425 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2426#endif
2427}
2428
2429
2430#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2431#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2432#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2433#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2434#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2435#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2436#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2437
9b5b5cff 2438static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2439 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2440 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2441 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2442 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2443 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2444 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2445 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2446};
2447
2448static int
2449do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2450{
2451 unsigned char arg[128];
2452 int ret = 0;
04bcef2a 2453 unsigned int copylen;
fc723250 2454 struct net *net = sock_net(sk);
f131315f 2455 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2456
fc723250 2457 BUG_ON(!net);
1da177e4
LT
2458 if (!capable(CAP_NET_ADMIN))
2459 return -EPERM;
2460
04bcef2a
AV
2461 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2462 return -EINVAL;
2463
1da177e4 2464 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2465 pr_err("get_ctl: len %u < %u\n",
2466 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2467 return -EINVAL;
2468 }
2469
04bcef2a
AV
2470 copylen = get_arglen[GET_CMDID(cmd)];
2471 if (copylen > 128)
2472 return -EINVAL;
2473
2474 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2475 return -EFAULT;
2476
14cc3e2b 2477 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2478 return -ERESTARTSYS;
2479
2480 switch (cmd) {
2481 case IP_VS_SO_GET_VERSION:
2482 {
2483 char buf[64];
2484
2485 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2486 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2487 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2488 ret = -EFAULT;
2489 goto out;
2490 }
2491 *len = strlen(buf)+1;
2492 }
2493 break;
2494
2495 case IP_VS_SO_GET_INFO:
2496 {
2497 struct ip_vs_getinfo info;
2498 info.version = IP_VS_VERSION_CODE;
6f7edb48 2499 info.size = ip_vs_conn_tab_size;
a0840e2e 2500 info.num_services = ipvs->num_services;
1da177e4
LT
2501 if (copy_to_user(user, &info, sizeof(info)) != 0)
2502 ret = -EFAULT;
2503 }
2504 break;
2505
2506 case IP_VS_SO_GET_SERVICES:
2507 {
2508 struct ip_vs_get_services *get;
2509 int size;
2510
2511 get = (struct ip_vs_get_services *)arg;
2512 size = sizeof(*get) +
2513 sizeof(struct ip_vs_service_entry) * get->num_services;
2514 if (*len != size) {
1e3e238e 2515 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2516 ret = -EINVAL;
2517 goto out;
2518 }
fc723250 2519 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2520 }
2521 break;
2522
2523 case IP_VS_SO_GET_SERVICE:
2524 {
2525 struct ip_vs_service_entry *entry;
2526 struct ip_vs_service *svc;
b18610de 2527 union nf_inet_addr addr;
1da177e4
LT
2528
2529 entry = (struct ip_vs_service_entry *)arg;
b18610de 2530 addr.ip = entry->addr;
1da177e4 2531 if (entry->fwmark)
fc723250 2532 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2533 else
fc723250
HS
2534 svc = __ip_vs_service_find(net, AF_INET,
2535 entry->protocol, &addr,
2536 entry->port);
1da177e4
LT
2537 if (svc) {
2538 ip_vs_copy_service(entry, svc);
2539 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2540 ret = -EFAULT;
1da177e4
LT
2541 } else
2542 ret = -ESRCH;
2543 }
2544 break;
2545
2546 case IP_VS_SO_GET_DESTS:
2547 {
2548 struct ip_vs_get_dests *get;
2549 int size;
2550
2551 get = (struct ip_vs_get_dests *)arg;
2552 size = sizeof(*get) +
2553 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2554 if (*len != size) {
1e3e238e 2555 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2556 ret = -EINVAL;
2557 goto out;
2558 }
fc723250 2559 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2560 }
2561 break;
2562
2563 case IP_VS_SO_GET_TIMEOUT:
2564 {
2565 struct ip_vs_timeout_user t;
2566
9330419d 2567 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2568 if (copy_to_user(user, &t, sizeof(t)) != 0)
2569 ret = -EFAULT;
2570 }
2571 break;
2572
2573 case IP_VS_SO_GET_DAEMON:
2574 {
2575 struct ip_vs_daemon_user d[2];
2576
2577 memset(&d, 0, sizeof(d));
f131315f 2578 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2579 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2580 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2581 sizeof(d[0].mcast_ifn));
2582 d[0].syncid = ipvs->master_syncid;
1da177e4 2583 }
f131315f 2584 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2585 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2586 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2587 sizeof(d[1].mcast_ifn));
2588 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2589 }
2590 if (copy_to_user(user, &d, sizeof(d)) != 0)
2591 ret = -EFAULT;
2592 }
2593 break;
2594
2595 default:
2596 ret = -EINVAL;
2597 }
2598
2599 out:
14cc3e2b 2600 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2601 return ret;
2602}
2603
2604
2605static struct nf_sockopt_ops ip_vs_sockopts = {
2606 .pf = PF_INET,
2607 .set_optmin = IP_VS_BASE_CTL,
2608 .set_optmax = IP_VS_SO_SET_MAX+1,
2609 .set = do_ip_vs_set_ctl,
2610 .get_optmin = IP_VS_BASE_CTL,
2611 .get_optmax = IP_VS_SO_GET_MAX+1,
2612 .get = do_ip_vs_get_ctl,
16fcec35 2613 .owner = THIS_MODULE,
1da177e4
LT
2614};
2615
9a812198
JV
2616/*
2617 * Generic Netlink interface
2618 */
2619
2620/* IPVS genetlink family */
2621static struct genl_family ip_vs_genl_family = {
2622 .id = GENL_ID_GENERATE,
2623 .hdrsize = 0,
2624 .name = IPVS_GENL_NAME,
2625 .version = IPVS_GENL_VERSION,
2626 .maxattr = IPVS_CMD_MAX,
2627};
2628
2629/* Policy used for first-level command attributes */
2630static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2631 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2632 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2633 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2634 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2635 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2636 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2637};
2638
2639/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2640static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2641 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2642 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2643 .len = IP_VS_IFNAME_MAXLEN },
2644 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2645};
2646
2647/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2648static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2649 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2650 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2651 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2652 .len = sizeof(union nf_inet_addr) },
2653 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2654 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2655 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2656 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2657 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2658 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2659 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2660 .len = sizeof(struct ip_vs_flags) },
2661 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2662 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2663 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2664};
2665
2666/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2667static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2668 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2669 .len = sizeof(union nf_inet_addr) },
2670 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2671 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2672 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2673 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2674 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2675 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2676 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2677 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2678 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2679};
2680
2681static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2682 struct ip_vs_stats *stats)
2683{
2684 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2685 if (!nl_stats)
2686 return -EMSGSIZE;
2687
2688 spin_lock_bh(&stats->lock);
2689
e9c0ce23
SW
2690 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2693 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2694 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2696 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2697 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2698 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2699 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
9a812198
JV
2700
2701 spin_unlock_bh(&stats->lock);
2702
2703 nla_nest_end(skb, nl_stats);
2704
2705 return 0;
2706
2707nla_put_failure:
2708 spin_unlock_bh(&stats->lock);
2709 nla_nest_cancel(skb, nl_stats);
2710 return -EMSGSIZE;
2711}
2712
2713static int ip_vs_genl_fill_service(struct sk_buff *skb,
2714 struct ip_vs_service *svc)
2715{
2716 struct nlattr *nl_service;
2717 struct ip_vs_flags flags = { .flags = svc->flags,
2718 .mask = ~0 };
2719
2720 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2721 if (!nl_service)
2722 return -EMSGSIZE;
2723
f94fd041 2724 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2725
2726 if (svc->fwmark) {
2727 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2728 } else {
2729 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2730 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2731 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2732 }
2733
2734 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2735 if (svc->pe)
2736 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2737 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2738 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2739 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2740
2741 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2742 goto nla_put_failure;
2743
2744 nla_nest_end(skb, nl_service);
2745
2746 return 0;
2747
2748nla_put_failure:
2749 nla_nest_cancel(skb, nl_service);
2750 return -EMSGSIZE;
2751}
2752
2753static int ip_vs_genl_dump_service(struct sk_buff *skb,
2754 struct ip_vs_service *svc,
2755 struct netlink_callback *cb)
2756{
2757 void *hdr;
2758
2759 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2760 &ip_vs_genl_family, NLM_F_MULTI,
2761 IPVS_CMD_NEW_SERVICE);
2762 if (!hdr)
2763 return -EMSGSIZE;
2764
2765 if (ip_vs_genl_fill_service(skb, svc) < 0)
2766 goto nla_put_failure;
2767
2768 return genlmsg_end(skb, hdr);
2769
2770nla_put_failure:
2771 genlmsg_cancel(skb, hdr);
2772 return -EMSGSIZE;
2773}
2774
2775static int ip_vs_genl_dump_services(struct sk_buff *skb,
2776 struct netlink_callback *cb)
2777{
2778 int idx = 0, i;
2779 int start = cb->args[0];
2780 struct ip_vs_service *svc;
fc723250 2781 struct net *net = skb_sknet(skb);
9a812198
JV
2782
2783 mutex_lock(&__ip_vs_mutex);
2784 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2785 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2786 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2787 continue;
2788 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2789 idx--;
2790 goto nla_put_failure;
2791 }
2792 }
2793 }
2794
2795 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2796 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2797 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2798 continue;
2799 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2800 idx--;
2801 goto nla_put_failure;
2802 }
2803 }
2804 }
2805
2806nla_put_failure:
2807 mutex_unlock(&__ip_vs_mutex);
2808 cb->args[0] = idx;
2809
2810 return skb->len;
2811}
2812
fc723250
HS
2813static int ip_vs_genl_parse_service(struct net *net,
2814 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2815 struct nlattr *nla, int full_entry,
2816 struct ip_vs_service **ret_svc)
9a812198
JV
2817{
2818 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2819 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2820 struct ip_vs_service *svc;
9a812198
JV
2821
2822 /* Parse mandatory identifying service fields first */
2823 if (nla == NULL ||
2824 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2825 return -EINVAL;
2826
2827 nla_af = attrs[IPVS_SVC_ATTR_AF];
2828 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2829 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2830 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2831 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2832
2833 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2834 return -EINVAL;
2835
258c8893
SH
2836 memset(usvc, 0, sizeof(*usvc));
2837
c860c6b1 2838 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2839#ifdef CONFIG_IP_VS_IPV6
2840 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2841#else
2842 if (usvc->af != AF_INET)
2843#endif
9a812198
JV
2844 return -EAFNOSUPPORT;
2845
2846 if (nla_fwmark) {
2847 usvc->protocol = IPPROTO_TCP;
2848 usvc->fwmark = nla_get_u32(nla_fwmark);
2849 } else {
2850 usvc->protocol = nla_get_u16(nla_protocol);
2851 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2852 usvc->port = nla_get_u16(nla_port);
2853 usvc->fwmark = 0;
2854 }
2855
26c15cfd 2856 if (usvc->fwmark)
fc723250 2857 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2858 else
fc723250 2859 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2860 &usvc->addr, usvc->port);
2861 *ret_svc = svc;
2862
9a812198
JV
2863 /* If a full entry was requested, check for the additional fields */
2864 if (full_entry) {
0d1e71b0 2865 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2866 *nla_netmask;
2867 struct ip_vs_flags flags;
9a812198
JV
2868
2869 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2870 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2871 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2872 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2873 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2874
2875 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2876 return -EINVAL;
2877
2878 nla_memcpy(&flags, nla_flags, sizeof(flags));
2879
2880 /* prefill flags from service if it already exists */
26c15cfd 2881 if (svc)
9a812198 2882 usvc->flags = svc->flags;
9a812198
JV
2883
2884 /* set new flags from userland */
2885 usvc->flags = (usvc->flags & ~flags.mask) |
2886 (flags.flags & flags.mask);
c860c6b1 2887 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2888 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2889 usvc->timeout = nla_get_u32(nla_timeout);
2890 usvc->netmask = nla_get_u32(nla_netmask);
2891 }
2892
2893 return 0;
2894}
2895
fc723250
HS
2896static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2897 struct nlattr *nla)
9a812198 2898{
c860c6b1 2899 struct ip_vs_service_user_kern usvc;
26c15cfd 2900 struct ip_vs_service *svc;
9a812198
JV
2901 int ret;
2902
fc723250 2903 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2904 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2905}
2906
2907static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2908{
2909 struct nlattr *nl_dest;
2910
2911 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2912 if (!nl_dest)
2913 return -EMSGSIZE;
2914
2915 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2916 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2917
2918 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2919 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2920 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2921 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2922 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2923 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2924 atomic_read(&dest->activeconns));
2925 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2926 atomic_read(&dest->inactconns));
2927 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2928 atomic_read(&dest->persistconns));
2929
2930 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2931 goto nla_put_failure;
2932
2933 nla_nest_end(skb, nl_dest);
2934
2935 return 0;
2936
2937nla_put_failure:
2938 nla_nest_cancel(skb, nl_dest);
2939 return -EMSGSIZE;
2940}
2941
2942static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2943 struct netlink_callback *cb)
2944{
2945 void *hdr;
2946
2947 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2948 &ip_vs_genl_family, NLM_F_MULTI,
2949 IPVS_CMD_NEW_DEST);
2950 if (!hdr)
2951 return -EMSGSIZE;
2952
2953 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2954 goto nla_put_failure;
2955
2956 return genlmsg_end(skb, hdr);
2957
2958nla_put_failure:
2959 genlmsg_cancel(skb, hdr);
2960 return -EMSGSIZE;
2961}
2962
2963static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2964 struct netlink_callback *cb)
2965{
2966 int idx = 0;
2967 int start = cb->args[0];
2968 struct ip_vs_service *svc;
2969 struct ip_vs_dest *dest;
2970 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2971 struct net *net = skb_sknet(skb);
9a812198
JV
2972
2973 mutex_lock(&__ip_vs_mutex);
2974
2975 /* Try to find the service for which to dump destinations */
2976 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2977 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2978 goto out_err;
2979
a0840e2e 2980
fc723250 2981 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
2982 if (IS_ERR(svc) || svc == NULL)
2983 goto out_err;
2984
2985 /* Dump the destinations */
2986 list_for_each_entry(dest, &svc->destinations, n_list) {
2987 if (++idx <= start)
2988 continue;
2989 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2990 idx--;
2991 goto nla_put_failure;
2992 }
2993 }
2994
2995nla_put_failure:
2996 cb->args[0] = idx;
9a812198
JV
2997
2998out_err:
2999 mutex_unlock(&__ip_vs_mutex);
3000
3001 return skb->len;
3002}
3003
c860c6b1 3004static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3005 struct nlattr *nla, int full_entry)
3006{
3007 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3008 struct nlattr *nla_addr, *nla_port;
3009
3010 /* Parse mandatory identifying destination fields first */
3011 if (nla == NULL ||
3012 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3013 return -EINVAL;
3014
3015 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3016 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3017
3018 if (!(nla_addr && nla_port))
3019 return -EINVAL;
3020
258c8893
SH
3021 memset(udest, 0, sizeof(*udest));
3022
9a812198
JV
3023 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3024 udest->port = nla_get_u16(nla_port);
3025
3026 /* If a full entry was requested, check for the additional fields */
3027 if (full_entry) {
3028 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3029 *nla_l_thresh;
3030
3031 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3032 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3033 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3034 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3035
3036 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3037 return -EINVAL;
3038
3039 udest->conn_flags = nla_get_u32(nla_fwd)
3040 & IP_VS_CONN_F_FWD_MASK;
3041 udest->weight = nla_get_u32(nla_weight);
3042 udest->u_threshold = nla_get_u32(nla_u_thresh);
3043 udest->l_threshold = nla_get_u32(nla_l_thresh);
3044 }
3045
3046 return 0;
3047}
3048
3049static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3050 const char *mcast_ifn, __be32 syncid)
3051{
3052 struct nlattr *nl_daemon;
3053
3054 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3055 if (!nl_daemon)
3056 return -EMSGSIZE;
3057
3058 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3059 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3060 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3061
3062 nla_nest_end(skb, nl_daemon);
3063
3064 return 0;
3065
3066nla_put_failure:
3067 nla_nest_cancel(skb, nl_daemon);
3068 return -EMSGSIZE;
3069}
3070
3071static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3072 const char *mcast_ifn, __be32 syncid,
3073 struct netlink_callback *cb)
3074{
3075 void *hdr;
3076 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3077 &ip_vs_genl_family, NLM_F_MULTI,
3078 IPVS_CMD_NEW_DAEMON);
3079 if (!hdr)
3080 return -EMSGSIZE;
3081
3082 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3083 goto nla_put_failure;
3084
3085 return genlmsg_end(skb, hdr);
3086
3087nla_put_failure:
3088 genlmsg_cancel(skb, hdr);
3089 return -EMSGSIZE;
3090}
3091
3092static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3093 struct netlink_callback *cb)
3094{
f131315f
HS
3095 struct net *net = skb_net(skb);
3096 struct netns_ipvs *ipvs = net_ipvs(net);
3097
9a812198 3098 mutex_lock(&__ip_vs_mutex);
f131315f 3099 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3100 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3101 ipvs->master_mcast_ifn,
3102 ipvs->master_syncid, cb) < 0)
9a812198
JV
3103 goto nla_put_failure;
3104
3105 cb->args[0] = 1;
3106 }
3107
f131315f 3108 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3109 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3110 ipvs->backup_mcast_ifn,
3111 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3112 goto nla_put_failure;
3113
3114 cb->args[1] = 1;
3115 }
3116
3117nla_put_failure:
3118 mutex_unlock(&__ip_vs_mutex);
3119
3120 return skb->len;
3121}
3122
f131315f 3123static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3124{
3125 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3126 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3127 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3128 return -EINVAL;
3129
f131315f
HS
3130 return start_sync_thread(net,
3131 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3132 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3133 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3134}
3135
f131315f 3136static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3137{
3138 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3139 return -EINVAL;
3140
f131315f
HS
3141 return stop_sync_thread(net,
3142 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3143}
3144
9330419d 3145static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3146{
3147 struct ip_vs_timeout_user t;
3148
9330419d 3149 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3150
3151 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3152 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3153
3154 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3155 t.tcp_fin_timeout =
3156 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3157
3158 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3159 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3160
9330419d 3161 return ip_vs_set_timeout(net, &t);
9a812198
JV
3162}
3163
3164static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3165{
3166 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3167 struct ip_vs_service_user_kern usvc;
3168 struct ip_vs_dest_user_kern udest;
9a812198
JV
3169 int ret = 0, cmd;
3170 int need_full_svc = 0, need_full_dest = 0;
fc723250 3171 struct net *net;
a0840e2e 3172 struct netns_ipvs *ipvs;
9a812198 3173
fc723250 3174 net = skb_sknet(skb);
a0840e2e 3175 ipvs = net_ipvs(net);
9a812198
JV
3176 cmd = info->genlhdr->cmd;
3177
3178 mutex_lock(&__ip_vs_mutex);
3179
3180 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3181 ret = ip_vs_flush(net);
9a812198
JV
3182 goto out;
3183 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3184 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3185 goto out;
3186 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3187 cmd == IPVS_CMD_DEL_DAEMON) {
3188
3189 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3190
3191 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3192 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3193 info->attrs[IPVS_CMD_ATTR_DAEMON],
3194 ip_vs_daemon_policy)) {
3195 ret = -EINVAL;
3196 goto out;
3197 }
3198
3199 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3200 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3201 else
f131315f 3202 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3203 goto out;
3204 } else if (cmd == IPVS_CMD_ZERO &&
3205 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3206 ret = ip_vs_zero_all(net);
9a812198
JV
3207 goto out;
3208 }
3209
3210 /* All following commands require a service argument, so check if we
3211 * received a valid one. We need a full service specification when
3212 * adding / editing a service. Only identifying members otherwise. */
3213 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3214 need_full_svc = 1;
3215
fc723250 3216 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3217 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3218 need_full_svc, &svc);
9a812198
JV
3219 if (ret)
3220 goto out;
3221
9a812198
JV
3222 /* Unless we're adding a new service, the service must already exist */
3223 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3224 ret = -ESRCH;
3225 goto out;
3226 }
3227
3228 /* Destination commands require a valid destination argument. For
3229 * adding / editing a destination, we need a full destination
3230 * specification. */
3231 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3232 cmd == IPVS_CMD_DEL_DEST) {
3233 if (cmd != IPVS_CMD_DEL_DEST)
3234 need_full_dest = 1;
3235
3236 ret = ip_vs_genl_parse_dest(&udest,
3237 info->attrs[IPVS_CMD_ATTR_DEST],
3238 need_full_dest);
3239 if (ret)
3240 goto out;
3241 }
3242
3243 switch (cmd) {
3244 case IPVS_CMD_NEW_SERVICE:
3245 if (svc == NULL)
fc723250 3246 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3247 else
3248 ret = -EEXIST;
3249 break;
3250 case IPVS_CMD_SET_SERVICE:
3251 ret = ip_vs_edit_service(svc, &usvc);
3252 break;
3253 case IPVS_CMD_DEL_SERVICE:
3254 ret = ip_vs_del_service(svc);
26c15cfd 3255 /* do not use svc, it can be freed */
9a812198
JV
3256 break;
3257 case IPVS_CMD_NEW_DEST:
3258 ret = ip_vs_add_dest(svc, &udest);
3259 break;
3260 case IPVS_CMD_SET_DEST:
3261 ret = ip_vs_edit_dest(svc, &udest);
3262 break;
3263 case IPVS_CMD_DEL_DEST:
3264 ret = ip_vs_del_dest(svc, &udest);
3265 break;
3266 case IPVS_CMD_ZERO:
3267 ret = ip_vs_zero_service(svc);
3268 break;
3269 default:
3270 ret = -EINVAL;
3271 }
3272
3273out:
9a812198
JV
3274 mutex_unlock(&__ip_vs_mutex);
3275
3276 return ret;
3277}
3278
3279static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3280{
3281 struct sk_buff *msg;
3282 void *reply;
3283 int ret, cmd, reply_cmd;
fc723250 3284 struct net *net;
a0840e2e 3285 struct netns_ipvs *ipvs;
9a812198 3286
fc723250 3287 net = skb_sknet(skb);
a0840e2e 3288 ipvs = net_ipvs(net);
9a812198
JV
3289 cmd = info->genlhdr->cmd;
3290
3291 if (cmd == IPVS_CMD_GET_SERVICE)
3292 reply_cmd = IPVS_CMD_NEW_SERVICE;
3293 else if (cmd == IPVS_CMD_GET_INFO)
3294 reply_cmd = IPVS_CMD_SET_INFO;
3295 else if (cmd == IPVS_CMD_GET_CONFIG)
3296 reply_cmd = IPVS_CMD_SET_CONFIG;
3297 else {
1e3e238e 3298 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3299 return -EINVAL;
3300 }
3301
3302 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3303 if (!msg)
3304 return -ENOMEM;
3305
3306 mutex_lock(&__ip_vs_mutex);
3307
3308 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3309 if (reply == NULL)
3310 goto nla_put_failure;
3311
3312 switch (cmd) {
3313 case IPVS_CMD_GET_SERVICE:
3314 {
3315 struct ip_vs_service *svc;
3316
fc723250
HS
3317 svc = ip_vs_genl_find_service(net,
3318 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3319 if (IS_ERR(svc)) {
3320 ret = PTR_ERR(svc);
3321 goto out_err;
3322 } else if (svc) {
3323 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3324 if (ret)
3325 goto nla_put_failure;
3326 } else {
3327 ret = -ESRCH;
3328 goto out_err;
3329 }
3330
3331 break;
3332 }
3333
3334 case IPVS_CMD_GET_CONFIG:
3335 {
3336 struct ip_vs_timeout_user t;
3337
9330419d 3338 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3339#ifdef CONFIG_IP_VS_PROTO_TCP
3340 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3341 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3342 t.tcp_fin_timeout);
3343#endif
3344#ifdef CONFIG_IP_VS_PROTO_UDP
3345 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3346#endif
3347
3348 break;
3349 }
3350
3351 case IPVS_CMD_GET_INFO:
3352 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3353 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3354 ip_vs_conn_tab_size);
9a812198
JV
3355 break;
3356 }
3357
3358 genlmsg_end(msg, reply);
134e6375 3359 ret = genlmsg_reply(msg, info);
9a812198
JV
3360 goto out;
3361
3362nla_put_failure:
1e3e238e 3363 pr_err("not enough space in Netlink message\n");
9a812198
JV
3364 ret = -EMSGSIZE;
3365
3366out_err:
3367 nlmsg_free(msg);
3368out:
3369 mutex_unlock(&__ip_vs_mutex);
3370
3371 return ret;
3372}
3373
3374
3375static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3376 {
3377 .cmd = IPVS_CMD_NEW_SERVICE,
3378 .flags = GENL_ADMIN_PERM,
3379 .policy = ip_vs_cmd_policy,
3380 .doit = ip_vs_genl_set_cmd,
3381 },
3382 {
3383 .cmd = IPVS_CMD_SET_SERVICE,
3384 .flags = GENL_ADMIN_PERM,
3385 .policy = ip_vs_cmd_policy,
3386 .doit = ip_vs_genl_set_cmd,
3387 },
3388 {
3389 .cmd = IPVS_CMD_DEL_SERVICE,
3390 .flags = GENL_ADMIN_PERM,
3391 .policy = ip_vs_cmd_policy,
3392 .doit = ip_vs_genl_set_cmd,
3393 },
3394 {
3395 .cmd = IPVS_CMD_GET_SERVICE,
3396 .flags = GENL_ADMIN_PERM,
3397 .doit = ip_vs_genl_get_cmd,
3398 .dumpit = ip_vs_genl_dump_services,
3399 .policy = ip_vs_cmd_policy,
3400 },
3401 {
3402 .cmd = IPVS_CMD_NEW_DEST,
3403 .flags = GENL_ADMIN_PERM,
3404 .policy = ip_vs_cmd_policy,
3405 .doit = ip_vs_genl_set_cmd,
3406 },
3407 {
3408 .cmd = IPVS_CMD_SET_DEST,
3409 .flags = GENL_ADMIN_PERM,
3410 .policy = ip_vs_cmd_policy,
3411 .doit = ip_vs_genl_set_cmd,
3412 },
3413 {
3414 .cmd = IPVS_CMD_DEL_DEST,
3415 .flags = GENL_ADMIN_PERM,
3416 .policy = ip_vs_cmd_policy,
3417 .doit = ip_vs_genl_set_cmd,
3418 },
3419 {
3420 .cmd = IPVS_CMD_GET_DEST,
3421 .flags = GENL_ADMIN_PERM,
3422 .policy = ip_vs_cmd_policy,
3423 .dumpit = ip_vs_genl_dump_dests,
3424 },
3425 {
3426 .cmd = IPVS_CMD_NEW_DAEMON,
3427 .flags = GENL_ADMIN_PERM,
3428 .policy = ip_vs_cmd_policy,
3429 .doit = ip_vs_genl_set_cmd,
3430 },
3431 {
3432 .cmd = IPVS_CMD_DEL_DAEMON,
3433 .flags = GENL_ADMIN_PERM,
3434 .policy = ip_vs_cmd_policy,
3435 .doit = ip_vs_genl_set_cmd,
3436 },
3437 {
3438 .cmd = IPVS_CMD_GET_DAEMON,
3439 .flags = GENL_ADMIN_PERM,
3440 .dumpit = ip_vs_genl_dump_daemons,
3441 },
3442 {
3443 .cmd = IPVS_CMD_SET_CONFIG,
3444 .flags = GENL_ADMIN_PERM,
3445 .policy = ip_vs_cmd_policy,
3446 .doit = ip_vs_genl_set_cmd,
3447 },
3448 {
3449 .cmd = IPVS_CMD_GET_CONFIG,
3450 .flags = GENL_ADMIN_PERM,
3451 .doit = ip_vs_genl_get_cmd,
3452 },
3453 {
3454 .cmd = IPVS_CMD_GET_INFO,
3455 .flags = GENL_ADMIN_PERM,
3456 .doit = ip_vs_genl_get_cmd,
3457 },
3458 {
3459 .cmd = IPVS_CMD_ZERO,
3460 .flags = GENL_ADMIN_PERM,
3461 .policy = ip_vs_cmd_policy,
3462 .doit = ip_vs_genl_set_cmd,
3463 },
3464 {
3465 .cmd = IPVS_CMD_FLUSH,
3466 .flags = GENL_ADMIN_PERM,
3467 .doit = ip_vs_genl_set_cmd,
3468 },
3469};
3470
3471static int __init ip_vs_genl_register(void)
3472{
8f698d54
MM
3473 return genl_register_family_with_ops(&ip_vs_genl_family,
3474 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3475}
3476
3477static void ip_vs_genl_unregister(void)
3478{
3479 genl_unregister_family(&ip_vs_genl_family);
3480}
3481
3482/* End of Generic Netlink interface definitions */
3483
61b1ab45
HS
3484/*
3485 * per netns intit/exit func.
3486 */
3487int __net_init __ip_vs_control_init(struct net *net)
3488{
fc723250
HS
3489 int idx;
3490 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3491 struct ctl_table *tbl;
fc723250 3492
61b1ab45
HS
3493 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3494 return -EPERM;
a0840e2e
HS
3495
3496 atomic_set(&ipvs->dropentry, 0);
3497 spin_lock_init(&ipvs->dropentry_lock);
3498 spin_lock_init(&ipvs->droppacket_lock);
3499 spin_lock_init(&ipvs->securetcp_lock);
3500 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3501
3502 /* Initialize rs_table */
3503 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3504 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3505
b17fc996
HS
3506 /* procfs stats */
3507 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3508 if (ipvs->tot_stats == NULL) {
3509 pr_err("%s(): no memory.\n", __func__);
3510 return -ENOMEM;
3511 }
3512 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3513 if (!ipvs->cpustats) {
3514 pr_err("%s() alloc_percpu failed\n", __func__);
3515 goto err_alloc;
3516 }
3517 spin_lock_init(&ipvs->tot_stats->lock);
61b1ab45 3518
fc723250
HS
3519 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3520 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3521
61b1ab45
HS
3522 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3523 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
b17fc996
HS
3524 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3525 &ip_vs_stats_percpu_fops);
a0840e2e
HS
3526
3527 if (!net_eq(net, &init_net)) {
3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529 if (tbl == NULL)
3530 goto err_dup;
3531 } else
3532 tbl = vs_vars;
3533 /* Initialize sysctl defaults */
3534 idx = 0;
3535 ipvs->sysctl_amemthresh = 1024;
3536 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537 ipvs->sysctl_am_droprate = 10;
3538 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541#ifdef CONFIG_IP_VS_NFCT
3542 tbl[idx++].data = &ipvs->sysctl_conntrack;
3543#endif
3544 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545 ipvs->sysctl_snat_reroute = 1;
3546 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547 ipvs->sysctl_sync_ver = 1;
3548 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3552 ipvs->sysctl_sync_threshold[0] = 3;
3553 ipvs->sysctl_sync_threshold[1] = 50;
3554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3557
3558
3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
61b1ab45 3560 vs_vars);
a0840e2e 3561 if (ipvs->sysctl_hdr == NULL)
61b1ab45 3562 goto err_reg;
b17fc996 3563 ip_vs_new_estimator(net, ipvs->tot_stats);
a0840e2e 3564 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3565 /* Schedule defense work */
3566 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3567 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45
HS
3568 return 0;
3569
3570err_reg:
a0840e2e
HS
3571 if (!net_eq(net, &init_net))
3572 kfree(tbl);
3573err_dup:
b17fc996
HS
3574 free_percpu(ipvs->cpustats);
3575err_alloc:
3576 kfree(ipvs->tot_stats);
61b1ab45
HS
3577 return -ENOMEM;
3578}
3579
3580static void __net_exit __ip_vs_control_cleanup(struct net *net)
3581{
b17fc996
HS
3582 struct netns_ipvs *ipvs = net_ipvs(net);
3583
61b1ab45
HS
3584 if (!net_eq(net, &init_net)) /* netns not enabled yet */
3585 return;
3586
b17fc996 3587 ip_vs_kill_estimator(net, ipvs->tot_stats);
a0840e2e 3588 unregister_net_sysctl_table(ipvs->sysctl_hdr);
b17fc996 3589 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3590 proc_net_remove(net, "ip_vs_stats");
3591 proc_net_remove(net, "ip_vs");
f6340ee0
HS
3592 cancel_delayed_work_sync(&ipvs->defense_work);
3593 cancel_work_sync(&ipvs->defense_work.work);
b17fc996
HS
3594 free_percpu(ipvs->cpustats);
3595 kfree(ipvs->tot_stats);
61b1ab45
HS
3596}
3597
3598static struct pernet_operations ipvs_control_ops = {
3599 .init = __ip_vs_control_init,
3600 .exit = __ip_vs_control_cleanup,
3601};
1da177e4 3602
048cf48b 3603int __init ip_vs_control_init(void)
1da177e4 3604{
1da177e4 3605 int idx;
fc723250 3606 int ret;
1da177e4
LT
3607
3608 EnterFunction(2);
3609
fc723250 3610 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3611 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3612 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3613 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3614 }
fc723250
HS
3615
3616 ret = register_pernet_subsys(&ipvs_control_ops);
3617 if (ret) {
3618 pr_err("cannot register namespace.\n");
3619 goto err;
d86bef73 3620 }
fc723250
HS
3621
3622 smp_wmb(); /* Do we really need it now ? */
d86bef73 3623
1da177e4
LT
3624 ret = nf_register_sockopt(&ip_vs_sockopts);
3625 if (ret) {
1e3e238e 3626 pr_err("cannot register sockopt.\n");
fc723250 3627 goto err_net;
1da177e4
LT
3628 }
3629
9a812198
JV
3630 ret = ip_vs_genl_register();
3631 if (ret) {
1e3e238e 3632 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3633 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3634 goto err_net;
9a812198
JV
3635 }
3636
1da177e4
LT
3637 LeaveFunction(2);
3638 return 0;
fc723250
HS
3639
3640err_net:
3641 unregister_pernet_subsys(&ipvs_control_ops);
3642err:
3643 return ret;
1da177e4
LT
3644}
3645
3646
3647void ip_vs_control_cleanup(void)
3648{
3649 EnterFunction(2);
3650 ip_vs_trash_cleanup();
61b1ab45 3651 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3652 ip_vs_genl_unregister();
1da177e4
LT
3653 nf_unregister_sockopt(&ip_vs_sockopts);
3654 LeaveFunction(2);
3655}