netfilter: h323: bug in parsing of ASN1 SEQOF field
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
09571c7a
VB
72#ifdef CONFIG_IP_VS_IPV6
73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
4a98480b
HS
74static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
09571c7a
VB
76{
77 struct rt6_info *rt;
4c9483b2
DM
78 struct flowi6 fl6 = {
79 .daddr = *addr,
09571c7a
VB
80 };
81
4c9483b2 82 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
09571c7a 83 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
4c9483b2 84 return 1;
09571c7a
VB
85
86 return 0;
87}
88#endif
14e40546
SH
89
90#ifdef CONFIG_SYSCTL
1da177e4 91/*
af9debd4
JA
92 * update_defense_level is called from keventd and from sysctl,
93 * so it needs to protect itself from softirqs
1da177e4 94 */
9330419d 95static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
96{
97 struct sysinfo i;
98 static int old_secure_tcp = 0;
99 int availmem;
100 int nomem;
101 int to_change = -1;
102
103 /* we only count free and buffered memory (in pages) */
104 si_meminfo(&i);
105 availmem = i.freeram + i.bufferram;
106 /* however in linux 2.5 the i.bufferram is total page cache size,
107 we need adjust it */
108 /* si_swapinfo(&i); */
109 /* availmem = availmem - (i.totalswap - i.freeswap); */
110
a0840e2e 111 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 112
af9debd4
JA
113 local_bh_disable();
114
1da177e4 115 /* drop_entry */
a0840e2e
HS
116 spin_lock(&ipvs->dropentry_lock);
117 switch (ipvs->sysctl_drop_entry) {
1da177e4 118 case 0:
a0840e2e 119 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
120 break;
121 case 1:
122 if (nomem) {
a0840e2e
HS
123 atomic_set(&ipvs->dropentry, 1);
124 ipvs->sysctl_drop_entry = 2;
1da177e4 125 } else {
a0840e2e 126 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
127 }
128 break;
129 case 2:
130 if (nomem) {
a0840e2e 131 atomic_set(&ipvs->dropentry, 1);
1da177e4 132 } else {
a0840e2e
HS
133 atomic_set(&ipvs->dropentry, 0);
134 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
135 };
136 break;
137 case 3:
a0840e2e 138 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
139 break;
140 }
a0840e2e 141 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
142
143 /* drop_packet */
a0840e2e
HS
144 spin_lock(&ipvs->droppacket_lock);
145 switch (ipvs->sysctl_drop_packet) {
1da177e4 146 case 0:
a0840e2e 147 ipvs->drop_rate = 0;
1da177e4
LT
148 break;
149 case 1:
150 if (nomem) {
a0840e2e
HS
151 ipvs->drop_rate = ipvs->drop_counter
152 = ipvs->sysctl_amemthresh /
153 (ipvs->sysctl_amemthresh-availmem);
154 ipvs->sysctl_drop_packet = 2;
1da177e4 155 } else {
a0840e2e 156 ipvs->drop_rate = 0;
1da177e4
LT
157 }
158 break;
159 case 2:
160 if (nomem) {
a0840e2e
HS
161 ipvs->drop_rate = ipvs->drop_counter
162 = ipvs->sysctl_amemthresh /
163 (ipvs->sysctl_amemthresh-availmem);
1da177e4 164 } else {
a0840e2e
HS
165 ipvs->drop_rate = 0;
166 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
167 }
168 break;
169 case 3:
a0840e2e 170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
171 break;
172 }
a0840e2e 173 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
174
175 /* secure_tcp */
a0840e2e
HS
176 spin_lock(&ipvs->securetcp_lock);
177 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
178 case 0:
179 if (old_secure_tcp >= 2)
180 to_change = 0;
181 break;
182 case 1:
183 if (nomem) {
184 if (old_secure_tcp < 2)
185 to_change = 1;
a0840e2e 186 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
187 } else {
188 if (old_secure_tcp >= 2)
189 to_change = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 if (old_secure_tcp < 2)
195 to_change = 1;
196 } else {
197 if (old_secure_tcp >= 2)
198 to_change = 0;
a0840e2e 199 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
200 }
201 break;
202 case 3:
203 if (old_secure_tcp < 2)
204 to_change = 1;
205 break;
206 }
a0840e2e 207 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 208 if (to_change >= 0)
9330419d 209 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
212
213 local_bh_enable();
1da177e4
LT
214}
215
216
217/*
218 * Timer for checking the defense
219 */
220#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 221
c4028958 222static void defense_work_handler(struct work_struct *work)
1da177e4 223{
f6340ee0
HS
224 struct netns_ipvs *ipvs =
225 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
226
227 update_defense_level(ipvs);
a0840e2e 228 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4 231}
14e40546 232#endif
1da177e4
LT
233
234int
235ip_vs_use_count_inc(void)
236{
237 return try_module_get(THIS_MODULE);
238}
239
240void
241ip_vs_use_count_dec(void)
242{
243 module_put(THIS_MODULE);
244}
245
246
247/*
248 * Hash table: for virtual service lookups
249 */
250#define IP_VS_SVC_TAB_BITS 8
251#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
252#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
253
254/* the service table hashed by <protocol, addr, port> */
255static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
256/* the service table hashed by fwmark */
257static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
258
1da177e4
LT
259
260/*
261 * Returns hash value for virtual service
262 */
fc723250
HS
263static inline unsigned
264ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
265 const union nf_inet_addr *addr, __be16 port)
1da177e4
LT
266{
267 register unsigned porth = ntohs(port);
b18610de 268 __be32 addr_fold = addr->ip;
1da177e4 269
b18610de
JV
270#ifdef CONFIG_IP_VS_IPV6
271 if (af == AF_INET6)
272 addr_fold = addr->ip6[0]^addr->ip6[1]^
273 addr->ip6[2]^addr->ip6[3];
274#endif
fc723250 275 addr_fold ^= ((size_t)net>>8);
b18610de
JV
276
277 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
278 & IP_VS_SVC_TAB_MASK;
279}
280
281/*
282 * Returns hash value of fwmark for virtual service lookup
283 */
fc723250 284static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 285{
fc723250 286 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
287}
288
289/*
fc723250 290 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
291 * or in the ip_vs_svc_fwm_table by fwmark.
292 * Should be called with locked tables.
293 */
294static int ip_vs_svc_hash(struct ip_vs_service *svc)
295{
296 unsigned hash;
297
298 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
299 pr_err("%s(): request for already hashed, called from %pF\n",
300 __func__, __builtin_return_address(0));
1da177e4
LT
301 return 0;
302 }
303
304 if (svc->fwmark == 0) {
305 /*
fc723250 306 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 307 */
fc723250
HS
308 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
309 &svc->addr, svc->port);
1da177e4
LT
310 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
311 } else {
312 /*
fc723250 313 * Hash it by fwmark in svc_fwm_table
1da177e4 314 */
fc723250 315 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
316 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
317 }
318
319 svc->flags |= IP_VS_SVC_F_HASHED;
320 /* increase its refcnt because it is referenced by the svc table */
321 atomic_inc(&svc->refcnt);
322 return 1;
323}
324
325
326/*
fc723250 327 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
328 * Should be called with locked tables.
329 */
330static int ip_vs_svc_unhash(struct ip_vs_service *svc)
331{
332 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
333 pr_err("%s(): request for unhash flagged, called from %pF\n",
334 __func__, __builtin_return_address(0));
1da177e4
LT
335 return 0;
336 }
337
338 if (svc->fwmark == 0) {
fc723250 339 /* Remove it from the svc_table table */
1da177e4
LT
340 list_del(&svc->s_list);
341 } else {
fc723250 342 /* Remove it from the svc_fwm_table table */
1da177e4
LT
343 list_del(&svc->f_list);
344 }
345
346 svc->flags &= ~IP_VS_SVC_F_HASHED;
347 atomic_dec(&svc->refcnt);
348 return 1;
349}
350
351
352/*
fc723250 353 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 354 */
b18610de 355static inline struct ip_vs_service *
fc723250
HS
356__ip_vs_service_find(struct net *net, int af, __u16 protocol,
357 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
358{
359 unsigned hash;
360 struct ip_vs_service *svc;
361
362 /* Check for "full" addressed entries */
fc723250 363 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
364
365 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
366 if ((svc->af == af)
367 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 368 && (svc->port == vport)
fc723250
HS
369 && (svc->protocol == protocol)
370 && net_eq(svc->net, net)) {
1da177e4 371 /* HIT */
1da177e4
LT
372 return svc;
373 }
374 }
375
376 return NULL;
377}
378
379
380/*
381 * Get service by {fwmark} in the service table.
382 */
b18610de 383static inline struct ip_vs_service *
fc723250 384__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4
LT
385{
386 unsigned hash;
387 struct ip_vs_service *svc;
388
389 /* Check for fwmark addressed entries */
fc723250 390 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
391
392 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
393 if (svc->fwmark == fwmark && svc->af == af
394 && net_eq(svc->net, net)) {
1da177e4 395 /* HIT */
1da177e4
LT
396 return svc;
397 }
398 }
399
400 return NULL;
401}
402
403struct ip_vs_service *
fc723250 404ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 405 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
406{
407 struct ip_vs_service *svc;
763f8d0e 408 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 409
1da177e4
LT
410 read_lock(&__ip_vs_svc_lock);
411
412 /*
413 * Check the table hashed by fwmark first
414 */
097fc76a
JA
415 if (fwmark) {
416 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
417 if (svc)
418 goto out;
419 }
1da177e4
LT
420
421 /*
422 * Check the table hashed by <protocol,addr,port>
423 * for "full" addressed entries
424 */
fc723250 425 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
426
427 if (svc == NULL
428 && protocol == IPPROTO_TCP
763f8d0e 429 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
430 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
431 /*
432 * Check if ftp service entry exists, the packet
433 * might belong to FTP data connections.
434 */
fc723250 435 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
436 }
437
438 if (svc == NULL
763f8d0e 439 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
440 /*
441 * Check if the catch-all port (port zero) exists
442 */
fc723250 443 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
444 }
445
446 out:
26c15cfd
JA
447 if (svc)
448 atomic_inc(&svc->usecnt);
1da177e4
LT
449 read_unlock(&__ip_vs_svc_lock);
450
3c2e0505
JV
451 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
452 fwmark, ip_vs_proto_name(protocol),
453 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
454 svc ? "hit" : "not hit");
1da177e4
LT
455
456 return svc;
457}
458
459
460static inline void
461__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
462{
463 atomic_inc(&svc->refcnt);
464 dest->svc = svc;
465}
466
26c15cfd 467static void
1da177e4
LT
468__ip_vs_unbind_svc(struct ip_vs_dest *dest)
469{
470 struct ip_vs_service *svc = dest->svc;
471
472 dest->svc = NULL;
26c15cfd
JA
473 if (atomic_dec_and_test(&svc->refcnt)) {
474 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
475 svc->fwmark,
476 IP_VS_DBG_ADDR(svc->af, &svc->addr),
477 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 478 free_percpu(svc->stats.cpustats);
1da177e4 479 kfree(svc);
26c15cfd 480 }
1da177e4
LT
481}
482
483
484/*
485 * Returns hash value for real service
486 */
7937df15
JV
487static inline unsigned ip_vs_rs_hashkey(int af,
488 const union nf_inet_addr *addr,
489 __be16 port)
1da177e4
LT
490{
491 register unsigned porth = ntohs(port);
7937df15
JV
492 __be32 addr_fold = addr->ip;
493
494#ifdef CONFIG_IP_VS_IPV6
495 if (af == AF_INET6)
496 addr_fold = addr->ip6[0]^addr->ip6[1]^
497 addr->ip6[2]^addr->ip6[3];
498#endif
1da177e4 499
7937df15 500 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
501 & IP_VS_RTAB_MASK;
502}
503
504/*
fc723250 505 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
506 * should be called with locked tables.
507 */
fc723250 508static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4
LT
509{
510 unsigned hash;
511
512 if (!list_empty(&dest->d_list)) {
513 return 0;
514 }
515
516 /*
517 * Hash by proto,addr,port,
518 * which are the parameters of the real service.
519 */
7937df15
JV
520 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
521
fc723250 522 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
523
524 return 1;
525}
526
527/*
fc723250 528 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
529 * should be called with locked tables.
530 */
531static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
532{
533 /*
fc723250 534 * Remove it from the rs_table table.
1da177e4
LT
535 */
536 if (!list_empty(&dest->d_list)) {
537 list_del(&dest->d_list);
538 INIT_LIST_HEAD(&dest->d_list);
539 }
540
541 return 1;
542}
543
544/*
545 * Lookup real service by <proto,addr,port> in the real service table.
546 */
547struct ip_vs_dest *
fc723250 548ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
549 const union nf_inet_addr *daddr,
550 __be16 dport)
1da177e4 551{
fc723250 552 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
553 unsigned hash;
554 struct ip_vs_dest *dest;
555
556 /*
557 * Check for "full" addressed entries
558 * Return the first found entry
559 */
7937df15 560 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 561
a0840e2e 562 read_lock(&ipvs->rs_lock);
fc723250 563 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
564 if ((dest->af == af)
565 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
566 && (dest->port == dport)
567 && ((dest->protocol == protocol) ||
568 dest->vfwmark)) {
569 /* HIT */
a0840e2e 570 read_unlock(&ipvs->rs_lock);
1da177e4
LT
571 return dest;
572 }
573 }
a0840e2e 574 read_unlock(&ipvs->rs_lock);
1da177e4
LT
575
576 return NULL;
577}
578
579/*
580 * Lookup destination by {addr,port} in the given service
581 */
582static struct ip_vs_dest *
7937df15
JV
583ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
584 __be16 dport)
1da177e4
LT
585{
586 struct ip_vs_dest *dest;
587
588 /*
589 * Find the destination for the given service
590 */
591 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
592 if ((dest->af == svc->af)
593 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
594 && (dest->port == dport)) {
1da177e4
LT
595 /* HIT */
596 return dest;
597 }
598 }
599
600 return NULL;
601}
602
1e356f9c
RB
603/*
604 * Find destination by {daddr,dport,vaddr,protocol}
605 * Cretaed to be used in ip_vs_process_message() in
606 * the backup synchronization daemon. It finds the
607 * destination to be bound to the received connection
608 * on the backup.
609 *
610 * ip_vs_lookup_real_service() looked promissing, but
611 * seems not working as expected.
612 */
fc723250
HS
613struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
614 const union nf_inet_addr *daddr,
7937df15
JV
615 __be16 dport,
616 const union nf_inet_addr *vaddr,
0e051e68 617 __be16 vport, __u16 protocol, __u32 fwmark)
1e356f9c
RB
618{
619 struct ip_vs_dest *dest;
620 struct ip_vs_service *svc;
621
fc723250 622 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
623 if (!svc)
624 return NULL;
625 dest = ip_vs_lookup_dest(svc, daddr, dport);
626 if (dest)
627 atomic_inc(&dest->refcnt);
628 ip_vs_service_put(svc);
629 return dest;
630}
1da177e4
LT
631
632/*
633 * Lookup dest by {svc,addr,port} in the destination trash.
634 * The destination trash is used to hold the destinations that are removed
635 * from the service table but are still referenced by some conn entries.
636 * The reason to add the destination trash is when the dest is temporary
637 * down (either by administrator or by monitor program), the dest can be
638 * picked back from the trash, the remaining connections to the dest can
639 * continue, and the counting information of the dest is also useful for
640 * scheduling.
641 */
642static struct ip_vs_dest *
7937df15
JV
643ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
644 __be16 dport)
1da177e4
LT
645{
646 struct ip_vs_dest *dest, *nxt;
f2431e6e 647 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
648
649 /*
650 * Find the destination in trash
651 */
f2431e6e 652 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
653 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
654 "dest->refcnt=%d\n",
655 dest->vfwmark,
656 IP_VS_DBG_ADDR(svc->af, &dest->addr),
657 ntohs(dest->port),
658 atomic_read(&dest->refcnt));
659 if (dest->af == svc->af &&
660 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
661 dest->port == dport &&
662 dest->vfwmark == svc->fwmark &&
663 dest->protocol == svc->protocol &&
664 (svc->fwmark ||
7937df15 665 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
666 dest->vport == svc->port))) {
667 /* HIT */
668 return dest;
669 }
670
671 /*
672 * Try to purge the destination from trash if not referenced
673 */
674 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
675 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
676 "from trash\n",
677 dest->vfwmark,
678 IP_VS_DBG_ADDR(svc->af, &dest->addr),
679 ntohs(dest->port));
1da177e4
LT
680 list_del(&dest->n_list);
681 ip_vs_dst_reset(dest);
682 __ip_vs_unbind_svc(dest);
b17fc996 683 free_percpu(dest->stats.cpustats);
1da177e4
LT
684 kfree(dest);
685 }
686 }
687
688 return NULL;
689}
690
691
692/*
693 * Clean up all the destinations in the trash
694 * Called by the ip_vs_control_cleanup()
695 *
696 * When the ip_vs_control_clearup is activated by ipvs module exit,
697 * the service tables must have been flushed and all the connections
698 * are expired, and the refcnt of each destination in the trash must
699 * be 1, so we simply release them here.
700 */
f2431e6e 701static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
702{
703 struct ip_vs_dest *dest, *nxt;
f2431e6e 704 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 705
f2431e6e 706 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4
LT
707 list_del(&dest->n_list);
708 ip_vs_dst_reset(dest);
709 __ip_vs_unbind_svc(dest);
b17fc996 710 free_percpu(dest->stats.cpustats);
1da177e4
LT
711 kfree(dest);
712 }
713}
714
55a3d4e1
JA
715static void
716ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
717{
718#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
55a3d4e1
JA
719
720 spin_lock_bh(&src->lock);
721
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
727
ea9f22cc 728 ip_vs_read_estimator(dst, src);
55a3d4e1
JA
729
730 spin_unlock_bh(&src->lock);
731}
1da177e4
LT
732
733static void
734ip_vs_zero_stats(struct ip_vs_stats *stats)
735{
736 spin_lock_bh(&stats->lock);
e93615d0 737
55a3d4e1
JA
738 /* get current counters as zero point, rates are zeroed */
739
740#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
55a3d4e1
JA
741
742 IP_VS_ZERO_STATS_COUNTER(conns);
743 IP_VS_ZERO_STATS_COUNTER(inpkts);
744 IP_VS_ZERO_STATS_COUNTER(outpkts);
745 IP_VS_ZERO_STATS_COUNTER(inbytes);
746 IP_VS_ZERO_STATS_COUNTER(outbytes);
747
1da177e4 748 ip_vs_zero_estimator(stats);
e93615d0 749
3a14a313 750 spin_unlock_bh(&stats->lock);
1da177e4
LT
751}
752
753/*
754 * Update a destination in the given service
755 */
756static void
26c15cfd
JA
757__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
758 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 759{
fc723250 760 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
761 int conn_flags;
762
763 /* set the weight and the flags */
764 atomic_set(&dest->weight, udest->weight);
3575792e
JA
765 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
766 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 767
1da177e4 768 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 769 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
770 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
771 } else {
772 /*
fc723250 773 * Put the real service in rs_table if not present.
1da177e4
LT
774 * For now only for NAT!
775 */
a0840e2e 776 write_lock_bh(&ipvs->rs_lock);
fc723250 777 ip_vs_rs_hash(ipvs, dest);
a0840e2e 778 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
779 }
780 atomic_set(&dest->conn_flags, conn_flags);
781
782 /* bind the service */
783 if (!dest->svc) {
784 __ip_vs_bind_svc(dest, svc);
785 } else {
786 if (dest->svc != svc) {
787 __ip_vs_unbind_svc(dest);
788 ip_vs_zero_stats(&dest->stats);
789 __ip_vs_bind_svc(dest, svc);
790 }
791 }
792
793 /* set the dest status flags */
794 dest->flags |= IP_VS_DEST_F_AVAILABLE;
795
796 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
797 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
798 dest->u_threshold = udest->u_threshold;
799 dest->l_threshold = udest->l_threshold;
26c15cfd 800
ff75f40f 801 spin_lock_bh(&dest->dst_lock);
fc604767 802 ip_vs_dst_reset(dest);
ff75f40f 803 spin_unlock_bh(&dest->dst_lock);
fc604767 804
26c15cfd 805 if (add)
6ef757f9 806 ip_vs_start_estimator(svc->net, &dest->stats);
26c15cfd
JA
807
808 write_lock_bh(&__ip_vs_svc_lock);
809
810 /* Wait until all other svc users go away */
811 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
812
813 if (add) {
814 list_add(&dest->n_list, &svc->destinations);
815 svc->num_dests++;
816 }
817
818 /* call the update_service, because server weight may be changed */
819 if (svc->scheduler->update_service)
820 svc->scheduler->update_service(svc);
821
822 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
823}
824
825
826/*
827 * Create a destination for the given service
828 */
829static int
c860c6b1 830ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
831 struct ip_vs_dest **dest_p)
832{
833 struct ip_vs_dest *dest;
834 unsigned atype;
835
836 EnterFunction(2);
837
09571c7a
VB
838#ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
841 if ((!(atype & IPV6_ADDR_UNICAST) ||
842 atype & IPV6_ADDR_LINKLOCAL) &&
4a98480b 843 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
09571c7a
VB
844 return -EINVAL;
845 } else
846#endif
847 {
4a98480b 848 atype = inet_addr_type(svc->net, udest->addr.ip);
09571c7a
VB
849 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
850 return -EINVAL;
851 }
1da177e4 852
dee06e47 853 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
1da177e4 854 if (dest == NULL) {
1e3e238e 855 pr_err("%s(): no memory.\n", __func__);
1da177e4
LT
856 return -ENOMEM;
857 }
b17fc996
HS
858 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
859 if (!dest->stats.cpustats) {
860 pr_err("%s() alloc_percpu failed\n", __func__);
861 goto err_alloc;
862 }
1da177e4 863
c860c6b1 864 dest->af = svc->af;
1da177e4 865 dest->protocol = svc->protocol;
c860c6b1 866 dest->vaddr = svc->addr;
1da177e4
LT
867 dest->vport = svc->port;
868 dest->vfwmark = svc->fwmark;
c860c6b1 869 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
870 dest->port = udest->port;
871
872 atomic_set(&dest->activeconns, 0);
873 atomic_set(&dest->inactconns, 0);
874 atomic_set(&dest->persistconns, 0);
26c15cfd 875 atomic_set(&dest->refcnt, 1);
1da177e4
LT
876
877 INIT_LIST_HEAD(&dest->d_list);
878 spin_lock_init(&dest->dst_lock);
879 spin_lock_init(&dest->stats.lock);
26c15cfd 880 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
881
882 *dest_p = dest;
883
884 LeaveFunction(2);
885 return 0;
b17fc996
HS
886
887err_alloc:
888 kfree(dest);
889 return -ENOMEM;
1da177e4
LT
890}
891
892
893/*
894 * Add a destination into an existing service
895 */
896static int
c860c6b1 897ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
898{
899 struct ip_vs_dest *dest;
c860c6b1 900 union nf_inet_addr daddr;
014d730d 901 __be16 dport = udest->port;
1da177e4
LT
902 int ret;
903
904 EnterFunction(2);
905
906 if (udest->weight < 0) {
1e3e238e 907 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
908 return -ERANGE;
909 }
910
911 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
912 pr_err("%s(): lower threshold is higher than upper threshold\n",
913 __func__);
1da177e4
LT
914 return -ERANGE;
915 }
916
c860c6b1
JV
917 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
918
1da177e4
LT
919 /*
920 * Check if the dest already exists in the list
921 */
7937df15
JV
922 dest = ip_vs_lookup_dest(svc, &daddr, dport);
923
1da177e4 924 if (dest != NULL) {
1e3e238e 925 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
926 return -EEXIST;
927 }
928
929 /*
930 * Check if the dest already exists in the trash and
931 * is from the same service
932 */
7937df15
JV
933 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
934
1da177e4 935 if (dest != NULL) {
cfc78c5a
JV
936 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
937 "dest->refcnt=%d, service %u/%s:%u\n",
938 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
939 atomic_read(&dest->refcnt),
940 dest->vfwmark,
941 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
942 ntohs(dest->vport));
943
1da177e4
LT
944 /*
945 * Get the destination from the trash
946 */
947 list_del(&dest->n_list);
948
26c15cfd
JA
949 __ip_vs_update_dest(svc, dest, udest, 1);
950 ret = 0;
951 } else {
1da177e4 952 /*
26c15cfd 953 * Allocate and initialize the dest structure
1da177e4 954 */
26c15cfd 955 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 956 }
1da177e4
LT
957 LeaveFunction(2);
958
26c15cfd 959 return ret;
1da177e4
LT
960}
961
962
963/*
964 * Edit a destination in the given service
965 */
966static int
c860c6b1 967ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
968{
969 struct ip_vs_dest *dest;
c860c6b1 970 union nf_inet_addr daddr;
014d730d 971 __be16 dport = udest->port;
1da177e4
LT
972
973 EnterFunction(2);
974
975 if (udest->weight < 0) {
1e3e238e 976 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
977 return -ERANGE;
978 }
979
980 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
981 pr_err("%s(): lower threshold is higher than upper threshold\n",
982 __func__);
1da177e4
LT
983 return -ERANGE;
984 }
985
c860c6b1
JV
986 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
987
1da177e4
LT
988 /*
989 * Lookup the destination list
990 */
7937df15
JV
991 dest = ip_vs_lookup_dest(svc, &daddr, dport);
992
1da177e4 993 if (dest == NULL) {
1e3e238e 994 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
995 return -ENOENT;
996 }
997
26c15cfd 998 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
999 LeaveFunction(2);
1000
1001 return 0;
1002}
1003
1004
1005/*
1006 * Delete a destination (must be already unlinked from the service)
1007 */
29c2026f 1008static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 1009{
a0840e2e
HS
1010 struct netns_ipvs *ipvs = net_ipvs(net);
1011
6ef757f9 1012 ip_vs_stop_estimator(net, &dest->stats);
1da177e4
LT
1013
1014 /*
1015 * Remove it from the d-linked list with the real services.
1016 */
a0840e2e 1017 write_lock_bh(&ipvs->rs_lock);
1da177e4 1018 ip_vs_rs_unhash(dest);
a0840e2e 1019 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
1020
1021 /*
1022 * Decrease the refcnt of the dest, and free the dest
1023 * if nobody refers to it (refcnt=0). Otherwise, throw
1024 * the destination into the trash.
1025 */
1026 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1027 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1028 dest->vfwmark,
1029 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1030 ntohs(dest->port));
1da177e4
LT
1031 ip_vs_dst_reset(dest);
1032 /* simply decrease svc->refcnt here, let the caller check
1033 and release the service if nobody refers to it.
1034 Only user context can release destination and service,
1035 and only one user context can update virtual service at a
1036 time, so the operation here is OK */
1037 atomic_dec(&dest->svc->refcnt);
b17fc996 1038 free_percpu(dest->stats.cpustats);
1da177e4
LT
1039 kfree(dest);
1040 } else {
cfc78c5a
JV
1041 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1042 "dest->refcnt=%d\n",
1043 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1044 ntohs(dest->port),
1045 atomic_read(&dest->refcnt));
f2431e6e 1046 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1047 atomic_inc(&dest->refcnt);
1048 }
1049}
1050
1051
1052/*
1053 * Unlink a destination from the given service
1054 */
1055static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1056 struct ip_vs_dest *dest,
1057 int svcupd)
1058{
1059 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1060
1061 /*
1062 * Remove it from the d-linked destination list.
1063 */
1064 list_del(&dest->n_list);
1065 svc->num_dests--;
82dfb6f3
SW
1066
1067 /*
1068 * Call the update_service function of its scheduler
1069 */
1070 if (svcupd && svc->scheduler->update_service)
1071 svc->scheduler->update_service(svc);
1da177e4
LT
1072}
1073
1074
1075/*
1076 * Delete a destination server in the given service
1077 */
1078static int
c860c6b1 1079ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1080{
1081 struct ip_vs_dest *dest;
014d730d 1082 __be16 dport = udest->port;
1da177e4
LT
1083
1084 EnterFunction(2);
1085
7937df15 1086 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1087
1da177e4 1088 if (dest == NULL) {
1e3e238e 1089 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1090 return -ENOENT;
1091 }
1092
1093 write_lock_bh(&__ip_vs_svc_lock);
1094
1095 /*
1096 * Wait until all other svc users go away.
1097 */
26c15cfd 1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1099
1100 /*
1101 * Unlink dest from the service
1102 */
1103 __ip_vs_unlink_dest(svc, dest, 1);
1104
1105 write_unlock_bh(&__ip_vs_svc_lock);
1106
1107 /*
1108 * Delete the destination
1109 */
a0840e2e 1110 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1111
1112 LeaveFunction(2);
1113
1114 return 0;
1115}
1116
1117
1118/*
1119 * Add a service into the service hash table
1120 */
1121static int
fc723250 1122ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1123 struct ip_vs_service **svc_p)
1da177e4
LT
1124{
1125 int ret = 0;
1126 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1127 struct ip_vs_pe *pe = NULL;
1da177e4 1128 struct ip_vs_service *svc = NULL;
a0840e2e 1129 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1130
1131 /* increase the module use count */
1132 ip_vs_use_count_inc();
1133
1134 /* Lookup the scheduler by 'u->sched_name' */
1135 sched = ip_vs_scheduler_get(u->sched_name);
1136 if (sched == NULL) {
1e3e238e 1137 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1138 ret = -ENOENT;
6e08bfb8 1139 goto out_err;
1da177e4
LT
1140 }
1141
0d1e71b0 1142 if (u->pe_name && *u->pe_name) {
e9e5eee8 1143 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1144 if (pe == NULL) {
1145 pr_info("persistence engine module ip_vs_pe_%s "
1146 "not found\n", u->pe_name);
1147 ret = -ENOENT;
1148 goto out_err;
1149 }
1150 }
1151
f94fd041 1152#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1153 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1154 ret = -EINVAL;
1155 goto out_err;
f94fd041
JV
1156 }
1157#endif
1158
dee06e47 1159 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1160 if (svc == NULL) {
1e3e238e 1161 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1162 ret = -ENOMEM;
1163 goto out_err;
1164 }
b17fc996
HS
1165 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1166 if (!svc->stats.cpustats) {
1167 pr_err("%s() alloc_percpu failed\n", __func__);
1168 goto out_err;
1169 }
1da177e4
LT
1170
1171 /* I'm the first user of the service */
26c15cfd 1172 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1173 atomic_set(&svc->refcnt, 0);
1174
c860c6b1 1175 svc->af = u->af;
1da177e4 1176 svc->protocol = u->protocol;
c860c6b1 1177 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1178 svc->port = u->port;
1179 svc->fwmark = u->fwmark;
1180 svc->flags = u->flags;
1181 svc->timeout = u->timeout * HZ;
1182 svc->netmask = u->netmask;
fc723250 1183 svc->net = net;
1da177e4
LT
1184
1185 INIT_LIST_HEAD(&svc->destinations);
1186 rwlock_init(&svc->sched_lock);
1187 spin_lock_init(&svc->stats.lock);
1188
1189 /* Bind the scheduler */
1190 ret = ip_vs_bind_scheduler(svc, sched);
1191 if (ret)
1192 goto out_err;
1193 sched = NULL;
1194
0d1e71b0
SH
1195 /* Bind the ct retriever */
1196 ip_vs_bind_pe(svc, pe);
1197 pe = NULL;
1198
1da177e4
LT
1199 /* Update the virtual service counters */
1200 if (svc->port == FTPPORT)
763f8d0e 1201 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1202 else if (svc->port == 0)
763f8d0e 1203 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1204
6ef757f9 1205 ip_vs_start_estimator(net, &svc->stats);
f94fd041
JV
1206
1207 /* Count only IPv4 services for old get/setsockopt interface */
1208 if (svc->af == AF_INET)
a0840e2e 1209 ipvs->num_services++;
1da177e4
LT
1210
1211 /* Hash the service into the service table */
1212 write_lock_bh(&__ip_vs_svc_lock);
1213 ip_vs_svc_hash(svc);
1214 write_unlock_bh(&__ip_vs_svc_lock);
1215
1216 *svc_p = svc;
1217 return 0;
1218
b17fc996 1219
6e08bfb8 1220 out_err:
1da177e4 1221 if (svc != NULL) {
2fabf35b 1222 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1223 if (svc->inc) {
1224 local_bh_disable();
1225 ip_vs_app_inc_put(svc->inc);
1226 local_bh_enable();
1227 }
b17fc996
HS
1228 if (svc->stats.cpustats)
1229 free_percpu(svc->stats.cpustats);
1da177e4
LT
1230 kfree(svc);
1231 }
1232 ip_vs_scheduler_put(sched);
0d1e71b0 1233 ip_vs_pe_put(pe);
1da177e4 1234
1da177e4
LT
1235 /* decrease the module use count */
1236 ip_vs_use_count_dec();
1237
1238 return ret;
1239}
1240
1241
1242/*
1243 * Edit a service and bind it with a new scheduler
1244 */
1245static int
c860c6b1 1246ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1247{
1248 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1249 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1250 int ret = 0;
1251
1252 /*
1253 * Lookup the scheduler, by 'u->sched_name'
1254 */
1255 sched = ip_vs_scheduler_get(u->sched_name);
1256 if (sched == NULL) {
1e3e238e 1257 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1258 return -ENOENT;
1259 }
1260 old_sched = sched;
1261
0d1e71b0 1262 if (u->pe_name && *u->pe_name) {
e9e5eee8 1263 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1264 if (pe == NULL) {
1265 pr_info("persistence engine module ip_vs_pe_%s "
1266 "not found\n", u->pe_name);
1267 ret = -ENOENT;
1268 goto out;
1269 }
1270 old_pe = pe;
1271 }
1272
f94fd041 1273#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1274 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1275 ret = -EINVAL;
1276 goto out;
f94fd041
JV
1277 }
1278#endif
1279
1da177e4
LT
1280 write_lock_bh(&__ip_vs_svc_lock);
1281
1282 /*
1283 * Wait until all other svc users go away.
1284 */
26c15cfd 1285 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1286
1287 /*
1288 * Set the flags and timeout value
1289 */
1290 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1291 svc->timeout = u->timeout * HZ;
1292 svc->netmask = u->netmask;
1293
1294 old_sched = svc->scheduler;
1295 if (sched != old_sched) {
1296 /*
1297 * Unbind the old scheduler
1298 */
1299 if ((ret = ip_vs_unbind_scheduler(svc))) {
1300 old_sched = sched;
9e691ed6 1301 goto out_unlock;
1da177e4
LT
1302 }
1303
1304 /*
1305 * Bind the new scheduler
1306 */
1307 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1308 /*
1309 * If ip_vs_bind_scheduler fails, restore the old
1310 * scheduler.
1311 * The main reason of failure is out of memory.
1312 *
1313 * The question is if the old scheduler can be
1314 * restored all the time. TODO: if it cannot be
1315 * restored some time, we must delete the service,
1316 * otherwise the system may crash.
1317 */
1318 ip_vs_bind_scheduler(svc, old_sched);
1319 old_sched = sched;
9e691ed6 1320 goto out_unlock;
1da177e4
LT
1321 }
1322 }
1323
0d1e71b0
SH
1324 old_pe = svc->pe;
1325 if (pe != old_pe) {
1326 ip_vs_unbind_pe(svc);
1327 ip_vs_bind_pe(svc, pe);
1328 }
1329
9e691ed6 1330 out_unlock:
1da177e4 1331 write_unlock_bh(&__ip_vs_svc_lock);
9e691ed6 1332 out:
6e08bfb8 1333 ip_vs_scheduler_put(old_sched);
0d1e71b0 1334 ip_vs_pe_put(old_pe);
1da177e4
LT
1335 return ret;
1336}
1337
1338
1339/*
1340 * Delete a service from the service list
1341 * - The service must be unlinked, unlocked and not referenced!
1342 * - We are called under _bh lock
1343 */
1344static void __ip_vs_del_service(struct ip_vs_service *svc)
1345{
1346 struct ip_vs_dest *dest, *nxt;
1347 struct ip_vs_scheduler *old_sched;
0d1e71b0 1348 struct ip_vs_pe *old_pe;
a0840e2e 1349 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1350
1351 pr_info("%s: enter\n", __func__);
1da177e4 1352
f94fd041
JV
1353 /* Count only IPv4 services for old get/setsockopt interface */
1354 if (svc->af == AF_INET)
a0840e2e 1355 ipvs->num_services--;
f94fd041 1356
6ef757f9 1357 ip_vs_stop_estimator(svc->net, &svc->stats);
1da177e4
LT
1358
1359 /* Unbind scheduler */
1360 old_sched = svc->scheduler;
1361 ip_vs_unbind_scheduler(svc);
6e08bfb8 1362 ip_vs_scheduler_put(old_sched);
1da177e4 1363
0d1e71b0
SH
1364 /* Unbind persistence engine */
1365 old_pe = svc->pe;
1366 ip_vs_unbind_pe(svc);
1367 ip_vs_pe_put(old_pe);
1368
1da177e4
LT
1369 /* Unbind app inc */
1370 if (svc->inc) {
1371 ip_vs_app_inc_put(svc->inc);
1372 svc->inc = NULL;
1373 }
1374
1375 /*
1376 * Unlink the whole destination list
1377 */
1378 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1379 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1380 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1381 }
1382
1383 /*
1384 * Update the virtual service counters
1385 */
1386 if (svc->port == FTPPORT)
763f8d0e 1387 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1388 else if (svc->port == 0)
763f8d0e 1389 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1390
1391 /*
1392 * Free the service if nobody refers to it
1393 */
26c15cfd
JA
1394 if (atomic_read(&svc->refcnt) == 0) {
1395 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1396 svc->fwmark,
1397 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1398 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1399 free_percpu(svc->stats.cpustats);
1da177e4 1400 kfree(svc);
26c15cfd 1401 }
1da177e4
LT
1402
1403 /* decrease the module use count */
1404 ip_vs_use_count_dec();
1405}
1406
1407/*
26c15cfd 1408 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1409 */
26c15cfd 1410static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1411{
1da177e4
LT
1412 /*
1413 * Unhash it from the service table
1414 */
1415 write_lock_bh(&__ip_vs_svc_lock);
1416
1417 ip_vs_svc_unhash(svc);
1418
1419 /*
1420 * Wait until all the svc users go away.
1421 */
26c15cfd 1422 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1423
1424 __ip_vs_del_service(svc);
1425
1426 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1427}
1428
1429/*
1430 * Delete a service from the service list
1431 */
1432static int ip_vs_del_service(struct ip_vs_service *svc)
1433{
1434 if (svc == NULL)
1435 return -EEXIST;
1436 ip_vs_unlink_service(svc);
1da177e4
LT
1437
1438 return 0;
1439}
1440
1441
1442/*
1443 * Flush all the virtual services
1444 */
fc723250 1445static int ip_vs_flush(struct net *net)
1da177e4
LT
1446{
1447 int idx;
1448 struct ip_vs_service *svc, *nxt;
1449
1450 /*
fc723250 1451 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1452 */
1453 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1454 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1455 s_list) {
1456 if (net_eq(svc->net, net))
1457 ip_vs_unlink_service(svc);
1da177e4
LT
1458 }
1459 }
1460
1461 /*
1462 * Flush the service table hashed by fwmark
1463 */
1464 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1465 list_for_each_entry_safe(svc, nxt,
1466 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1467 if (net_eq(svc->net, net))
1468 ip_vs_unlink_service(svc);
1da177e4
LT
1469 }
1470 }
1471
1472 return 0;
1473}
1474
1475
1476/*
1477 * Zero counters in a service or all services
1478 */
1479static int ip_vs_zero_service(struct ip_vs_service *svc)
1480{
1481 struct ip_vs_dest *dest;
1482
1483 write_lock_bh(&__ip_vs_svc_lock);
1484 list_for_each_entry(dest, &svc->destinations, n_list) {
1485 ip_vs_zero_stats(&dest->stats);
1486 }
1487 ip_vs_zero_stats(&svc->stats);
1488 write_unlock_bh(&__ip_vs_svc_lock);
1489 return 0;
1490}
1491
fc723250 1492static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1493{
1494 int idx;
1495 struct ip_vs_service *svc;
1496
1497 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1498 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1499 if (net_eq(svc->net, net))
1500 ip_vs_zero_service(svc);
1da177e4
LT
1501 }
1502 }
1503
1504 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1505 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1506 if (net_eq(svc->net, net))
1507 ip_vs_zero_service(svc);
1da177e4
LT
1508 }
1509 }
1510
2a0751af 1511 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1da177e4
LT
1512 return 0;
1513}
1514
14e40546 1515#ifdef CONFIG_SYSCTL
1da177e4 1516static int
8d65af78 1517proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1518 void __user *buffer, size_t *lenp, loff_t *ppos)
1519{
9330419d 1520 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1521 int *valp = table->data;
1522 int val = *valp;
1523 int rc;
1524
8d65af78 1525 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1526 if (write && (*valp != val)) {
1527 if ((*valp < 0) || (*valp > 3)) {
1528 /* Restore the correct value */
1529 *valp = val;
1530 } else {
9330419d 1531 update_defense_level(net_ipvs(net));
1da177e4
LT
1532 }
1533 }
1534 return rc;
1535}
1536
1da177e4 1537static int
8d65af78 1538proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1539 void __user *buffer, size_t *lenp, loff_t *ppos)
1540{
1541 int *valp = table->data;
1542 int val[2];
1543 int rc;
1544
1545 /* backup the value first */
1546 memcpy(val, valp, sizeof(val));
1547
8d65af78 1548 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1549 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550 /* Restore the correct value */
1551 memcpy(valp, val, sizeof(val));
1552 }
1553 return rc;
1554}
1555
b880c1f0
HS
1556static int
1557proc_do_sync_mode(ctl_table *table, int write,
1558 void __user *buffer, size_t *lenp, loff_t *ppos)
1559{
1560 int *valp = table->data;
1561 int val = *valp;
1562 int rc;
1563
1564 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1565 if (write && (*valp != val)) {
1566 if ((*valp < 0) || (*valp > 1)) {
1567 /* Restore the correct value */
1568 *valp = val;
1569 } else {
f131315f
HS
1570 struct net *net = current->nsproxy->net_ns;
1571 ip_vs_sync_switch_mode(net, val);
b880c1f0
HS
1572 }
1573 }
1574 return rc;
1575}
1da177e4
LT
1576
1577/*
1578 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e
HS
1579 * Do not change order or insert new entries without
1580 * align with netns init in __ip_vs_control_init()
1da177e4
LT
1581 */
1582
1583static struct ctl_table vs_vars[] = {
1584 {
1da177e4 1585 .procname = "amemthresh",
1da177e4
LT
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
6d9f239a 1588 .proc_handler = proc_dointvec,
1da177e4 1589 },
1da177e4 1590 {
1da177e4 1591 .procname = "am_droprate",
1da177e4
LT
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
6d9f239a 1594 .proc_handler = proc_dointvec,
1da177e4
LT
1595 },
1596 {
1da177e4 1597 .procname = "drop_entry",
1da177e4
LT
1598 .maxlen = sizeof(int),
1599 .mode = 0644,
6d9f239a 1600 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1601 },
1602 {
1da177e4 1603 .procname = "drop_packet",
1da177e4
LT
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
6d9f239a 1606 .proc_handler = proc_do_defense_mode,
1da177e4 1607 },
f4bc17cd
JA
1608#ifdef CONFIG_IP_VS_NFCT
1609 {
1610 .procname = "conntrack",
f4bc17cd
JA
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec,
1614 },
1615#endif
1da177e4 1616 {
1da177e4 1617 .procname = "secure_tcp",
1da177e4
LT
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
6d9f239a 1620 .proc_handler = proc_do_defense_mode,
1da177e4 1621 },
8a803040
JA
1622 {
1623 .procname = "snat_reroute",
8a803040
JA
1624 .maxlen = sizeof(int),
1625 .mode = 0644,
1626 .proc_handler = &proc_dointvec,
1627 },
b880c1f0
HS
1628 {
1629 .procname = "sync_version",
b880c1f0
HS
1630 .maxlen = sizeof(int),
1631 .mode = 0644,
1632 .proc_handler = &proc_do_sync_mode,
1633 },
a0840e2e
HS
1634 {
1635 .procname = "cache_bypass",
1636 .maxlen = sizeof(int),
1637 .mode = 0644,
1638 .proc_handler = proc_dointvec,
1639 },
1640 {
1641 .procname = "expire_nodest_conn",
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = proc_dointvec,
1645 },
1646 {
1647 .procname = "expire_quiescent_template",
1648 .maxlen = sizeof(int),
1649 .mode = 0644,
1650 .proc_handler = proc_dointvec,
1651 },
1652 {
1653 .procname = "sync_threshold",
1654 .maxlen =
1655 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1656 .mode = 0644,
1657 .proc_handler = proc_do_sync_threshold,
1658 },
1659 {
1660 .procname = "nat_icmp_send",
1661 .maxlen = sizeof(int),
1662 .mode = 0644,
1663 .proc_handler = proc_dointvec,
1664 },
1665#ifdef CONFIG_IP_VS_DEBUG
1666 {
1667 .procname = "debug_level",
1668 .data = &sysctl_ip_vs_debug_level,
1669 .maxlen = sizeof(int),
1670 .mode = 0644,
1671 .proc_handler = proc_dointvec,
1672 },
1673#endif
1da177e4
LT
1674#if 0
1675 {
1da177e4
LT
1676 .procname = "timeout_established",
1677 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1678 .maxlen = sizeof(int),
1679 .mode = 0644,
6d9f239a 1680 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1681 },
1682 {
1da177e4
LT
1683 .procname = "timeout_synsent",
1684 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
6d9f239a 1687 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1688 },
1689 {
1da177e4
LT
1690 .procname = "timeout_synrecv",
1691 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
6d9f239a 1694 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1695 },
1696 {
1da177e4
LT
1697 .procname = "timeout_finwait",
1698 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
6d9f239a 1701 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1702 },
1703 {
1da177e4
LT
1704 .procname = "timeout_timewait",
1705 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
6d9f239a 1708 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1709 },
1710 {
1da177e4
LT
1711 .procname = "timeout_close",
1712 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
6d9f239a 1715 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1716 },
1717 {
1da177e4
LT
1718 .procname = "timeout_closewait",
1719 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
6d9f239a 1722 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1723 },
1724 {
1da177e4
LT
1725 .procname = "timeout_lastack",
1726 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
6d9f239a 1729 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1730 },
1731 {
1da177e4
LT
1732 .procname = "timeout_listen",
1733 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1734 .maxlen = sizeof(int),
1735 .mode = 0644,
6d9f239a 1736 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1737 },
1738 {
1da177e4
LT
1739 .procname = "timeout_synack",
1740 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1741 .maxlen = sizeof(int),
1742 .mode = 0644,
6d9f239a 1743 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1744 },
1745 {
1da177e4
LT
1746 .procname = "timeout_udp",
1747 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1748 .maxlen = sizeof(int),
1749 .mode = 0644,
6d9f239a 1750 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1751 },
1752 {
1da177e4
LT
1753 .procname = "timeout_icmp",
1754 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1755 .maxlen = sizeof(int),
1756 .mode = 0644,
6d9f239a 1757 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1758 },
1759#endif
f8572d8f 1760 { }
1da177e4
LT
1761};
1762
5587da55 1763const struct ctl_path net_vs_ctl_path[] = {
f8572d8f
EB
1764 { .procname = "net", },
1765 { .procname = "ipv4", },
90754f8e
PE
1766 { .procname = "vs", },
1767 { }
1da177e4 1768};
90754f8e 1769EXPORT_SYMBOL_GPL(net_vs_ctl_path);
14e40546 1770#endif
1da177e4 1771
1da177e4
LT
1772#ifdef CONFIG_PROC_FS
1773
1774struct ip_vs_iter {
fc723250 1775 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1776 struct list_head *table;
1777 int bucket;
1778};
1779
1780/*
1781 * Write the contents of the VS rule table to a PROCfs file.
1782 * (It is kept just for backward compatibility)
1783 */
1784static inline const char *ip_vs_fwd_name(unsigned flags)
1785{
1786 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1787 case IP_VS_CONN_F_LOCALNODE:
1788 return "Local";
1789 case IP_VS_CONN_F_TUNNEL:
1790 return "Tunnel";
1791 case IP_VS_CONN_F_DROUTE:
1792 return "Route";
1793 default:
1794 return "Masq";
1795 }
1796}
1797
1798
1799/* Get the Nth entry in the two lists */
1800static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1801{
fc723250 1802 struct net *net = seq_file_net(seq);
1da177e4
LT
1803 struct ip_vs_iter *iter = seq->private;
1804 int idx;
1805 struct ip_vs_service *svc;
1806
1807 /* look in hash by protocol */
1808 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1809 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1810 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1811 iter->table = ip_vs_svc_table;
1812 iter->bucket = idx;
1813 return svc;
1814 }
1815 }
1816 }
1817
1818 /* keep looking in fwmark */
1819 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1820 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1821 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1822 iter->table = ip_vs_svc_fwm_table;
1823 iter->bucket = idx;
1824 return svc;
1825 }
1826 }
1827 }
1828
1829 return NULL;
1830}
1831
1832static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1833__acquires(__ip_vs_svc_lock)
1da177e4
LT
1834{
1835
1836 read_lock_bh(&__ip_vs_svc_lock);
1837 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1838}
1839
1840
1841static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1842{
1843 struct list_head *e;
1844 struct ip_vs_iter *iter;
1845 struct ip_vs_service *svc;
1846
1847 ++*pos;
1848 if (v == SEQ_START_TOKEN)
1849 return ip_vs_info_array(seq,0);
1850
1851 svc = v;
1852 iter = seq->private;
1853
1854 if (iter->table == ip_vs_svc_table) {
1855 /* next service in table hashed by protocol */
1856 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1857 return list_entry(e, struct ip_vs_service, s_list);
1858
1859
1860 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1861 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1862 s_list) {
1863 return svc;
1864 }
1865 }
1866
1867 iter->table = ip_vs_svc_fwm_table;
1868 iter->bucket = -1;
1869 goto scan_fwmark;
1870 }
1871
1872 /* next service in hashed by fwmark */
1873 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1874 return list_entry(e, struct ip_vs_service, f_list);
1875
1876 scan_fwmark:
1877 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1878 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1879 f_list)
1880 return svc;
1881 }
1882
1883 return NULL;
1884}
1885
1886static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 1887__releases(__ip_vs_svc_lock)
1da177e4
LT
1888{
1889 read_unlock_bh(&__ip_vs_svc_lock);
1890}
1891
1892
1893static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1894{
1895 if (v == SEQ_START_TOKEN) {
1896 seq_printf(seq,
1897 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 1898 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
1899 seq_puts(seq,
1900 "Prot LocalAddress:Port Scheduler Flags\n");
1901 seq_puts(seq,
1902 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1903 } else {
1904 const struct ip_vs_service *svc = v;
1905 const struct ip_vs_iter *iter = seq->private;
1906 const struct ip_vs_dest *dest;
1907
667a5f18
VB
1908 if (iter->table == ip_vs_svc_table) {
1909#ifdef CONFIG_IP_VS_IPV6
1910 if (svc->af == AF_INET6)
5b095d98 1911 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 1912 ip_vs_proto_name(svc->protocol),
38ff4fa4 1913 &svc->addr.in6,
667a5f18
VB
1914 ntohs(svc->port),
1915 svc->scheduler->name);
1916 else
1917#endif
26ec037f 1918 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
1919 ip_vs_proto_name(svc->protocol),
1920 ntohl(svc->addr.ip),
1921 ntohs(svc->port),
26ec037f
NC
1922 svc->scheduler->name,
1923 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1924 } else {
26ec037f
NC
1925 seq_printf(seq, "FWM %08X %s %s",
1926 svc->fwmark, svc->scheduler->name,
1927 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 1928 }
1da177e4
LT
1929
1930 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1931 seq_printf(seq, "persistent %d %08X\n",
1932 svc->timeout,
1933 ntohl(svc->netmask));
1934 else
1935 seq_putc(seq, '\n');
1936
1937 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1938#ifdef CONFIG_IP_VS_IPV6
1939 if (dest->af == AF_INET6)
1940 seq_printf(seq,
5b095d98 1941 " -> [%pI6]:%04X"
667a5f18 1942 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 1943 &dest->addr.in6,
667a5f18
VB
1944 ntohs(dest->port),
1945 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1946 atomic_read(&dest->weight),
1947 atomic_read(&dest->activeconns),
1948 atomic_read(&dest->inactconns));
1949 else
1950#endif
1951 seq_printf(seq,
1952 " -> %08X:%04X "
1953 "%-7s %-6d %-10d %-10d\n",
1954 ntohl(dest->addr.ip),
1955 ntohs(dest->port),
1956 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1957 atomic_read(&dest->weight),
1958 atomic_read(&dest->activeconns),
1959 atomic_read(&dest->inactconns));
1960
1da177e4
LT
1961 }
1962 }
1963 return 0;
1964}
1965
56b3d975 1966static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1967 .start = ip_vs_info_seq_start,
1968 .next = ip_vs_info_seq_next,
1969 .stop = ip_vs_info_seq_stop,
1970 .show = ip_vs_info_seq_show,
1971};
1972
1973static int ip_vs_info_open(struct inode *inode, struct file *file)
1974{
fc723250 1975 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 1976 sizeof(struct ip_vs_iter));
1da177e4
LT
1977}
1978
9a32144e 1979static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1980 .owner = THIS_MODULE,
1981 .open = ip_vs_info_open,
1982 .read = seq_read,
1983 .llseek = seq_lseek,
1984 .release = seq_release_private,
1985};
1986
1987#endif
1988
1da177e4
LT
1989#ifdef CONFIG_PROC_FS
1990static int ip_vs_stats_show(struct seq_file *seq, void *v)
1991{
b17fc996 1992 struct net *net = seq_file_single_net(seq);
55a3d4e1 1993 struct ip_vs_stats_user show;
1da177e4
LT
1994
1995/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1996 seq_puts(seq,
1997 " Total Incoming Outgoing Incoming Outgoing\n");
1998 seq_printf(seq,
1999 " Conns Packets Packets Bytes Bytes\n");
2000
55a3d4e1
JA
2001 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2002 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2003 show.inpkts, show.outpkts,
2004 (unsigned long long) show.inbytes,
2005 (unsigned long long) show.outbytes);
1da177e4
LT
2006
2007/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2008 seq_puts(seq,
2009 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
55a3d4e1
JA
2010 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2011 show.cps, show.inpps, show.outpps,
2012 show.inbps, show.outbps);
1da177e4
LT
2013
2014 return 0;
2015}
2016
2017static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2018{
fc723250 2019 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2020}
2021
9a32144e 2022static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2023 .owner = THIS_MODULE,
2024 .open = ip_vs_stats_seq_open,
2025 .read = seq_read,
2026 .llseek = seq_lseek,
2027 .release = single_release,
2028};
2029
b17fc996
HS
2030static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2031{
2032 struct net *net = seq_file_single_net(seq);
2a0751af
JA
2033 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2034 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
ea9f22cc 2035 struct ip_vs_stats_user rates;
b17fc996
HS
2036 int i;
2037
2038/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2039 seq_puts(seq,
2040 " Total Incoming Outgoing Incoming Outgoing\n");
2041 seq_printf(seq,
2042 "CPU Conns Packets Packets Bytes Bytes\n");
2043
2044 for_each_possible_cpu(i) {
2a0751af
JA
2045 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2046 unsigned int start;
2047 __u64 inbytes, outbytes;
2048
2049 do {
2050 start = u64_stats_fetch_begin_bh(&u->syncp);
2051 inbytes = u->ustats.inbytes;
2052 outbytes = u->ustats.outbytes;
2053 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2054
b17fc996 2055 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2a0751af
JA
2056 i, u->ustats.conns, u->ustats.inpkts,
2057 u->ustats.outpkts, (__u64)inbytes,
2058 (__u64)outbytes);
b17fc996
HS
2059 }
2060
2061 spin_lock_bh(&tot_stats->lock);
ea9f22cc 2062
b17fc996
HS
2063 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2064 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2065 tot_stats->ustats.outpkts,
2066 (unsigned long long) tot_stats->ustats.inbytes,
2067 (unsigned long long) tot_stats->ustats.outbytes);
2068
ea9f22cc
JA
2069 ip_vs_read_estimator(&rates, tot_stats);
2070
2071 spin_unlock_bh(&tot_stats->lock);
2072
b17fc996
HS
2073/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2074 seq_puts(seq,
2075 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2076 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
ea9f22cc
JA
2077 rates.cps,
2078 rates.inpps,
2079 rates.outpps,
2080 rates.inbps,
2081 rates.outbps);
b17fc996
HS
2082
2083 return 0;
2084}
2085
2086static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2087{
2088 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2089}
2090
2091static const struct file_operations ip_vs_stats_percpu_fops = {
2092 .owner = THIS_MODULE,
2093 .open = ip_vs_stats_percpu_seq_open,
2094 .read = seq_read,
2095 .llseek = seq_lseek,
2096 .release = single_release,
2097};
1da177e4
LT
2098#endif
2099
2100/*
2101 * Set timeout values for tcp tcpfin udp in the timeout_table.
2102 */
9330419d 2103static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2104{
091bb34c 2105#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2106 struct ip_vs_proto_data *pd;
091bb34c 2107#endif
9330419d 2108
1da177e4
LT
2109 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2110 u->tcp_timeout,
2111 u->tcp_fin_timeout,
2112 u->udp_timeout);
2113
2114#ifdef CONFIG_IP_VS_PROTO_TCP
2115 if (u->tcp_timeout) {
9330419d
HS
2116 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2117 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2118 = u->tcp_timeout * HZ;
2119 }
2120
2121 if (u->tcp_fin_timeout) {
9330419d
HS
2122 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2123 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2124 = u->tcp_fin_timeout * HZ;
2125 }
2126#endif
2127
2128#ifdef CONFIG_IP_VS_PROTO_UDP
2129 if (u->udp_timeout) {
9330419d
HS
2130 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2131 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2132 = u->udp_timeout * HZ;
2133 }
2134#endif
2135 return 0;
2136}
2137
2138
2139#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2140#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2141#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2142 sizeof(struct ip_vs_dest_user))
2143#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2144#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2145#define MAX_ARG_LEN SVCDEST_ARG_LEN
2146
9b5b5cff 2147static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2148 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2149 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2150 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2151 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2152 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2153 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2154 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2155 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2156 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2157 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2158 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2159};
2160
c860c6b1
JV
2161static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2162 struct ip_vs_service_user *usvc_compat)
2163{
0d1e71b0
SH
2164 memset(usvc, 0, sizeof(*usvc));
2165
c860c6b1
JV
2166 usvc->af = AF_INET;
2167 usvc->protocol = usvc_compat->protocol;
2168 usvc->addr.ip = usvc_compat->addr;
2169 usvc->port = usvc_compat->port;
2170 usvc->fwmark = usvc_compat->fwmark;
2171
2172 /* Deep copy of sched_name is not needed here */
2173 usvc->sched_name = usvc_compat->sched_name;
2174
2175 usvc->flags = usvc_compat->flags;
2176 usvc->timeout = usvc_compat->timeout;
2177 usvc->netmask = usvc_compat->netmask;
2178}
2179
2180static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2181 struct ip_vs_dest_user *udest_compat)
2182{
0d1e71b0
SH
2183 memset(udest, 0, sizeof(*udest));
2184
c860c6b1
JV
2185 udest->addr.ip = udest_compat->addr;
2186 udest->port = udest_compat->port;
2187 udest->conn_flags = udest_compat->conn_flags;
2188 udest->weight = udest_compat->weight;
2189 udest->u_threshold = udest_compat->u_threshold;
2190 udest->l_threshold = udest_compat->l_threshold;
2191}
2192
1da177e4
LT
2193static int
2194do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2195{
fc723250 2196 struct net *net = sock_net(sk);
1da177e4
LT
2197 int ret;
2198 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2199 struct ip_vs_service_user *usvc_compat;
2200 struct ip_vs_service_user_kern usvc;
1da177e4 2201 struct ip_vs_service *svc;
c860c6b1
JV
2202 struct ip_vs_dest_user *udest_compat;
2203 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2204
2205 if (!capable(CAP_NET_ADMIN))
2206 return -EPERM;
2207
04bcef2a
AV
2208 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2209 return -EINVAL;
2210 if (len < 0 || len > MAX_ARG_LEN)
2211 return -EINVAL;
1da177e4 2212 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2213 pr_err("set_ctl: len %u != %u\n",
2214 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2215 return -EINVAL;
2216 }
2217
2218 if (copy_from_user(arg, user, len) != 0)
2219 return -EFAULT;
2220
2221 /* increase the module use count */
2222 ip_vs_use_count_inc();
2223
14cc3e2b 2224 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2225 ret = -ERESTARTSYS;
2226 goto out_dec;
2227 }
2228
2229 if (cmd == IP_VS_SO_SET_FLUSH) {
2230 /* Flush the virtual service */
fc723250 2231 ret = ip_vs_flush(net);
1da177e4
LT
2232 goto out_unlock;
2233 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2234 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2235 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4
LT
2236 goto out_unlock;
2237 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2238 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f
HS
2239 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2240 dm->syncid);
1da177e4
LT
2241 goto out_unlock;
2242 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2243 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
f131315f 2244 ret = stop_sync_thread(net, dm->state);
1da177e4
LT
2245 goto out_unlock;
2246 }
2247
c860c6b1
JV
2248 usvc_compat = (struct ip_vs_service_user *)arg;
2249 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2250
2251 /* We only use the new structs internally, so copy userspace compat
2252 * structs to extended internal versions */
2253 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2254 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2255
2256 if (cmd == IP_VS_SO_SET_ZERO) {
2257 /* if no service address is set, zero counters in all */
c860c6b1 2258 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2259 ret = ip_vs_zero_all(net);
1da177e4
LT
2260 goto out_unlock;
2261 }
2262 }
2263
2906f66a
VMR
2264 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2265 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2266 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2267 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2268 usvc.protocol, &usvc.addr.ip,
2269 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2270 ret = -EFAULT;
2271 goto out_unlock;
2272 }
2273
2274 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2275 if (usvc.fwmark == 0)
fc723250 2276 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2277 &usvc.addr, usvc.port);
1da177e4 2278 else
fc723250 2279 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2280
2281 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2282 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2283 ret = -ESRCH;
26c15cfd 2284 goto out_unlock;
1da177e4
LT
2285 }
2286
2287 switch (cmd) {
2288 case IP_VS_SO_SET_ADD:
2289 if (svc != NULL)
2290 ret = -EEXIST;
2291 else
fc723250 2292 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2293 break;
2294 case IP_VS_SO_SET_EDIT:
c860c6b1 2295 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2296 break;
2297 case IP_VS_SO_SET_DEL:
2298 ret = ip_vs_del_service(svc);
2299 if (!ret)
2300 goto out_unlock;
2301 break;
2302 case IP_VS_SO_SET_ZERO:
2303 ret = ip_vs_zero_service(svc);
2304 break;
2305 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2306 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2307 break;
2308 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2309 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2310 break;
2311 case IP_VS_SO_SET_DELDEST:
c860c6b1 2312 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2313 break;
2314 default:
2315 ret = -EINVAL;
2316 }
2317
1da177e4 2318 out_unlock:
14cc3e2b 2319 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2320 out_dec:
2321 /* decrease the module use count */
2322 ip_vs_use_count_dec();
2323
2324 return ret;
2325}
2326
2327
1da177e4
LT
2328static void
2329ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2330{
2331 dst->protocol = src->protocol;
e7ade46a 2332 dst->addr = src->addr.ip;
1da177e4
LT
2333 dst->port = src->port;
2334 dst->fwmark = src->fwmark;
4da62fc7 2335 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2336 dst->flags = src->flags;
2337 dst->timeout = src->timeout / HZ;
2338 dst->netmask = src->netmask;
2339 dst->num_dests = src->num_dests;
2340 ip_vs_copy_stats(&dst->stats, &src->stats);
2341}
2342
2343static inline int
fc723250
HS
2344__ip_vs_get_service_entries(struct net *net,
2345 const struct ip_vs_get_services *get,
1da177e4
LT
2346 struct ip_vs_get_services __user *uptr)
2347{
2348 int idx, count=0;
2349 struct ip_vs_service *svc;
2350 struct ip_vs_service_entry entry;
2351 int ret = 0;
2352
2353 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2354 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2355 /* Only expose IPv4 entries to old interface */
fc723250 2356 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2357 continue;
2358
1da177e4
LT
2359 if (count >= get->num_services)
2360 goto out;
4da62fc7 2361 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2362 ip_vs_copy_service(&entry, svc);
2363 if (copy_to_user(&uptr->entrytable[count],
2364 &entry, sizeof(entry))) {
2365 ret = -EFAULT;
2366 goto out;
2367 }
2368 count++;
2369 }
2370 }
2371
2372 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2373 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2374 /* Only expose IPv4 entries to old interface */
fc723250 2375 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2376 continue;
2377
1da177e4
LT
2378 if (count >= get->num_services)
2379 goto out;
4da62fc7 2380 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2381 ip_vs_copy_service(&entry, svc);
2382 if (copy_to_user(&uptr->entrytable[count],
2383 &entry, sizeof(entry))) {
2384 ret = -EFAULT;
2385 goto out;
2386 }
2387 count++;
2388 }
2389 }
2390 out:
2391 return ret;
2392}
2393
2394static inline int
fc723250 2395__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2396 struct ip_vs_get_dests __user *uptr)
2397{
2398 struct ip_vs_service *svc;
b18610de 2399 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2400 int ret = 0;
2401
2402 if (get->fwmark)
fc723250 2403 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2404 else
fc723250 2405 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2406 get->port);
b18610de 2407
1da177e4
LT
2408 if (svc) {
2409 int count = 0;
2410 struct ip_vs_dest *dest;
2411 struct ip_vs_dest_entry entry;
2412
2413 list_for_each_entry(dest, &svc->destinations, n_list) {
2414 if (count >= get->num_dests)
2415 break;
2416
e7ade46a 2417 entry.addr = dest->addr.ip;
1da177e4
LT
2418 entry.port = dest->port;
2419 entry.conn_flags = atomic_read(&dest->conn_flags);
2420 entry.weight = atomic_read(&dest->weight);
2421 entry.u_threshold = dest->u_threshold;
2422 entry.l_threshold = dest->l_threshold;
2423 entry.activeconns = atomic_read(&dest->activeconns);
2424 entry.inactconns = atomic_read(&dest->inactconns);
2425 entry.persistconns = atomic_read(&dest->persistconns);
2426 ip_vs_copy_stats(&entry.stats, &dest->stats);
2427 if (copy_to_user(&uptr->entrytable[count],
2428 &entry, sizeof(entry))) {
2429 ret = -EFAULT;
2430 break;
2431 }
2432 count++;
2433 }
1da177e4
LT
2434 } else
2435 ret = -ESRCH;
2436 return ret;
2437}
2438
2439static inline void
9330419d 2440__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2441{
091bb34c 2442#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2443 struct ip_vs_proto_data *pd;
091bb34c 2444#endif
9330419d 2445
1da177e4 2446#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2447 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2448 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2449 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2450#endif
2451#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2452 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2453 u->udp_timeout =
9330419d 2454 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2455#endif
2456}
2457
2458
2459#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2460#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2461#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2462#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2463#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2464#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2465#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2466
9b5b5cff 2467static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2468 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2469 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2470 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2471 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2472 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2473 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2474 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2475};
2476
2477static int
2478do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2479{
2480 unsigned char arg[128];
2481 int ret = 0;
04bcef2a 2482 unsigned int copylen;
fc723250 2483 struct net *net = sock_net(sk);
f131315f 2484 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2485
fc723250 2486 BUG_ON(!net);
1da177e4
LT
2487 if (!capable(CAP_NET_ADMIN))
2488 return -EPERM;
2489
04bcef2a
AV
2490 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2491 return -EINVAL;
2492
1da177e4 2493 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2494 pr_err("get_ctl: len %u < %u\n",
2495 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2496 return -EINVAL;
2497 }
2498
04bcef2a
AV
2499 copylen = get_arglen[GET_CMDID(cmd)];
2500 if (copylen > 128)
2501 return -EINVAL;
2502
2503 if (copy_from_user(arg, user, copylen) != 0)
1da177e4
LT
2504 return -EFAULT;
2505
14cc3e2b 2506 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2507 return -ERESTARTSYS;
2508
2509 switch (cmd) {
2510 case IP_VS_SO_GET_VERSION:
2511 {
2512 char buf[64];
2513
2514 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2515 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2516 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2517 ret = -EFAULT;
2518 goto out;
2519 }
2520 *len = strlen(buf)+1;
2521 }
2522 break;
2523
2524 case IP_VS_SO_GET_INFO:
2525 {
2526 struct ip_vs_getinfo info;
2527 info.version = IP_VS_VERSION_CODE;
6f7edb48 2528 info.size = ip_vs_conn_tab_size;
a0840e2e 2529 info.num_services = ipvs->num_services;
1da177e4
LT
2530 if (copy_to_user(user, &info, sizeof(info)) != 0)
2531 ret = -EFAULT;
2532 }
2533 break;
2534
2535 case IP_VS_SO_GET_SERVICES:
2536 {
2537 struct ip_vs_get_services *get;
2538 int size;
2539
2540 get = (struct ip_vs_get_services *)arg;
2541 size = sizeof(*get) +
2542 sizeof(struct ip_vs_service_entry) * get->num_services;
2543 if (*len != size) {
1e3e238e 2544 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2545 ret = -EINVAL;
2546 goto out;
2547 }
fc723250 2548 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2549 }
2550 break;
2551
2552 case IP_VS_SO_GET_SERVICE:
2553 {
2554 struct ip_vs_service_entry *entry;
2555 struct ip_vs_service *svc;
b18610de 2556 union nf_inet_addr addr;
1da177e4
LT
2557
2558 entry = (struct ip_vs_service_entry *)arg;
b18610de 2559 addr.ip = entry->addr;
1da177e4 2560 if (entry->fwmark)
fc723250 2561 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2562 else
fc723250
HS
2563 svc = __ip_vs_service_find(net, AF_INET,
2564 entry->protocol, &addr,
2565 entry->port);
1da177e4
LT
2566 if (svc) {
2567 ip_vs_copy_service(entry, svc);
2568 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2569 ret = -EFAULT;
1da177e4
LT
2570 } else
2571 ret = -ESRCH;
2572 }
2573 break;
2574
2575 case IP_VS_SO_GET_DESTS:
2576 {
2577 struct ip_vs_get_dests *get;
2578 int size;
2579
2580 get = (struct ip_vs_get_dests *)arg;
2581 size = sizeof(*get) +
2582 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2583 if (*len != size) {
1e3e238e 2584 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2585 ret = -EINVAL;
2586 goto out;
2587 }
fc723250 2588 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2589 }
2590 break;
2591
2592 case IP_VS_SO_GET_TIMEOUT:
2593 {
2594 struct ip_vs_timeout_user t;
2595
9330419d 2596 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2597 if (copy_to_user(user, &t, sizeof(t)) != 0)
2598 ret = -EFAULT;
2599 }
2600 break;
2601
2602 case IP_VS_SO_GET_DAEMON:
2603 {
2604 struct ip_vs_daemon_user d[2];
2605
2606 memset(&d, 0, sizeof(d));
f131315f 2607 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
1da177e4 2608 d[0].state = IP_VS_STATE_MASTER;
f131315f
HS
2609 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2610 sizeof(d[0].mcast_ifn));
2611 d[0].syncid = ipvs->master_syncid;
1da177e4 2612 }
f131315f 2613 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
1da177e4 2614 d[1].state = IP_VS_STATE_BACKUP;
f131315f
HS
2615 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2616 sizeof(d[1].mcast_ifn));
2617 d[1].syncid = ipvs->backup_syncid;
1da177e4
LT
2618 }
2619 if (copy_to_user(user, &d, sizeof(d)) != 0)
2620 ret = -EFAULT;
2621 }
2622 break;
2623
2624 default:
2625 ret = -EINVAL;
2626 }
2627
2628 out:
14cc3e2b 2629 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2630 return ret;
2631}
2632
2633
2634static struct nf_sockopt_ops ip_vs_sockopts = {
2635 .pf = PF_INET,
2636 .set_optmin = IP_VS_BASE_CTL,
2637 .set_optmax = IP_VS_SO_SET_MAX+1,
2638 .set = do_ip_vs_set_ctl,
2639 .get_optmin = IP_VS_BASE_CTL,
2640 .get_optmax = IP_VS_SO_GET_MAX+1,
2641 .get = do_ip_vs_get_ctl,
16fcec35 2642 .owner = THIS_MODULE,
1da177e4
LT
2643};
2644
9a812198
JV
2645/*
2646 * Generic Netlink interface
2647 */
2648
2649/* IPVS genetlink family */
2650static struct genl_family ip_vs_genl_family = {
2651 .id = GENL_ID_GENERATE,
2652 .hdrsize = 0,
2653 .name = IPVS_GENL_NAME,
2654 .version = IPVS_GENL_VERSION,
2655 .maxattr = IPVS_CMD_MAX,
c6d2d445 2656 .netnsok = true, /* Make ipvsadm to work on netns */
9a812198
JV
2657};
2658
2659/* Policy used for first-level command attributes */
2660static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2661 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2662 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2663 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2664 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2665 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2666 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2667};
2668
2669/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2670static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2671 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2672 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2673 .len = IP_VS_IFNAME_MAXLEN },
2674 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2675};
2676
2677/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2678static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2679 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2680 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2681 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2682 .len = sizeof(union nf_inet_addr) },
2683 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2684 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2685 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2686 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2687 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2688 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2689 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2690 .len = sizeof(struct ip_vs_flags) },
2691 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2692 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2693 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2694};
2695
2696/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2697static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2698 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2699 .len = sizeof(union nf_inet_addr) },
2700 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2701 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2702 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2703 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2704 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2705 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2706 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2707 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2708 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2709};
2710
2711static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2712 struct ip_vs_stats *stats)
2713{
55a3d4e1 2714 struct ip_vs_stats_user ustats;
9a812198
JV
2715 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2716 if (!nl_stats)
2717 return -EMSGSIZE;
2718
55a3d4e1 2719 ip_vs_copy_stats(&ustats, stats);
9a812198 2720
55a3d4e1
JA
2721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2725 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2730 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
9a812198
JV
2731
2732 nla_nest_end(skb, nl_stats);
2733
2734 return 0;
2735
2736nla_put_failure:
9a812198
JV
2737 nla_nest_cancel(skb, nl_stats);
2738 return -EMSGSIZE;
2739}
2740
2741static int ip_vs_genl_fill_service(struct sk_buff *skb,
2742 struct ip_vs_service *svc)
2743{
2744 struct nlattr *nl_service;
2745 struct ip_vs_flags flags = { .flags = svc->flags,
2746 .mask = ~0 };
2747
2748 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2749 if (!nl_service)
2750 return -EMSGSIZE;
2751
f94fd041 2752 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
9a812198
JV
2753
2754 if (svc->fwmark) {
2755 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2756 } else {
2757 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2758 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2759 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2760 }
2761
2762 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
0d1e71b0
SH
2763 if (svc->pe)
2764 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
9a812198
JV
2765 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2766 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2767 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2768
2769 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2770 goto nla_put_failure;
2771
2772 nla_nest_end(skb, nl_service);
2773
2774 return 0;
2775
2776nla_put_failure:
2777 nla_nest_cancel(skb, nl_service);
2778 return -EMSGSIZE;
2779}
2780
2781static int ip_vs_genl_dump_service(struct sk_buff *skb,
2782 struct ip_vs_service *svc,
2783 struct netlink_callback *cb)
2784{
2785 void *hdr;
2786
2787 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2788 &ip_vs_genl_family, NLM_F_MULTI,
2789 IPVS_CMD_NEW_SERVICE);
2790 if (!hdr)
2791 return -EMSGSIZE;
2792
2793 if (ip_vs_genl_fill_service(skb, svc) < 0)
2794 goto nla_put_failure;
2795
2796 return genlmsg_end(skb, hdr);
2797
2798nla_put_failure:
2799 genlmsg_cancel(skb, hdr);
2800 return -EMSGSIZE;
2801}
2802
2803static int ip_vs_genl_dump_services(struct sk_buff *skb,
2804 struct netlink_callback *cb)
2805{
2806 int idx = 0, i;
2807 int start = cb->args[0];
2808 struct ip_vs_service *svc;
fc723250 2809 struct net *net = skb_sknet(skb);
9a812198
JV
2810
2811 mutex_lock(&__ip_vs_mutex);
2812 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2813 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2814 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2815 continue;
2816 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2817 idx--;
2818 goto nla_put_failure;
2819 }
2820 }
2821 }
2822
2823 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2824 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2825 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2826 continue;
2827 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2828 idx--;
2829 goto nla_put_failure;
2830 }
2831 }
2832 }
2833
2834nla_put_failure:
2835 mutex_unlock(&__ip_vs_mutex);
2836 cb->args[0] = idx;
2837
2838 return skb->len;
2839}
2840
fc723250
HS
2841static int ip_vs_genl_parse_service(struct net *net,
2842 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2843 struct nlattr *nla, int full_entry,
2844 struct ip_vs_service **ret_svc)
9a812198
JV
2845{
2846 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2847 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 2848 struct ip_vs_service *svc;
9a812198
JV
2849
2850 /* Parse mandatory identifying service fields first */
2851 if (nla == NULL ||
2852 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2853 return -EINVAL;
2854
2855 nla_af = attrs[IPVS_SVC_ATTR_AF];
2856 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2857 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2858 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2859 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2860
2861 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2862 return -EINVAL;
2863
258c8893
SH
2864 memset(usvc, 0, sizeof(*usvc));
2865
c860c6b1 2866 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
2867#ifdef CONFIG_IP_VS_IPV6
2868 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2869#else
2870 if (usvc->af != AF_INET)
2871#endif
9a812198
JV
2872 return -EAFNOSUPPORT;
2873
2874 if (nla_fwmark) {
2875 usvc->protocol = IPPROTO_TCP;
2876 usvc->fwmark = nla_get_u32(nla_fwmark);
2877 } else {
2878 usvc->protocol = nla_get_u16(nla_protocol);
2879 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2880 usvc->port = nla_get_u16(nla_port);
2881 usvc->fwmark = 0;
2882 }
2883
26c15cfd 2884 if (usvc->fwmark)
fc723250 2885 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 2886 else
fc723250 2887 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
2888 &usvc->addr, usvc->port);
2889 *ret_svc = svc;
2890
9a812198
JV
2891 /* If a full entry was requested, check for the additional fields */
2892 if (full_entry) {
0d1e71b0 2893 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
2894 *nla_netmask;
2895 struct ip_vs_flags flags;
9a812198
JV
2896
2897 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 2898 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
2899 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2900 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2901 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2902
2903 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2904 return -EINVAL;
2905
2906 nla_memcpy(&flags, nla_flags, sizeof(flags));
2907
2908 /* prefill flags from service if it already exists */
26c15cfd 2909 if (svc)
9a812198 2910 usvc->flags = svc->flags;
9a812198
JV
2911
2912 /* set new flags from userland */
2913 usvc->flags = (usvc->flags & ~flags.mask) |
2914 (flags.flags & flags.mask);
c860c6b1 2915 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 2916 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
2917 usvc->timeout = nla_get_u32(nla_timeout);
2918 usvc->netmask = nla_get_u32(nla_netmask);
2919 }
2920
2921 return 0;
2922}
2923
fc723250
HS
2924static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2925 struct nlattr *nla)
9a812198 2926{
c860c6b1 2927 struct ip_vs_service_user_kern usvc;
26c15cfd 2928 struct ip_vs_service *svc;
9a812198
JV
2929 int ret;
2930
fc723250 2931 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 2932 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
2933}
2934
2935static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2936{
2937 struct nlattr *nl_dest;
2938
2939 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2940 if (!nl_dest)
2941 return -EMSGSIZE;
2942
2943 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2944 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2945
2946 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2947 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2948 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2949 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2950 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2951 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2952 atomic_read(&dest->activeconns));
2953 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2954 atomic_read(&dest->inactconns));
2955 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2956 atomic_read(&dest->persistconns));
2957
2958 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2959 goto nla_put_failure;
2960
2961 nla_nest_end(skb, nl_dest);
2962
2963 return 0;
2964
2965nla_put_failure:
2966 nla_nest_cancel(skb, nl_dest);
2967 return -EMSGSIZE;
2968}
2969
2970static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2971 struct netlink_callback *cb)
2972{
2973 void *hdr;
2974
2975 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2976 &ip_vs_genl_family, NLM_F_MULTI,
2977 IPVS_CMD_NEW_DEST);
2978 if (!hdr)
2979 return -EMSGSIZE;
2980
2981 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2982 goto nla_put_failure;
2983
2984 return genlmsg_end(skb, hdr);
2985
2986nla_put_failure:
2987 genlmsg_cancel(skb, hdr);
2988 return -EMSGSIZE;
2989}
2990
2991static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2992 struct netlink_callback *cb)
2993{
2994 int idx = 0;
2995 int start = cb->args[0];
2996 struct ip_vs_service *svc;
2997 struct ip_vs_dest *dest;
2998 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 2999 struct net *net = skb_sknet(skb);
9a812198
JV
3000
3001 mutex_lock(&__ip_vs_mutex);
3002
3003 /* Try to find the service for which to dump destinations */
3004 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3005 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3006 goto out_err;
3007
a0840e2e 3008
fc723250 3009 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3010 if (IS_ERR(svc) || svc == NULL)
3011 goto out_err;
3012
3013 /* Dump the destinations */
3014 list_for_each_entry(dest, &svc->destinations, n_list) {
3015 if (++idx <= start)
3016 continue;
3017 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3018 idx--;
3019 goto nla_put_failure;
3020 }
3021 }
3022
3023nla_put_failure:
3024 cb->args[0] = idx;
9a812198
JV
3025
3026out_err:
3027 mutex_unlock(&__ip_vs_mutex);
3028
3029 return skb->len;
3030}
3031
c860c6b1 3032static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3033 struct nlattr *nla, int full_entry)
3034{
3035 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3036 struct nlattr *nla_addr, *nla_port;
3037
3038 /* Parse mandatory identifying destination fields first */
3039 if (nla == NULL ||
3040 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3041 return -EINVAL;
3042
3043 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3044 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3045
3046 if (!(nla_addr && nla_port))
3047 return -EINVAL;
3048
258c8893
SH
3049 memset(udest, 0, sizeof(*udest));
3050
9a812198
JV
3051 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3052 udest->port = nla_get_u16(nla_port);
3053
3054 /* If a full entry was requested, check for the additional fields */
3055 if (full_entry) {
3056 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3057 *nla_l_thresh;
3058
3059 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3060 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3061 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3062 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3063
3064 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3065 return -EINVAL;
3066
3067 udest->conn_flags = nla_get_u32(nla_fwd)
3068 & IP_VS_CONN_F_FWD_MASK;
3069 udest->weight = nla_get_u32(nla_weight);
3070 udest->u_threshold = nla_get_u32(nla_u_thresh);
3071 udest->l_threshold = nla_get_u32(nla_l_thresh);
3072 }
3073
3074 return 0;
3075}
3076
3077static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3078 const char *mcast_ifn, __be32 syncid)
3079{
3080 struct nlattr *nl_daemon;
3081
3082 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3083 if (!nl_daemon)
3084 return -EMSGSIZE;
3085
3086 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
3087 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
3088 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
3089
3090 nla_nest_end(skb, nl_daemon);
3091
3092 return 0;
3093
3094nla_put_failure:
3095 nla_nest_cancel(skb, nl_daemon);
3096 return -EMSGSIZE;
3097}
3098
3099static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3100 const char *mcast_ifn, __be32 syncid,
3101 struct netlink_callback *cb)
3102{
3103 void *hdr;
3104 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3105 &ip_vs_genl_family, NLM_F_MULTI,
3106 IPVS_CMD_NEW_DAEMON);
3107 if (!hdr)
3108 return -EMSGSIZE;
3109
3110 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3111 goto nla_put_failure;
3112
3113 return genlmsg_end(skb, hdr);
3114
3115nla_put_failure:
3116 genlmsg_cancel(skb, hdr);
3117 return -EMSGSIZE;
3118}
3119
3120static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3121 struct netlink_callback *cb)
3122{
f131315f
HS
3123 struct net *net = skb_net(skb);
3124 struct netns_ipvs *ipvs = net_ipvs(net);
3125
9a812198 3126 mutex_lock(&__ip_vs_mutex);
f131315f 3127 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3128 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3129 ipvs->master_mcast_ifn,
3130 ipvs->master_syncid, cb) < 0)
9a812198
JV
3131 goto nla_put_failure;
3132
3133 cb->args[0] = 1;
3134 }
3135
f131315f 3136 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3137 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3138 ipvs->backup_mcast_ifn,
3139 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3140 goto nla_put_failure;
3141
3142 cb->args[1] = 1;
3143 }
3144
3145nla_put_failure:
3146 mutex_unlock(&__ip_vs_mutex);
3147
3148 return skb->len;
3149}
3150
f131315f 3151static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3152{
3153 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3154 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3155 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3156 return -EINVAL;
3157
f131315f
HS
3158 return start_sync_thread(net,
3159 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3160 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3161 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3162}
3163
f131315f 3164static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3165{
3166 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3167 return -EINVAL;
3168
f131315f
HS
3169 return stop_sync_thread(net,
3170 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3171}
3172
9330419d 3173static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3174{
3175 struct ip_vs_timeout_user t;
3176
9330419d 3177 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3178
3179 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3180 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3181
3182 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3183 t.tcp_fin_timeout =
3184 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3185
3186 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3187 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3188
9330419d 3189 return ip_vs_set_timeout(net, &t);
9a812198
JV
3190}
3191
3192static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3193{
3194 struct ip_vs_service *svc = NULL;
c860c6b1
JV
3195 struct ip_vs_service_user_kern usvc;
3196 struct ip_vs_dest_user_kern udest;
9a812198
JV
3197 int ret = 0, cmd;
3198 int need_full_svc = 0, need_full_dest = 0;
fc723250 3199 struct net *net;
a0840e2e 3200 struct netns_ipvs *ipvs;
9a812198 3201
fc723250 3202 net = skb_sknet(skb);
a0840e2e 3203 ipvs = net_ipvs(net);
9a812198
JV
3204 cmd = info->genlhdr->cmd;
3205
3206 mutex_lock(&__ip_vs_mutex);
3207
3208 if (cmd == IPVS_CMD_FLUSH) {
fc723250 3209 ret = ip_vs_flush(net);
9a812198
JV
3210 goto out;
3211 } else if (cmd == IPVS_CMD_SET_CONFIG) {
9330419d 3212 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3213 goto out;
3214 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3215 cmd == IPVS_CMD_DEL_DAEMON) {
3216
3217 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3218
3219 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3220 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3221 info->attrs[IPVS_CMD_ATTR_DAEMON],
3222 ip_vs_daemon_policy)) {
3223 ret = -EINVAL;
3224 goto out;
3225 }
3226
3227 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3228 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3229 else
f131315f 3230 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
9a812198
JV
3231 goto out;
3232 } else if (cmd == IPVS_CMD_ZERO &&
3233 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3234 ret = ip_vs_zero_all(net);
9a812198
JV
3235 goto out;
3236 }
3237
3238 /* All following commands require a service argument, so check if we
3239 * received a valid one. We need a full service specification when
3240 * adding / editing a service. Only identifying members otherwise. */
3241 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3242 need_full_svc = 1;
3243
fc723250 3244 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3245 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3246 need_full_svc, &svc);
9a812198
JV
3247 if (ret)
3248 goto out;
3249
9a812198
JV
3250 /* Unless we're adding a new service, the service must already exist */
3251 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3252 ret = -ESRCH;
3253 goto out;
3254 }
3255
3256 /* Destination commands require a valid destination argument. For
3257 * adding / editing a destination, we need a full destination
3258 * specification. */
3259 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3260 cmd == IPVS_CMD_DEL_DEST) {
3261 if (cmd != IPVS_CMD_DEL_DEST)
3262 need_full_dest = 1;
3263
3264 ret = ip_vs_genl_parse_dest(&udest,
3265 info->attrs[IPVS_CMD_ATTR_DEST],
3266 need_full_dest);
3267 if (ret)
3268 goto out;
3269 }
3270
3271 switch (cmd) {
3272 case IPVS_CMD_NEW_SERVICE:
3273 if (svc == NULL)
fc723250 3274 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3275 else
3276 ret = -EEXIST;
3277 break;
3278 case IPVS_CMD_SET_SERVICE:
3279 ret = ip_vs_edit_service(svc, &usvc);
3280 break;
3281 case IPVS_CMD_DEL_SERVICE:
3282 ret = ip_vs_del_service(svc);
26c15cfd 3283 /* do not use svc, it can be freed */
9a812198
JV
3284 break;
3285 case IPVS_CMD_NEW_DEST:
3286 ret = ip_vs_add_dest(svc, &udest);
3287 break;
3288 case IPVS_CMD_SET_DEST:
3289 ret = ip_vs_edit_dest(svc, &udest);
3290 break;
3291 case IPVS_CMD_DEL_DEST:
3292 ret = ip_vs_del_dest(svc, &udest);
3293 break;
3294 case IPVS_CMD_ZERO:
3295 ret = ip_vs_zero_service(svc);
3296 break;
3297 default:
3298 ret = -EINVAL;
3299 }
3300
3301out:
9a812198
JV
3302 mutex_unlock(&__ip_vs_mutex);
3303
3304 return ret;
3305}
3306
3307static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3308{
3309 struct sk_buff *msg;
3310 void *reply;
3311 int ret, cmd, reply_cmd;
fc723250 3312 struct net *net;
a0840e2e 3313 struct netns_ipvs *ipvs;
9a812198 3314
fc723250 3315 net = skb_sknet(skb);
a0840e2e 3316 ipvs = net_ipvs(net);
9a812198
JV
3317 cmd = info->genlhdr->cmd;
3318
3319 if (cmd == IPVS_CMD_GET_SERVICE)
3320 reply_cmd = IPVS_CMD_NEW_SERVICE;
3321 else if (cmd == IPVS_CMD_GET_INFO)
3322 reply_cmd = IPVS_CMD_SET_INFO;
3323 else if (cmd == IPVS_CMD_GET_CONFIG)
3324 reply_cmd = IPVS_CMD_SET_CONFIG;
3325 else {
1e3e238e 3326 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3327 return -EINVAL;
3328 }
3329
3330 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3331 if (!msg)
3332 return -ENOMEM;
3333
3334 mutex_lock(&__ip_vs_mutex);
3335
3336 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3337 if (reply == NULL)
3338 goto nla_put_failure;
3339
3340 switch (cmd) {
3341 case IPVS_CMD_GET_SERVICE:
3342 {
3343 struct ip_vs_service *svc;
3344
fc723250
HS
3345 svc = ip_vs_genl_find_service(net,
3346 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3347 if (IS_ERR(svc)) {
3348 ret = PTR_ERR(svc);
3349 goto out_err;
3350 } else if (svc) {
3351 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3352 if (ret)
3353 goto nla_put_failure;
3354 } else {
3355 ret = -ESRCH;
3356 goto out_err;
3357 }
3358
3359 break;
3360 }
3361
3362 case IPVS_CMD_GET_CONFIG:
3363 {
3364 struct ip_vs_timeout_user t;
3365
9330419d 3366 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3367#ifdef CONFIG_IP_VS_PROTO_TCP
3368 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3369 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3370 t.tcp_fin_timeout);
3371#endif
3372#ifdef CONFIG_IP_VS_PROTO_UDP
3373 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3374#endif
3375
3376 break;
3377 }
3378
3379 case IPVS_CMD_GET_INFO:
3380 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3381 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
6f7edb48 3382 ip_vs_conn_tab_size);
9a812198
JV
3383 break;
3384 }
3385
3386 genlmsg_end(msg, reply);
134e6375 3387 ret = genlmsg_reply(msg, info);
9a812198
JV
3388 goto out;
3389
3390nla_put_failure:
1e3e238e 3391 pr_err("not enough space in Netlink message\n");
9a812198
JV
3392 ret = -EMSGSIZE;
3393
3394out_err:
3395 nlmsg_free(msg);
3396out:
3397 mutex_unlock(&__ip_vs_mutex);
3398
3399 return ret;
3400}
3401
3402
3403static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3404 {
3405 .cmd = IPVS_CMD_NEW_SERVICE,
3406 .flags = GENL_ADMIN_PERM,
3407 .policy = ip_vs_cmd_policy,
3408 .doit = ip_vs_genl_set_cmd,
3409 },
3410 {
3411 .cmd = IPVS_CMD_SET_SERVICE,
3412 .flags = GENL_ADMIN_PERM,
3413 .policy = ip_vs_cmd_policy,
3414 .doit = ip_vs_genl_set_cmd,
3415 },
3416 {
3417 .cmd = IPVS_CMD_DEL_SERVICE,
3418 .flags = GENL_ADMIN_PERM,
3419 .policy = ip_vs_cmd_policy,
3420 .doit = ip_vs_genl_set_cmd,
3421 },
3422 {
3423 .cmd = IPVS_CMD_GET_SERVICE,
3424 .flags = GENL_ADMIN_PERM,
3425 .doit = ip_vs_genl_get_cmd,
3426 .dumpit = ip_vs_genl_dump_services,
3427 .policy = ip_vs_cmd_policy,
3428 },
3429 {
3430 .cmd = IPVS_CMD_NEW_DEST,
3431 .flags = GENL_ADMIN_PERM,
3432 .policy = ip_vs_cmd_policy,
3433 .doit = ip_vs_genl_set_cmd,
3434 },
3435 {
3436 .cmd = IPVS_CMD_SET_DEST,
3437 .flags = GENL_ADMIN_PERM,
3438 .policy = ip_vs_cmd_policy,
3439 .doit = ip_vs_genl_set_cmd,
3440 },
3441 {
3442 .cmd = IPVS_CMD_DEL_DEST,
3443 .flags = GENL_ADMIN_PERM,
3444 .policy = ip_vs_cmd_policy,
3445 .doit = ip_vs_genl_set_cmd,
3446 },
3447 {
3448 .cmd = IPVS_CMD_GET_DEST,
3449 .flags = GENL_ADMIN_PERM,
3450 .policy = ip_vs_cmd_policy,
3451 .dumpit = ip_vs_genl_dump_dests,
3452 },
3453 {
3454 .cmd = IPVS_CMD_NEW_DAEMON,
3455 .flags = GENL_ADMIN_PERM,
3456 .policy = ip_vs_cmd_policy,
3457 .doit = ip_vs_genl_set_cmd,
3458 },
3459 {
3460 .cmd = IPVS_CMD_DEL_DAEMON,
3461 .flags = GENL_ADMIN_PERM,
3462 .policy = ip_vs_cmd_policy,
3463 .doit = ip_vs_genl_set_cmd,
3464 },
3465 {
3466 .cmd = IPVS_CMD_GET_DAEMON,
3467 .flags = GENL_ADMIN_PERM,
3468 .dumpit = ip_vs_genl_dump_daemons,
3469 },
3470 {
3471 .cmd = IPVS_CMD_SET_CONFIG,
3472 .flags = GENL_ADMIN_PERM,
3473 .policy = ip_vs_cmd_policy,
3474 .doit = ip_vs_genl_set_cmd,
3475 },
3476 {
3477 .cmd = IPVS_CMD_GET_CONFIG,
3478 .flags = GENL_ADMIN_PERM,
3479 .doit = ip_vs_genl_get_cmd,
3480 },
3481 {
3482 .cmd = IPVS_CMD_GET_INFO,
3483 .flags = GENL_ADMIN_PERM,
3484 .doit = ip_vs_genl_get_cmd,
3485 },
3486 {
3487 .cmd = IPVS_CMD_ZERO,
3488 .flags = GENL_ADMIN_PERM,
3489 .policy = ip_vs_cmd_policy,
3490 .doit = ip_vs_genl_set_cmd,
3491 },
3492 {
3493 .cmd = IPVS_CMD_FLUSH,
3494 .flags = GENL_ADMIN_PERM,
3495 .doit = ip_vs_genl_set_cmd,
3496 },
3497};
3498
3499static int __init ip_vs_genl_register(void)
3500{
8f698d54
MM
3501 return genl_register_family_with_ops(&ip_vs_genl_family,
3502 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3503}
3504
3505static void ip_vs_genl_unregister(void)
3506{
3507 genl_unregister_family(&ip_vs_genl_family);
3508}
3509
3510/* End of Generic Netlink interface definitions */
3511
61b1ab45
HS
3512/*
3513 * per netns intit/exit func.
3514 */
14e40546
SH
3515#ifdef CONFIG_SYSCTL
3516int __net_init __ip_vs_control_init_sysctl(struct net *net)
61b1ab45 3517{
fc723250
HS
3518 int idx;
3519 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3520 struct ctl_table *tbl;
fc723250 3521
a0840e2e
HS
3522 atomic_set(&ipvs->dropentry, 0);
3523 spin_lock_init(&ipvs->dropentry_lock);
3524 spin_lock_init(&ipvs->droppacket_lock);
3525 spin_lock_init(&ipvs->securetcp_lock);
a0840e2e
HS
3526
3527 if (!net_eq(net, &init_net)) {
3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3529 if (tbl == NULL)
14e40546 3530 return -ENOMEM;
a0840e2e
HS
3531 } else
3532 tbl = vs_vars;
3533 /* Initialize sysctl defaults */
3534 idx = 0;
3535 ipvs->sysctl_amemthresh = 1024;
3536 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3537 ipvs->sysctl_am_droprate = 10;
3538 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3539 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3540 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3541#ifdef CONFIG_IP_VS_NFCT
3542 tbl[idx++].data = &ipvs->sysctl_conntrack;
3543#endif
3544 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3545 ipvs->sysctl_snat_reroute = 1;
3546 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3547 ipvs->sysctl_sync_ver = 1;
3548 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
59e0350e
SH
3552 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3553 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
a0840e2e
HS
3554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3557
3558
3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
07924709 3560 tbl);
0443929f
SH
3561 if (ipvs->sysctl_hdr == NULL) {
3562 if (!net_eq(net, &init_net))
3563 kfree(tbl);
14e40546 3564 return -ENOMEM;
0443929f 3565 }
6ef757f9 3566 ip_vs_start_estimator(net, &ipvs->tot_stats);
a0840e2e 3567 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3568 /* Schedule defense work */
3569 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3570 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45 3571
61b1ab45 3572 return 0;
61b1ab45
HS
3573}
3574
14e40546 3575void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
61b1ab45 3576{
b17fc996
HS
3577 struct netns_ipvs *ipvs = net_ipvs(net);
3578
f2431e6e
HS
3579 cancel_delayed_work_sync(&ipvs->defense_work);
3580 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3581 unregister_net_sysctl_table(ipvs->sysctl_hdr);
14e40546
SH
3582}
3583
3584#else
3585
3586int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3587void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3588
0443929f 3589#endif
14e40546
SH
3590
3591int __net_init __ip_vs_control_init(struct net *net)
3592{
3593 int idx;
3594 struct netns_ipvs *ipvs = net_ipvs(net);
3595
3596 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3597
3598 /* Initialize rs_table */
3599 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3600 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3601
3602 INIT_LIST_HEAD(&ipvs->dest_trash);
3603 atomic_set(&ipvs->ftpsvc_counter, 0);
3604 atomic_set(&ipvs->nullsvc_counter, 0);
3605
3606 /* procfs stats */
3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
f40f94fc 3608 if (!ipvs->tot_stats.cpustats) {
14e40546
SH
3609 pr_err("%s(): alloc_percpu.\n", __func__);
3610 return -ENOMEM;
3611 }
3612 spin_lock_init(&ipvs->tot_stats.lock);
3613
3614 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3615 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3616 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3617 &ip_vs_stats_percpu_fops);
3618
3619 if (__ip_vs_control_init_sysctl(net))
3620 goto err;
3621
3622 return 0;
3623
3624err:
2a0751af 3625 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3626 return -ENOMEM;
3627}
3628
3629static void __net_exit __ip_vs_control_cleanup(struct net *net)
3630{
b17fc996
HS
3631 struct netns_ipvs *ipvs = net_ipvs(net);
3632
f2431e6e 3633 ip_vs_trash_cleanup(net);
6ef757f9 3634 ip_vs_stop_estimator(net, &ipvs->tot_stats);
14e40546 3635 __ip_vs_control_cleanup_sysctl(net);
b17fc996 3636 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3637 proc_net_remove(net, "ip_vs_stats");
3638 proc_net_remove(net, "ip_vs");
2a0751af 3639 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3640}
3641
3642static struct pernet_operations ipvs_control_ops = {
3643 .init = __ip_vs_control_init,
3644 .exit = __ip_vs_control_cleanup,
3645};
1da177e4 3646
048cf48b 3647int __init ip_vs_control_init(void)
1da177e4 3648{
1da177e4 3649 int idx;
fc723250 3650 int ret;
1da177e4
LT
3651
3652 EnterFunction(2);
3653
fc723250 3654 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
d86bef73
EB
3655 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3656 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3657 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3658 }
fc723250
HS
3659
3660 ret = register_pernet_subsys(&ipvs_control_ops);
3661 if (ret) {
3662 pr_err("cannot register namespace.\n");
3663 goto err;
d86bef73 3664 }
fc723250
HS
3665
3666 smp_wmb(); /* Do we really need it now ? */
d86bef73 3667
1da177e4
LT
3668 ret = nf_register_sockopt(&ip_vs_sockopts);
3669 if (ret) {
1e3e238e 3670 pr_err("cannot register sockopt.\n");
fc723250 3671 goto err_net;
1da177e4
LT
3672 }
3673
9a812198
JV
3674 ret = ip_vs_genl_register();
3675 if (ret) {
1e3e238e 3676 pr_err("cannot register Generic Netlink interface.\n");
9a812198 3677 nf_unregister_sockopt(&ip_vs_sockopts);
fc723250 3678 goto err_net;
9a812198
JV
3679 }
3680
1da177e4
LT
3681 LeaveFunction(2);
3682 return 0;
fc723250
HS
3683
3684err_net:
3685 unregister_pernet_subsys(&ipvs_control_ops);
3686err:
3687 return ret;
1da177e4
LT
3688}
3689
3690
3691void ip_vs_control_cleanup(void)
3692{
3693 EnterFunction(2);
61b1ab45 3694 unregister_pernet_subsys(&ipvs_control_ops);
9a812198 3695 ip_vs_genl_unregister();
1da177e4
LT
3696 nf_unregister_sockopt(&ip_vs_sockopts);
3697 LeaveFunction(2);
3698}