netlink: Rename pid to portid to avoid confusion
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
9aada7ac
HE
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
1da177e4
LT
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
1da177e4 33#include <linux/seq_file.h>
5a0e3ad6 34#include <linux/slab.h>
1da177e4
LT
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
14cc3e2b 38#include <linux/mutex.h>
1da177e4 39
457c4cbc 40#include <net/net_namespace.h>
9330419d 41#include <linux/nsproxy.h>
1da177e4 42#include <net/ip.h>
09571c7a
VB
43#ifdef CONFIG_IP_VS_IPV6
44#include <net/ipv6.h>
45#include <net/ip6_route.h>
46#endif
14c85021 47#include <net/route.h>
1da177e4 48#include <net/sock.h>
9a812198 49#include <net/genetlink.h>
1da177e4
LT
50
51#include <asm/uaccess.h>
52
53#include <net/ip_vs.h>
54
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 56static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
1da177e4 61/* sysctl variables */
1da177e4
LT
62
63#ifdef CONFIG_IP_VS_DEBUG
64static int sysctl_ip_vs_debug_level = 0;
65
66int ip_vs_get_debug_level(void)
67{
68 return sysctl_ip_vs_debug_level;
69}
70#endif
71
7a4f0761
HS
72
73/* Protos */
74static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
09571c7a
VB
77#ifdef CONFIG_IP_VS_IPV6
78/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
c24584c0
ED
79static bool __ip_vs_addr_is_local_v6(struct net *net,
80 const struct in6_addr *addr)
09571c7a 81{
4c9483b2
DM
82 struct flowi6 fl6 = {
83 .daddr = *addr,
09571c7a 84 };
c24584c0
ED
85 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86 bool is_local;
09571c7a 87
c24584c0 88 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
09571c7a 89
c24584c0
ED
90 dst_release(dst);
91 return is_local;
09571c7a
VB
92}
93#endif
14e40546
SH
94
95#ifdef CONFIG_SYSCTL
1da177e4 96/*
af9debd4
JA
97 * update_defense_level is called from keventd and from sysctl,
98 * so it needs to protect itself from softirqs
1da177e4 99 */
9330419d 100static void update_defense_level(struct netns_ipvs *ipvs)
1da177e4
LT
101{
102 struct sysinfo i;
103 static int old_secure_tcp = 0;
104 int availmem;
105 int nomem;
106 int to_change = -1;
107
108 /* we only count free and buffered memory (in pages) */
109 si_meminfo(&i);
110 availmem = i.freeram + i.bufferram;
111 /* however in linux 2.5 the i.bufferram is total page cache size,
112 we need adjust it */
113 /* si_swapinfo(&i); */
114 /* availmem = availmem - (i.totalswap - i.freeswap); */
115
a0840e2e 116 nomem = (availmem < ipvs->sysctl_amemthresh);
1da177e4 117
af9debd4
JA
118 local_bh_disable();
119
1da177e4 120 /* drop_entry */
a0840e2e
HS
121 spin_lock(&ipvs->dropentry_lock);
122 switch (ipvs->sysctl_drop_entry) {
1da177e4 123 case 0:
a0840e2e 124 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
125 break;
126 case 1:
127 if (nomem) {
a0840e2e
HS
128 atomic_set(&ipvs->dropentry, 1);
129 ipvs->sysctl_drop_entry = 2;
1da177e4 130 } else {
a0840e2e 131 atomic_set(&ipvs->dropentry, 0);
1da177e4
LT
132 }
133 break;
134 case 2:
135 if (nomem) {
a0840e2e 136 atomic_set(&ipvs->dropentry, 1);
1da177e4 137 } else {
a0840e2e
HS
138 atomic_set(&ipvs->dropentry, 0);
139 ipvs->sysctl_drop_entry = 1;
1da177e4
LT
140 };
141 break;
142 case 3:
a0840e2e 143 atomic_set(&ipvs->dropentry, 1);
1da177e4
LT
144 break;
145 }
a0840e2e 146 spin_unlock(&ipvs->dropentry_lock);
1da177e4
LT
147
148 /* drop_packet */
a0840e2e
HS
149 spin_lock(&ipvs->droppacket_lock);
150 switch (ipvs->sysctl_drop_packet) {
1da177e4 151 case 0:
a0840e2e 152 ipvs->drop_rate = 0;
1da177e4
LT
153 break;
154 case 1:
155 if (nomem) {
a0840e2e
HS
156 ipvs->drop_rate = ipvs->drop_counter
157 = ipvs->sysctl_amemthresh /
158 (ipvs->sysctl_amemthresh-availmem);
159 ipvs->sysctl_drop_packet = 2;
1da177e4 160 } else {
a0840e2e 161 ipvs->drop_rate = 0;
1da177e4
LT
162 }
163 break;
164 case 2:
165 if (nomem) {
a0840e2e
HS
166 ipvs->drop_rate = ipvs->drop_counter
167 = ipvs->sysctl_amemthresh /
168 (ipvs->sysctl_amemthresh-availmem);
1da177e4 169 } else {
a0840e2e
HS
170 ipvs->drop_rate = 0;
171 ipvs->sysctl_drop_packet = 1;
1da177e4
LT
172 }
173 break;
174 case 3:
a0840e2e 175 ipvs->drop_rate = ipvs->sysctl_am_droprate;
1da177e4
LT
176 break;
177 }
a0840e2e 178 spin_unlock(&ipvs->droppacket_lock);
1da177e4
LT
179
180 /* secure_tcp */
a0840e2e
HS
181 spin_lock(&ipvs->securetcp_lock);
182 switch (ipvs->sysctl_secure_tcp) {
1da177e4
LT
183 case 0:
184 if (old_secure_tcp >= 2)
185 to_change = 0;
186 break;
187 case 1:
188 if (nomem) {
189 if (old_secure_tcp < 2)
190 to_change = 1;
a0840e2e 191 ipvs->sysctl_secure_tcp = 2;
1da177e4
LT
192 } else {
193 if (old_secure_tcp >= 2)
194 to_change = 0;
195 }
196 break;
197 case 2:
198 if (nomem) {
199 if (old_secure_tcp < 2)
200 to_change = 1;
201 } else {
202 if (old_secure_tcp >= 2)
203 to_change = 0;
a0840e2e 204 ipvs->sysctl_secure_tcp = 1;
1da177e4
LT
205 }
206 break;
207 case 3:
208 if (old_secure_tcp < 2)
209 to_change = 1;
210 break;
211 }
a0840e2e 212 old_secure_tcp = ipvs->sysctl_secure_tcp;
1da177e4 213 if (to_change >= 0)
9330419d 214 ip_vs_protocol_timeout_change(ipvs,
a0840e2e
HS
215 ipvs->sysctl_secure_tcp > 1);
216 spin_unlock(&ipvs->securetcp_lock);
af9debd4
JA
217
218 local_bh_enable();
1da177e4
LT
219}
220
221
222/*
223 * Timer for checking the defense
224 */
225#define DEFENSE_TIMER_PERIOD 1*HZ
1da177e4 226
c4028958 227static void defense_work_handler(struct work_struct *work)
1da177e4 228{
f6340ee0
HS
229 struct netns_ipvs *ipvs =
230 container_of(work, struct netns_ipvs, defense_work.work);
9330419d
HS
231
232 update_defense_level(ipvs);
a0840e2e 233 if (atomic_read(&ipvs->dropentry))
f6340ee0
HS
234 ip_vs_random_dropentry(ipvs->net);
235 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
1da177e4 236}
14e40546 237#endif
1da177e4
LT
238
239int
240ip_vs_use_count_inc(void)
241{
242 return try_module_get(THIS_MODULE);
243}
244
245void
246ip_vs_use_count_dec(void)
247{
248 module_put(THIS_MODULE);
249}
250
251
252/*
253 * Hash table: for virtual service lookups
254 */
255#define IP_VS_SVC_TAB_BITS 8
256#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259/* the service table hashed by <protocol, addr, port> */
260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261/* the service table hashed by fwmark */
262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
1da177e4
LT
264
265/*
266 * Returns hash value for virtual service
267 */
95c96174
ED
268static inline unsigned int
269ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
fc723250 270 const union nf_inet_addr *addr, __be16 port)
1da177e4 271{
95c96174 272 register unsigned int porth = ntohs(port);
b18610de 273 __be32 addr_fold = addr->ip;
1da177e4 274
b18610de
JV
275#ifdef CONFIG_IP_VS_IPV6
276 if (af == AF_INET6)
277 addr_fold = addr->ip6[0]^addr->ip6[1]^
278 addr->ip6[2]^addr->ip6[3];
279#endif
fc723250 280 addr_fold ^= ((size_t)net>>8);
b18610de
JV
281
282 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
283 & IP_VS_SVC_TAB_MASK;
284}
285
286/*
287 * Returns hash value of fwmark for virtual service lookup
288 */
95c96174 289static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
1da177e4 290{
fc723250 291 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
1da177e4
LT
292}
293
294/*
fc723250 295 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
1da177e4
LT
296 * or in the ip_vs_svc_fwm_table by fwmark.
297 * Should be called with locked tables.
298 */
299static int ip_vs_svc_hash(struct ip_vs_service *svc)
300{
95c96174 301 unsigned int hash;
1da177e4
LT
302
303 if (svc->flags & IP_VS_SVC_F_HASHED) {
1e3e238e
HE
304 pr_err("%s(): request for already hashed, called from %pF\n",
305 __func__, __builtin_return_address(0));
1da177e4
LT
306 return 0;
307 }
308
309 if (svc->fwmark == 0) {
310 /*
fc723250 311 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
1da177e4 312 */
fc723250
HS
313 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314 &svc->addr, svc->port);
1da177e4
LT
315 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316 } else {
317 /*
fc723250 318 * Hash it by fwmark in svc_fwm_table
1da177e4 319 */
fc723250 320 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
1da177e4
LT
321 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322 }
323
324 svc->flags |= IP_VS_SVC_F_HASHED;
325 /* increase its refcnt because it is referenced by the svc table */
326 atomic_inc(&svc->refcnt);
327 return 1;
328}
329
330
331/*
fc723250 332 * Unhashes a service from svc_table / svc_fwm_table.
1da177e4
LT
333 * Should be called with locked tables.
334 */
335static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336{
337 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
1e3e238e
HE
338 pr_err("%s(): request for unhash flagged, called from %pF\n",
339 __func__, __builtin_return_address(0));
1da177e4
LT
340 return 0;
341 }
342
343 if (svc->fwmark == 0) {
fc723250 344 /* Remove it from the svc_table table */
1da177e4
LT
345 list_del(&svc->s_list);
346 } else {
fc723250 347 /* Remove it from the svc_fwm_table table */
1da177e4
LT
348 list_del(&svc->f_list);
349 }
350
351 svc->flags &= ~IP_VS_SVC_F_HASHED;
352 atomic_dec(&svc->refcnt);
353 return 1;
354}
355
356
357/*
fc723250 358 * Get service by {netns, proto,addr,port} in the service table.
1da177e4 359 */
b18610de 360static inline struct ip_vs_service *
fc723250
HS
361__ip_vs_service_find(struct net *net, int af, __u16 protocol,
362 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4 363{
95c96174 364 unsigned int hash;
1da177e4
LT
365 struct ip_vs_service *svc;
366
367 /* Check for "full" addressed entries */
fc723250 368 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
1da177e4
LT
369
370 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
371 if ((svc->af == af)
372 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4 373 && (svc->port == vport)
fc723250
HS
374 && (svc->protocol == protocol)
375 && net_eq(svc->net, net)) {
1da177e4 376 /* HIT */
1da177e4
LT
377 return svc;
378 }
379 }
380
381 return NULL;
382}
383
384
385/*
386 * Get service by {fwmark} in the service table.
387 */
b18610de 388static inline struct ip_vs_service *
fc723250 389__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
1da177e4 390{
95c96174 391 unsigned int hash;
1da177e4
LT
392 struct ip_vs_service *svc;
393
394 /* Check for fwmark addressed entries */
fc723250 395 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
1da177e4
LT
396
397 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
fc723250
HS
398 if (svc->fwmark == fwmark && svc->af == af
399 && net_eq(svc->net, net)) {
1da177e4 400 /* HIT */
1da177e4
LT
401 return svc;
402 }
403 }
404
405 return NULL;
406}
407
408struct ip_vs_service *
fc723250 409ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
3c2e0505 410 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
411{
412 struct ip_vs_service *svc;
763f8d0e 413 struct netns_ipvs *ipvs = net_ipvs(net);
3c2e0505 414
1da177e4
LT
415 read_lock(&__ip_vs_svc_lock);
416
417 /*
418 * Check the table hashed by fwmark first
419 */
097fc76a
JA
420 if (fwmark) {
421 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422 if (svc)
423 goto out;
424 }
1da177e4
LT
425
426 /*
427 * Check the table hashed by <protocol,addr,port>
428 * for "full" addressed entries
429 */
fc723250 430 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
1da177e4
LT
431
432 if (svc == NULL
433 && protocol == IPPROTO_TCP
763f8d0e 434 && atomic_read(&ipvs->ftpsvc_counter)
1da177e4
LT
435 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436 /*
437 * Check if ftp service entry exists, the packet
438 * might belong to FTP data connections.
439 */
fc723250 440 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
1da177e4
LT
441 }
442
443 if (svc == NULL
763f8d0e 444 && atomic_read(&ipvs->nullsvc_counter)) {
1da177e4
LT
445 /*
446 * Check if the catch-all port (port zero) exists
447 */
fc723250 448 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
1da177e4
LT
449 }
450
451 out:
26c15cfd
JA
452 if (svc)
453 atomic_inc(&svc->usecnt);
1da177e4
LT
454 read_unlock(&__ip_vs_svc_lock);
455
3c2e0505
JV
456 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457 fwmark, ip_vs_proto_name(protocol),
458 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459 svc ? "hit" : "not hit");
1da177e4
LT
460
461 return svc;
462}
463
464
465static inline void
466__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467{
468 atomic_inc(&svc->refcnt);
469 dest->svc = svc;
470}
471
26c15cfd 472static void
1da177e4
LT
473__ip_vs_unbind_svc(struct ip_vs_dest *dest)
474{
475 struct ip_vs_service *svc = dest->svc;
476
477 dest->svc = NULL;
26c15cfd
JA
478 if (atomic_dec_and_test(&svc->refcnt)) {
479 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480 svc->fwmark,
481 IP_VS_DBG_ADDR(svc->af, &svc->addr),
482 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 483 free_percpu(svc->stats.cpustats);
1da177e4 484 kfree(svc);
26c15cfd 485 }
1da177e4
LT
486}
487
488
489/*
490 * Returns hash value for real service
491 */
95c96174 492static inline unsigned int ip_vs_rs_hashkey(int af,
7937df15
JV
493 const union nf_inet_addr *addr,
494 __be16 port)
1da177e4 495{
95c96174 496 register unsigned int porth = ntohs(port);
7937df15
JV
497 __be32 addr_fold = addr->ip;
498
499#ifdef CONFIG_IP_VS_IPV6
500 if (af == AF_INET6)
501 addr_fold = addr->ip6[0]^addr->ip6[1]^
502 addr->ip6[2]^addr->ip6[3];
503#endif
1da177e4 504
7937df15 505 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
506 & IP_VS_RTAB_MASK;
507}
508
509/*
fc723250 510 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
1da177e4
LT
511 * should be called with locked tables.
512 */
fc723250 513static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
1da177e4 514{
95c96174 515 unsigned int hash;
1da177e4
LT
516
517 if (!list_empty(&dest->d_list)) {
518 return 0;
519 }
520
521 /*
522 * Hash by proto,addr,port,
523 * which are the parameters of the real service.
524 */
7937df15
JV
525 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
fc723250 527 list_add(&dest->d_list, &ipvs->rs_table[hash]);
1da177e4
LT
528
529 return 1;
530}
531
532/*
fc723250 533 * UNhashes ip_vs_dest from rs_table.
1da177e4
LT
534 * should be called with locked tables.
535 */
536static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537{
538 /*
fc723250 539 * Remove it from the rs_table table.
1da177e4
LT
540 */
541 if (!list_empty(&dest->d_list)) {
542 list_del(&dest->d_list);
543 INIT_LIST_HEAD(&dest->d_list);
544 }
545
546 return 1;
547}
548
549/*
550 * Lookup real service by <proto,addr,port> in the real service table.
551 */
552struct ip_vs_dest *
fc723250 553ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
7937df15
JV
554 const union nf_inet_addr *daddr,
555 __be16 dport)
1da177e4 556{
fc723250 557 struct netns_ipvs *ipvs = net_ipvs(net);
95c96174 558 unsigned int hash;
1da177e4
LT
559 struct ip_vs_dest *dest;
560
561 /*
562 * Check for "full" addressed entries
563 * Return the first found entry
564 */
7937df15 565 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4 566
a0840e2e 567 read_lock(&ipvs->rs_lock);
fc723250 568 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
7937df15
JV
569 if ((dest->af == af)
570 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
571 && (dest->port == dport)
572 && ((dest->protocol == protocol) ||
573 dest->vfwmark)) {
574 /* HIT */
a0840e2e 575 read_unlock(&ipvs->rs_lock);
1da177e4
LT
576 return dest;
577 }
578 }
a0840e2e 579 read_unlock(&ipvs->rs_lock);
1da177e4
LT
580
581 return NULL;
582}
583
584/*
585 * Lookup destination by {addr,port} in the given service
586 */
587static struct ip_vs_dest *
7937df15
JV
588ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589 __be16 dport)
1da177e4
LT
590{
591 struct ip_vs_dest *dest;
592
593 /*
594 * Find the destination for the given service
595 */
596 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
597 if ((dest->af == svc->af)
598 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599 && (dest->port == dport)) {
1da177e4
LT
600 /* HIT */
601 return dest;
602 }
603 }
604
605 return NULL;
606}
607
1e356f9c
RB
608/*
609 * Find destination by {daddr,dport,vaddr,protocol}
610 * Cretaed to be used in ip_vs_process_message() in
611 * the backup synchronization daemon. It finds the
612 * destination to be bound to the received connection
613 * on the backup.
614 *
615 * ip_vs_lookup_real_service() looked promissing, but
616 * seems not working as expected.
617 */
fc723250
HS
618struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
619 const union nf_inet_addr *daddr,
7937df15
JV
620 __be16 dport,
621 const union nf_inet_addr *vaddr,
52793dbe
JA
622 __be16 vport, __u16 protocol, __u32 fwmark,
623 __u32 flags)
1e356f9c
RB
624{
625 struct ip_vs_dest *dest;
626 struct ip_vs_service *svc;
52793dbe 627 __be16 port = dport;
1e356f9c 628
fc723250 629 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
1e356f9c
RB
630 if (!svc)
631 return NULL;
52793dbe
JA
632 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
633 port = 0;
634 dest = ip_vs_lookup_dest(svc, daddr, port);
635 if (!dest)
636 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
1e356f9c
RB
637 if (dest)
638 atomic_inc(&dest->refcnt);
639 ip_vs_service_put(svc);
640 return dest;
641}
1da177e4
LT
642
643/*
644 * Lookup dest by {svc,addr,port} in the destination trash.
645 * The destination trash is used to hold the destinations that are removed
646 * from the service table but are still referenced by some conn entries.
647 * The reason to add the destination trash is when the dest is temporary
648 * down (either by administrator or by monitor program), the dest can be
649 * picked back from the trash, the remaining connections to the dest can
650 * continue, and the counting information of the dest is also useful for
651 * scheduling.
652 */
653static struct ip_vs_dest *
7937df15
JV
654ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
655 __be16 dport)
1da177e4
LT
656{
657 struct ip_vs_dest *dest, *nxt;
f2431e6e 658 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
659
660 /*
661 * Find the destination in trash
662 */
f2431e6e 663 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
7937df15
JV
664 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
665 "dest->refcnt=%d\n",
666 dest->vfwmark,
667 IP_VS_DBG_ADDR(svc->af, &dest->addr),
668 ntohs(dest->port),
669 atomic_read(&dest->refcnt));
670 if (dest->af == svc->af &&
671 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
672 dest->port == dport &&
673 dest->vfwmark == svc->fwmark &&
674 dest->protocol == svc->protocol &&
675 (svc->fwmark ||
7937df15 676 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
677 dest->vport == svc->port))) {
678 /* HIT */
679 return dest;
680 }
681
682 /*
683 * Try to purge the destination from trash if not referenced
684 */
685 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
686 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
687 "from trash\n",
688 dest->vfwmark,
689 IP_VS_DBG_ADDR(svc->af, &dest->addr),
690 ntohs(dest->port));
1da177e4
LT
691 list_del(&dest->n_list);
692 ip_vs_dst_reset(dest);
693 __ip_vs_unbind_svc(dest);
b17fc996 694 free_percpu(dest->stats.cpustats);
1da177e4
LT
695 kfree(dest);
696 }
697 }
698
699 return NULL;
700}
701
702
703/*
704 * Clean up all the destinations in the trash
705 * Called by the ip_vs_control_cleanup()
706 *
707 * When the ip_vs_control_clearup is activated by ipvs module exit,
708 * the service tables must have been flushed and all the connections
709 * are expired, and the refcnt of each destination in the trash must
710 * be 1, so we simply release them here.
711 */
f2431e6e 712static void ip_vs_trash_cleanup(struct net *net)
1da177e4
LT
713{
714 struct ip_vs_dest *dest, *nxt;
f2431e6e 715 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 716
f2431e6e 717 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
1da177e4
LT
718 list_del(&dest->n_list);
719 ip_vs_dst_reset(dest);
720 __ip_vs_unbind_svc(dest);
b17fc996 721 free_percpu(dest->stats.cpustats);
1da177e4
LT
722 kfree(dest);
723 }
724}
725
55a3d4e1
JA
726static void
727ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
728{
729#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
55a3d4e1
JA
730
731 spin_lock_bh(&src->lock);
732
733 IP_VS_SHOW_STATS_COUNTER(conns);
734 IP_VS_SHOW_STATS_COUNTER(inpkts);
735 IP_VS_SHOW_STATS_COUNTER(outpkts);
736 IP_VS_SHOW_STATS_COUNTER(inbytes);
737 IP_VS_SHOW_STATS_COUNTER(outbytes);
738
ea9f22cc 739 ip_vs_read_estimator(dst, src);
55a3d4e1
JA
740
741 spin_unlock_bh(&src->lock);
742}
1da177e4
LT
743
744static void
745ip_vs_zero_stats(struct ip_vs_stats *stats)
746{
747 spin_lock_bh(&stats->lock);
e93615d0 748
55a3d4e1
JA
749 /* get current counters as zero point, rates are zeroed */
750
751#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
55a3d4e1
JA
752
753 IP_VS_ZERO_STATS_COUNTER(conns);
754 IP_VS_ZERO_STATS_COUNTER(inpkts);
755 IP_VS_ZERO_STATS_COUNTER(outpkts);
756 IP_VS_ZERO_STATS_COUNTER(inbytes);
757 IP_VS_ZERO_STATS_COUNTER(outbytes);
758
1da177e4 759 ip_vs_zero_estimator(stats);
e93615d0 760
3a14a313 761 spin_unlock_bh(&stats->lock);
1da177e4
LT
762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
26c15cfd
JA
768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add)
1da177e4 770{
fc723250 771 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1da177e4
LT
772 int conn_flags;
773
774 /* set the weight and the flags */
775 atomic_set(&dest->weight, udest->weight);
3575792e
JA
776 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777 conn_flags |= IP_VS_CONN_F_INACTIVE;
1da177e4 778
1da177e4 779 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
3575792e 780 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1da177e4
LT
781 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782 } else {
783 /*
fc723250 784 * Put the real service in rs_table if not present.
1da177e4
LT
785 * For now only for NAT!
786 */
a0840e2e 787 write_lock_bh(&ipvs->rs_lock);
fc723250 788 ip_vs_rs_hash(ipvs, dest);
a0840e2e 789 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
790 }
791 atomic_set(&dest->conn_flags, conn_flags);
792
793 /* bind the service */
794 if (!dest->svc) {
795 __ip_vs_bind_svc(dest, svc);
796 } else {
797 if (dest->svc != svc) {
798 __ip_vs_unbind_svc(dest);
799 ip_vs_zero_stats(&dest->stats);
800 __ip_vs_bind_svc(dest, svc);
801 }
802 }
803
804 /* set the dest status flags */
805 dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809 dest->u_threshold = udest->u_threshold;
810 dest->l_threshold = udest->l_threshold;
26c15cfd 811
ff75f40f 812 spin_lock_bh(&dest->dst_lock);
fc604767 813 ip_vs_dst_reset(dest);
ff75f40f 814 spin_unlock_bh(&dest->dst_lock);
fc604767 815
26c15cfd 816 if (add)
6ef757f9 817 ip_vs_start_estimator(svc->net, &dest->stats);
26c15cfd
JA
818
819 write_lock_bh(&__ip_vs_svc_lock);
820
821 /* Wait until all other svc users go away */
822 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824 if (add) {
825 list_add(&dest->n_list, &svc->destinations);
826 svc->num_dests++;
827 }
828
829 /* call the update_service, because server weight may be changed */
830 if (svc->scheduler->update_service)
831 svc->scheduler->update_service(svc);
832
833 write_unlock_bh(&__ip_vs_svc_lock);
1da177e4
LT
834}
835
836
837/*
838 * Create a destination for the given service
839 */
840static int
c860c6b1 841ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
842 struct ip_vs_dest **dest_p)
843{
844 struct ip_vs_dest *dest;
95c96174 845 unsigned int atype;
1da177e4
LT
846
847 EnterFunction(2);
848
09571c7a
VB
849#ifdef CONFIG_IP_VS_IPV6
850 if (svc->af == AF_INET6) {
851 atype = ipv6_addr_type(&udest->addr.in6);
3bfb92f4
SW
852 if ((!(atype & IPV6_ADDR_UNICAST) ||
853 atype & IPV6_ADDR_LINKLOCAL) &&
4a98480b 854 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
09571c7a
VB
855 return -EINVAL;
856 } else
857#endif
858 {
4a98480b 859 atype = inet_addr_type(svc->net, udest->addr.ip);
09571c7a
VB
860 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861 return -EINVAL;
862 }
1da177e4 863
dee06e47 864 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
0a9ee813 865 if (dest == NULL)
1da177e4 866 return -ENOMEM;
0a9ee813 867
b17fc996 868 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a9ee813 869 if (!dest->stats.cpustats)
b17fc996 870 goto err_alloc;
1da177e4 871
c860c6b1 872 dest->af = svc->af;
1da177e4 873 dest->protocol = svc->protocol;
c860c6b1 874 dest->vaddr = svc->addr;
1da177e4
LT
875 dest->vport = svc->port;
876 dest->vfwmark = svc->fwmark;
c860c6b1 877 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
878 dest->port = udest->port;
879
880 atomic_set(&dest->activeconns, 0);
881 atomic_set(&dest->inactconns, 0);
882 atomic_set(&dest->persistconns, 0);
26c15cfd 883 atomic_set(&dest->refcnt, 1);
1da177e4
LT
884
885 INIT_LIST_HEAD(&dest->d_list);
886 spin_lock_init(&dest->dst_lock);
887 spin_lock_init(&dest->stats.lock);
26c15cfd 888 __ip_vs_update_dest(svc, dest, udest, 1);
1da177e4
LT
889
890 *dest_p = dest;
891
892 LeaveFunction(2);
893 return 0;
b17fc996
HS
894
895err_alloc:
896 kfree(dest);
897 return -ENOMEM;
1da177e4
LT
898}
899
900
901/*
902 * Add a destination into an existing service
903 */
904static int
c860c6b1 905ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
906{
907 struct ip_vs_dest *dest;
c860c6b1 908 union nf_inet_addr daddr;
014d730d 909 __be16 dport = udest->port;
1da177e4
LT
910 int ret;
911
912 EnterFunction(2);
913
914 if (udest->weight < 0) {
1e3e238e 915 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
916 return -ERANGE;
917 }
918
919 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
920 pr_err("%s(): lower threshold is higher than upper threshold\n",
921 __func__);
1da177e4
LT
922 return -ERANGE;
923 }
924
c860c6b1
JV
925 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
926
1da177e4
LT
927 /*
928 * Check if the dest already exists in the list
929 */
7937df15
JV
930 dest = ip_vs_lookup_dest(svc, &daddr, dport);
931
1da177e4 932 if (dest != NULL) {
1e3e238e 933 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1da177e4
LT
934 return -EEXIST;
935 }
936
937 /*
938 * Check if the dest already exists in the trash and
939 * is from the same service
940 */
7937df15
JV
941 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
942
1da177e4 943 if (dest != NULL) {
cfc78c5a
JV
944 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
945 "dest->refcnt=%d, service %u/%s:%u\n",
946 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
947 atomic_read(&dest->refcnt),
948 dest->vfwmark,
949 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
950 ntohs(dest->vport));
951
1da177e4
LT
952 /*
953 * Get the destination from the trash
954 */
955 list_del(&dest->n_list);
956
26c15cfd
JA
957 __ip_vs_update_dest(svc, dest, udest, 1);
958 ret = 0;
959 } else {
1da177e4 960 /*
26c15cfd 961 * Allocate and initialize the dest structure
1da177e4 962 */
26c15cfd 963 ret = ip_vs_new_dest(svc, udest, &dest);
1da177e4 964 }
1da177e4
LT
965 LeaveFunction(2);
966
26c15cfd 967 return ret;
1da177e4
LT
968}
969
970
971/*
972 * Edit a destination in the given service
973 */
974static int
c860c6b1 975ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
976{
977 struct ip_vs_dest *dest;
c860c6b1 978 union nf_inet_addr daddr;
014d730d 979 __be16 dport = udest->port;
1da177e4
LT
980
981 EnterFunction(2);
982
983 if (udest->weight < 0) {
1e3e238e 984 pr_err("%s(): server weight less than zero\n", __func__);
1da177e4
LT
985 return -ERANGE;
986 }
987
988 if (udest->l_threshold > udest->u_threshold) {
1e3e238e
HE
989 pr_err("%s(): lower threshold is higher than upper threshold\n",
990 __func__);
1da177e4
LT
991 return -ERANGE;
992 }
993
c860c6b1
JV
994 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
995
1da177e4
LT
996 /*
997 * Lookup the destination list
998 */
7937df15
JV
999 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1000
1da177e4 1001 if (dest == NULL) {
1e3e238e 1002 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1da177e4
LT
1003 return -ENOENT;
1004 }
1005
26c15cfd 1006 __ip_vs_update_dest(svc, dest, udest, 0);
1da177e4
LT
1007 LeaveFunction(2);
1008
1009 return 0;
1010}
1011
1012
1013/*
1014 * Delete a destination (must be already unlinked from the service)
1015 */
29c2026f 1016static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1da177e4 1017{
a0840e2e
HS
1018 struct netns_ipvs *ipvs = net_ipvs(net);
1019
6ef757f9 1020 ip_vs_stop_estimator(net, &dest->stats);
1da177e4
LT
1021
1022 /*
1023 * Remove it from the d-linked list with the real services.
1024 */
a0840e2e 1025 write_lock_bh(&ipvs->rs_lock);
1da177e4 1026 ip_vs_rs_unhash(dest);
a0840e2e 1027 write_unlock_bh(&ipvs->rs_lock);
1da177e4
LT
1028
1029 /*
1030 * Decrease the refcnt of the dest, and free the dest
1031 * if nobody refers to it (refcnt=0). Otherwise, throw
1032 * the destination into the trash.
1033 */
1034 if (atomic_dec_and_test(&dest->refcnt)) {
26c15cfd
JA
1035 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1036 dest->vfwmark,
1037 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1038 ntohs(dest->port));
1da177e4
LT
1039 ip_vs_dst_reset(dest);
1040 /* simply decrease svc->refcnt here, let the caller check
1041 and release the service if nobody refers to it.
1042 Only user context can release destination and service,
1043 and only one user context can update virtual service at a
1044 time, so the operation here is OK */
1045 atomic_dec(&dest->svc->refcnt);
b17fc996 1046 free_percpu(dest->stats.cpustats);
1da177e4
LT
1047 kfree(dest);
1048 } else {
cfc78c5a
JV
1049 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1050 "dest->refcnt=%d\n",
1051 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1052 ntohs(dest->port),
1053 atomic_read(&dest->refcnt));
f2431e6e 1054 list_add(&dest->n_list, &ipvs->dest_trash);
1da177e4
LT
1055 atomic_inc(&dest->refcnt);
1056 }
1057}
1058
1059
1060/*
1061 * Unlink a destination from the given service
1062 */
1063static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064 struct ip_vs_dest *dest,
1065 int svcupd)
1066{
1067 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069 /*
1070 * Remove it from the d-linked destination list.
1071 */
1072 list_del(&dest->n_list);
1073 svc->num_dests--;
82dfb6f3
SW
1074
1075 /*
1076 * Call the update_service function of its scheduler
1077 */
1078 if (svcupd && svc->scheduler->update_service)
1079 svc->scheduler->update_service(svc);
1da177e4
LT
1080}
1081
1082
1083/*
1084 * Delete a destination server in the given service
1085 */
1086static int
c860c6b1 1087ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1088{
1089 struct ip_vs_dest *dest;
014d730d 1090 __be16 dport = udest->port;
1da177e4
LT
1091
1092 EnterFunction(2);
1093
7937df15 1094 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1095
1da177e4 1096 if (dest == NULL) {
1e3e238e 1097 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1da177e4
LT
1098 return -ENOENT;
1099 }
1100
1101 write_lock_bh(&__ip_vs_svc_lock);
1102
1103 /*
1104 * Wait until all other svc users go away.
1105 */
26c15cfd 1106 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1107
1108 /*
1109 * Unlink dest from the service
1110 */
1111 __ip_vs_unlink_dest(svc, dest, 1);
1112
1113 write_unlock_bh(&__ip_vs_svc_lock);
1114
1115 /*
1116 * Delete the destination
1117 */
a0840e2e 1118 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1119
1120 LeaveFunction(2);
1121
1122 return 0;
1123}
1124
1125
1126/*
1127 * Add a service into the service hash table
1128 */
1129static int
fc723250 1130ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
c860c6b1 1131 struct ip_vs_service **svc_p)
1da177e4
LT
1132{
1133 int ret = 0;
1134 struct ip_vs_scheduler *sched = NULL;
0d1e71b0 1135 struct ip_vs_pe *pe = NULL;
1da177e4 1136 struct ip_vs_service *svc = NULL;
a0840e2e 1137 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
1138
1139 /* increase the module use count */
1140 ip_vs_use_count_inc();
1141
1142 /* Lookup the scheduler by 'u->sched_name' */
1143 sched = ip_vs_scheduler_get(u->sched_name);
1144 if (sched == NULL) {
1e3e238e 1145 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4 1146 ret = -ENOENT;
6e08bfb8 1147 goto out_err;
1da177e4
LT
1148 }
1149
0d1e71b0 1150 if (u->pe_name && *u->pe_name) {
e9e5eee8 1151 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1152 if (pe == NULL) {
1153 pr_info("persistence engine module ip_vs_pe_%s "
1154 "not found\n", u->pe_name);
1155 ret = -ENOENT;
1156 goto out_err;
1157 }
1158 }
1159
f94fd041 1160#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1161 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1162 ret = -EINVAL;
1163 goto out_err;
f94fd041
JV
1164 }
1165#endif
1166
dee06e47 1167 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1da177e4 1168 if (svc == NULL) {
1e3e238e 1169 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1da177e4
LT
1170 ret = -ENOMEM;
1171 goto out_err;
1172 }
b17fc996 1173 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a54e939
JL
1174 if (!svc->stats.cpustats) {
1175 ret = -ENOMEM;
b17fc996 1176 goto out_err;
0a54e939 1177 }
1da177e4
LT
1178
1179 /* I'm the first user of the service */
26c15cfd 1180 atomic_set(&svc->usecnt, 0);
1da177e4
LT
1181 atomic_set(&svc->refcnt, 0);
1182
c860c6b1 1183 svc->af = u->af;
1da177e4 1184 svc->protocol = u->protocol;
c860c6b1 1185 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1186 svc->port = u->port;
1187 svc->fwmark = u->fwmark;
1188 svc->flags = u->flags;
1189 svc->timeout = u->timeout * HZ;
1190 svc->netmask = u->netmask;
fc723250 1191 svc->net = net;
1da177e4
LT
1192
1193 INIT_LIST_HEAD(&svc->destinations);
1194 rwlock_init(&svc->sched_lock);
1195 spin_lock_init(&svc->stats.lock);
1196
1197 /* Bind the scheduler */
1198 ret = ip_vs_bind_scheduler(svc, sched);
1199 if (ret)
1200 goto out_err;
1201 sched = NULL;
1202
0d1e71b0
SH
1203 /* Bind the ct retriever */
1204 ip_vs_bind_pe(svc, pe);
1205 pe = NULL;
1206
1da177e4
LT
1207 /* Update the virtual service counters */
1208 if (svc->port == FTPPORT)
763f8d0e 1209 atomic_inc(&ipvs->ftpsvc_counter);
1da177e4 1210 else if (svc->port == 0)
763f8d0e 1211 atomic_inc(&ipvs->nullsvc_counter);
1da177e4 1212
6ef757f9 1213 ip_vs_start_estimator(net, &svc->stats);
f94fd041
JV
1214
1215 /* Count only IPv4 services for old get/setsockopt interface */
1216 if (svc->af == AF_INET)
a0840e2e 1217 ipvs->num_services++;
1da177e4
LT
1218
1219 /* Hash the service into the service table */
1220 write_lock_bh(&__ip_vs_svc_lock);
1221 ip_vs_svc_hash(svc);
1222 write_unlock_bh(&__ip_vs_svc_lock);
1223
1224 *svc_p = svc;
7a4f0761
HS
1225 /* Now there is a service - full throttle */
1226 ipvs->enable = 1;
1da177e4
LT
1227 return 0;
1228
b17fc996 1229
6e08bfb8 1230 out_err:
1da177e4 1231 if (svc != NULL) {
2fabf35b 1232 ip_vs_unbind_scheduler(svc);
1da177e4
LT
1233 if (svc->inc) {
1234 local_bh_disable();
1235 ip_vs_app_inc_put(svc->inc);
1236 local_bh_enable();
1237 }
b17fc996
HS
1238 if (svc->stats.cpustats)
1239 free_percpu(svc->stats.cpustats);
1da177e4
LT
1240 kfree(svc);
1241 }
1242 ip_vs_scheduler_put(sched);
0d1e71b0 1243 ip_vs_pe_put(pe);
1da177e4 1244
1da177e4
LT
1245 /* decrease the module use count */
1246 ip_vs_use_count_dec();
1247
1248 return ret;
1249}
1250
1251
1252/*
1253 * Edit a service and bind it with a new scheduler
1254 */
1255static int
c860c6b1 1256ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1257{
1258 struct ip_vs_scheduler *sched, *old_sched;
0d1e71b0 1259 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1da177e4
LT
1260 int ret = 0;
1261
1262 /*
1263 * Lookup the scheduler, by 'u->sched_name'
1264 */
1265 sched = ip_vs_scheduler_get(u->sched_name);
1266 if (sched == NULL) {
1e3e238e 1267 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1da177e4
LT
1268 return -ENOENT;
1269 }
1270 old_sched = sched;
1271
0d1e71b0 1272 if (u->pe_name && *u->pe_name) {
e9e5eee8 1273 pe = ip_vs_pe_getbyname(u->pe_name);
0d1e71b0
SH
1274 if (pe == NULL) {
1275 pr_info("persistence engine module ip_vs_pe_%s "
1276 "not found\n", u->pe_name);
1277 ret = -ENOENT;
1278 goto out;
1279 }
1280 old_pe = pe;
1281 }
1282
f94fd041 1283#ifdef CONFIG_IP_VS_IPV6
48148938
JV
1284 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1285 ret = -EINVAL;
1286 goto out;
f94fd041
JV
1287 }
1288#endif
1289
1da177e4
LT
1290 write_lock_bh(&__ip_vs_svc_lock);
1291
1292 /*
1293 * Wait until all other svc users go away.
1294 */
26c15cfd 1295 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1296
1297 /*
1298 * Set the flags and timeout value
1299 */
1300 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1301 svc->timeout = u->timeout * HZ;
1302 svc->netmask = u->netmask;
1303
1304 old_sched = svc->scheduler;
1305 if (sched != old_sched) {
1306 /*
1307 * Unbind the old scheduler
1308 */
1309 if ((ret = ip_vs_unbind_scheduler(svc))) {
1310 old_sched = sched;
9e691ed6 1311 goto out_unlock;
1da177e4
LT
1312 }
1313
1314 /*
1315 * Bind the new scheduler
1316 */
1317 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1318 /*
1319 * If ip_vs_bind_scheduler fails, restore the old
1320 * scheduler.
1321 * The main reason of failure is out of memory.
1322 *
1323 * The question is if the old scheduler can be
1324 * restored all the time. TODO: if it cannot be
1325 * restored some time, we must delete the service,
1326 * otherwise the system may crash.
1327 */
1328 ip_vs_bind_scheduler(svc, old_sched);
1329 old_sched = sched;
9e691ed6 1330 goto out_unlock;
1da177e4
LT
1331 }
1332 }
1333
0d1e71b0
SH
1334 old_pe = svc->pe;
1335 if (pe != old_pe) {
1336 ip_vs_unbind_pe(svc);
1337 ip_vs_bind_pe(svc, pe);
1338 }
1339
552ad65a 1340out_unlock:
1da177e4 1341 write_unlock_bh(&__ip_vs_svc_lock);
552ad65a 1342out:
6e08bfb8 1343 ip_vs_scheduler_put(old_sched);
0d1e71b0 1344 ip_vs_pe_put(old_pe);
1da177e4
LT
1345 return ret;
1346}
1347
1348
1349/*
1350 * Delete a service from the service list
1351 * - The service must be unlinked, unlocked and not referenced!
1352 * - We are called under _bh lock
1353 */
1354static void __ip_vs_del_service(struct ip_vs_service *svc)
1355{
1356 struct ip_vs_dest *dest, *nxt;
1357 struct ip_vs_scheduler *old_sched;
0d1e71b0 1358 struct ip_vs_pe *old_pe;
a0840e2e 1359 struct netns_ipvs *ipvs = net_ipvs(svc->net);
0d1e71b0
SH
1360
1361 pr_info("%s: enter\n", __func__);
1da177e4 1362
f94fd041
JV
1363 /* Count only IPv4 services for old get/setsockopt interface */
1364 if (svc->af == AF_INET)
a0840e2e 1365 ipvs->num_services--;
f94fd041 1366
6ef757f9 1367 ip_vs_stop_estimator(svc->net, &svc->stats);
1da177e4
LT
1368
1369 /* Unbind scheduler */
1370 old_sched = svc->scheduler;
1371 ip_vs_unbind_scheduler(svc);
6e08bfb8 1372 ip_vs_scheduler_put(old_sched);
1da177e4 1373
0d1e71b0
SH
1374 /* Unbind persistence engine */
1375 old_pe = svc->pe;
1376 ip_vs_unbind_pe(svc);
1377 ip_vs_pe_put(old_pe);
1378
1da177e4
LT
1379 /* Unbind app inc */
1380 if (svc->inc) {
1381 ip_vs_app_inc_put(svc->inc);
1382 svc->inc = NULL;
1383 }
1384
1385 /*
1386 * Unlink the whole destination list
1387 */
1388 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1389 __ip_vs_unlink_dest(svc, dest, 0);
29c2026f 1390 __ip_vs_del_dest(svc->net, dest);
1da177e4
LT
1391 }
1392
1393 /*
1394 * Update the virtual service counters
1395 */
1396 if (svc->port == FTPPORT)
763f8d0e 1397 atomic_dec(&ipvs->ftpsvc_counter);
1da177e4 1398 else if (svc->port == 0)
763f8d0e 1399 atomic_dec(&ipvs->nullsvc_counter);
1da177e4
LT
1400
1401 /*
1402 * Free the service if nobody refers to it
1403 */
26c15cfd
JA
1404 if (atomic_read(&svc->refcnt) == 0) {
1405 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1406 svc->fwmark,
1407 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1408 ntohs(svc->port), atomic_read(&svc->usecnt));
b17fc996 1409 free_percpu(svc->stats.cpustats);
1da177e4 1410 kfree(svc);
26c15cfd 1411 }
1da177e4
LT
1412
1413 /* decrease the module use count */
1414 ip_vs_use_count_dec();
1415}
1416
1417/*
26c15cfd 1418 * Unlink a service from list and try to delete it if its refcnt reached 0
1da177e4 1419 */
26c15cfd 1420static void ip_vs_unlink_service(struct ip_vs_service *svc)
1da177e4 1421{
1da177e4
LT
1422 /*
1423 * Unhash it from the service table
1424 */
1425 write_lock_bh(&__ip_vs_svc_lock);
1426
1427 ip_vs_svc_unhash(svc);
1428
1429 /*
1430 * Wait until all the svc users go away.
1431 */
26c15cfd 1432 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1da177e4
LT
1433
1434 __ip_vs_del_service(svc);
1435
1436 write_unlock_bh(&__ip_vs_svc_lock);
26c15cfd
JA
1437}
1438
1439/*
1440 * Delete a service from the service list
1441 */
1442static int ip_vs_del_service(struct ip_vs_service *svc)
1443{
1444 if (svc == NULL)
1445 return -EEXIST;
1446 ip_vs_unlink_service(svc);
1da177e4
LT
1447
1448 return 0;
1449}
1450
1451
1452/*
1453 * Flush all the virtual services
1454 */
fc723250 1455static int ip_vs_flush(struct net *net)
1da177e4
LT
1456{
1457 int idx;
1458 struct ip_vs_service *svc, *nxt;
1459
1460 /*
fc723250 1461 * Flush the service table hashed by <netns,protocol,addr,port>
1da177e4
LT
1462 */
1463 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
fc723250
HS
1464 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1465 s_list) {
1466 if (net_eq(svc->net, net))
1467 ip_vs_unlink_service(svc);
1da177e4
LT
1468 }
1469 }
1470
1471 /*
1472 * Flush the service table hashed by fwmark
1473 */
1474 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1475 list_for_each_entry_safe(svc, nxt,
1476 &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1477 if (net_eq(svc->net, net))
1478 ip_vs_unlink_service(svc);
1da177e4
LT
1479 }
1480 }
1481
1482 return 0;
1483}
1484
7a4f0761
HS
1485/*
1486 * Delete service by {netns} in the service table.
1487 * Called by __ip_vs_cleanup()
1488 */
503cf15a 1489void ip_vs_service_net_cleanup(struct net *net)
7a4f0761
HS
1490{
1491 EnterFunction(2);
1492 /* Check for "full" addressed entries */
1493 mutex_lock(&__ip_vs_mutex);
1494 ip_vs_flush(net);
1495 mutex_unlock(&__ip_vs_mutex);
1496 LeaveFunction(2);
1497}
1498/*
1499 * Release dst hold by dst_cache
1500 */
1501static inline void
1502__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1503{
1504 spin_lock_bh(&dest->dst_lock);
1505 if (dest->dst_cache && dest->dst_cache->dev == dev) {
1506 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1507 dev->name,
1508 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1509 ntohs(dest->port),
1510 atomic_read(&dest->refcnt));
1511 ip_vs_dst_reset(dest);
1512 }
1513 spin_unlock_bh(&dest->dst_lock);
1514
1515}
1516/*
1517 * Netdev event receiver
1518 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1519 * a device that is "unregister" it must be released.
1520 */
1521static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1522 void *ptr)
1523{
1524 struct net_device *dev = ptr;
1525 struct net *net = dev_net(dev);
283283c4 1526 struct netns_ipvs *ipvs = net_ipvs(net);
7a4f0761
HS
1527 struct ip_vs_service *svc;
1528 struct ip_vs_dest *dest;
1529 unsigned int idx;
1530
283283c4 1531 if (event != NETDEV_UNREGISTER || !ipvs)
7a4f0761
HS
1532 return NOTIFY_DONE;
1533 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1534 EnterFunction(2);
1535 mutex_lock(&__ip_vs_mutex);
1536 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1537 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1538 if (net_eq(svc->net, net)) {
1539 list_for_each_entry(dest, &svc->destinations,
1540 n_list) {
1541 __ip_vs_dev_reset(dest, dev);
1542 }
1543 }
1544 }
1545
1546 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1547 if (net_eq(svc->net, net)) {
1548 list_for_each_entry(dest, &svc->destinations,
1549 n_list) {
1550 __ip_vs_dev_reset(dest, dev);
1551 }
1552 }
1553
1554 }
1555 }
1556
283283c4 1557 list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
7a4f0761
HS
1558 __ip_vs_dev_reset(dest, dev);
1559 }
1560 mutex_unlock(&__ip_vs_mutex);
1561 LeaveFunction(2);
1562 return NOTIFY_DONE;
1563}
1da177e4
LT
1564
1565/*
1566 * Zero counters in a service or all services
1567 */
1568static int ip_vs_zero_service(struct ip_vs_service *svc)
1569{
1570 struct ip_vs_dest *dest;
1571
1572 write_lock_bh(&__ip_vs_svc_lock);
1573 list_for_each_entry(dest, &svc->destinations, n_list) {
1574 ip_vs_zero_stats(&dest->stats);
1575 }
1576 ip_vs_zero_stats(&svc->stats);
1577 write_unlock_bh(&__ip_vs_svc_lock);
1578 return 0;
1579}
1580
fc723250 1581static int ip_vs_zero_all(struct net *net)
1da177e4
LT
1582{
1583 int idx;
1584 struct ip_vs_service *svc;
1585
1586 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1587 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250
HS
1588 if (net_eq(svc->net, net))
1589 ip_vs_zero_service(svc);
1da177e4
LT
1590 }
1591 }
1592
1593 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1594 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250
HS
1595 if (net_eq(svc->net, net))
1596 ip_vs_zero_service(svc);
1da177e4
LT
1597 }
1598 }
1599
2a0751af 1600 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1da177e4
LT
1601 return 0;
1602}
1603
14e40546 1604#ifdef CONFIG_SYSCTL
749c42b6
JA
1605
1606static int zero;
1607static int three = 3;
1608
1da177e4 1609static int
8d65af78 1610proc_do_defense_mode(ctl_table *table, int write,
1da177e4
LT
1611 void __user *buffer, size_t *lenp, loff_t *ppos)
1612{
9330419d 1613 struct net *net = current->nsproxy->net_ns;
1da177e4
LT
1614 int *valp = table->data;
1615 int val = *valp;
1616 int rc;
1617
8d65af78 1618 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1da177e4
LT
1619 if (write && (*valp != val)) {
1620 if ((*valp < 0) || (*valp > 3)) {
1621 /* Restore the correct value */
1622 *valp = val;
1623 } else {
9330419d 1624 update_defense_level(net_ipvs(net));
1da177e4
LT
1625 }
1626 }
1627 return rc;
1628}
1629
1da177e4 1630static int
8d65af78 1631proc_do_sync_threshold(ctl_table *table, int write,
1da177e4
LT
1632 void __user *buffer, size_t *lenp, loff_t *ppos)
1633{
1634 int *valp = table->data;
1635 int val[2];
1636 int rc;
1637
1638 /* backup the value first */
1639 memcpy(val, valp, sizeof(val));
1640
8d65af78 1641 rc = proc_dointvec(table, write, buffer, lenp, ppos);
749c42b6
JA
1642 if (write && (valp[0] < 0 || valp[1] < 0 ||
1643 (valp[0] >= valp[1] && valp[1]))) {
1da177e4
LT
1644 /* Restore the correct value */
1645 memcpy(valp, val, sizeof(val));
1646 }
1647 return rc;
1648}
1649
b880c1f0
HS
1650static int
1651proc_do_sync_mode(ctl_table *table, int write,
1652 void __user *buffer, size_t *lenp, loff_t *ppos)
1653{
1654 int *valp = table->data;
1655 int val = *valp;
1656 int rc;
1657
1658 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1659 if (write && (*valp != val)) {
1660 if ((*valp < 0) || (*valp > 1)) {
1661 /* Restore the correct value */
1662 *valp = val;
f73181c8
PNA
1663 }
1664 }
1665 return rc;
1666}
1667
1668static int
1669proc_do_sync_ports(ctl_table *table, int write,
1670 void __user *buffer, size_t *lenp, loff_t *ppos)
1671{
1672 int *valp = table->data;
1673 int val = *valp;
1674 int rc;
1675
1676 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1677 if (write && (*valp != val)) {
1678 if (*valp < 1 || !is_power_of_2(*valp)) {
1679 /* Restore the correct value */
1680 *valp = val;
b880c1f0
HS
1681 }
1682 }
1683 return rc;
1684}
1da177e4
LT
1685
1686/*
1687 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
a0840e2e 1688 * Do not change order or insert new entries without
503cf15a 1689 * align with netns init in ip_vs_control_net_init()
1da177e4
LT
1690 */
1691
1692static struct ctl_table vs_vars[] = {
1693 {
1da177e4 1694 .procname = "amemthresh",
1da177e4
LT
1695 .maxlen = sizeof(int),
1696 .mode = 0644,
6d9f239a 1697 .proc_handler = proc_dointvec,
1da177e4 1698 },
1da177e4 1699 {
1da177e4 1700 .procname = "am_droprate",
1da177e4
LT
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
6d9f239a 1703 .proc_handler = proc_dointvec,
1da177e4
LT
1704 },
1705 {
1da177e4 1706 .procname = "drop_entry",
1da177e4
LT
1707 .maxlen = sizeof(int),
1708 .mode = 0644,
6d9f239a 1709 .proc_handler = proc_do_defense_mode,
1da177e4
LT
1710 },
1711 {
1da177e4 1712 .procname = "drop_packet",
1da177e4
LT
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
6d9f239a 1715 .proc_handler = proc_do_defense_mode,
1da177e4 1716 },
f4bc17cd
JA
1717#ifdef CONFIG_IP_VS_NFCT
1718 {
1719 .procname = "conntrack",
f4bc17cd
JA
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
1722 .proc_handler = &proc_dointvec,
1723 },
1724#endif
1da177e4 1725 {
1da177e4 1726 .procname = "secure_tcp",
1da177e4
LT
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
6d9f239a 1729 .proc_handler = proc_do_defense_mode,
1da177e4 1730 },
8a803040
JA
1731 {
1732 .procname = "snat_reroute",
8a803040
JA
1733 .maxlen = sizeof(int),
1734 .mode = 0644,
1735 .proc_handler = &proc_dointvec,
1736 },
b880c1f0
HS
1737 {
1738 .procname = "sync_version",
b880c1f0
HS
1739 .maxlen = sizeof(int),
1740 .mode = 0644,
1741 .proc_handler = &proc_do_sync_mode,
1742 },
f73181c8
PNA
1743 {
1744 .procname = "sync_ports",
1745 .maxlen = sizeof(int),
1746 .mode = 0644,
1747 .proc_handler = &proc_do_sync_ports,
1748 },
1c003b15
PNA
1749 {
1750 .procname = "sync_qlen_max",
1751 .maxlen = sizeof(int),
1752 .mode = 0644,
1753 .proc_handler = proc_dointvec,
1754 },
1755 {
1756 .procname = "sync_sock_size",
1757 .maxlen = sizeof(int),
1758 .mode = 0644,
1759 .proc_handler = proc_dointvec,
1760 },
a0840e2e
HS
1761 {
1762 .procname = "cache_bypass",
1763 .maxlen = sizeof(int),
1764 .mode = 0644,
1765 .proc_handler = proc_dointvec,
1766 },
1767 {
1768 .procname = "expire_nodest_conn",
1769 .maxlen = sizeof(int),
1770 .mode = 0644,
1771 .proc_handler = proc_dointvec,
1772 },
1773 {
1774 .procname = "expire_quiescent_template",
1775 .maxlen = sizeof(int),
1776 .mode = 0644,
1777 .proc_handler = proc_dointvec,
1778 },
1779 {
1780 .procname = "sync_threshold",
1781 .maxlen =
1782 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1783 .mode = 0644,
1784 .proc_handler = proc_do_sync_threshold,
1785 },
749c42b6
JA
1786 {
1787 .procname = "sync_refresh_period",
1788 .maxlen = sizeof(int),
1789 .mode = 0644,
1790 .proc_handler = proc_dointvec_jiffies,
1791 },
1792 {
1793 .procname = "sync_retries",
1794 .maxlen = sizeof(int),
1795 .mode = 0644,
1796 .proc_handler = proc_dointvec_minmax,
1797 .extra1 = &zero,
1798 .extra2 = &three,
1799 },
a0840e2e
HS
1800 {
1801 .procname = "nat_icmp_send",
1802 .maxlen = sizeof(int),
1803 .mode = 0644,
1804 .proc_handler = proc_dointvec,
1805 },
3654e611
JA
1806 {
1807 .procname = "pmtu_disc",
1808 .maxlen = sizeof(int),
1809 .mode = 0644,
1810 .proc_handler = proc_dointvec,
1811 },
a0840e2e
HS
1812#ifdef CONFIG_IP_VS_DEBUG
1813 {
1814 .procname = "debug_level",
1815 .data = &sysctl_ip_vs_debug_level,
1816 .maxlen = sizeof(int),
1817 .mode = 0644,
1818 .proc_handler = proc_dointvec,
1819 },
1820#endif
1da177e4
LT
1821#if 0
1822 {
1da177e4
LT
1823 .procname = "timeout_established",
1824 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1825 .maxlen = sizeof(int),
1826 .mode = 0644,
6d9f239a 1827 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1828 },
1829 {
1da177e4
LT
1830 .procname = "timeout_synsent",
1831 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1832 .maxlen = sizeof(int),
1833 .mode = 0644,
6d9f239a 1834 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1835 },
1836 {
1da177e4
LT
1837 .procname = "timeout_synrecv",
1838 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1839 .maxlen = sizeof(int),
1840 .mode = 0644,
6d9f239a 1841 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1842 },
1843 {
1da177e4
LT
1844 .procname = "timeout_finwait",
1845 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1846 .maxlen = sizeof(int),
1847 .mode = 0644,
6d9f239a 1848 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1849 },
1850 {
1da177e4
LT
1851 .procname = "timeout_timewait",
1852 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1853 .maxlen = sizeof(int),
1854 .mode = 0644,
6d9f239a 1855 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1856 },
1857 {
1da177e4
LT
1858 .procname = "timeout_close",
1859 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1860 .maxlen = sizeof(int),
1861 .mode = 0644,
6d9f239a 1862 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1863 },
1864 {
1da177e4
LT
1865 .procname = "timeout_closewait",
1866 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1867 .maxlen = sizeof(int),
1868 .mode = 0644,
6d9f239a 1869 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1870 },
1871 {
1da177e4
LT
1872 .procname = "timeout_lastack",
1873 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1874 .maxlen = sizeof(int),
1875 .mode = 0644,
6d9f239a 1876 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1877 },
1878 {
1da177e4
LT
1879 .procname = "timeout_listen",
1880 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1881 .maxlen = sizeof(int),
1882 .mode = 0644,
6d9f239a 1883 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1884 },
1885 {
1da177e4
LT
1886 .procname = "timeout_synack",
1887 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1888 .maxlen = sizeof(int),
1889 .mode = 0644,
6d9f239a 1890 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1891 },
1892 {
1da177e4
LT
1893 .procname = "timeout_udp",
1894 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1895 .maxlen = sizeof(int),
1896 .mode = 0644,
6d9f239a 1897 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1898 },
1899 {
1da177e4
LT
1900 .procname = "timeout_icmp",
1901 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1902 .maxlen = sizeof(int),
1903 .mode = 0644,
6d9f239a 1904 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1905 },
1906#endif
f8572d8f 1907 { }
1da177e4
LT
1908};
1909
14e40546 1910#endif
1da177e4 1911
1da177e4
LT
1912#ifdef CONFIG_PROC_FS
1913
1914struct ip_vs_iter {
fc723250 1915 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1da177e4
LT
1916 struct list_head *table;
1917 int bucket;
1918};
1919
1920/*
1921 * Write the contents of the VS rule table to a PROCfs file.
1922 * (It is kept just for backward compatibility)
1923 */
95c96174 1924static inline const char *ip_vs_fwd_name(unsigned int flags)
1da177e4
LT
1925{
1926 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1927 case IP_VS_CONN_F_LOCALNODE:
1928 return "Local";
1929 case IP_VS_CONN_F_TUNNEL:
1930 return "Tunnel";
1931 case IP_VS_CONN_F_DROUTE:
1932 return "Route";
1933 default:
1934 return "Masq";
1935 }
1936}
1937
1938
1939/* Get the Nth entry in the two lists */
1940static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1941{
fc723250 1942 struct net *net = seq_file_net(seq);
1da177e4
LT
1943 struct ip_vs_iter *iter = seq->private;
1944 int idx;
1945 struct ip_vs_service *svc;
1946
1947 /* look in hash by protocol */
1948 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1949 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
fc723250 1950 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1951 iter->table = ip_vs_svc_table;
1952 iter->bucket = idx;
1953 return svc;
1954 }
1955 }
1956 }
1957
1958 /* keep looking in fwmark */
1959 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1960 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
fc723250 1961 if (net_eq(svc->net, net) && pos-- == 0) {
1da177e4
LT
1962 iter->table = ip_vs_svc_fwm_table;
1963 iter->bucket = idx;
1964 return svc;
1965 }
1966 }
1967 }
1968
1969 return NULL;
1970}
1971
1972static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
563e94f0 1973__acquires(__ip_vs_svc_lock)
1da177e4
LT
1974{
1975
1976 read_lock_bh(&__ip_vs_svc_lock);
1977 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1978}
1979
1980
1981static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1982{
1983 struct list_head *e;
1984 struct ip_vs_iter *iter;
1985 struct ip_vs_service *svc;
1986
1987 ++*pos;
1988 if (v == SEQ_START_TOKEN)
1989 return ip_vs_info_array(seq,0);
1990
1991 svc = v;
1992 iter = seq->private;
1993
1994 if (iter->table == ip_vs_svc_table) {
1995 /* next service in table hashed by protocol */
1996 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1997 return list_entry(e, struct ip_vs_service, s_list);
1998
1999
2000 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2001 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2002 s_list) {
2003 return svc;
2004 }
2005 }
2006
2007 iter->table = ip_vs_svc_fwm_table;
2008 iter->bucket = -1;
2009 goto scan_fwmark;
2010 }
2011
2012 /* next service in hashed by fwmark */
2013 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2014 return list_entry(e, struct ip_vs_service, f_list);
2015
2016 scan_fwmark:
2017 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2018 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2019 f_list)
2020 return svc;
2021 }
2022
2023 return NULL;
2024}
2025
2026static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
563e94f0 2027__releases(__ip_vs_svc_lock)
1da177e4
LT
2028{
2029 read_unlock_bh(&__ip_vs_svc_lock);
2030}
2031
2032
2033static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2034{
2035 if (v == SEQ_START_TOKEN) {
2036 seq_printf(seq,
2037 "IP Virtual Server version %d.%d.%d (size=%d)\n",
6f7edb48 2038 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2039 seq_puts(seq,
2040 "Prot LocalAddress:Port Scheduler Flags\n");
2041 seq_puts(seq,
2042 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2043 } else {
2044 const struct ip_vs_service *svc = v;
2045 const struct ip_vs_iter *iter = seq->private;
2046 const struct ip_vs_dest *dest;
2047
667a5f18
VB
2048 if (iter->table == ip_vs_svc_table) {
2049#ifdef CONFIG_IP_VS_IPV6
2050 if (svc->af == AF_INET6)
5b095d98 2051 seq_printf(seq, "%s [%pI6]:%04X %s ",
667a5f18 2052 ip_vs_proto_name(svc->protocol),
38ff4fa4 2053 &svc->addr.in6,
667a5f18
VB
2054 ntohs(svc->port),
2055 svc->scheduler->name);
2056 else
2057#endif
26ec037f 2058 seq_printf(seq, "%s %08X:%04X %s %s ",
667a5f18
VB
2059 ip_vs_proto_name(svc->protocol),
2060 ntohl(svc->addr.ip),
2061 ntohs(svc->port),
26ec037f
NC
2062 svc->scheduler->name,
2063 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 2064 } else {
26ec037f
NC
2065 seq_printf(seq, "FWM %08X %s %s",
2066 svc->fwmark, svc->scheduler->name,
2067 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
667a5f18 2068 }
1da177e4
LT
2069
2070 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2071 seq_printf(seq, "persistent %d %08X\n",
2072 svc->timeout,
2073 ntohl(svc->netmask));
2074 else
2075 seq_putc(seq, '\n');
2076
2077 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
2078#ifdef CONFIG_IP_VS_IPV6
2079 if (dest->af == AF_INET6)
2080 seq_printf(seq,
5b095d98 2081 " -> [%pI6]:%04X"
667a5f18 2082 " %-7s %-6d %-10d %-10d\n",
38ff4fa4 2083 &dest->addr.in6,
667a5f18
VB
2084 ntohs(dest->port),
2085 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2086 atomic_read(&dest->weight),
2087 atomic_read(&dest->activeconns),
2088 atomic_read(&dest->inactconns));
2089 else
2090#endif
2091 seq_printf(seq,
2092 " -> %08X:%04X "
2093 "%-7s %-6d %-10d %-10d\n",
2094 ntohl(dest->addr.ip),
2095 ntohs(dest->port),
2096 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2097 atomic_read(&dest->weight),
2098 atomic_read(&dest->activeconns),
2099 atomic_read(&dest->inactconns));
2100
1da177e4
LT
2101 }
2102 }
2103 return 0;
2104}
2105
56b3d975 2106static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
2107 .start = ip_vs_info_seq_start,
2108 .next = ip_vs_info_seq_next,
2109 .stop = ip_vs_info_seq_stop,
2110 .show = ip_vs_info_seq_show,
2111};
2112
2113static int ip_vs_info_open(struct inode *inode, struct file *file)
2114{
fc723250 2115 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
cf7732e4 2116 sizeof(struct ip_vs_iter));
1da177e4
LT
2117}
2118
9a32144e 2119static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
2120 .owner = THIS_MODULE,
2121 .open = ip_vs_info_open,
2122 .read = seq_read,
2123 .llseek = seq_lseek,
0f08190f 2124 .release = seq_release_net,
1da177e4
LT
2125};
2126
1da177e4
LT
2127static int ip_vs_stats_show(struct seq_file *seq, void *v)
2128{
b17fc996 2129 struct net *net = seq_file_single_net(seq);
55a3d4e1 2130 struct ip_vs_stats_user show;
1da177e4
LT
2131
2132/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2133 seq_puts(seq,
2134 " Total Incoming Outgoing Incoming Outgoing\n");
2135 seq_printf(seq,
2136 " Conns Packets Packets Bytes Bytes\n");
2137
55a3d4e1
JA
2138 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2139 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2140 show.inpkts, show.outpkts,
2141 (unsigned long long) show.inbytes,
2142 (unsigned long long) show.outbytes);
1da177e4
LT
2143
2144/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2145 seq_puts(seq,
2146 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
55a3d4e1
JA
2147 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2148 show.cps, show.inpps, show.outpps,
2149 show.inbps, show.outbps);
1da177e4
LT
2150
2151 return 0;
2152}
2153
2154static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2155{
fc723250 2156 return single_open_net(inode, file, ip_vs_stats_show);
1da177e4
LT
2157}
2158
9a32144e 2159static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
2160 .owner = THIS_MODULE,
2161 .open = ip_vs_stats_seq_open,
2162 .read = seq_read,
2163 .llseek = seq_lseek,
0f08190f 2164 .release = single_release_net,
1da177e4
LT
2165};
2166
b17fc996
HS
2167static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2168{
2169 struct net *net = seq_file_single_net(seq);
2a0751af
JA
2170 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2171 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
ea9f22cc 2172 struct ip_vs_stats_user rates;
b17fc996
HS
2173 int i;
2174
2175/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2176 seq_puts(seq,
2177 " Total Incoming Outgoing Incoming Outgoing\n");
2178 seq_printf(seq,
2179 "CPU Conns Packets Packets Bytes Bytes\n");
2180
2181 for_each_possible_cpu(i) {
2a0751af
JA
2182 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2183 unsigned int start;
2184 __u64 inbytes, outbytes;
2185
2186 do {
2187 start = u64_stats_fetch_begin_bh(&u->syncp);
2188 inbytes = u->ustats.inbytes;
2189 outbytes = u->ustats.outbytes;
2190 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2191
b17fc996 2192 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2a0751af
JA
2193 i, u->ustats.conns, u->ustats.inpkts,
2194 u->ustats.outpkts, (__u64)inbytes,
2195 (__u64)outbytes);
b17fc996
HS
2196 }
2197
2198 spin_lock_bh(&tot_stats->lock);
ea9f22cc 2199
b17fc996
HS
2200 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2201 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2202 tot_stats->ustats.outpkts,
2203 (unsigned long long) tot_stats->ustats.inbytes,
2204 (unsigned long long) tot_stats->ustats.outbytes);
2205
ea9f22cc
JA
2206 ip_vs_read_estimator(&rates, tot_stats);
2207
2208 spin_unlock_bh(&tot_stats->lock);
2209
b17fc996
HS
2210/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2211 seq_puts(seq,
2212 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2213 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
ea9f22cc
JA
2214 rates.cps,
2215 rates.inpps,
2216 rates.outpps,
2217 rates.inbps,
2218 rates.outbps);
b17fc996
HS
2219
2220 return 0;
2221}
2222
2223static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2224{
2225 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2226}
2227
2228static const struct file_operations ip_vs_stats_percpu_fops = {
2229 .owner = THIS_MODULE,
2230 .open = ip_vs_stats_percpu_seq_open,
2231 .read = seq_read,
2232 .llseek = seq_lseek,
0f08190f 2233 .release = single_release_net,
b17fc996 2234};
1da177e4
LT
2235#endif
2236
2237/*
2238 * Set timeout values for tcp tcpfin udp in the timeout_table.
2239 */
9330419d 2240static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2241{
091bb34c 2242#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2243 struct ip_vs_proto_data *pd;
091bb34c 2244#endif
9330419d 2245
1da177e4
LT
2246 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2247 u->tcp_timeout,
2248 u->tcp_fin_timeout,
2249 u->udp_timeout);
2250
2251#ifdef CONFIG_IP_VS_PROTO_TCP
2252 if (u->tcp_timeout) {
9330419d
HS
2253 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2254 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
1da177e4
LT
2255 = u->tcp_timeout * HZ;
2256 }
2257
2258 if (u->tcp_fin_timeout) {
9330419d
HS
2259 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2260 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
1da177e4
LT
2261 = u->tcp_fin_timeout * HZ;
2262 }
2263#endif
2264
2265#ifdef CONFIG_IP_VS_PROTO_UDP
2266 if (u->udp_timeout) {
9330419d
HS
2267 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2268 pd->timeout_table[IP_VS_UDP_S_NORMAL]
1da177e4
LT
2269 = u->udp_timeout * HZ;
2270 }
2271#endif
2272 return 0;
2273}
2274
2275
2276#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2277#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2278#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2279 sizeof(struct ip_vs_dest_user))
2280#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2281#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2282#define MAX_ARG_LEN SVCDEST_ARG_LEN
2283
9b5b5cff 2284static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
2285 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2286 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2287 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2288 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2289 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2290 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2291 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2292 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2293 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2294 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2295 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2296};
2297
c860c6b1
JV
2298static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2299 struct ip_vs_service_user *usvc_compat)
2300{
0d1e71b0
SH
2301 memset(usvc, 0, sizeof(*usvc));
2302
c860c6b1
JV
2303 usvc->af = AF_INET;
2304 usvc->protocol = usvc_compat->protocol;
2305 usvc->addr.ip = usvc_compat->addr;
2306 usvc->port = usvc_compat->port;
2307 usvc->fwmark = usvc_compat->fwmark;
2308
2309 /* Deep copy of sched_name is not needed here */
2310 usvc->sched_name = usvc_compat->sched_name;
2311
2312 usvc->flags = usvc_compat->flags;
2313 usvc->timeout = usvc_compat->timeout;
2314 usvc->netmask = usvc_compat->netmask;
2315}
2316
2317static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2318 struct ip_vs_dest_user *udest_compat)
2319{
0d1e71b0
SH
2320 memset(udest, 0, sizeof(*udest));
2321
c860c6b1
JV
2322 udest->addr.ip = udest_compat->addr;
2323 udest->port = udest_compat->port;
2324 udest->conn_flags = udest_compat->conn_flags;
2325 udest->weight = udest_compat->weight;
2326 udest->u_threshold = udest_compat->u_threshold;
2327 udest->l_threshold = udest_compat->l_threshold;
2328}
2329
1da177e4
LT
2330static int
2331do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2332{
fc723250 2333 struct net *net = sock_net(sk);
1da177e4
LT
2334 int ret;
2335 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2336 struct ip_vs_service_user *usvc_compat;
2337 struct ip_vs_service_user_kern usvc;
1da177e4 2338 struct ip_vs_service *svc;
c860c6b1
JV
2339 struct ip_vs_dest_user *udest_compat;
2340 struct ip_vs_dest_user_kern udest;
ae1d48b2 2341 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4
LT
2342
2343 if (!capable(CAP_NET_ADMIN))
2344 return -EPERM;
2345
04bcef2a
AV
2346 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2347 return -EINVAL;
2348 if (len < 0 || len > MAX_ARG_LEN)
2349 return -EINVAL;
1da177e4 2350 if (len != set_arglen[SET_CMDID(cmd)]) {
1e3e238e
HE
2351 pr_err("set_ctl: len %u != %u\n",
2352 len, set_arglen[SET_CMDID(cmd)]);
1da177e4
LT
2353 return -EINVAL;
2354 }
2355
2356 if (copy_from_user(arg, user, len) != 0)
2357 return -EFAULT;
2358
2359 /* increase the module use count */
2360 ip_vs_use_count_inc();
2361
ae1d48b2
HS
2362 /* Handle daemons since they have another lock */
2363 if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2364 cmd == IP_VS_SO_SET_STOPDAEMON) {
2365 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2366
2367 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2368 ret = -ERESTARTSYS;
2369 goto out_dec;
2370 }
2371 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2372 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2373 dm->syncid);
2374 else
2375 ret = stop_sync_thread(net, dm->state);
2376 mutex_unlock(&ipvs->sync_mutex);
2377 goto out_dec;
2378 }
2379
14cc3e2b 2380 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2381 ret = -ERESTARTSYS;
2382 goto out_dec;
2383 }
2384
2385 if (cmd == IP_VS_SO_SET_FLUSH) {
2386 /* Flush the virtual service */
fc723250 2387 ret = ip_vs_flush(net);
1da177e4
LT
2388 goto out_unlock;
2389 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2390 /* Set timeout values for (tcp tcpfin udp) */
9330419d 2391 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
1da177e4 2392 goto out_unlock;
1da177e4
LT
2393 }
2394
c860c6b1
JV
2395 usvc_compat = (struct ip_vs_service_user *)arg;
2396 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2397
2398 /* We only use the new structs internally, so copy userspace compat
2399 * structs to extended internal versions */
2400 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2401 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2402
2403 if (cmd == IP_VS_SO_SET_ZERO) {
2404 /* if no service address is set, zero counters in all */
c860c6b1 2405 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
fc723250 2406 ret = ip_vs_zero_all(net);
1da177e4
LT
2407 goto out_unlock;
2408 }
2409 }
2410
2906f66a
VMR
2411 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2412 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2413 usvc.protocol != IPPROTO_SCTP) {
1e3e238e
HE
2414 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2415 usvc.protocol, &usvc.addr.ip,
2416 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2417 ret = -EFAULT;
2418 goto out_unlock;
2419 }
2420
2421 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2422 if (usvc.fwmark == 0)
fc723250 2423 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
26c15cfd 2424 &usvc.addr, usvc.port);
1da177e4 2425 else
fc723250 2426 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
1da177e4
LT
2427
2428 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2429 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4 2430 ret = -ESRCH;
26c15cfd 2431 goto out_unlock;
1da177e4
LT
2432 }
2433
2434 switch (cmd) {
2435 case IP_VS_SO_SET_ADD:
2436 if (svc != NULL)
2437 ret = -EEXIST;
2438 else
fc723250 2439 ret = ip_vs_add_service(net, &usvc, &svc);
1da177e4
LT
2440 break;
2441 case IP_VS_SO_SET_EDIT:
c860c6b1 2442 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2443 break;
2444 case IP_VS_SO_SET_DEL:
2445 ret = ip_vs_del_service(svc);
2446 if (!ret)
2447 goto out_unlock;
2448 break;
2449 case IP_VS_SO_SET_ZERO:
2450 ret = ip_vs_zero_service(svc);
2451 break;
2452 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2453 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2454 break;
2455 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2456 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2457 break;
2458 case IP_VS_SO_SET_DELDEST:
c860c6b1 2459 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2460 break;
2461 default:
2462 ret = -EINVAL;
2463 }
2464
1da177e4 2465 out_unlock:
14cc3e2b 2466 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2467 out_dec:
2468 /* decrease the module use count */
2469 ip_vs_use_count_dec();
2470
2471 return ret;
2472}
2473
2474
1da177e4
LT
2475static void
2476ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2477{
2478 dst->protocol = src->protocol;
e7ade46a 2479 dst->addr = src->addr.ip;
1da177e4
LT
2480 dst->port = src->port;
2481 dst->fwmark = src->fwmark;
4da62fc7 2482 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2483 dst->flags = src->flags;
2484 dst->timeout = src->timeout / HZ;
2485 dst->netmask = src->netmask;
2486 dst->num_dests = src->num_dests;
2487 ip_vs_copy_stats(&dst->stats, &src->stats);
2488}
2489
2490static inline int
fc723250
HS
2491__ip_vs_get_service_entries(struct net *net,
2492 const struct ip_vs_get_services *get,
1da177e4
LT
2493 struct ip_vs_get_services __user *uptr)
2494{
2495 int idx, count=0;
2496 struct ip_vs_service *svc;
2497 struct ip_vs_service_entry entry;
2498 int ret = 0;
2499
2500 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2501 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
f94fd041 2502 /* Only expose IPv4 entries to old interface */
fc723250 2503 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2504 continue;
2505
1da177e4
LT
2506 if (count >= get->num_services)
2507 goto out;
4da62fc7 2508 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2509 ip_vs_copy_service(&entry, svc);
2510 if (copy_to_user(&uptr->entrytable[count],
2511 &entry, sizeof(entry))) {
2512 ret = -EFAULT;
2513 goto out;
2514 }
2515 count++;
2516 }
2517 }
2518
2519 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2520 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
f94fd041 2521 /* Only expose IPv4 entries to old interface */
fc723250 2522 if (svc->af != AF_INET || !net_eq(svc->net, net))
f94fd041
JV
2523 continue;
2524
1da177e4
LT
2525 if (count >= get->num_services)
2526 goto out;
4da62fc7 2527 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2528 ip_vs_copy_service(&entry, svc);
2529 if (copy_to_user(&uptr->entrytable[count],
2530 &entry, sizeof(entry))) {
2531 ret = -EFAULT;
2532 goto out;
2533 }
2534 count++;
2535 }
2536 }
552ad65a 2537out:
1da177e4
LT
2538 return ret;
2539}
2540
2541static inline int
fc723250 2542__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
1da177e4
LT
2543 struct ip_vs_get_dests __user *uptr)
2544{
2545 struct ip_vs_service *svc;
b18610de 2546 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2547 int ret = 0;
2548
2549 if (get->fwmark)
fc723250 2550 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
1da177e4 2551 else
fc723250 2552 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
26c15cfd 2553 get->port);
b18610de 2554
1da177e4
LT
2555 if (svc) {
2556 int count = 0;
2557 struct ip_vs_dest *dest;
2558 struct ip_vs_dest_entry entry;
2559
2560 list_for_each_entry(dest, &svc->destinations, n_list) {
2561 if (count >= get->num_dests)
2562 break;
2563
e7ade46a 2564 entry.addr = dest->addr.ip;
1da177e4
LT
2565 entry.port = dest->port;
2566 entry.conn_flags = atomic_read(&dest->conn_flags);
2567 entry.weight = atomic_read(&dest->weight);
2568 entry.u_threshold = dest->u_threshold;
2569 entry.l_threshold = dest->l_threshold;
2570 entry.activeconns = atomic_read(&dest->activeconns);
2571 entry.inactconns = atomic_read(&dest->inactconns);
2572 entry.persistconns = atomic_read(&dest->persistconns);
2573 ip_vs_copy_stats(&entry.stats, &dest->stats);
2574 if (copy_to_user(&uptr->entrytable[count],
2575 &entry, sizeof(entry))) {
2576 ret = -EFAULT;
2577 break;
2578 }
2579 count++;
2580 }
1da177e4
LT
2581 } else
2582 ret = -ESRCH;
2583 return ret;
2584}
2585
2586static inline void
9330419d 2587__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
1da177e4 2588{
091bb34c 2589#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
9330419d 2590 struct ip_vs_proto_data *pd;
091bb34c 2591#endif
9330419d 2592
1da177e4 2593#ifdef CONFIG_IP_VS_PROTO_TCP
9330419d
HS
2594 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2595 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2596 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
1da177e4
LT
2597#endif
2598#ifdef CONFIG_IP_VS_PROTO_UDP
9330419d 2599 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
1da177e4 2600 u->udp_timeout =
9330419d 2601 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
1da177e4
LT
2602#endif
2603}
2604
2605
2606#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2607#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2608#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2609#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2610#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2611#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2612#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2613
9b5b5cff 2614static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2615 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2616 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2617 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2618 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2619 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2620 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2621 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2622};
2623
2624static int
2625do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2626{
2627 unsigned char arg[128];
2628 int ret = 0;
04bcef2a 2629 unsigned int copylen;
fc723250 2630 struct net *net = sock_net(sk);
f131315f 2631 struct netns_ipvs *ipvs = net_ipvs(net);
1da177e4 2632
fc723250 2633 BUG_ON(!net);
1da177e4
LT
2634 if (!capable(CAP_NET_ADMIN))
2635 return -EPERM;
2636
04bcef2a
AV
2637 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2638 return -EINVAL;
2639
1da177e4 2640 if (*len < get_arglen[GET_CMDID(cmd)]) {
1e3e238e
HE
2641 pr_err("get_ctl: len %u < %u\n",
2642 *len, get_arglen[GET_CMDID(cmd)]);
1da177e4
LT
2643 return -EINVAL;
2644 }
2645
04bcef2a
AV
2646 copylen = get_arglen[GET_CMDID(cmd)];
2647 if (copylen > 128)
2648 return -EINVAL;
2649
2650 if (copy_from_user(arg, user, copylen) != 0)
1da177e4 2651 return -EFAULT;
ae1d48b2
HS
2652 /*
2653 * Handle daemons first since it has its own locking
2654 */
2655 if (cmd == IP_VS_SO_GET_DAEMON) {
2656 struct ip_vs_daemon_user d[2];
2657
2658 memset(&d, 0, sizeof(d));
2659 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2660 return -ERESTARTSYS;
2661
2662 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2663 d[0].state = IP_VS_STATE_MASTER;
2664 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2665 sizeof(d[0].mcast_ifn));
2666 d[0].syncid = ipvs->master_syncid;
2667 }
2668 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2669 d[1].state = IP_VS_STATE_BACKUP;
2670 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2671 sizeof(d[1].mcast_ifn));
2672 d[1].syncid = ipvs->backup_syncid;
2673 }
2674 if (copy_to_user(user, &d, sizeof(d)) != 0)
2675 ret = -EFAULT;
2676 mutex_unlock(&ipvs->sync_mutex);
2677 return ret;
2678 }
1da177e4 2679
14cc3e2b 2680 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2681 return -ERESTARTSYS;
2682
2683 switch (cmd) {
2684 case IP_VS_SO_GET_VERSION:
2685 {
2686 char buf[64];
2687
2688 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
6f7edb48 2689 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1da177e4
LT
2690 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2691 ret = -EFAULT;
2692 goto out;
2693 }
2694 *len = strlen(buf)+1;
2695 }
2696 break;
2697
2698 case IP_VS_SO_GET_INFO:
2699 {
2700 struct ip_vs_getinfo info;
2701 info.version = IP_VS_VERSION_CODE;
6f7edb48 2702 info.size = ip_vs_conn_tab_size;
a0840e2e 2703 info.num_services = ipvs->num_services;
1da177e4
LT
2704 if (copy_to_user(user, &info, sizeof(info)) != 0)
2705 ret = -EFAULT;
2706 }
2707 break;
2708
2709 case IP_VS_SO_GET_SERVICES:
2710 {
2711 struct ip_vs_get_services *get;
2712 int size;
2713
2714 get = (struct ip_vs_get_services *)arg;
2715 size = sizeof(*get) +
2716 sizeof(struct ip_vs_service_entry) * get->num_services;
2717 if (*len != size) {
1e3e238e 2718 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2719 ret = -EINVAL;
2720 goto out;
2721 }
fc723250 2722 ret = __ip_vs_get_service_entries(net, get, user);
1da177e4
LT
2723 }
2724 break;
2725
2726 case IP_VS_SO_GET_SERVICE:
2727 {
2728 struct ip_vs_service_entry *entry;
2729 struct ip_vs_service *svc;
b18610de 2730 union nf_inet_addr addr;
1da177e4
LT
2731
2732 entry = (struct ip_vs_service_entry *)arg;
b18610de 2733 addr.ip = entry->addr;
1da177e4 2734 if (entry->fwmark)
fc723250 2735 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
1da177e4 2736 else
fc723250
HS
2737 svc = __ip_vs_service_find(net, AF_INET,
2738 entry->protocol, &addr,
2739 entry->port);
1da177e4
LT
2740 if (svc) {
2741 ip_vs_copy_service(entry, svc);
2742 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2743 ret = -EFAULT;
1da177e4
LT
2744 } else
2745 ret = -ESRCH;
2746 }
2747 break;
2748
2749 case IP_VS_SO_GET_DESTS:
2750 {
2751 struct ip_vs_get_dests *get;
2752 int size;
2753
2754 get = (struct ip_vs_get_dests *)arg;
2755 size = sizeof(*get) +
2756 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2757 if (*len != size) {
1e3e238e 2758 pr_err("length: %u != %u\n", *len, size);
1da177e4
LT
2759 ret = -EINVAL;
2760 goto out;
2761 }
fc723250 2762 ret = __ip_vs_get_dest_entries(net, get, user);
1da177e4
LT
2763 }
2764 break;
2765
2766 case IP_VS_SO_GET_TIMEOUT:
2767 {
2768 struct ip_vs_timeout_user t;
2769
2d8a041b 2770 memset(&t, 0, sizeof(t));
9330419d 2771 __ip_vs_get_timeouts(net, &t);
1da177e4
LT
2772 if (copy_to_user(user, &t, sizeof(t)) != 0)
2773 ret = -EFAULT;
2774 }
2775 break;
2776
1da177e4
LT
2777 default:
2778 ret = -EINVAL;
2779 }
2780
552ad65a 2781out:
14cc3e2b 2782 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2783 return ret;
2784}
2785
2786
2787static struct nf_sockopt_ops ip_vs_sockopts = {
2788 .pf = PF_INET,
2789 .set_optmin = IP_VS_BASE_CTL,
2790 .set_optmax = IP_VS_SO_SET_MAX+1,
2791 .set = do_ip_vs_set_ctl,
2792 .get_optmin = IP_VS_BASE_CTL,
2793 .get_optmax = IP_VS_SO_GET_MAX+1,
2794 .get = do_ip_vs_get_ctl,
16fcec35 2795 .owner = THIS_MODULE,
1da177e4
LT
2796};
2797
9a812198
JV
2798/*
2799 * Generic Netlink interface
2800 */
2801
2802/* IPVS genetlink family */
2803static struct genl_family ip_vs_genl_family = {
2804 .id = GENL_ID_GENERATE,
2805 .hdrsize = 0,
2806 .name = IPVS_GENL_NAME,
2807 .version = IPVS_GENL_VERSION,
2808 .maxattr = IPVS_CMD_MAX,
c6d2d445 2809 .netnsok = true, /* Make ipvsadm to work on netns */
9a812198
JV
2810};
2811
2812/* Policy used for first-level command attributes */
2813static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2814 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2815 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2816 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2817 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2818 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2819 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2820};
2821
2822/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2823static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2824 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2825 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2826 .len = IP_VS_IFNAME_MAXLEN },
2827 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2828};
2829
2830/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2831static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2832 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2833 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2834 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2835 .len = sizeof(union nf_inet_addr) },
2836 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2837 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2838 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2839 .len = IP_VS_SCHEDNAME_MAXLEN },
0d1e71b0
SH
2840 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2841 .len = IP_VS_PENAME_MAXLEN },
9a812198
JV
2842 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2843 .len = sizeof(struct ip_vs_flags) },
2844 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2845 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2846 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2847};
2848
2849/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2850static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2851 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2852 .len = sizeof(union nf_inet_addr) },
2853 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2854 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2855 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2856 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2857 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2858 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2859 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2860 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2861 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2862};
2863
2864static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2865 struct ip_vs_stats *stats)
2866{
55a3d4e1 2867 struct ip_vs_stats_user ustats;
9a812198
JV
2868 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2869 if (!nl_stats)
2870 return -EMSGSIZE;
2871
55a3d4e1 2872 ip_vs_copy_stats(&ustats, stats);
9a812198 2873
969e8e25
DM
2874 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2875 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2876 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2877 nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2878 nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2879 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2880 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2881 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2882 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2883 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2884 goto nla_put_failure;
9a812198
JV
2885 nla_nest_end(skb, nl_stats);
2886
2887 return 0;
2888
2889nla_put_failure:
9a812198
JV
2890 nla_nest_cancel(skb, nl_stats);
2891 return -EMSGSIZE;
2892}
2893
2894static int ip_vs_genl_fill_service(struct sk_buff *skb,
2895 struct ip_vs_service *svc)
2896{
2897 struct nlattr *nl_service;
2898 struct ip_vs_flags flags = { .flags = svc->flags,
2899 .mask = ~0 };
2900
2901 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2902 if (!nl_service)
2903 return -EMSGSIZE;
2904
969e8e25
DM
2905 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2906 goto nla_put_failure;
9a812198 2907 if (svc->fwmark) {
969e8e25
DM
2908 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2909 goto nla_put_failure;
9a812198 2910 } else {
969e8e25
DM
2911 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2912 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2913 nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2914 goto nla_put_failure;
9a812198
JV
2915 }
2916
969e8e25
DM
2917 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2918 (svc->pe &&
2919 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2920 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2921 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2922 nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2923 goto nla_put_failure;
9a812198
JV
2924 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2925 goto nla_put_failure;
2926
2927 nla_nest_end(skb, nl_service);
2928
2929 return 0;
2930
2931nla_put_failure:
2932 nla_nest_cancel(skb, nl_service);
2933 return -EMSGSIZE;
2934}
2935
2936static int ip_vs_genl_dump_service(struct sk_buff *skb,
2937 struct ip_vs_service *svc,
2938 struct netlink_callback *cb)
2939{
2940 void *hdr;
2941
15e47304 2942 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
2943 &ip_vs_genl_family, NLM_F_MULTI,
2944 IPVS_CMD_NEW_SERVICE);
2945 if (!hdr)
2946 return -EMSGSIZE;
2947
2948 if (ip_vs_genl_fill_service(skb, svc) < 0)
2949 goto nla_put_failure;
2950
2951 return genlmsg_end(skb, hdr);
2952
2953nla_put_failure:
2954 genlmsg_cancel(skb, hdr);
2955 return -EMSGSIZE;
2956}
2957
2958static int ip_vs_genl_dump_services(struct sk_buff *skb,
2959 struct netlink_callback *cb)
2960{
2961 int idx = 0, i;
2962 int start = cb->args[0];
2963 struct ip_vs_service *svc;
fc723250 2964 struct net *net = skb_sknet(skb);
9a812198
JV
2965
2966 mutex_lock(&__ip_vs_mutex);
2967 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2968 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
fc723250 2969 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2970 continue;
2971 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2972 idx--;
2973 goto nla_put_failure;
2974 }
2975 }
2976 }
2977
2978 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2979 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
fc723250 2980 if (++idx <= start || !net_eq(svc->net, net))
9a812198
JV
2981 continue;
2982 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2983 idx--;
2984 goto nla_put_failure;
2985 }
2986 }
2987 }
2988
2989nla_put_failure:
2990 mutex_unlock(&__ip_vs_mutex);
2991 cb->args[0] = idx;
2992
2993 return skb->len;
2994}
2995
fc723250
HS
2996static int ip_vs_genl_parse_service(struct net *net,
2997 struct ip_vs_service_user_kern *usvc,
26c15cfd
JA
2998 struct nlattr *nla, int full_entry,
2999 struct ip_vs_service **ret_svc)
9a812198
JV
3000{
3001 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3002 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
26c15cfd 3003 struct ip_vs_service *svc;
9a812198
JV
3004
3005 /* Parse mandatory identifying service fields first */
3006 if (nla == NULL ||
3007 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3008 return -EINVAL;
3009
3010 nla_af = attrs[IPVS_SVC_ATTR_AF];
3011 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3012 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3013 nla_port = attrs[IPVS_SVC_ATTR_PORT];
3014 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3015
3016 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3017 return -EINVAL;
3018
258c8893
SH
3019 memset(usvc, 0, sizeof(*usvc));
3020
c860c6b1 3021 usvc->af = nla_get_u16(nla_af);
f94fd041
JV
3022#ifdef CONFIG_IP_VS_IPV6
3023 if (usvc->af != AF_INET && usvc->af != AF_INET6)
3024#else
3025 if (usvc->af != AF_INET)
3026#endif
9a812198
JV
3027 return -EAFNOSUPPORT;
3028
3029 if (nla_fwmark) {
3030 usvc->protocol = IPPROTO_TCP;
3031 usvc->fwmark = nla_get_u32(nla_fwmark);
3032 } else {
3033 usvc->protocol = nla_get_u16(nla_protocol);
3034 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3035 usvc->port = nla_get_u16(nla_port);
3036 usvc->fwmark = 0;
3037 }
3038
26c15cfd 3039 if (usvc->fwmark)
fc723250 3040 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
26c15cfd 3041 else
fc723250 3042 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
26c15cfd
JA
3043 &usvc->addr, usvc->port);
3044 *ret_svc = svc;
3045
9a812198
JV
3046 /* If a full entry was requested, check for the additional fields */
3047 if (full_entry) {
0d1e71b0 3048 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
9a812198
JV
3049 *nla_netmask;
3050 struct ip_vs_flags flags;
9a812198
JV
3051
3052 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
0d1e71b0 3053 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
9a812198
JV
3054 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3055 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3056 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3057
3058 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3059 return -EINVAL;
3060
3061 nla_memcpy(&flags, nla_flags, sizeof(flags));
3062
3063 /* prefill flags from service if it already exists */
26c15cfd 3064 if (svc)
9a812198 3065 usvc->flags = svc->flags;
9a812198
JV
3066
3067 /* set new flags from userland */
3068 usvc->flags = (usvc->flags & ~flags.mask) |
3069 (flags.flags & flags.mask);
c860c6b1 3070 usvc->sched_name = nla_data(nla_sched);
0d1e71b0 3071 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
9a812198
JV
3072 usvc->timeout = nla_get_u32(nla_timeout);
3073 usvc->netmask = nla_get_u32(nla_netmask);
3074 }
3075
3076 return 0;
3077}
3078
fc723250
HS
3079static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3080 struct nlattr *nla)
9a812198 3081{
c860c6b1 3082 struct ip_vs_service_user_kern usvc;
26c15cfd 3083 struct ip_vs_service *svc;
9a812198
JV
3084 int ret;
3085
fc723250 3086 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
26c15cfd 3087 return ret ? ERR_PTR(ret) : svc;
9a812198
JV
3088}
3089
3090static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3091{
3092 struct nlattr *nl_dest;
3093
3094 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3095 if (!nl_dest)
3096 return -EMSGSIZE;
3097
969e8e25
DM
3098 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3099 nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3100 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3101 (atomic_read(&dest->conn_flags) &
3102 IP_VS_CONN_F_FWD_MASK)) ||
3103 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3104 atomic_read(&dest->weight)) ||
3105 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3106 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3107 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3108 atomic_read(&dest->activeconns)) ||
3109 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3110 atomic_read(&dest->inactconns)) ||
3111 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3112 atomic_read(&dest->persistconns)))
3113 goto nla_put_failure;
9a812198
JV
3114 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3115 goto nla_put_failure;
3116
3117 nla_nest_end(skb, nl_dest);
3118
3119 return 0;
3120
3121nla_put_failure:
3122 nla_nest_cancel(skb, nl_dest);
3123 return -EMSGSIZE;
3124}
3125
3126static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3127 struct netlink_callback *cb)
3128{
3129 void *hdr;
3130
15e47304 3131 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
3132 &ip_vs_genl_family, NLM_F_MULTI,
3133 IPVS_CMD_NEW_DEST);
3134 if (!hdr)
3135 return -EMSGSIZE;
3136
3137 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3138 goto nla_put_failure;
3139
3140 return genlmsg_end(skb, hdr);
3141
3142nla_put_failure:
3143 genlmsg_cancel(skb, hdr);
3144 return -EMSGSIZE;
3145}
3146
3147static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3148 struct netlink_callback *cb)
3149{
3150 int idx = 0;
3151 int start = cb->args[0];
3152 struct ip_vs_service *svc;
3153 struct ip_vs_dest *dest;
3154 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
a0840e2e 3155 struct net *net = skb_sknet(skb);
9a812198
JV
3156
3157 mutex_lock(&__ip_vs_mutex);
3158
3159 /* Try to find the service for which to dump destinations */
3160 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3161 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3162 goto out_err;
3163
a0840e2e 3164
fc723250 3165 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3166 if (IS_ERR(svc) || svc == NULL)
3167 goto out_err;
3168
3169 /* Dump the destinations */
3170 list_for_each_entry(dest, &svc->destinations, n_list) {
3171 if (++idx <= start)
3172 continue;
3173 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3174 idx--;
3175 goto nla_put_failure;
3176 }
3177 }
3178
3179nla_put_failure:
3180 cb->args[0] = idx;
9a812198
JV
3181
3182out_err:
3183 mutex_unlock(&__ip_vs_mutex);
3184
3185 return skb->len;
3186}
3187
c860c6b1 3188static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
3189 struct nlattr *nla, int full_entry)
3190{
3191 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3192 struct nlattr *nla_addr, *nla_port;
3193
3194 /* Parse mandatory identifying destination fields first */
3195 if (nla == NULL ||
3196 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3197 return -EINVAL;
3198
3199 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3200 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3201
3202 if (!(nla_addr && nla_port))
3203 return -EINVAL;
3204
258c8893
SH
3205 memset(udest, 0, sizeof(*udest));
3206
9a812198
JV
3207 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3208 udest->port = nla_get_u16(nla_port);
3209
3210 /* If a full entry was requested, check for the additional fields */
3211 if (full_entry) {
3212 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3213 *nla_l_thresh;
3214
3215 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3216 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3217 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3218 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3219
3220 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3221 return -EINVAL;
3222
3223 udest->conn_flags = nla_get_u32(nla_fwd)
3224 & IP_VS_CONN_F_FWD_MASK;
3225 udest->weight = nla_get_u32(nla_weight);
3226 udest->u_threshold = nla_get_u32(nla_u_thresh);
3227 udest->l_threshold = nla_get_u32(nla_l_thresh);
3228 }
3229
3230 return 0;
3231}
3232
3233static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3234 const char *mcast_ifn, __be32 syncid)
3235{
3236 struct nlattr *nl_daemon;
3237
3238 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3239 if (!nl_daemon)
3240 return -EMSGSIZE;
3241
969e8e25
DM
3242 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3243 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3244 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3245 goto nla_put_failure;
9a812198
JV
3246 nla_nest_end(skb, nl_daemon);
3247
3248 return 0;
3249
3250nla_put_failure:
3251 nla_nest_cancel(skb, nl_daemon);
3252 return -EMSGSIZE;
3253}
3254
3255static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3256 const char *mcast_ifn, __be32 syncid,
3257 struct netlink_callback *cb)
3258{
3259 void *hdr;
15e47304 3260 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
9a812198
JV
3261 &ip_vs_genl_family, NLM_F_MULTI,
3262 IPVS_CMD_NEW_DAEMON);
3263 if (!hdr)
3264 return -EMSGSIZE;
3265
3266 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3267 goto nla_put_failure;
3268
3269 return genlmsg_end(skb, hdr);
3270
3271nla_put_failure:
3272 genlmsg_cancel(skb, hdr);
3273 return -EMSGSIZE;
3274}
3275
3276static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3277 struct netlink_callback *cb)
3278{
a09d1977 3279 struct net *net = skb_sknet(skb);
f131315f
HS
3280 struct netns_ipvs *ipvs = net_ipvs(net);
3281
ae1d48b2 3282 mutex_lock(&ipvs->sync_mutex);
f131315f 3283 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
9a812198 3284 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
f131315f
HS
3285 ipvs->master_mcast_ifn,
3286 ipvs->master_syncid, cb) < 0)
9a812198
JV
3287 goto nla_put_failure;
3288
3289 cb->args[0] = 1;
3290 }
3291
f131315f 3292 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
9a812198 3293 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
f131315f
HS
3294 ipvs->backup_mcast_ifn,
3295 ipvs->backup_syncid, cb) < 0)
9a812198
JV
3296 goto nla_put_failure;
3297
3298 cb->args[1] = 1;
3299 }
3300
3301nla_put_failure:
ae1d48b2 3302 mutex_unlock(&ipvs->sync_mutex);
9a812198
JV
3303
3304 return skb->len;
3305}
3306
f131315f 3307static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3308{
3309 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3310 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3311 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3312 return -EINVAL;
3313
f131315f
HS
3314 return start_sync_thread(net,
3315 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
9a812198
JV
3316 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3317 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3318}
3319
f131315f 3320static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
9a812198
JV
3321{
3322 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3323 return -EINVAL;
3324
f131315f
HS
3325 return stop_sync_thread(net,
3326 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
9a812198
JV
3327}
3328
9330419d 3329static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
9a812198
JV
3330{
3331 struct ip_vs_timeout_user t;
3332
9330419d 3333 __ip_vs_get_timeouts(net, &t);
9a812198
JV
3334
3335 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3336 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3337
3338 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3339 t.tcp_fin_timeout =
3340 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3341
3342 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3343 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3344
9330419d 3345 return ip_vs_set_timeout(net, &t);
9a812198
JV
3346}
3347
ae1d48b2 3348static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
9a812198 3349{
9a812198 3350 int ret = 0, cmd;
fc723250 3351 struct net *net;
a0840e2e 3352 struct netns_ipvs *ipvs;
9a812198 3353
fc723250 3354 net = skb_sknet(skb);
a0840e2e 3355 ipvs = net_ipvs(net);
9a812198
JV
3356 cmd = info->genlhdr->cmd;
3357
ae1d48b2 3358 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
9a812198
JV
3359 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3360
ae1d48b2 3361 mutex_lock(&ipvs->sync_mutex);
9a812198
JV
3362 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3363 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3364 info->attrs[IPVS_CMD_ATTR_DAEMON],
3365 ip_vs_daemon_policy)) {
3366 ret = -EINVAL;
3367 goto out;
3368 }
3369
3370 if (cmd == IPVS_CMD_NEW_DAEMON)
f131315f 3371 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
9a812198 3372 else
f131315f 3373 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
ae1d48b2
HS
3374out:
3375 mutex_unlock(&ipvs->sync_mutex);
3376 }
3377 return ret;
3378}
3379
3380static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3381{
3382 struct ip_vs_service *svc = NULL;
3383 struct ip_vs_service_user_kern usvc;
3384 struct ip_vs_dest_user_kern udest;
3385 int ret = 0, cmd;
3386 int need_full_svc = 0, need_full_dest = 0;
3387 struct net *net;
ae1d48b2
HS
3388
3389 net = skb_sknet(skb);
ae1d48b2
HS
3390 cmd = info->genlhdr->cmd;
3391
3392 mutex_lock(&__ip_vs_mutex);
3393
3394 if (cmd == IPVS_CMD_FLUSH) {
3395 ret = ip_vs_flush(net);
3396 goto out;
3397 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3398 ret = ip_vs_genl_set_config(net, info->attrs);
9a812198
JV
3399 goto out;
3400 } else if (cmd == IPVS_CMD_ZERO &&
3401 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
fc723250 3402 ret = ip_vs_zero_all(net);
9a812198
JV
3403 goto out;
3404 }
3405
3406 /* All following commands require a service argument, so check if we
3407 * received a valid one. We need a full service specification when
3408 * adding / editing a service. Only identifying members otherwise. */
3409 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3410 need_full_svc = 1;
3411
fc723250 3412 ret = ip_vs_genl_parse_service(net, &usvc,
9a812198 3413 info->attrs[IPVS_CMD_ATTR_SERVICE],
26c15cfd 3414 need_full_svc, &svc);
9a812198
JV
3415 if (ret)
3416 goto out;
3417
9a812198
JV
3418 /* Unless we're adding a new service, the service must already exist */
3419 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3420 ret = -ESRCH;
3421 goto out;
3422 }
3423
3424 /* Destination commands require a valid destination argument. For
3425 * adding / editing a destination, we need a full destination
3426 * specification. */
3427 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3428 cmd == IPVS_CMD_DEL_DEST) {
3429 if (cmd != IPVS_CMD_DEL_DEST)
3430 need_full_dest = 1;
3431
3432 ret = ip_vs_genl_parse_dest(&udest,
3433 info->attrs[IPVS_CMD_ATTR_DEST],
3434 need_full_dest);
3435 if (ret)
3436 goto out;
3437 }
3438
3439 switch (cmd) {
3440 case IPVS_CMD_NEW_SERVICE:
3441 if (svc == NULL)
fc723250 3442 ret = ip_vs_add_service(net, &usvc, &svc);
9a812198
JV
3443 else
3444 ret = -EEXIST;
3445 break;
3446 case IPVS_CMD_SET_SERVICE:
3447 ret = ip_vs_edit_service(svc, &usvc);
3448 break;
3449 case IPVS_CMD_DEL_SERVICE:
3450 ret = ip_vs_del_service(svc);
26c15cfd 3451 /* do not use svc, it can be freed */
9a812198
JV
3452 break;
3453 case IPVS_CMD_NEW_DEST:
3454 ret = ip_vs_add_dest(svc, &udest);
3455 break;
3456 case IPVS_CMD_SET_DEST:
3457 ret = ip_vs_edit_dest(svc, &udest);
3458 break;
3459 case IPVS_CMD_DEL_DEST:
3460 ret = ip_vs_del_dest(svc, &udest);
3461 break;
3462 case IPVS_CMD_ZERO:
3463 ret = ip_vs_zero_service(svc);
3464 break;
3465 default:
3466 ret = -EINVAL;
3467 }
3468
3469out:
9a812198
JV
3470 mutex_unlock(&__ip_vs_mutex);
3471
3472 return ret;
3473}
3474
3475static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3476{
3477 struct sk_buff *msg;
3478 void *reply;
3479 int ret, cmd, reply_cmd;
fc723250 3480 struct net *net;
9a812198 3481
fc723250 3482 net = skb_sknet(skb);
9a812198
JV
3483 cmd = info->genlhdr->cmd;
3484
3485 if (cmd == IPVS_CMD_GET_SERVICE)
3486 reply_cmd = IPVS_CMD_NEW_SERVICE;
3487 else if (cmd == IPVS_CMD_GET_INFO)
3488 reply_cmd = IPVS_CMD_SET_INFO;
3489 else if (cmd == IPVS_CMD_GET_CONFIG)
3490 reply_cmd = IPVS_CMD_SET_CONFIG;
3491 else {
1e3e238e 3492 pr_err("unknown Generic Netlink command\n");
9a812198
JV
3493 return -EINVAL;
3494 }
3495
3496 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3497 if (!msg)
3498 return -ENOMEM;
3499
3500 mutex_lock(&__ip_vs_mutex);
3501
3502 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3503 if (reply == NULL)
3504 goto nla_put_failure;
3505
3506 switch (cmd) {
3507 case IPVS_CMD_GET_SERVICE:
3508 {
3509 struct ip_vs_service *svc;
3510
fc723250
HS
3511 svc = ip_vs_genl_find_service(net,
3512 info->attrs[IPVS_CMD_ATTR_SERVICE]);
9a812198
JV
3513 if (IS_ERR(svc)) {
3514 ret = PTR_ERR(svc);
3515 goto out_err;
3516 } else if (svc) {
3517 ret = ip_vs_genl_fill_service(msg, svc);
9a812198
JV
3518 if (ret)
3519 goto nla_put_failure;
3520 } else {
3521 ret = -ESRCH;
3522 goto out_err;
3523 }
3524
3525 break;
3526 }
3527
3528 case IPVS_CMD_GET_CONFIG:
3529 {
3530 struct ip_vs_timeout_user t;
3531
9330419d 3532 __ip_vs_get_timeouts(net, &t);
9a812198 3533#ifdef CONFIG_IP_VS_PROTO_TCP
969e8e25
DM
3534 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3535 t.tcp_timeout) ||
3536 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3537 t.tcp_fin_timeout))
3538 goto nla_put_failure;
9a812198
JV
3539#endif
3540#ifdef CONFIG_IP_VS_PROTO_UDP
969e8e25
DM
3541 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3542 goto nla_put_failure;
9a812198
JV
3543#endif
3544
3545 break;
3546 }
3547
3548 case IPVS_CMD_GET_INFO:
969e8e25
DM
3549 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3550 IP_VS_VERSION_CODE) ||
3551 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3552 ip_vs_conn_tab_size))
3553 goto nla_put_failure;
9a812198
JV
3554 break;
3555 }
3556
3557 genlmsg_end(msg, reply);
134e6375 3558 ret = genlmsg_reply(msg, info);
9a812198
JV
3559 goto out;
3560
3561nla_put_failure:
1e3e238e 3562 pr_err("not enough space in Netlink message\n");
9a812198
JV
3563 ret = -EMSGSIZE;
3564
3565out_err:
3566 nlmsg_free(msg);
3567out:
3568 mutex_unlock(&__ip_vs_mutex);
3569
3570 return ret;
3571}
3572
3573
3574static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3575 {
3576 .cmd = IPVS_CMD_NEW_SERVICE,
3577 .flags = GENL_ADMIN_PERM,
3578 .policy = ip_vs_cmd_policy,
3579 .doit = ip_vs_genl_set_cmd,
3580 },
3581 {
3582 .cmd = IPVS_CMD_SET_SERVICE,
3583 .flags = GENL_ADMIN_PERM,
3584 .policy = ip_vs_cmd_policy,
3585 .doit = ip_vs_genl_set_cmd,
3586 },
3587 {
3588 .cmd = IPVS_CMD_DEL_SERVICE,
3589 .flags = GENL_ADMIN_PERM,
3590 .policy = ip_vs_cmd_policy,
3591 .doit = ip_vs_genl_set_cmd,
3592 },
3593 {
3594 .cmd = IPVS_CMD_GET_SERVICE,
3595 .flags = GENL_ADMIN_PERM,
3596 .doit = ip_vs_genl_get_cmd,
3597 .dumpit = ip_vs_genl_dump_services,
3598 .policy = ip_vs_cmd_policy,
3599 },
3600 {
3601 .cmd = IPVS_CMD_NEW_DEST,
3602 .flags = GENL_ADMIN_PERM,
3603 .policy = ip_vs_cmd_policy,
3604 .doit = ip_vs_genl_set_cmd,
3605 },
3606 {
3607 .cmd = IPVS_CMD_SET_DEST,
3608 .flags = GENL_ADMIN_PERM,
3609 .policy = ip_vs_cmd_policy,
3610 .doit = ip_vs_genl_set_cmd,
3611 },
3612 {
3613 .cmd = IPVS_CMD_DEL_DEST,
3614 .flags = GENL_ADMIN_PERM,
3615 .policy = ip_vs_cmd_policy,
3616 .doit = ip_vs_genl_set_cmd,
3617 },
3618 {
3619 .cmd = IPVS_CMD_GET_DEST,
3620 .flags = GENL_ADMIN_PERM,
3621 .policy = ip_vs_cmd_policy,
3622 .dumpit = ip_vs_genl_dump_dests,
3623 },
3624 {
3625 .cmd = IPVS_CMD_NEW_DAEMON,
3626 .flags = GENL_ADMIN_PERM,
3627 .policy = ip_vs_cmd_policy,
ae1d48b2 3628 .doit = ip_vs_genl_set_daemon,
9a812198
JV
3629 },
3630 {
3631 .cmd = IPVS_CMD_DEL_DAEMON,
3632 .flags = GENL_ADMIN_PERM,
3633 .policy = ip_vs_cmd_policy,
ae1d48b2 3634 .doit = ip_vs_genl_set_daemon,
9a812198
JV
3635 },
3636 {
3637 .cmd = IPVS_CMD_GET_DAEMON,
3638 .flags = GENL_ADMIN_PERM,
3639 .dumpit = ip_vs_genl_dump_daemons,
3640 },
3641 {
3642 .cmd = IPVS_CMD_SET_CONFIG,
3643 .flags = GENL_ADMIN_PERM,
3644 .policy = ip_vs_cmd_policy,
3645 .doit = ip_vs_genl_set_cmd,
3646 },
3647 {
3648 .cmd = IPVS_CMD_GET_CONFIG,
3649 .flags = GENL_ADMIN_PERM,
3650 .doit = ip_vs_genl_get_cmd,
3651 },
3652 {
3653 .cmd = IPVS_CMD_GET_INFO,
3654 .flags = GENL_ADMIN_PERM,
3655 .doit = ip_vs_genl_get_cmd,
3656 },
3657 {
3658 .cmd = IPVS_CMD_ZERO,
3659 .flags = GENL_ADMIN_PERM,
3660 .policy = ip_vs_cmd_policy,
3661 .doit = ip_vs_genl_set_cmd,
3662 },
3663 {
3664 .cmd = IPVS_CMD_FLUSH,
3665 .flags = GENL_ADMIN_PERM,
3666 .doit = ip_vs_genl_set_cmd,
3667 },
3668};
3669
3670static int __init ip_vs_genl_register(void)
3671{
8f698d54
MM
3672 return genl_register_family_with_ops(&ip_vs_genl_family,
3673 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
9a812198
JV
3674}
3675
3676static void ip_vs_genl_unregister(void)
3677{
3678 genl_unregister_family(&ip_vs_genl_family);
3679}
3680
3681/* End of Generic Netlink interface definitions */
3682
61b1ab45
HS
3683/*
3684 * per netns intit/exit func.
3685 */
14e40546 3686#ifdef CONFIG_SYSCTL
2b2d2808 3687static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
61b1ab45 3688{
fc723250
HS
3689 int idx;
3690 struct netns_ipvs *ipvs = net_ipvs(net);
a0840e2e 3691 struct ctl_table *tbl;
fc723250 3692
a0840e2e
HS
3693 atomic_set(&ipvs->dropentry, 0);
3694 spin_lock_init(&ipvs->dropentry_lock);
3695 spin_lock_init(&ipvs->droppacket_lock);
3696 spin_lock_init(&ipvs->securetcp_lock);
a0840e2e
HS
3697
3698 if (!net_eq(net, &init_net)) {
3699 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3700 if (tbl == NULL)
14e40546 3701 return -ENOMEM;
a0840e2e
HS
3702 } else
3703 tbl = vs_vars;
3704 /* Initialize sysctl defaults */
3705 idx = 0;
3706 ipvs->sysctl_amemthresh = 1024;
3707 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3708 ipvs->sysctl_am_droprate = 10;
3709 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3710 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3711 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3712#ifdef CONFIG_IP_VS_NFCT
3713 tbl[idx++].data = &ipvs->sysctl_conntrack;
3714#endif
3715 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3716 ipvs->sysctl_snat_reroute = 1;
3717 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3718 ipvs->sysctl_sync_ver = 1;
3719 tbl[idx++].data = &ipvs->sysctl_sync_ver;
f73181c8
PNA
3720 ipvs->sysctl_sync_ports = 1;
3721 tbl[idx++].data = &ipvs->sysctl_sync_ports;
1c003b15
PNA
3722 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3723 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3724 ipvs->sysctl_sync_sock_size = 0;
3725 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
a0840e2e
HS
3726 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3727 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3728 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
59e0350e
SH
3729 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3730 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
a0840e2e
HS
3731 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3732 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
749c42b6
JA
3733 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3734 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3735 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3736 tbl[idx++].data = &ipvs->sysctl_sync_retries;
a0840e2e 3737 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3654e611
JA
3738 ipvs->sysctl_pmtu_disc = 1;
3739 tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
a0840e2e
HS
3740
3741
ec8f23ce 3742 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
0443929f
SH
3743 if (ipvs->sysctl_hdr == NULL) {
3744 if (!net_eq(net, &init_net))
3745 kfree(tbl);
14e40546 3746 return -ENOMEM;
0443929f 3747 }
6ef757f9 3748 ip_vs_start_estimator(net, &ipvs->tot_stats);
a0840e2e 3749 ipvs->sysctl_tbl = tbl;
f6340ee0
HS
3750 /* Schedule defense work */
3751 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3752 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
61b1ab45 3753
61b1ab45 3754 return 0;
61b1ab45
HS
3755}
3756
2b2d2808 3757static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
61b1ab45 3758{
b17fc996
HS
3759 struct netns_ipvs *ipvs = net_ipvs(net);
3760
f2431e6e
HS
3761 cancel_delayed_work_sync(&ipvs->defense_work);
3762 cancel_work_sync(&ipvs->defense_work.work);
a0840e2e 3763 unregister_net_sysctl_table(ipvs->sysctl_hdr);
14e40546
SH
3764}
3765
3766#else
3767
2b2d2808
CG
3768static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3769static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
14e40546 3770
0443929f 3771#endif
14e40546 3772
7a4f0761
HS
3773static struct notifier_block ip_vs_dst_notifier = {
3774 .notifier_call = ip_vs_dst_event,
3775};
3776
503cf15a 3777int __net_init ip_vs_control_net_init(struct net *net)
14e40546
SH
3778{
3779 int idx;
3780 struct netns_ipvs *ipvs = net_ipvs(net);
3781
3458e21c 3782 rwlock_init(&ipvs->rs_lock);
14e40546
SH
3783
3784 /* Initialize rs_table */
3785 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3786 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3787
3788 INIT_LIST_HEAD(&ipvs->dest_trash);
3789 atomic_set(&ipvs->ftpsvc_counter, 0);
3790 atomic_set(&ipvs->nullsvc_counter, 0);
3791
3792 /* procfs stats */
3793 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
0a9ee813 3794 if (!ipvs->tot_stats.cpustats)
14e40546 3795 return -ENOMEM;
0a9ee813 3796
14e40546
SH
3797 spin_lock_init(&ipvs->tot_stats.lock);
3798
3799 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3800 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3801 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3802 &ip_vs_stats_percpu_fops);
3803
503cf15a 3804 if (ip_vs_control_net_init_sysctl(net))
14e40546
SH
3805 goto err;
3806
3807 return 0;
3808
3809err:
2a0751af 3810 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3811 return -ENOMEM;
3812}
3813
503cf15a 3814void __net_exit ip_vs_control_net_cleanup(struct net *net)
61b1ab45 3815{
b17fc996
HS
3816 struct netns_ipvs *ipvs = net_ipvs(net);
3817
f2431e6e 3818 ip_vs_trash_cleanup(net);
6ef757f9 3819 ip_vs_stop_estimator(net, &ipvs->tot_stats);
503cf15a 3820 ip_vs_control_net_cleanup_sysctl(net);
b17fc996 3821 proc_net_remove(net, "ip_vs_stats_percpu");
61b1ab45
HS
3822 proc_net_remove(net, "ip_vs_stats");
3823 proc_net_remove(net, "ip_vs");
2a0751af 3824 free_percpu(ipvs->tot_stats.cpustats);
61b1ab45
HS
3825}
3826
8537de8a 3827int __init ip_vs_register_nl_ioctl(void)
1da177e4 3828{
fc723250 3829 int ret;
1da177e4 3830
1da177e4
LT
3831 ret = nf_register_sockopt(&ip_vs_sockopts);
3832 if (ret) {
1e3e238e 3833 pr_err("cannot register sockopt.\n");
7a4f0761 3834 goto err_sock;
1da177e4
LT
3835 }
3836
9a812198
JV
3837 ret = ip_vs_genl_register();
3838 if (ret) {
1e3e238e 3839 pr_err("cannot register Generic Netlink interface.\n");
7a4f0761 3840 goto err_genl;
9a812198 3841 }
1da177e4 3842 return 0;
fc723250 3843
7a4f0761
HS
3844err_genl:
3845 nf_unregister_sockopt(&ip_vs_sockopts);
3846err_sock:
fc723250 3847 return ret;
1da177e4
LT
3848}
3849
8537de8a
HS
3850void ip_vs_unregister_nl_ioctl(void)
3851{
3852 ip_vs_genl_unregister();
3853 nf_unregister_sockopt(&ip_vs_sockopts);
3854}
3855
3856int __init ip_vs_control_init(void)
3857{
3858 int idx;
3859 int ret;
3860
3861 EnterFunction(2);
3862
3863 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3864 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3865 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3866 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3867 }
3868
3869 smp_wmb(); /* Do we really need it now ? */
3870
3871 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3872 if (ret < 0)
3873 return ret;
3874
3875 LeaveFunction(2);
3876 return 0;
3877}
3878
1da177e4
LT
3879
3880void ip_vs_control_cleanup(void)
3881{
3882 EnterFunction(2);
7676e345 3883 unregister_netdevice_notifier(&ip_vs_dst_notifier);
1da177e4
LT
3884 LeaveFunction(2);
3885}