netlink: Rename pid to portid to avoid confusion
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68 return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /* Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80 const struct in6_addr *addr)
81 {
82 struct flowi6 fl6 = {
83 .daddr = *addr,
84 };
85 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86 bool is_local;
87
88 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90 dst_release(dst);
91 return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97 * update_defense_level is called from keventd and from sysctl,
98 * so it needs to protect itself from softirqs
99 */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102 struct sysinfo i;
103 static int old_secure_tcp = 0;
104 int availmem;
105 int nomem;
106 int to_change = -1;
107
108 /* we only count free and buffered memory (in pages) */
109 si_meminfo(&i);
110 availmem = i.freeram + i.bufferram;
111 /* however in linux 2.5 the i.bufferram is total page cache size,
112 we need adjust it */
113 /* si_swapinfo(&i); */
114 /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116 nomem = (availmem < ipvs->sysctl_amemthresh);
117
118 local_bh_disable();
119
120 /* drop_entry */
121 spin_lock(&ipvs->dropentry_lock);
122 switch (ipvs->sysctl_drop_entry) {
123 case 0:
124 atomic_set(&ipvs->dropentry, 0);
125 break;
126 case 1:
127 if (nomem) {
128 atomic_set(&ipvs->dropentry, 1);
129 ipvs->sysctl_drop_entry = 2;
130 } else {
131 atomic_set(&ipvs->dropentry, 0);
132 }
133 break;
134 case 2:
135 if (nomem) {
136 atomic_set(&ipvs->dropentry, 1);
137 } else {
138 atomic_set(&ipvs->dropentry, 0);
139 ipvs->sysctl_drop_entry = 1;
140 };
141 break;
142 case 3:
143 atomic_set(&ipvs->dropentry, 1);
144 break;
145 }
146 spin_unlock(&ipvs->dropentry_lock);
147
148 /* drop_packet */
149 spin_lock(&ipvs->droppacket_lock);
150 switch (ipvs->sysctl_drop_packet) {
151 case 0:
152 ipvs->drop_rate = 0;
153 break;
154 case 1:
155 if (nomem) {
156 ipvs->drop_rate = ipvs->drop_counter
157 = ipvs->sysctl_amemthresh /
158 (ipvs->sysctl_amemthresh-availmem);
159 ipvs->sysctl_drop_packet = 2;
160 } else {
161 ipvs->drop_rate = 0;
162 }
163 break;
164 case 2:
165 if (nomem) {
166 ipvs->drop_rate = ipvs->drop_counter
167 = ipvs->sysctl_amemthresh /
168 (ipvs->sysctl_amemthresh-availmem);
169 } else {
170 ipvs->drop_rate = 0;
171 ipvs->sysctl_drop_packet = 1;
172 }
173 break;
174 case 3:
175 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176 break;
177 }
178 spin_unlock(&ipvs->droppacket_lock);
179
180 /* secure_tcp */
181 spin_lock(&ipvs->securetcp_lock);
182 switch (ipvs->sysctl_secure_tcp) {
183 case 0:
184 if (old_secure_tcp >= 2)
185 to_change = 0;
186 break;
187 case 1:
188 if (nomem) {
189 if (old_secure_tcp < 2)
190 to_change = 1;
191 ipvs->sysctl_secure_tcp = 2;
192 } else {
193 if (old_secure_tcp >= 2)
194 to_change = 0;
195 }
196 break;
197 case 2:
198 if (nomem) {
199 if (old_secure_tcp < 2)
200 to_change = 1;
201 } else {
202 if (old_secure_tcp >= 2)
203 to_change = 0;
204 ipvs->sysctl_secure_tcp = 1;
205 }
206 break;
207 case 3:
208 if (old_secure_tcp < 2)
209 to_change = 1;
210 break;
211 }
212 old_secure_tcp = ipvs->sysctl_secure_tcp;
213 if (to_change >= 0)
214 ip_vs_protocol_timeout_change(ipvs,
215 ipvs->sysctl_secure_tcp > 1);
216 spin_unlock(&ipvs->securetcp_lock);
217
218 local_bh_enable();
219 }
220
221
222 /*
223 * Timer for checking the defense
224 */
225 #define DEFENSE_TIMER_PERIOD 1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229 struct netns_ipvs *ipvs =
230 container_of(work, struct netns_ipvs, defense_work.work);
231
232 update_defense_level(ipvs);
233 if (atomic_read(&ipvs->dropentry))
234 ip_vs_random_dropentry(ipvs->net);
235 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242 return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248 module_put(THIS_MODULE);
249 }
250
251
252 /*
253 * Hash table: for virtual service lookups
254 */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266 * Returns hash value for virtual service
267 */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270 const union nf_inet_addr *addr, __be16 port)
271 {
272 register unsigned int porth = ntohs(port);
273 __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276 if (af == AF_INET6)
277 addr_fold = addr->ip6[0]^addr->ip6[1]^
278 addr->ip6[2]^addr->ip6[3];
279 #endif
280 addr_fold ^= ((size_t)net>>8);
281
282 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287 * Returns hash value of fwmark for virtual service lookup
288 */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296 * or in the ip_vs_svc_fwm_table by fwmark.
297 * Should be called with locked tables.
298 */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301 unsigned int hash;
302
303 if (svc->flags & IP_VS_SVC_F_HASHED) {
304 pr_err("%s(): request for already hashed, called from %pF\n",
305 __func__, __builtin_return_address(0));
306 return 0;
307 }
308
309 if (svc->fwmark == 0) {
310 /*
311 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312 */
313 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314 &svc->addr, svc->port);
315 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316 } else {
317 /*
318 * Hash it by fwmark in svc_fwm_table
319 */
320 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322 }
323
324 svc->flags |= IP_VS_SVC_F_HASHED;
325 /* increase its refcnt because it is referenced by the svc table */
326 atomic_inc(&svc->refcnt);
327 return 1;
328 }
329
330
331 /*
332 * Unhashes a service from svc_table / svc_fwm_table.
333 * Should be called with locked tables.
334 */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338 pr_err("%s(): request for unhash flagged, called from %pF\n",
339 __func__, __builtin_return_address(0));
340 return 0;
341 }
342
343 if (svc->fwmark == 0) {
344 /* Remove it from the svc_table table */
345 list_del(&svc->s_list);
346 } else {
347 /* Remove it from the svc_fwm_table table */
348 list_del(&svc->f_list);
349 }
350
351 svc->flags &= ~IP_VS_SVC_F_HASHED;
352 atomic_dec(&svc->refcnt);
353 return 1;
354 }
355
356
357 /*
358 * Get service by {netns, proto,addr,port} in the service table.
359 */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362 const union nf_inet_addr *vaddr, __be16 vport)
363 {
364 unsigned int hash;
365 struct ip_vs_service *svc;
366
367 /* Check for "full" addressed entries */
368 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371 if ((svc->af == af)
372 && ip_vs_addr_equal(af, &svc->addr, vaddr)
373 && (svc->port == vport)
374 && (svc->protocol == protocol)
375 && net_eq(svc->net, net)) {
376 /* HIT */
377 return svc;
378 }
379 }
380
381 return NULL;
382 }
383
384
385 /*
386 * Get service by {fwmark} in the service table.
387 */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391 unsigned int hash;
392 struct ip_vs_service *svc;
393
394 /* Check for fwmark addressed entries */
395 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398 if (svc->fwmark == fwmark && svc->af == af
399 && net_eq(svc->net, net)) {
400 /* HIT */
401 return svc;
402 }
403 }
404
405 return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410 const union nf_inet_addr *vaddr, __be16 vport)
411 {
412 struct ip_vs_service *svc;
413 struct netns_ipvs *ipvs = net_ipvs(net);
414
415 read_lock(&__ip_vs_svc_lock);
416
417 /*
418 * Check the table hashed by fwmark first
419 */
420 if (fwmark) {
421 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422 if (svc)
423 goto out;
424 }
425
426 /*
427 * Check the table hashed by <protocol,addr,port>
428 * for "full" addressed entries
429 */
430 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432 if (svc == NULL
433 && protocol == IPPROTO_TCP
434 && atomic_read(&ipvs->ftpsvc_counter)
435 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436 /*
437 * Check if ftp service entry exists, the packet
438 * might belong to FTP data connections.
439 */
440 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441 }
442
443 if (svc == NULL
444 && atomic_read(&ipvs->nullsvc_counter)) {
445 /*
446 * Check if the catch-all port (port zero) exists
447 */
448 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449 }
450
451 out:
452 if (svc)
453 atomic_inc(&svc->usecnt);
454 read_unlock(&__ip_vs_svc_lock);
455
456 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457 fwmark, ip_vs_proto_name(protocol),
458 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459 svc ? "hit" : "not hit");
460
461 return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468 atomic_inc(&svc->refcnt);
469 dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475 struct ip_vs_service *svc = dest->svc;
476
477 dest->svc = NULL;
478 if (atomic_dec_and_test(&svc->refcnt)) {
479 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480 svc->fwmark,
481 IP_VS_DBG_ADDR(svc->af, &svc->addr),
482 ntohs(svc->port), atomic_read(&svc->usecnt));
483 free_percpu(svc->stats.cpustats);
484 kfree(svc);
485 }
486 }
487
488
489 /*
490 * Returns hash value for real service
491 */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493 const union nf_inet_addr *addr,
494 __be16 port)
495 {
496 register unsigned int porth = ntohs(port);
497 __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500 if (af == AF_INET6)
501 addr_fold = addr->ip6[0]^addr->ip6[1]^
502 addr->ip6[2]^addr->ip6[3];
503 #endif
504
505 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511 * should be called with locked tables.
512 */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515 unsigned int hash;
516
517 if (!list_empty(&dest->d_list)) {
518 return 0;
519 }
520
521 /*
522 * Hash by proto,addr,port,
523 * which are the parameters of the real service.
524 */
525 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527 list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529 return 1;
530 }
531
532 /*
533 * UNhashes ip_vs_dest from rs_table.
534 * should be called with locked tables.
535 */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538 /*
539 * Remove it from the rs_table table.
540 */
541 if (!list_empty(&dest->d_list)) {
542 list_del(&dest->d_list);
543 INIT_LIST_HEAD(&dest->d_list);
544 }
545
546 return 1;
547 }
548
549 /*
550 * Lookup real service by <proto,addr,port> in the real service table.
551 */
552 struct ip_vs_dest *
553 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
554 const union nf_inet_addr *daddr,
555 __be16 dport)
556 {
557 struct netns_ipvs *ipvs = net_ipvs(net);
558 unsigned int hash;
559 struct ip_vs_dest *dest;
560
561 /*
562 * Check for "full" addressed entries
563 * Return the first found entry
564 */
565 hash = ip_vs_rs_hashkey(af, daddr, dport);
566
567 read_lock(&ipvs->rs_lock);
568 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
569 if ((dest->af == af)
570 && ip_vs_addr_equal(af, &dest->addr, daddr)
571 && (dest->port == dport)
572 && ((dest->protocol == protocol) ||
573 dest->vfwmark)) {
574 /* HIT */
575 read_unlock(&ipvs->rs_lock);
576 return dest;
577 }
578 }
579 read_unlock(&ipvs->rs_lock);
580
581 return NULL;
582 }
583
584 /*
585 * Lookup destination by {addr,port} in the given service
586 */
587 static struct ip_vs_dest *
588 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589 __be16 dport)
590 {
591 struct ip_vs_dest *dest;
592
593 /*
594 * Find the destination for the given service
595 */
596 list_for_each_entry(dest, &svc->destinations, n_list) {
597 if ((dest->af == svc->af)
598 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599 && (dest->port == dport)) {
600 /* HIT */
601 return dest;
602 }
603 }
604
605 return NULL;
606 }
607
608 /*
609 * Find destination by {daddr,dport,vaddr,protocol}
610 * Cretaed to be used in ip_vs_process_message() in
611 * the backup synchronization daemon. It finds the
612 * destination to be bound to the received connection
613 * on the backup.
614 *
615 * ip_vs_lookup_real_service() looked promissing, but
616 * seems not working as expected.
617 */
618 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
619 const union nf_inet_addr *daddr,
620 __be16 dport,
621 const union nf_inet_addr *vaddr,
622 __be16 vport, __u16 protocol, __u32 fwmark,
623 __u32 flags)
624 {
625 struct ip_vs_dest *dest;
626 struct ip_vs_service *svc;
627 __be16 port = dport;
628
629 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
630 if (!svc)
631 return NULL;
632 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
633 port = 0;
634 dest = ip_vs_lookup_dest(svc, daddr, port);
635 if (!dest)
636 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
637 if (dest)
638 atomic_inc(&dest->refcnt);
639 ip_vs_service_put(svc);
640 return dest;
641 }
642
643 /*
644 * Lookup dest by {svc,addr,port} in the destination trash.
645 * The destination trash is used to hold the destinations that are removed
646 * from the service table but are still referenced by some conn entries.
647 * The reason to add the destination trash is when the dest is temporary
648 * down (either by administrator or by monitor program), the dest can be
649 * picked back from the trash, the remaining connections to the dest can
650 * continue, and the counting information of the dest is also useful for
651 * scheduling.
652 */
653 static struct ip_vs_dest *
654 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
655 __be16 dport)
656 {
657 struct ip_vs_dest *dest, *nxt;
658 struct netns_ipvs *ipvs = net_ipvs(svc->net);
659
660 /*
661 * Find the destination in trash
662 */
663 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
664 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
665 "dest->refcnt=%d\n",
666 dest->vfwmark,
667 IP_VS_DBG_ADDR(svc->af, &dest->addr),
668 ntohs(dest->port),
669 atomic_read(&dest->refcnt));
670 if (dest->af == svc->af &&
671 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
672 dest->port == dport &&
673 dest->vfwmark == svc->fwmark &&
674 dest->protocol == svc->protocol &&
675 (svc->fwmark ||
676 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
677 dest->vport == svc->port))) {
678 /* HIT */
679 return dest;
680 }
681
682 /*
683 * Try to purge the destination from trash if not referenced
684 */
685 if (atomic_read(&dest->refcnt) == 1) {
686 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
687 "from trash\n",
688 dest->vfwmark,
689 IP_VS_DBG_ADDR(svc->af, &dest->addr),
690 ntohs(dest->port));
691 list_del(&dest->n_list);
692 ip_vs_dst_reset(dest);
693 __ip_vs_unbind_svc(dest);
694 free_percpu(dest->stats.cpustats);
695 kfree(dest);
696 }
697 }
698
699 return NULL;
700 }
701
702
703 /*
704 * Clean up all the destinations in the trash
705 * Called by the ip_vs_control_cleanup()
706 *
707 * When the ip_vs_control_clearup is activated by ipvs module exit,
708 * the service tables must have been flushed and all the connections
709 * are expired, and the refcnt of each destination in the trash must
710 * be 1, so we simply release them here.
711 */
712 static void ip_vs_trash_cleanup(struct net *net)
713 {
714 struct ip_vs_dest *dest, *nxt;
715 struct netns_ipvs *ipvs = net_ipvs(net);
716
717 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
718 list_del(&dest->n_list);
719 ip_vs_dst_reset(dest);
720 __ip_vs_unbind_svc(dest);
721 free_percpu(dest->stats.cpustats);
722 kfree(dest);
723 }
724 }
725
726 static void
727 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
728 {
729 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
730
731 spin_lock_bh(&src->lock);
732
733 IP_VS_SHOW_STATS_COUNTER(conns);
734 IP_VS_SHOW_STATS_COUNTER(inpkts);
735 IP_VS_SHOW_STATS_COUNTER(outpkts);
736 IP_VS_SHOW_STATS_COUNTER(inbytes);
737 IP_VS_SHOW_STATS_COUNTER(outbytes);
738
739 ip_vs_read_estimator(dst, src);
740
741 spin_unlock_bh(&src->lock);
742 }
743
744 static void
745 ip_vs_zero_stats(struct ip_vs_stats *stats)
746 {
747 spin_lock_bh(&stats->lock);
748
749 /* get current counters as zero point, rates are zeroed */
750
751 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
752
753 IP_VS_ZERO_STATS_COUNTER(conns);
754 IP_VS_ZERO_STATS_COUNTER(inpkts);
755 IP_VS_ZERO_STATS_COUNTER(outpkts);
756 IP_VS_ZERO_STATS_COUNTER(inbytes);
757 IP_VS_ZERO_STATS_COUNTER(outbytes);
758
759 ip_vs_zero_estimator(stats);
760
761 spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765 * Update a destination in the given service
766 */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add)
770 {
771 struct netns_ipvs *ipvs = net_ipvs(svc->net);
772 int conn_flags;
773
774 /* set the weight and the flags */
775 atomic_set(&dest->weight, udest->weight);
776 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777 conn_flags |= IP_VS_CONN_F_INACTIVE;
778
779 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
780 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
781 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782 } else {
783 /*
784 * Put the real service in rs_table if not present.
785 * For now only for NAT!
786 */
787 write_lock_bh(&ipvs->rs_lock);
788 ip_vs_rs_hash(ipvs, dest);
789 write_unlock_bh(&ipvs->rs_lock);
790 }
791 atomic_set(&dest->conn_flags, conn_flags);
792
793 /* bind the service */
794 if (!dest->svc) {
795 __ip_vs_bind_svc(dest, svc);
796 } else {
797 if (dest->svc != svc) {
798 __ip_vs_unbind_svc(dest);
799 ip_vs_zero_stats(&dest->stats);
800 __ip_vs_bind_svc(dest, svc);
801 }
802 }
803
804 /* set the dest status flags */
805 dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809 dest->u_threshold = udest->u_threshold;
810 dest->l_threshold = udest->l_threshold;
811
812 spin_lock_bh(&dest->dst_lock);
813 ip_vs_dst_reset(dest);
814 spin_unlock_bh(&dest->dst_lock);
815
816 if (add)
817 ip_vs_start_estimator(svc->net, &dest->stats);
818
819 write_lock_bh(&__ip_vs_svc_lock);
820
821 /* Wait until all other svc users go away */
822 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824 if (add) {
825 list_add(&dest->n_list, &svc->destinations);
826 svc->num_dests++;
827 }
828
829 /* call the update_service, because server weight may be changed */
830 if (svc->scheduler->update_service)
831 svc->scheduler->update_service(svc);
832
833 write_unlock_bh(&__ip_vs_svc_lock);
834 }
835
836
837 /*
838 * Create a destination for the given service
839 */
840 static int
841 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
842 struct ip_vs_dest **dest_p)
843 {
844 struct ip_vs_dest *dest;
845 unsigned int atype;
846
847 EnterFunction(2);
848
849 #ifdef CONFIG_IP_VS_IPV6
850 if (svc->af == AF_INET6) {
851 atype = ipv6_addr_type(&udest->addr.in6);
852 if ((!(atype & IPV6_ADDR_UNICAST) ||
853 atype & IPV6_ADDR_LINKLOCAL) &&
854 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
855 return -EINVAL;
856 } else
857 #endif
858 {
859 atype = inet_addr_type(svc->net, udest->addr.ip);
860 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861 return -EINVAL;
862 }
863
864 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
865 if (dest == NULL)
866 return -ENOMEM;
867
868 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
869 if (!dest->stats.cpustats)
870 goto err_alloc;
871
872 dest->af = svc->af;
873 dest->protocol = svc->protocol;
874 dest->vaddr = svc->addr;
875 dest->vport = svc->port;
876 dest->vfwmark = svc->fwmark;
877 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
878 dest->port = udest->port;
879
880 atomic_set(&dest->activeconns, 0);
881 atomic_set(&dest->inactconns, 0);
882 atomic_set(&dest->persistconns, 0);
883 atomic_set(&dest->refcnt, 1);
884
885 INIT_LIST_HEAD(&dest->d_list);
886 spin_lock_init(&dest->dst_lock);
887 spin_lock_init(&dest->stats.lock);
888 __ip_vs_update_dest(svc, dest, udest, 1);
889
890 *dest_p = dest;
891
892 LeaveFunction(2);
893 return 0;
894
895 err_alloc:
896 kfree(dest);
897 return -ENOMEM;
898 }
899
900
901 /*
902 * Add a destination into an existing service
903 */
904 static int
905 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
906 {
907 struct ip_vs_dest *dest;
908 union nf_inet_addr daddr;
909 __be16 dport = udest->port;
910 int ret;
911
912 EnterFunction(2);
913
914 if (udest->weight < 0) {
915 pr_err("%s(): server weight less than zero\n", __func__);
916 return -ERANGE;
917 }
918
919 if (udest->l_threshold > udest->u_threshold) {
920 pr_err("%s(): lower threshold is higher than upper threshold\n",
921 __func__);
922 return -ERANGE;
923 }
924
925 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
926
927 /*
928 * Check if the dest already exists in the list
929 */
930 dest = ip_vs_lookup_dest(svc, &daddr, dport);
931
932 if (dest != NULL) {
933 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
934 return -EEXIST;
935 }
936
937 /*
938 * Check if the dest already exists in the trash and
939 * is from the same service
940 */
941 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
942
943 if (dest != NULL) {
944 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
945 "dest->refcnt=%d, service %u/%s:%u\n",
946 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
947 atomic_read(&dest->refcnt),
948 dest->vfwmark,
949 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
950 ntohs(dest->vport));
951
952 /*
953 * Get the destination from the trash
954 */
955 list_del(&dest->n_list);
956
957 __ip_vs_update_dest(svc, dest, udest, 1);
958 ret = 0;
959 } else {
960 /*
961 * Allocate and initialize the dest structure
962 */
963 ret = ip_vs_new_dest(svc, udest, &dest);
964 }
965 LeaveFunction(2);
966
967 return ret;
968 }
969
970
971 /*
972 * Edit a destination in the given service
973 */
974 static int
975 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
976 {
977 struct ip_vs_dest *dest;
978 union nf_inet_addr daddr;
979 __be16 dport = udest->port;
980
981 EnterFunction(2);
982
983 if (udest->weight < 0) {
984 pr_err("%s(): server weight less than zero\n", __func__);
985 return -ERANGE;
986 }
987
988 if (udest->l_threshold > udest->u_threshold) {
989 pr_err("%s(): lower threshold is higher than upper threshold\n",
990 __func__);
991 return -ERANGE;
992 }
993
994 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
995
996 /*
997 * Lookup the destination list
998 */
999 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1000
1001 if (dest == NULL) {
1002 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1003 return -ENOENT;
1004 }
1005
1006 __ip_vs_update_dest(svc, dest, udest, 0);
1007 LeaveFunction(2);
1008
1009 return 0;
1010 }
1011
1012
1013 /*
1014 * Delete a destination (must be already unlinked from the service)
1015 */
1016 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1017 {
1018 struct netns_ipvs *ipvs = net_ipvs(net);
1019
1020 ip_vs_stop_estimator(net, &dest->stats);
1021
1022 /*
1023 * Remove it from the d-linked list with the real services.
1024 */
1025 write_lock_bh(&ipvs->rs_lock);
1026 ip_vs_rs_unhash(dest);
1027 write_unlock_bh(&ipvs->rs_lock);
1028
1029 /*
1030 * Decrease the refcnt of the dest, and free the dest
1031 * if nobody refers to it (refcnt=0). Otherwise, throw
1032 * the destination into the trash.
1033 */
1034 if (atomic_dec_and_test(&dest->refcnt)) {
1035 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1036 dest->vfwmark,
1037 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1038 ntohs(dest->port));
1039 ip_vs_dst_reset(dest);
1040 /* simply decrease svc->refcnt here, let the caller check
1041 and release the service if nobody refers to it.
1042 Only user context can release destination and service,
1043 and only one user context can update virtual service at a
1044 time, so the operation here is OK */
1045 atomic_dec(&dest->svc->refcnt);
1046 free_percpu(dest->stats.cpustats);
1047 kfree(dest);
1048 } else {
1049 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1050 "dest->refcnt=%d\n",
1051 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1052 ntohs(dest->port),
1053 atomic_read(&dest->refcnt));
1054 list_add(&dest->n_list, &ipvs->dest_trash);
1055 atomic_inc(&dest->refcnt);
1056 }
1057 }
1058
1059
1060 /*
1061 * Unlink a destination from the given service
1062 */
1063 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064 struct ip_vs_dest *dest,
1065 int svcupd)
1066 {
1067 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069 /*
1070 * Remove it from the d-linked destination list.
1071 */
1072 list_del(&dest->n_list);
1073 svc->num_dests--;
1074
1075 /*
1076 * Call the update_service function of its scheduler
1077 */
1078 if (svcupd && svc->scheduler->update_service)
1079 svc->scheduler->update_service(svc);
1080 }
1081
1082
1083 /*
1084 * Delete a destination server in the given service
1085 */
1086 static int
1087 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1088 {
1089 struct ip_vs_dest *dest;
1090 __be16 dport = udest->port;
1091
1092 EnterFunction(2);
1093
1094 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1095
1096 if (dest == NULL) {
1097 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1098 return -ENOENT;
1099 }
1100
1101 write_lock_bh(&__ip_vs_svc_lock);
1102
1103 /*
1104 * Wait until all other svc users go away.
1105 */
1106 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1107
1108 /*
1109 * Unlink dest from the service
1110 */
1111 __ip_vs_unlink_dest(svc, dest, 1);
1112
1113 write_unlock_bh(&__ip_vs_svc_lock);
1114
1115 /*
1116 * Delete the destination
1117 */
1118 __ip_vs_del_dest(svc->net, dest);
1119
1120 LeaveFunction(2);
1121
1122 return 0;
1123 }
1124
1125
1126 /*
1127 * Add a service into the service hash table
1128 */
1129 static int
1130 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1131 struct ip_vs_service **svc_p)
1132 {
1133 int ret = 0;
1134 struct ip_vs_scheduler *sched = NULL;
1135 struct ip_vs_pe *pe = NULL;
1136 struct ip_vs_service *svc = NULL;
1137 struct netns_ipvs *ipvs = net_ipvs(net);
1138
1139 /* increase the module use count */
1140 ip_vs_use_count_inc();
1141
1142 /* Lookup the scheduler by 'u->sched_name' */
1143 sched = ip_vs_scheduler_get(u->sched_name);
1144 if (sched == NULL) {
1145 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1146 ret = -ENOENT;
1147 goto out_err;
1148 }
1149
1150 if (u->pe_name && *u->pe_name) {
1151 pe = ip_vs_pe_getbyname(u->pe_name);
1152 if (pe == NULL) {
1153 pr_info("persistence engine module ip_vs_pe_%s "
1154 "not found\n", u->pe_name);
1155 ret = -ENOENT;
1156 goto out_err;
1157 }
1158 }
1159
1160 #ifdef CONFIG_IP_VS_IPV6
1161 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1162 ret = -EINVAL;
1163 goto out_err;
1164 }
1165 #endif
1166
1167 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1168 if (svc == NULL) {
1169 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1170 ret = -ENOMEM;
1171 goto out_err;
1172 }
1173 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1174 if (!svc->stats.cpustats) {
1175 ret = -ENOMEM;
1176 goto out_err;
1177 }
1178
1179 /* I'm the first user of the service */
1180 atomic_set(&svc->usecnt, 0);
1181 atomic_set(&svc->refcnt, 0);
1182
1183 svc->af = u->af;
1184 svc->protocol = u->protocol;
1185 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1186 svc->port = u->port;
1187 svc->fwmark = u->fwmark;
1188 svc->flags = u->flags;
1189 svc->timeout = u->timeout * HZ;
1190 svc->netmask = u->netmask;
1191 svc->net = net;
1192
1193 INIT_LIST_HEAD(&svc->destinations);
1194 rwlock_init(&svc->sched_lock);
1195 spin_lock_init(&svc->stats.lock);
1196
1197 /* Bind the scheduler */
1198 ret = ip_vs_bind_scheduler(svc, sched);
1199 if (ret)
1200 goto out_err;
1201 sched = NULL;
1202
1203 /* Bind the ct retriever */
1204 ip_vs_bind_pe(svc, pe);
1205 pe = NULL;
1206
1207 /* Update the virtual service counters */
1208 if (svc->port == FTPPORT)
1209 atomic_inc(&ipvs->ftpsvc_counter);
1210 else if (svc->port == 0)
1211 atomic_inc(&ipvs->nullsvc_counter);
1212
1213 ip_vs_start_estimator(net, &svc->stats);
1214
1215 /* Count only IPv4 services for old get/setsockopt interface */
1216 if (svc->af == AF_INET)
1217 ipvs->num_services++;
1218
1219 /* Hash the service into the service table */
1220 write_lock_bh(&__ip_vs_svc_lock);
1221 ip_vs_svc_hash(svc);
1222 write_unlock_bh(&__ip_vs_svc_lock);
1223
1224 *svc_p = svc;
1225 /* Now there is a service - full throttle */
1226 ipvs->enable = 1;
1227 return 0;
1228
1229
1230 out_err:
1231 if (svc != NULL) {
1232 ip_vs_unbind_scheduler(svc);
1233 if (svc->inc) {
1234 local_bh_disable();
1235 ip_vs_app_inc_put(svc->inc);
1236 local_bh_enable();
1237 }
1238 if (svc->stats.cpustats)
1239 free_percpu(svc->stats.cpustats);
1240 kfree(svc);
1241 }
1242 ip_vs_scheduler_put(sched);
1243 ip_vs_pe_put(pe);
1244
1245 /* decrease the module use count */
1246 ip_vs_use_count_dec();
1247
1248 return ret;
1249 }
1250
1251
1252 /*
1253 * Edit a service and bind it with a new scheduler
1254 */
1255 static int
1256 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1257 {
1258 struct ip_vs_scheduler *sched, *old_sched;
1259 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1260 int ret = 0;
1261
1262 /*
1263 * Lookup the scheduler, by 'u->sched_name'
1264 */
1265 sched = ip_vs_scheduler_get(u->sched_name);
1266 if (sched == NULL) {
1267 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1268 return -ENOENT;
1269 }
1270 old_sched = sched;
1271
1272 if (u->pe_name && *u->pe_name) {
1273 pe = ip_vs_pe_getbyname(u->pe_name);
1274 if (pe == NULL) {
1275 pr_info("persistence engine module ip_vs_pe_%s "
1276 "not found\n", u->pe_name);
1277 ret = -ENOENT;
1278 goto out;
1279 }
1280 old_pe = pe;
1281 }
1282
1283 #ifdef CONFIG_IP_VS_IPV6
1284 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1285 ret = -EINVAL;
1286 goto out;
1287 }
1288 #endif
1289
1290 write_lock_bh(&__ip_vs_svc_lock);
1291
1292 /*
1293 * Wait until all other svc users go away.
1294 */
1295 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1296
1297 /*
1298 * Set the flags and timeout value
1299 */
1300 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1301 svc->timeout = u->timeout * HZ;
1302 svc->netmask = u->netmask;
1303
1304 old_sched = svc->scheduler;
1305 if (sched != old_sched) {
1306 /*
1307 * Unbind the old scheduler
1308 */
1309 if ((ret = ip_vs_unbind_scheduler(svc))) {
1310 old_sched = sched;
1311 goto out_unlock;
1312 }
1313
1314 /*
1315 * Bind the new scheduler
1316 */
1317 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1318 /*
1319 * If ip_vs_bind_scheduler fails, restore the old
1320 * scheduler.
1321 * The main reason of failure is out of memory.
1322 *
1323 * The question is if the old scheduler can be
1324 * restored all the time. TODO: if it cannot be
1325 * restored some time, we must delete the service,
1326 * otherwise the system may crash.
1327 */
1328 ip_vs_bind_scheduler(svc, old_sched);
1329 old_sched = sched;
1330 goto out_unlock;
1331 }
1332 }
1333
1334 old_pe = svc->pe;
1335 if (pe != old_pe) {
1336 ip_vs_unbind_pe(svc);
1337 ip_vs_bind_pe(svc, pe);
1338 }
1339
1340 out_unlock:
1341 write_unlock_bh(&__ip_vs_svc_lock);
1342 out:
1343 ip_vs_scheduler_put(old_sched);
1344 ip_vs_pe_put(old_pe);
1345 return ret;
1346 }
1347
1348
1349 /*
1350 * Delete a service from the service list
1351 * - The service must be unlinked, unlocked and not referenced!
1352 * - We are called under _bh lock
1353 */
1354 static void __ip_vs_del_service(struct ip_vs_service *svc)
1355 {
1356 struct ip_vs_dest *dest, *nxt;
1357 struct ip_vs_scheduler *old_sched;
1358 struct ip_vs_pe *old_pe;
1359 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1360
1361 pr_info("%s: enter\n", __func__);
1362
1363 /* Count only IPv4 services for old get/setsockopt interface */
1364 if (svc->af == AF_INET)
1365 ipvs->num_services--;
1366
1367 ip_vs_stop_estimator(svc->net, &svc->stats);
1368
1369 /* Unbind scheduler */
1370 old_sched = svc->scheduler;
1371 ip_vs_unbind_scheduler(svc);
1372 ip_vs_scheduler_put(old_sched);
1373
1374 /* Unbind persistence engine */
1375 old_pe = svc->pe;
1376 ip_vs_unbind_pe(svc);
1377 ip_vs_pe_put(old_pe);
1378
1379 /* Unbind app inc */
1380 if (svc->inc) {
1381 ip_vs_app_inc_put(svc->inc);
1382 svc->inc = NULL;
1383 }
1384
1385 /*
1386 * Unlink the whole destination list
1387 */
1388 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1389 __ip_vs_unlink_dest(svc, dest, 0);
1390 __ip_vs_del_dest(svc->net, dest);
1391 }
1392
1393 /*
1394 * Update the virtual service counters
1395 */
1396 if (svc->port == FTPPORT)
1397 atomic_dec(&ipvs->ftpsvc_counter);
1398 else if (svc->port == 0)
1399 atomic_dec(&ipvs->nullsvc_counter);
1400
1401 /*
1402 * Free the service if nobody refers to it
1403 */
1404 if (atomic_read(&svc->refcnt) == 0) {
1405 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1406 svc->fwmark,
1407 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1408 ntohs(svc->port), atomic_read(&svc->usecnt));
1409 free_percpu(svc->stats.cpustats);
1410 kfree(svc);
1411 }
1412
1413 /* decrease the module use count */
1414 ip_vs_use_count_dec();
1415 }
1416
1417 /*
1418 * Unlink a service from list and try to delete it if its refcnt reached 0
1419 */
1420 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1421 {
1422 /*
1423 * Unhash it from the service table
1424 */
1425 write_lock_bh(&__ip_vs_svc_lock);
1426
1427 ip_vs_svc_unhash(svc);
1428
1429 /*
1430 * Wait until all the svc users go away.
1431 */
1432 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1433
1434 __ip_vs_del_service(svc);
1435
1436 write_unlock_bh(&__ip_vs_svc_lock);
1437 }
1438
1439 /*
1440 * Delete a service from the service list
1441 */
1442 static int ip_vs_del_service(struct ip_vs_service *svc)
1443 {
1444 if (svc == NULL)
1445 return -EEXIST;
1446 ip_vs_unlink_service(svc);
1447
1448 return 0;
1449 }
1450
1451
1452 /*
1453 * Flush all the virtual services
1454 */
1455 static int ip_vs_flush(struct net *net)
1456 {
1457 int idx;
1458 struct ip_vs_service *svc, *nxt;
1459
1460 /*
1461 * Flush the service table hashed by <netns,protocol,addr,port>
1462 */
1463 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1464 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1465 s_list) {
1466 if (net_eq(svc->net, net))
1467 ip_vs_unlink_service(svc);
1468 }
1469 }
1470
1471 /*
1472 * Flush the service table hashed by fwmark
1473 */
1474 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1475 list_for_each_entry_safe(svc, nxt,
1476 &ip_vs_svc_fwm_table[idx], f_list) {
1477 if (net_eq(svc->net, net))
1478 ip_vs_unlink_service(svc);
1479 }
1480 }
1481
1482 return 0;
1483 }
1484
1485 /*
1486 * Delete service by {netns} in the service table.
1487 * Called by __ip_vs_cleanup()
1488 */
1489 void ip_vs_service_net_cleanup(struct net *net)
1490 {
1491 EnterFunction(2);
1492 /* Check for "full" addressed entries */
1493 mutex_lock(&__ip_vs_mutex);
1494 ip_vs_flush(net);
1495 mutex_unlock(&__ip_vs_mutex);
1496 LeaveFunction(2);
1497 }
1498 /*
1499 * Release dst hold by dst_cache
1500 */
1501 static inline void
1502 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1503 {
1504 spin_lock_bh(&dest->dst_lock);
1505 if (dest->dst_cache && dest->dst_cache->dev == dev) {
1506 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1507 dev->name,
1508 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1509 ntohs(dest->port),
1510 atomic_read(&dest->refcnt));
1511 ip_vs_dst_reset(dest);
1512 }
1513 spin_unlock_bh(&dest->dst_lock);
1514
1515 }
1516 /*
1517 * Netdev event receiver
1518 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1519 * a device that is "unregister" it must be released.
1520 */
1521 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1522 void *ptr)
1523 {
1524 struct net_device *dev = ptr;
1525 struct net *net = dev_net(dev);
1526 struct netns_ipvs *ipvs = net_ipvs(net);
1527 struct ip_vs_service *svc;
1528 struct ip_vs_dest *dest;
1529 unsigned int idx;
1530
1531 if (event != NETDEV_UNREGISTER || !ipvs)
1532 return NOTIFY_DONE;
1533 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1534 EnterFunction(2);
1535 mutex_lock(&__ip_vs_mutex);
1536 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1537 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1538 if (net_eq(svc->net, net)) {
1539 list_for_each_entry(dest, &svc->destinations,
1540 n_list) {
1541 __ip_vs_dev_reset(dest, dev);
1542 }
1543 }
1544 }
1545
1546 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1547 if (net_eq(svc->net, net)) {
1548 list_for_each_entry(dest, &svc->destinations,
1549 n_list) {
1550 __ip_vs_dev_reset(dest, dev);
1551 }
1552 }
1553
1554 }
1555 }
1556
1557 list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1558 __ip_vs_dev_reset(dest, dev);
1559 }
1560 mutex_unlock(&__ip_vs_mutex);
1561 LeaveFunction(2);
1562 return NOTIFY_DONE;
1563 }
1564
1565 /*
1566 * Zero counters in a service or all services
1567 */
1568 static int ip_vs_zero_service(struct ip_vs_service *svc)
1569 {
1570 struct ip_vs_dest *dest;
1571
1572 write_lock_bh(&__ip_vs_svc_lock);
1573 list_for_each_entry(dest, &svc->destinations, n_list) {
1574 ip_vs_zero_stats(&dest->stats);
1575 }
1576 ip_vs_zero_stats(&svc->stats);
1577 write_unlock_bh(&__ip_vs_svc_lock);
1578 return 0;
1579 }
1580
1581 static int ip_vs_zero_all(struct net *net)
1582 {
1583 int idx;
1584 struct ip_vs_service *svc;
1585
1586 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1587 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1588 if (net_eq(svc->net, net))
1589 ip_vs_zero_service(svc);
1590 }
1591 }
1592
1593 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1594 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1595 if (net_eq(svc->net, net))
1596 ip_vs_zero_service(svc);
1597 }
1598 }
1599
1600 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1601 return 0;
1602 }
1603
1604 #ifdef CONFIG_SYSCTL
1605
1606 static int zero;
1607 static int three = 3;
1608
1609 static int
1610 proc_do_defense_mode(ctl_table *table, int write,
1611 void __user *buffer, size_t *lenp, loff_t *ppos)
1612 {
1613 struct net *net = current->nsproxy->net_ns;
1614 int *valp = table->data;
1615 int val = *valp;
1616 int rc;
1617
1618 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1619 if (write && (*valp != val)) {
1620 if ((*valp < 0) || (*valp > 3)) {
1621 /* Restore the correct value */
1622 *valp = val;
1623 } else {
1624 update_defense_level(net_ipvs(net));
1625 }
1626 }
1627 return rc;
1628 }
1629
1630 static int
1631 proc_do_sync_threshold(ctl_table *table, int write,
1632 void __user *buffer, size_t *lenp, loff_t *ppos)
1633 {
1634 int *valp = table->data;
1635 int val[2];
1636 int rc;
1637
1638 /* backup the value first */
1639 memcpy(val, valp, sizeof(val));
1640
1641 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1642 if (write && (valp[0] < 0 || valp[1] < 0 ||
1643 (valp[0] >= valp[1] && valp[1]))) {
1644 /* Restore the correct value */
1645 memcpy(valp, val, sizeof(val));
1646 }
1647 return rc;
1648 }
1649
1650 static int
1651 proc_do_sync_mode(ctl_table *table, int write,
1652 void __user *buffer, size_t *lenp, loff_t *ppos)
1653 {
1654 int *valp = table->data;
1655 int val = *valp;
1656 int rc;
1657
1658 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1659 if (write && (*valp != val)) {
1660 if ((*valp < 0) || (*valp > 1)) {
1661 /* Restore the correct value */
1662 *valp = val;
1663 }
1664 }
1665 return rc;
1666 }
1667
1668 static int
1669 proc_do_sync_ports(ctl_table *table, int write,
1670 void __user *buffer, size_t *lenp, loff_t *ppos)
1671 {
1672 int *valp = table->data;
1673 int val = *valp;
1674 int rc;
1675
1676 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1677 if (write && (*valp != val)) {
1678 if (*valp < 1 || !is_power_of_2(*valp)) {
1679 /* Restore the correct value */
1680 *valp = val;
1681 }
1682 }
1683 return rc;
1684 }
1685
1686 /*
1687 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1688 * Do not change order or insert new entries without
1689 * align with netns init in ip_vs_control_net_init()
1690 */
1691
1692 static struct ctl_table vs_vars[] = {
1693 {
1694 .procname = "amemthresh",
1695 .maxlen = sizeof(int),
1696 .mode = 0644,
1697 .proc_handler = proc_dointvec,
1698 },
1699 {
1700 .procname = "am_droprate",
1701 .maxlen = sizeof(int),
1702 .mode = 0644,
1703 .proc_handler = proc_dointvec,
1704 },
1705 {
1706 .procname = "drop_entry",
1707 .maxlen = sizeof(int),
1708 .mode = 0644,
1709 .proc_handler = proc_do_defense_mode,
1710 },
1711 {
1712 .procname = "drop_packet",
1713 .maxlen = sizeof(int),
1714 .mode = 0644,
1715 .proc_handler = proc_do_defense_mode,
1716 },
1717 #ifdef CONFIG_IP_VS_NFCT
1718 {
1719 .procname = "conntrack",
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
1722 .proc_handler = &proc_dointvec,
1723 },
1724 #endif
1725 {
1726 .procname = "secure_tcp",
1727 .maxlen = sizeof(int),
1728 .mode = 0644,
1729 .proc_handler = proc_do_defense_mode,
1730 },
1731 {
1732 .procname = "snat_reroute",
1733 .maxlen = sizeof(int),
1734 .mode = 0644,
1735 .proc_handler = &proc_dointvec,
1736 },
1737 {
1738 .procname = "sync_version",
1739 .maxlen = sizeof(int),
1740 .mode = 0644,
1741 .proc_handler = &proc_do_sync_mode,
1742 },
1743 {
1744 .procname = "sync_ports",
1745 .maxlen = sizeof(int),
1746 .mode = 0644,
1747 .proc_handler = &proc_do_sync_ports,
1748 },
1749 {
1750 .procname = "sync_qlen_max",
1751 .maxlen = sizeof(int),
1752 .mode = 0644,
1753 .proc_handler = proc_dointvec,
1754 },
1755 {
1756 .procname = "sync_sock_size",
1757 .maxlen = sizeof(int),
1758 .mode = 0644,
1759 .proc_handler = proc_dointvec,
1760 },
1761 {
1762 .procname = "cache_bypass",
1763 .maxlen = sizeof(int),
1764 .mode = 0644,
1765 .proc_handler = proc_dointvec,
1766 },
1767 {
1768 .procname = "expire_nodest_conn",
1769 .maxlen = sizeof(int),
1770 .mode = 0644,
1771 .proc_handler = proc_dointvec,
1772 },
1773 {
1774 .procname = "expire_quiescent_template",
1775 .maxlen = sizeof(int),
1776 .mode = 0644,
1777 .proc_handler = proc_dointvec,
1778 },
1779 {
1780 .procname = "sync_threshold",
1781 .maxlen =
1782 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1783 .mode = 0644,
1784 .proc_handler = proc_do_sync_threshold,
1785 },
1786 {
1787 .procname = "sync_refresh_period",
1788 .maxlen = sizeof(int),
1789 .mode = 0644,
1790 .proc_handler = proc_dointvec_jiffies,
1791 },
1792 {
1793 .procname = "sync_retries",
1794 .maxlen = sizeof(int),
1795 .mode = 0644,
1796 .proc_handler = proc_dointvec_minmax,
1797 .extra1 = &zero,
1798 .extra2 = &three,
1799 },
1800 {
1801 .procname = "nat_icmp_send",
1802 .maxlen = sizeof(int),
1803 .mode = 0644,
1804 .proc_handler = proc_dointvec,
1805 },
1806 {
1807 .procname = "pmtu_disc",
1808 .maxlen = sizeof(int),
1809 .mode = 0644,
1810 .proc_handler = proc_dointvec,
1811 },
1812 #ifdef CONFIG_IP_VS_DEBUG
1813 {
1814 .procname = "debug_level",
1815 .data = &sysctl_ip_vs_debug_level,
1816 .maxlen = sizeof(int),
1817 .mode = 0644,
1818 .proc_handler = proc_dointvec,
1819 },
1820 #endif
1821 #if 0
1822 {
1823 .procname = "timeout_established",
1824 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1825 .maxlen = sizeof(int),
1826 .mode = 0644,
1827 .proc_handler = proc_dointvec_jiffies,
1828 },
1829 {
1830 .procname = "timeout_synsent",
1831 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1832 .maxlen = sizeof(int),
1833 .mode = 0644,
1834 .proc_handler = proc_dointvec_jiffies,
1835 },
1836 {
1837 .procname = "timeout_synrecv",
1838 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1839 .maxlen = sizeof(int),
1840 .mode = 0644,
1841 .proc_handler = proc_dointvec_jiffies,
1842 },
1843 {
1844 .procname = "timeout_finwait",
1845 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1846 .maxlen = sizeof(int),
1847 .mode = 0644,
1848 .proc_handler = proc_dointvec_jiffies,
1849 },
1850 {
1851 .procname = "timeout_timewait",
1852 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1853 .maxlen = sizeof(int),
1854 .mode = 0644,
1855 .proc_handler = proc_dointvec_jiffies,
1856 },
1857 {
1858 .procname = "timeout_close",
1859 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1860 .maxlen = sizeof(int),
1861 .mode = 0644,
1862 .proc_handler = proc_dointvec_jiffies,
1863 },
1864 {
1865 .procname = "timeout_closewait",
1866 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1867 .maxlen = sizeof(int),
1868 .mode = 0644,
1869 .proc_handler = proc_dointvec_jiffies,
1870 },
1871 {
1872 .procname = "timeout_lastack",
1873 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1874 .maxlen = sizeof(int),
1875 .mode = 0644,
1876 .proc_handler = proc_dointvec_jiffies,
1877 },
1878 {
1879 .procname = "timeout_listen",
1880 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1881 .maxlen = sizeof(int),
1882 .mode = 0644,
1883 .proc_handler = proc_dointvec_jiffies,
1884 },
1885 {
1886 .procname = "timeout_synack",
1887 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1888 .maxlen = sizeof(int),
1889 .mode = 0644,
1890 .proc_handler = proc_dointvec_jiffies,
1891 },
1892 {
1893 .procname = "timeout_udp",
1894 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1895 .maxlen = sizeof(int),
1896 .mode = 0644,
1897 .proc_handler = proc_dointvec_jiffies,
1898 },
1899 {
1900 .procname = "timeout_icmp",
1901 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1902 .maxlen = sizeof(int),
1903 .mode = 0644,
1904 .proc_handler = proc_dointvec_jiffies,
1905 },
1906 #endif
1907 { }
1908 };
1909
1910 #endif
1911
1912 #ifdef CONFIG_PROC_FS
1913
1914 struct ip_vs_iter {
1915 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1916 struct list_head *table;
1917 int bucket;
1918 };
1919
1920 /*
1921 * Write the contents of the VS rule table to a PROCfs file.
1922 * (It is kept just for backward compatibility)
1923 */
1924 static inline const char *ip_vs_fwd_name(unsigned int flags)
1925 {
1926 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1927 case IP_VS_CONN_F_LOCALNODE:
1928 return "Local";
1929 case IP_VS_CONN_F_TUNNEL:
1930 return "Tunnel";
1931 case IP_VS_CONN_F_DROUTE:
1932 return "Route";
1933 default:
1934 return "Masq";
1935 }
1936 }
1937
1938
1939 /* Get the Nth entry in the two lists */
1940 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1941 {
1942 struct net *net = seq_file_net(seq);
1943 struct ip_vs_iter *iter = seq->private;
1944 int idx;
1945 struct ip_vs_service *svc;
1946
1947 /* look in hash by protocol */
1948 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1949 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1950 if (net_eq(svc->net, net) && pos-- == 0) {
1951 iter->table = ip_vs_svc_table;
1952 iter->bucket = idx;
1953 return svc;
1954 }
1955 }
1956 }
1957
1958 /* keep looking in fwmark */
1959 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1960 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1961 if (net_eq(svc->net, net) && pos-- == 0) {
1962 iter->table = ip_vs_svc_fwm_table;
1963 iter->bucket = idx;
1964 return svc;
1965 }
1966 }
1967 }
1968
1969 return NULL;
1970 }
1971
1972 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1973 __acquires(__ip_vs_svc_lock)
1974 {
1975
1976 read_lock_bh(&__ip_vs_svc_lock);
1977 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1978 }
1979
1980
1981 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1982 {
1983 struct list_head *e;
1984 struct ip_vs_iter *iter;
1985 struct ip_vs_service *svc;
1986
1987 ++*pos;
1988 if (v == SEQ_START_TOKEN)
1989 return ip_vs_info_array(seq,0);
1990
1991 svc = v;
1992 iter = seq->private;
1993
1994 if (iter->table == ip_vs_svc_table) {
1995 /* next service in table hashed by protocol */
1996 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1997 return list_entry(e, struct ip_vs_service, s_list);
1998
1999
2000 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2001 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2002 s_list) {
2003 return svc;
2004 }
2005 }
2006
2007 iter->table = ip_vs_svc_fwm_table;
2008 iter->bucket = -1;
2009 goto scan_fwmark;
2010 }
2011
2012 /* next service in hashed by fwmark */
2013 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2014 return list_entry(e, struct ip_vs_service, f_list);
2015
2016 scan_fwmark:
2017 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2018 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2019 f_list)
2020 return svc;
2021 }
2022
2023 return NULL;
2024 }
2025
2026 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2027 __releases(__ip_vs_svc_lock)
2028 {
2029 read_unlock_bh(&__ip_vs_svc_lock);
2030 }
2031
2032
2033 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2034 {
2035 if (v == SEQ_START_TOKEN) {
2036 seq_printf(seq,
2037 "IP Virtual Server version %d.%d.%d (size=%d)\n",
2038 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2039 seq_puts(seq,
2040 "Prot LocalAddress:Port Scheduler Flags\n");
2041 seq_puts(seq,
2042 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2043 } else {
2044 const struct ip_vs_service *svc = v;
2045 const struct ip_vs_iter *iter = seq->private;
2046 const struct ip_vs_dest *dest;
2047
2048 if (iter->table == ip_vs_svc_table) {
2049 #ifdef CONFIG_IP_VS_IPV6
2050 if (svc->af == AF_INET6)
2051 seq_printf(seq, "%s [%pI6]:%04X %s ",
2052 ip_vs_proto_name(svc->protocol),
2053 &svc->addr.in6,
2054 ntohs(svc->port),
2055 svc->scheduler->name);
2056 else
2057 #endif
2058 seq_printf(seq, "%s %08X:%04X %s %s ",
2059 ip_vs_proto_name(svc->protocol),
2060 ntohl(svc->addr.ip),
2061 ntohs(svc->port),
2062 svc->scheduler->name,
2063 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2064 } else {
2065 seq_printf(seq, "FWM %08X %s %s",
2066 svc->fwmark, svc->scheduler->name,
2067 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2068 }
2069
2070 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2071 seq_printf(seq, "persistent %d %08X\n",
2072 svc->timeout,
2073 ntohl(svc->netmask));
2074 else
2075 seq_putc(seq, '\n');
2076
2077 list_for_each_entry(dest, &svc->destinations, n_list) {
2078 #ifdef CONFIG_IP_VS_IPV6
2079 if (dest->af == AF_INET6)
2080 seq_printf(seq,
2081 " -> [%pI6]:%04X"
2082 " %-7s %-6d %-10d %-10d\n",
2083 &dest->addr.in6,
2084 ntohs(dest->port),
2085 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2086 atomic_read(&dest->weight),
2087 atomic_read(&dest->activeconns),
2088 atomic_read(&dest->inactconns));
2089 else
2090 #endif
2091 seq_printf(seq,
2092 " -> %08X:%04X "
2093 "%-7s %-6d %-10d %-10d\n",
2094 ntohl(dest->addr.ip),
2095 ntohs(dest->port),
2096 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2097 atomic_read(&dest->weight),
2098 atomic_read(&dest->activeconns),
2099 atomic_read(&dest->inactconns));
2100
2101 }
2102 }
2103 return 0;
2104 }
2105
2106 static const struct seq_operations ip_vs_info_seq_ops = {
2107 .start = ip_vs_info_seq_start,
2108 .next = ip_vs_info_seq_next,
2109 .stop = ip_vs_info_seq_stop,
2110 .show = ip_vs_info_seq_show,
2111 };
2112
2113 static int ip_vs_info_open(struct inode *inode, struct file *file)
2114 {
2115 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2116 sizeof(struct ip_vs_iter));
2117 }
2118
2119 static const struct file_operations ip_vs_info_fops = {
2120 .owner = THIS_MODULE,
2121 .open = ip_vs_info_open,
2122 .read = seq_read,
2123 .llseek = seq_lseek,
2124 .release = seq_release_net,
2125 };
2126
2127 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2128 {
2129 struct net *net = seq_file_single_net(seq);
2130 struct ip_vs_stats_user show;
2131
2132 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2133 seq_puts(seq,
2134 " Total Incoming Outgoing Incoming Outgoing\n");
2135 seq_printf(seq,
2136 " Conns Packets Packets Bytes Bytes\n");
2137
2138 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2139 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2140 show.inpkts, show.outpkts,
2141 (unsigned long long) show.inbytes,
2142 (unsigned long long) show.outbytes);
2143
2144 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2145 seq_puts(seq,
2146 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2147 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2148 show.cps, show.inpps, show.outpps,
2149 show.inbps, show.outbps);
2150
2151 return 0;
2152 }
2153
2154 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2155 {
2156 return single_open_net(inode, file, ip_vs_stats_show);
2157 }
2158
2159 static const struct file_operations ip_vs_stats_fops = {
2160 .owner = THIS_MODULE,
2161 .open = ip_vs_stats_seq_open,
2162 .read = seq_read,
2163 .llseek = seq_lseek,
2164 .release = single_release_net,
2165 };
2166
2167 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2168 {
2169 struct net *net = seq_file_single_net(seq);
2170 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2171 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2172 struct ip_vs_stats_user rates;
2173 int i;
2174
2175 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2176 seq_puts(seq,
2177 " Total Incoming Outgoing Incoming Outgoing\n");
2178 seq_printf(seq,
2179 "CPU Conns Packets Packets Bytes Bytes\n");
2180
2181 for_each_possible_cpu(i) {
2182 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2183 unsigned int start;
2184 __u64 inbytes, outbytes;
2185
2186 do {
2187 start = u64_stats_fetch_begin_bh(&u->syncp);
2188 inbytes = u->ustats.inbytes;
2189 outbytes = u->ustats.outbytes;
2190 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2191
2192 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2193 i, u->ustats.conns, u->ustats.inpkts,
2194 u->ustats.outpkts, (__u64)inbytes,
2195 (__u64)outbytes);
2196 }
2197
2198 spin_lock_bh(&tot_stats->lock);
2199
2200 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2201 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2202 tot_stats->ustats.outpkts,
2203 (unsigned long long) tot_stats->ustats.inbytes,
2204 (unsigned long long) tot_stats->ustats.outbytes);
2205
2206 ip_vs_read_estimator(&rates, tot_stats);
2207
2208 spin_unlock_bh(&tot_stats->lock);
2209
2210 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2211 seq_puts(seq,
2212 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2213 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2214 rates.cps,
2215 rates.inpps,
2216 rates.outpps,
2217 rates.inbps,
2218 rates.outbps);
2219
2220 return 0;
2221 }
2222
2223 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2224 {
2225 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2226 }
2227
2228 static const struct file_operations ip_vs_stats_percpu_fops = {
2229 .owner = THIS_MODULE,
2230 .open = ip_vs_stats_percpu_seq_open,
2231 .read = seq_read,
2232 .llseek = seq_lseek,
2233 .release = single_release_net,
2234 };
2235 #endif
2236
2237 /*
2238 * Set timeout values for tcp tcpfin udp in the timeout_table.
2239 */
2240 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2241 {
2242 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2243 struct ip_vs_proto_data *pd;
2244 #endif
2245
2246 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2247 u->tcp_timeout,
2248 u->tcp_fin_timeout,
2249 u->udp_timeout);
2250
2251 #ifdef CONFIG_IP_VS_PROTO_TCP
2252 if (u->tcp_timeout) {
2253 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2254 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2255 = u->tcp_timeout * HZ;
2256 }
2257
2258 if (u->tcp_fin_timeout) {
2259 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2260 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2261 = u->tcp_fin_timeout * HZ;
2262 }
2263 #endif
2264
2265 #ifdef CONFIG_IP_VS_PROTO_UDP
2266 if (u->udp_timeout) {
2267 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2268 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2269 = u->udp_timeout * HZ;
2270 }
2271 #endif
2272 return 0;
2273 }
2274
2275
2276 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2277 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2278 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2279 sizeof(struct ip_vs_dest_user))
2280 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2281 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2282 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2283
2284 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2285 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2286 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2287 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2288 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2289 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2290 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2291 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2292 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2293 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2294 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2295 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2296 };
2297
2298 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2299 struct ip_vs_service_user *usvc_compat)
2300 {
2301 memset(usvc, 0, sizeof(*usvc));
2302
2303 usvc->af = AF_INET;
2304 usvc->protocol = usvc_compat->protocol;
2305 usvc->addr.ip = usvc_compat->addr;
2306 usvc->port = usvc_compat->port;
2307 usvc->fwmark = usvc_compat->fwmark;
2308
2309 /* Deep copy of sched_name is not needed here */
2310 usvc->sched_name = usvc_compat->sched_name;
2311
2312 usvc->flags = usvc_compat->flags;
2313 usvc->timeout = usvc_compat->timeout;
2314 usvc->netmask = usvc_compat->netmask;
2315 }
2316
2317 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2318 struct ip_vs_dest_user *udest_compat)
2319 {
2320 memset(udest, 0, sizeof(*udest));
2321
2322 udest->addr.ip = udest_compat->addr;
2323 udest->port = udest_compat->port;
2324 udest->conn_flags = udest_compat->conn_flags;
2325 udest->weight = udest_compat->weight;
2326 udest->u_threshold = udest_compat->u_threshold;
2327 udest->l_threshold = udest_compat->l_threshold;
2328 }
2329
2330 static int
2331 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2332 {
2333 struct net *net = sock_net(sk);
2334 int ret;
2335 unsigned char arg[MAX_ARG_LEN];
2336 struct ip_vs_service_user *usvc_compat;
2337 struct ip_vs_service_user_kern usvc;
2338 struct ip_vs_service *svc;
2339 struct ip_vs_dest_user *udest_compat;
2340 struct ip_vs_dest_user_kern udest;
2341 struct netns_ipvs *ipvs = net_ipvs(net);
2342
2343 if (!capable(CAP_NET_ADMIN))
2344 return -EPERM;
2345
2346 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2347 return -EINVAL;
2348 if (len < 0 || len > MAX_ARG_LEN)
2349 return -EINVAL;
2350 if (len != set_arglen[SET_CMDID(cmd)]) {
2351 pr_err("set_ctl: len %u != %u\n",
2352 len, set_arglen[SET_CMDID(cmd)]);
2353 return -EINVAL;
2354 }
2355
2356 if (copy_from_user(arg, user, len) != 0)
2357 return -EFAULT;
2358
2359 /* increase the module use count */
2360 ip_vs_use_count_inc();
2361
2362 /* Handle daemons since they have another lock */
2363 if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2364 cmd == IP_VS_SO_SET_STOPDAEMON) {
2365 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2366
2367 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2368 ret = -ERESTARTSYS;
2369 goto out_dec;
2370 }
2371 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2372 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2373 dm->syncid);
2374 else
2375 ret = stop_sync_thread(net, dm->state);
2376 mutex_unlock(&ipvs->sync_mutex);
2377 goto out_dec;
2378 }
2379
2380 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2381 ret = -ERESTARTSYS;
2382 goto out_dec;
2383 }
2384
2385 if (cmd == IP_VS_SO_SET_FLUSH) {
2386 /* Flush the virtual service */
2387 ret = ip_vs_flush(net);
2388 goto out_unlock;
2389 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2390 /* Set timeout values for (tcp tcpfin udp) */
2391 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2392 goto out_unlock;
2393 }
2394
2395 usvc_compat = (struct ip_vs_service_user *)arg;
2396 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2397
2398 /* We only use the new structs internally, so copy userspace compat
2399 * structs to extended internal versions */
2400 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2401 ip_vs_copy_udest_compat(&udest, udest_compat);
2402
2403 if (cmd == IP_VS_SO_SET_ZERO) {
2404 /* if no service address is set, zero counters in all */
2405 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2406 ret = ip_vs_zero_all(net);
2407 goto out_unlock;
2408 }
2409 }
2410
2411 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2412 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2413 usvc.protocol != IPPROTO_SCTP) {
2414 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2415 usvc.protocol, &usvc.addr.ip,
2416 ntohs(usvc.port), usvc.sched_name);
2417 ret = -EFAULT;
2418 goto out_unlock;
2419 }
2420
2421 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2422 if (usvc.fwmark == 0)
2423 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2424 &usvc.addr, usvc.port);
2425 else
2426 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2427
2428 if (cmd != IP_VS_SO_SET_ADD
2429 && (svc == NULL || svc->protocol != usvc.protocol)) {
2430 ret = -ESRCH;
2431 goto out_unlock;
2432 }
2433
2434 switch (cmd) {
2435 case IP_VS_SO_SET_ADD:
2436 if (svc != NULL)
2437 ret = -EEXIST;
2438 else
2439 ret = ip_vs_add_service(net, &usvc, &svc);
2440 break;
2441 case IP_VS_SO_SET_EDIT:
2442 ret = ip_vs_edit_service(svc, &usvc);
2443 break;
2444 case IP_VS_SO_SET_DEL:
2445 ret = ip_vs_del_service(svc);
2446 if (!ret)
2447 goto out_unlock;
2448 break;
2449 case IP_VS_SO_SET_ZERO:
2450 ret = ip_vs_zero_service(svc);
2451 break;
2452 case IP_VS_SO_SET_ADDDEST:
2453 ret = ip_vs_add_dest(svc, &udest);
2454 break;
2455 case IP_VS_SO_SET_EDITDEST:
2456 ret = ip_vs_edit_dest(svc, &udest);
2457 break;
2458 case IP_VS_SO_SET_DELDEST:
2459 ret = ip_vs_del_dest(svc, &udest);
2460 break;
2461 default:
2462 ret = -EINVAL;
2463 }
2464
2465 out_unlock:
2466 mutex_unlock(&__ip_vs_mutex);
2467 out_dec:
2468 /* decrease the module use count */
2469 ip_vs_use_count_dec();
2470
2471 return ret;
2472 }
2473
2474
2475 static void
2476 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2477 {
2478 dst->protocol = src->protocol;
2479 dst->addr = src->addr.ip;
2480 dst->port = src->port;
2481 dst->fwmark = src->fwmark;
2482 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2483 dst->flags = src->flags;
2484 dst->timeout = src->timeout / HZ;
2485 dst->netmask = src->netmask;
2486 dst->num_dests = src->num_dests;
2487 ip_vs_copy_stats(&dst->stats, &src->stats);
2488 }
2489
2490 static inline int
2491 __ip_vs_get_service_entries(struct net *net,
2492 const struct ip_vs_get_services *get,
2493 struct ip_vs_get_services __user *uptr)
2494 {
2495 int idx, count=0;
2496 struct ip_vs_service *svc;
2497 struct ip_vs_service_entry entry;
2498 int ret = 0;
2499
2500 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2501 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2502 /* Only expose IPv4 entries to old interface */
2503 if (svc->af != AF_INET || !net_eq(svc->net, net))
2504 continue;
2505
2506 if (count >= get->num_services)
2507 goto out;
2508 memset(&entry, 0, sizeof(entry));
2509 ip_vs_copy_service(&entry, svc);
2510 if (copy_to_user(&uptr->entrytable[count],
2511 &entry, sizeof(entry))) {
2512 ret = -EFAULT;
2513 goto out;
2514 }
2515 count++;
2516 }
2517 }
2518
2519 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2520 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2521 /* Only expose IPv4 entries to old interface */
2522 if (svc->af != AF_INET || !net_eq(svc->net, net))
2523 continue;
2524
2525 if (count >= get->num_services)
2526 goto out;
2527 memset(&entry, 0, sizeof(entry));
2528 ip_vs_copy_service(&entry, svc);
2529 if (copy_to_user(&uptr->entrytable[count],
2530 &entry, sizeof(entry))) {
2531 ret = -EFAULT;
2532 goto out;
2533 }
2534 count++;
2535 }
2536 }
2537 out:
2538 return ret;
2539 }
2540
2541 static inline int
2542 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2543 struct ip_vs_get_dests __user *uptr)
2544 {
2545 struct ip_vs_service *svc;
2546 union nf_inet_addr addr = { .ip = get->addr };
2547 int ret = 0;
2548
2549 if (get->fwmark)
2550 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2551 else
2552 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2553 get->port);
2554
2555 if (svc) {
2556 int count = 0;
2557 struct ip_vs_dest *dest;
2558 struct ip_vs_dest_entry entry;
2559
2560 list_for_each_entry(dest, &svc->destinations, n_list) {
2561 if (count >= get->num_dests)
2562 break;
2563
2564 entry.addr = dest->addr.ip;
2565 entry.port = dest->port;
2566 entry.conn_flags = atomic_read(&dest->conn_flags);
2567 entry.weight = atomic_read(&dest->weight);
2568 entry.u_threshold = dest->u_threshold;
2569 entry.l_threshold = dest->l_threshold;
2570 entry.activeconns = atomic_read(&dest->activeconns);
2571 entry.inactconns = atomic_read(&dest->inactconns);
2572 entry.persistconns = atomic_read(&dest->persistconns);
2573 ip_vs_copy_stats(&entry.stats, &dest->stats);
2574 if (copy_to_user(&uptr->entrytable[count],
2575 &entry, sizeof(entry))) {
2576 ret = -EFAULT;
2577 break;
2578 }
2579 count++;
2580 }
2581 } else
2582 ret = -ESRCH;
2583 return ret;
2584 }
2585
2586 static inline void
2587 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2588 {
2589 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2590 struct ip_vs_proto_data *pd;
2591 #endif
2592
2593 #ifdef CONFIG_IP_VS_PROTO_TCP
2594 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2595 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2596 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2597 #endif
2598 #ifdef CONFIG_IP_VS_PROTO_UDP
2599 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2600 u->udp_timeout =
2601 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2602 #endif
2603 }
2604
2605
2606 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2607 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2608 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2609 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2610 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2611 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2612 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2613
2614 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2615 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2616 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2617 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2618 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2619 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2620 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2621 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2622 };
2623
2624 static int
2625 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2626 {
2627 unsigned char arg[128];
2628 int ret = 0;
2629 unsigned int copylen;
2630 struct net *net = sock_net(sk);
2631 struct netns_ipvs *ipvs = net_ipvs(net);
2632
2633 BUG_ON(!net);
2634 if (!capable(CAP_NET_ADMIN))
2635 return -EPERM;
2636
2637 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2638 return -EINVAL;
2639
2640 if (*len < get_arglen[GET_CMDID(cmd)]) {
2641 pr_err("get_ctl: len %u < %u\n",
2642 *len, get_arglen[GET_CMDID(cmd)]);
2643 return -EINVAL;
2644 }
2645
2646 copylen = get_arglen[GET_CMDID(cmd)];
2647 if (copylen > 128)
2648 return -EINVAL;
2649
2650 if (copy_from_user(arg, user, copylen) != 0)
2651 return -EFAULT;
2652 /*
2653 * Handle daemons first since it has its own locking
2654 */
2655 if (cmd == IP_VS_SO_GET_DAEMON) {
2656 struct ip_vs_daemon_user d[2];
2657
2658 memset(&d, 0, sizeof(d));
2659 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2660 return -ERESTARTSYS;
2661
2662 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2663 d[0].state = IP_VS_STATE_MASTER;
2664 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2665 sizeof(d[0].mcast_ifn));
2666 d[0].syncid = ipvs->master_syncid;
2667 }
2668 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2669 d[1].state = IP_VS_STATE_BACKUP;
2670 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2671 sizeof(d[1].mcast_ifn));
2672 d[1].syncid = ipvs->backup_syncid;
2673 }
2674 if (copy_to_user(user, &d, sizeof(d)) != 0)
2675 ret = -EFAULT;
2676 mutex_unlock(&ipvs->sync_mutex);
2677 return ret;
2678 }
2679
2680 if (mutex_lock_interruptible(&__ip_vs_mutex))
2681 return -ERESTARTSYS;
2682
2683 switch (cmd) {
2684 case IP_VS_SO_GET_VERSION:
2685 {
2686 char buf[64];
2687
2688 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2689 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2690 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2691 ret = -EFAULT;
2692 goto out;
2693 }
2694 *len = strlen(buf)+1;
2695 }
2696 break;
2697
2698 case IP_VS_SO_GET_INFO:
2699 {
2700 struct ip_vs_getinfo info;
2701 info.version = IP_VS_VERSION_CODE;
2702 info.size = ip_vs_conn_tab_size;
2703 info.num_services = ipvs->num_services;
2704 if (copy_to_user(user, &info, sizeof(info)) != 0)
2705 ret = -EFAULT;
2706 }
2707 break;
2708
2709 case IP_VS_SO_GET_SERVICES:
2710 {
2711 struct ip_vs_get_services *get;
2712 int size;
2713
2714 get = (struct ip_vs_get_services *)arg;
2715 size = sizeof(*get) +
2716 sizeof(struct ip_vs_service_entry) * get->num_services;
2717 if (*len != size) {
2718 pr_err("length: %u != %u\n", *len, size);
2719 ret = -EINVAL;
2720 goto out;
2721 }
2722 ret = __ip_vs_get_service_entries(net, get, user);
2723 }
2724 break;
2725
2726 case IP_VS_SO_GET_SERVICE:
2727 {
2728 struct ip_vs_service_entry *entry;
2729 struct ip_vs_service *svc;
2730 union nf_inet_addr addr;
2731
2732 entry = (struct ip_vs_service_entry *)arg;
2733 addr.ip = entry->addr;
2734 if (entry->fwmark)
2735 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2736 else
2737 svc = __ip_vs_service_find(net, AF_INET,
2738 entry->protocol, &addr,
2739 entry->port);
2740 if (svc) {
2741 ip_vs_copy_service(entry, svc);
2742 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2743 ret = -EFAULT;
2744 } else
2745 ret = -ESRCH;
2746 }
2747 break;
2748
2749 case IP_VS_SO_GET_DESTS:
2750 {
2751 struct ip_vs_get_dests *get;
2752 int size;
2753
2754 get = (struct ip_vs_get_dests *)arg;
2755 size = sizeof(*get) +
2756 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2757 if (*len != size) {
2758 pr_err("length: %u != %u\n", *len, size);
2759 ret = -EINVAL;
2760 goto out;
2761 }
2762 ret = __ip_vs_get_dest_entries(net, get, user);
2763 }
2764 break;
2765
2766 case IP_VS_SO_GET_TIMEOUT:
2767 {
2768 struct ip_vs_timeout_user t;
2769
2770 memset(&t, 0, sizeof(t));
2771 __ip_vs_get_timeouts(net, &t);
2772 if (copy_to_user(user, &t, sizeof(t)) != 0)
2773 ret = -EFAULT;
2774 }
2775 break;
2776
2777 default:
2778 ret = -EINVAL;
2779 }
2780
2781 out:
2782 mutex_unlock(&__ip_vs_mutex);
2783 return ret;
2784 }
2785
2786
2787 static struct nf_sockopt_ops ip_vs_sockopts = {
2788 .pf = PF_INET,
2789 .set_optmin = IP_VS_BASE_CTL,
2790 .set_optmax = IP_VS_SO_SET_MAX+1,
2791 .set = do_ip_vs_set_ctl,
2792 .get_optmin = IP_VS_BASE_CTL,
2793 .get_optmax = IP_VS_SO_GET_MAX+1,
2794 .get = do_ip_vs_get_ctl,
2795 .owner = THIS_MODULE,
2796 };
2797
2798 /*
2799 * Generic Netlink interface
2800 */
2801
2802 /* IPVS genetlink family */
2803 static struct genl_family ip_vs_genl_family = {
2804 .id = GENL_ID_GENERATE,
2805 .hdrsize = 0,
2806 .name = IPVS_GENL_NAME,
2807 .version = IPVS_GENL_VERSION,
2808 .maxattr = IPVS_CMD_MAX,
2809 .netnsok = true, /* Make ipvsadm to work on netns */
2810 };
2811
2812 /* Policy used for first-level command attributes */
2813 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2814 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2815 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2816 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2817 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2818 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2819 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2820 };
2821
2822 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2823 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2824 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2825 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2826 .len = IP_VS_IFNAME_MAXLEN },
2827 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2828 };
2829
2830 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2831 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2832 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2833 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2834 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2835 .len = sizeof(union nf_inet_addr) },
2836 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2837 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2838 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2839 .len = IP_VS_SCHEDNAME_MAXLEN },
2840 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2841 .len = IP_VS_PENAME_MAXLEN },
2842 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2843 .len = sizeof(struct ip_vs_flags) },
2844 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2845 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2846 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2847 };
2848
2849 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2850 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2851 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2852 .len = sizeof(union nf_inet_addr) },
2853 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2854 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2855 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2856 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2857 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2858 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2859 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2860 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2861 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2862 };
2863
2864 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2865 struct ip_vs_stats *stats)
2866 {
2867 struct ip_vs_stats_user ustats;
2868 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2869 if (!nl_stats)
2870 return -EMSGSIZE;
2871
2872 ip_vs_copy_stats(&ustats, stats);
2873
2874 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2875 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2876 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2877 nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2878 nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2879 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2880 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2881 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2882 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2883 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2884 goto nla_put_failure;
2885 nla_nest_end(skb, nl_stats);
2886
2887 return 0;
2888
2889 nla_put_failure:
2890 nla_nest_cancel(skb, nl_stats);
2891 return -EMSGSIZE;
2892 }
2893
2894 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2895 struct ip_vs_service *svc)
2896 {
2897 struct nlattr *nl_service;
2898 struct ip_vs_flags flags = { .flags = svc->flags,
2899 .mask = ~0 };
2900
2901 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2902 if (!nl_service)
2903 return -EMSGSIZE;
2904
2905 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2906 goto nla_put_failure;
2907 if (svc->fwmark) {
2908 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2909 goto nla_put_failure;
2910 } else {
2911 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2912 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2913 nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2914 goto nla_put_failure;
2915 }
2916
2917 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2918 (svc->pe &&
2919 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2920 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2921 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2922 nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2923 goto nla_put_failure;
2924 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2925 goto nla_put_failure;
2926
2927 nla_nest_end(skb, nl_service);
2928
2929 return 0;
2930
2931 nla_put_failure:
2932 nla_nest_cancel(skb, nl_service);
2933 return -EMSGSIZE;
2934 }
2935
2936 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2937 struct ip_vs_service *svc,
2938 struct netlink_callback *cb)
2939 {
2940 void *hdr;
2941
2942 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2943 &ip_vs_genl_family, NLM_F_MULTI,
2944 IPVS_CMD_NEW_SERVICE);
2945 if (!hdr)
2946 return -EMSGSIZE;
2947
2948 if (ip_vs_genl_fill_service(skb, svc) < 0)
2949 goto nla_put_failure;
2950
2951 return genlmsg_end(skb, hdr);
2952
2953 nla_put_failure:
2954 genlmsg_cancel(skb, hdr);
2955 return -EMSGSIZE;
2956 }
2957
2958 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2959 struct netlink_callback *cb)
2960 {
2961 int idx = 0, i;
2962 int start = cb->args[0];
2963 struct ip_vs_service *svc;
2964 struct net *net = skb_sknet(skb);
2965
2966 mutex_lock(&__ip_vs_mutex);
2967 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2968 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2969 if (++idx <= start || !net_eq(svc->net, net))
2970 continue;
2971 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2972 idx--;
2973 goto nla_put_failure;
2974 }
2975 }
2976 }
2977
2978 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2979 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2980 if (++idx <= start || !net_eq(svc->net, net))
2981 continue;
2982 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2983 idx--;
2984 goto nla_put_failure;
2985 }
2986 }
2987 }
2988
2989 nla_put_failure:
2990 mutex_unlock(&__ip_vs_mutex);
2991 cb->args[0] = idx;
2992
2993 return skb->len;
2994 }
2995
2996 static int ip_vs_genl_parse_service(struct net *net,
2997 struct ip_vs_service_user_kern *usvc,
2998 struct nlattr *nla, int full_entry,
2999 struct ip_vs_service **ret_svc)
3000 {
3001 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3002 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3003 struct ip_vs_service *svc;
3004
3005 /* Parse mandatory identifying service fields first */
3006 if (nla == NULL ||
3007 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3008 return -EINVAL;
3009
3010 nla_af = attrs[IPVS_SVC_ATTR_AF];
3011 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3012 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3013 nla_port = attrs[IPVS_SVC_ATTR_PORT];
3014 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3015
3016 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3017 return -EINVAL;
3018
3019 memset(usvc, 0, sizeof(*usvc));
3020
3021 usvc->af = nla_get_u16(nla_af);
3022 #ifdef CONFIG_IP_VS_IPV6
3023 if (usvc->af != AF_INET && usvc->af != AF_INET6)
3024 #else
3025 if (usvc->af != AF_INET)
3026 #endif
3027 return -EAFNOSUPPORT;
3028
3029 if (nla_fwmark) {
3030 usvc->protocol = IPPROTO_TCP;
3031 usvc->fwmark = nla_get_u32(nla_fwmark);
3032 } else {
3033 usvc->protocol = nla_get_u16(nla_protocol);
3034 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3035 usvc->port = nla_get_u16(nla_port);
3036 usvc->fwmark = 0;
3037 }
3038
3039 if (usvc->fwmark)
3040 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3041 else
3042 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3043 &usvc->addr, usvc->port);
3044 *ret_svc = svc;
3045
3046 /* If a full entry was requested, check for the additional fields */
3047 if (full_entry) {
3048 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3049 *nla_netmask;
3050 struct ip_vs_flags flags;
3051
3052 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3053 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3054 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3055 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3056 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3057
3058 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3059 return -EINVAL;
3060
3061 nla_memcpy(&flags, nla_flags, sizeof(flags));
3062
3063 /* prefill flags from service if it already exists */
3064 if (svc)
3065 usvc->flags = svc->flags;
3066
3067 /* set new flags from userland */
3068 usvc->flags = (usvc->flags & ~flags.mask) |
3069 (flags.flags & flags.mask);
3070 usvc->sched_name = nla_data(nla_sched);
3071 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3072 usvc->timeout = nla_get_u32(nla_timeout);
3073 usvc->netmask = nla_get_u32(nla_netmask);
3074 }
3075
3076 return 0;
3077 }
3078
3079 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3080 struct nlattr *nla)
3081 {
3082 struct ip_vs_service_user_kern usvc;
3083 struct ip_vs_service *svc;
3084 int ret;
3085
3086 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3087 return ret ? ERR_PTR(ret) : svc;
3088 }
3089
3090 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3091 {
3092 struct nlattr *nl_dest;
3093
3094 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3095 if (!nl_dest)
3096 return -EMSGSIZE;
3097
3098 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3099 nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3100 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3101 (atomic_read(&dest->conn_flags) &
3102 IP_VS_CONN_F_FWD_MASK)) ||
3103 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3104 atomic_read(&dest->weight)) ||
3105 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3106 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3107 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3108 atomic_read(&dest->activeconns)) ||
3109 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3110 atomic_read(&dest->inactconns)) ||
3111 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3112 atomic_read(&dest->persistconns)))
3113 goto nla_put_failure;
3114 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3115 goto nla_put_failure;
3116
3117 nla_nest_end(skb, nl_dest);
3118
3119 return 0;
3120
3121 nla_put_failure:
3122 nla_nest_cancel(skb, nl_dest);
3123 return -EMSGSIZE;
3124 }
3125
3126 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3127 struct netlink_callback *cb)
3128 {
3129 void *hdr;
3130
3131 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3132 &ip_vs_genl_family, NLM_F_MULTI,
3133 IPVS_CMD_NEW_DEST);
3134 if (!hdr)
3135 return -EMSGSIZE;
3136
3137 if (ip_vs_genl_fill_dest(skb, dest) < 0)
3138 goto nla_put_failure;
3139
3140 return genlmsg_end(skb, hdr);
3141
3142 nla_put_failure:
3143 genlmsg_cancel(skb, hdr);
3144 return -EMSGSIZE;
3145 }
3146
3147 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3148 struct netlink_callback *cb)
3149 {
3150 int idx = 0;
3151 int start = cb->args[0];
3152 struct ip_vs_service *svc;
3153 struct ip_vs_dest *dest;
3154 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3155 struct net *net = skb_sknet(skb);
3156
3157 mutex_lock(&__ip_vs_mutex);
3158
3159 /* Try to find the service for which to dump destinations */
3160 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3161 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3162 goto out_err;
3163
3164
3165 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3166 if (IS_ERR(svc) || svc == NULL)
3167 goto out_err;
3168
3169 /* Dump the destinations */
3170 list_for_each_entry(dest, &svc->destinations, n_list) {
3171 if (++idx <= start)
3172 continue;
3173 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3174 idx--;
3175 goto nla_put_failure;
3176 }
3177 }
3178
3179 nla_put_failure:
3180 cb->args[0] = idx;
3181
3182 out_err:
3183 mutex_unlock(&__ip_vs_mutex);
3184
3185 return skb->len;
3186 }
3187
3188 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3189 struct nlattr *nla, int full_entry)
3190 {
3191 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3192 struct nlattr *nla_addr, *nla_port;
3193
3194 /* Parse mandatory identifying destination fields first */
3195 if (nla == NULL ||
3196 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3197 return -EINVAL;
3198
3199 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3200 nla_port = attrs[IPVS_DEST_ATTR_PORT];
3201
3202 if (!(nla_addr && nla_port))
3203 return -EINVAL;
3204
3205 memset(udest, 0, sizeof(*udest));
3206
3207 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3208 udest->port = nla_get_u16(nla_port);
3209
3210 /* If a full entry was requested, check for the additional fields */
3211 if (full_entry) {
3212 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3213 *nla_l_thresh;
3214
3215 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3216 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3217 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3218 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3219
3220 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3221 return -EINVAL;
3222
3223 udest->conn_flags = nla_get_u32(nla_fwd)
3224 & IP_VS_CONN_F_FWD_MASK;
3225 udest->weight = nla_get_u32(nla_weight);
3226 udest->u_threshold = nla_get_u32(nla_u_thresh);
3227 udest->l_threshold = nla_get_u32(nla_l_thresh);
3228 }
3229
3230 return 0;
3231 }
3232
3233 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3234 const char *mcast_ifn, __be32 syncid)
3235 {
3236 struct nlattr *nl_daemon;
3237
3238 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3239 if (!nl_daemon)
3240 return -EMSGSIZE;
3241
3242 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3243 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3244 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3245 goto nla_put_failure;
3246 nla_nest_end(skb, nl_daemon);
3247
3248 return 0;
3249
3250 nla_put_failure:
3251 nla_nest_cancel(skb, nl_daemon);
3252 return -EMSGSIZE;
3253 }
3254
3255 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3256 const char *mcast_ifn, __be32 syncid,
3257 struct netlink_callback *cb)
3258 {
3259 void *hdr;
3260 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3261 &ip_vs_genl_family, NLM_F_MULTI,
3262 IPVS_CMD_NEW_DAEMON);
3263 if (!hdr)
3264 return -EMSGSIZE;
3265
3266 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3267 goto nla_put_failure;
3268
3269 return genlmsg_end(skb, hdr);
3270
3271 nla_put_failure:
3272 genlmsg_cancel(skb, hdr);
3273 return -EMSGSIZE;
3274 }
3275
3276 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3277 struct netlink_callback *cb)
3278 {
3279 struct net *net = skb_sknet(skb);
3280 struct netns_ipvs *ipvs = net_ipvs(net);
3281
3282 mutex_lock(&ipvs->sync_mutex);
3283 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3284 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3285 ipvs->master_mcast_ifn,
3286 ipvs->master_syncid, cb) < 0)
3287 goto nla_put_failure;
3288
3289 cb->args[0] = 1;
3290 }
3291
3292 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3293 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3294 ipvs->backup_mcast_ifn,
3295 ipvs->backup_syncid, cb) < 0)
3296 goto nla_put_failure;
3297
3298 cb->args[1] = 1;
3299 }
3300
3301 nla_put_failure:
3302 mutex_unlock(&ipvs->sync_mutex);
3303
3304 return skb->len;
3305 }
3306
3307 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3308 {
3309 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3310 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3311 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3312 return -EINVAL;
3313
3314 return start_sync_thread(net,
3315 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3316 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3317 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3318 }
3319
3320 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3321 {
3322 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3323 return -EINVAL;
3324
3325 return stop_sync_thread(net,
3326 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3327 }
3328
3329 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3330 {
3331 struct ip_vs_timeout_user t;
3332
3333 __ip_vs_get_timeouts(net, &t);
3334
3335 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3336 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3337
3338 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3339 t.tcp_fin_timeout =
3340 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3341
3342 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3343 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3344
3345 return ip_vs_set_timeout(net, &t);
3346 }
3347
3348 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3349 {
3350 int ret = 0, cmd;
3351 struct net *net;
3352 struct netns_ipvs *ipvs;
3353
3354 net = skb_sknet(skb);
3355 ipvs = net_ipvs(net);
3356 cmd = info->genlhdr->cmd;
3357
3358 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3359 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3360
3361 mutex_lock(&ipvs->sync_mutex);
3362 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3363 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3364 info->attrs[IPVS_CMD_ATTR_DAEMON],
3365 ip_vs_daemon_policy)) {
3366 ret = -EINVAL;
3367 goto out;
3368 }
3369
3370 if (cmd == IPVS_CMD_NEW_DAEMON)
3371 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3372 else
3373 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3374 out:
3375 mutex_unlock(&ipvs->sync_mutex);
3376 }
3377 return ret;
3378 }
3379
3380 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3381 {
3382 struct ip_vs_service *svc = NULL;
3383 struct ip_vs_service_user_kern usvc;
3384 struct ip_vs_dest_user_kern udest;
3385 int ret = 0, cmd;
3386 int need_full_svc = 0, need_full_dest = 0;
3387 struct net *net;
3388
3389 net = skb_sknet(skb);
3390 cmd = info->genlhdr->cmd;
3391
3392 mutex_lock(&__ip_vs_mutex);
3393
3394 if (cmd == IPVS_CMD_FLUSH) {
3395 ret = ip_vs_flush(net);
3396 goto out;
3397 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3398 ret = ip_vs_genl_set_config(net, info->attrs);
3399 goto out;
3400 } else if (cmd == IPVS_CMD_ZERO &&
3401 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3402 ret = ip_vs_zero_all(net);
3403 goto out;
3404 }
3405
3406 /* All following commands require a service argument, so check if we
3407 * received a valid one. We need a full service specification when
3408 * adding / editing a service. Only identifying members otherwise. */
3409 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3410 need_full_svc = 1;
3411
3412 ret = ip_vs_genl_parse_service(net, &usvc,
3413 info->attrs[IPVS_CMD_ATTR_SERVICE],
3414 need_full_svc, &svc);
3415 if (ret)
3416 goto out;
3417
3418 /* Unless we're adding a new service, the service must already exist */
3419 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3420 ret = -ESRCH;
3421 goto out;
3422 }
3423
3424 /* Destination commands require a valid destination argument. For
3425 * adding / editing a destination, we need a full destination
3426 * specification. */
3427 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3428 cmd == IPVS_CMD_DEL_DEST) {
3429 if (cmd != IPVS_CMD_DEL_DEST)
3430 need_full_dest = 1;
3431
3432 ret = ip_vs_genl_parse_dest(&udest,
3433 info->attrs[IPVS_CMD_ATTR_DEST],
3434 need_full_dest);
3435 if (ret)
3436 goto out;
3437 }
3438
3439 switch (cmd) {
3440 case IPVS_CMD_NEW_SERVICE:
3441 if (svc == NULL)
3442 ret = ip_vs_add_service(net, &usvc, &svc);
3443 else
3444 ret = -EEXIST;
3445 break;
3446 case IPVS_CMD_SET_SERVICE:
3447 ret = ip_vs_edit_service(svc, &usvc);
3448 break;
3449 case IPVS_CMD_DEL_SERVICE:
3450 ret = ip_vs_del_service(svc);
3451 /* do not use svc, it can be freed */
3452 break;
3453 case IPVS_CMD_NEW_DEST:
3454 ret = ip_vs_add_dest(svc, &udest);
3455 break;
3456 case IPVS_CMD_SET_DEST:
3457 ret = ip_vs_edit_dest(svc, &udest);
3458 break;
3459 case IPVS_CMD_DEL_DEST:
3460 ret = ip_vs_del_dest(svc, &udest);
3461 break;
3462 case IPVS_CMD_ZERO:
3463 ret = ip_vs_zero_service(svc);
3464 break;
3465 default:
3466 ret = -EINVAL;
3467 }
3468
3469 out:
3470 mutex_unlock(&__ip_vs_mutex);
3471
3472 return ret;
3473 }
3474
3475 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3476 {
3477 struct sk_buff *msg;
3478 void *reply;
3479 int ret, cmd, reply_cmd;
3480 struct net *net;
3481
3482 net = skb_sknet(skb);
3483 cmd = info->genlhdr->cmd;
3484
3485 if (cmd == IPVS_CMD_GET_SERVICE)
3486 reply_cmd = IPVS_CMD_NEW_SERVICE;
3487 else if (cmd == IPVS_CMD_GET_INFO)
3488 reply_cmd = IPVS_CMD_SET_INFO;
3489 else if (cmd == IPVS_CMD_GET_CONFIG)
3490 reply_cmd = IPVS_CMD_SET_CONFIG;
3491 else {
3492 pr_err("unknown Generic Netlink command\n");
3493 return -EINVAL;
3494 }
3495
3496 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3497 if (!msg)
3498 return -ENOMEM;
3499
3500 mutex_lock(&__ip_vs_mutex);
3501
3502 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3503 if (reply == NULL)
3504 goto nla_put_failure;
3505
3506 switch (cmd) {
3507 case IPVS_CMD_GET_SERVICE:
3508 {
3509 struct ip_vs_service *svc;
3510
3511 svc = ip_vs_genl_find_service(net,
3512 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3513 if (IS_ERR(svc)) {
3514 ret = PTR_ERR(svc);
3515 goto out_err;
3516 } else if (svc) {
3517 ret = ip_vs_genl_fill_service(msg, svc);
3518 if (ret)
3519 goto nla_put_failure;
3520 } else {
3521 ret = -ESRCH;
3522 goto out_err;
3523 }
3524
3525 break;
3526 }
3527
3528 case IPVS_CMD_GET_CONFIG:
3529 {
3530 struct ip_vs_timeout_user t;
3531
3532 __ip_vs_get_timeouts(net, &t);
3533 #ifdef CONFIG_IP_VS_PROTO_TCP
3534 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3535 t.tcp_timeout) ||
3536 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3537 t.tcp_fin_timeout))
3538 goto nla_put_failure;
3539 #endif
3540 #ifdef CONFIG_IP_VS_PROTO_UDP
3541 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3542 goto nla_put_failure;
3543 #endif
3544
3545 break;
3546 }
3547
3548 case IPVS_CMD_GET_INFO:
3549 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3550 IP_VS_VERSION_CODE) ||
3551 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3552 ip_vs_conn_tab_size))
3553 goto nla_put_failure;
3554 break;
3555 }
3556
3557 genlmsg_end(msg, reply);
3558 ret = genlmsg_reply(msg, info);
3559 goto out;
3560
3561 nla_put_failure:
3562 pr_err("not enough space in Netlink message\n");
3563 ret = -EMSGSIZE;
3564
3565 out_err:
3566 nlmsg_free(msg);
3567 out:
3568 mutex_unlock(&__ip_vs_mutex);
3569
3570 return ret;
3571 }
3572
3573
3574 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3575 {
3576 .cmd = IPVS_CMD_NEW_SERVICE,
3577 .flags = GENL_ADMIN_PERM,
3578 .policy = ip_vs_cmd_policy,
3579 .doit = ip_vs_genl_set_cmd,
3580 },
3581 {
3582 .cmd = IPVS_CMD_SET_SERVICE,
3583 .flags = GENL_ADMIN_PERM,
3584 .policy = ip_vs_cmd_policy,
3585 .doit = ip_vs_genl_set_cmd,
3586 },
3587 {
3588 .cmd = IPVS_CMD_DEL_SERVICE,
3589 .flags = GENL_ADMIN_PERM,
3590 .policy = ip_vs_cmd_policy,
3591 .doit = ip_vs_genl_set_cmd,
3592 },
3593 {
3594 .cmd = IPVS_CMD_GET_SERVICE,
3595 .flags = GENL_ADMIN_PERM,
3596 .doit = ip_vs_genl_get_cmd,
3597 .dumpit = ip_vs_genl_dump_services,
3598 .policy = ip_vs_cmd_policy,
3599 },
3600 {
3601 .cmd = IPVS_CMD_NEW_DEST,
3602 .flags = GENL_ADMIN_PERM,
3603 .policy = ip_vs_cmd_policy,
3604 .doit = ip_vs_genl_set_cmd,
3605 },
3606 {
3607 .cmd = IPVS_CMD_SET_DEST,
3608 .flags = GENL_ADMIN_PERM,
3609 .policy = ip_vs_cmd_policy,
3610 .doit = ip_vs_genl_set_cmd,
3611 },
3612 {
3613 .cmd = IPVS_CMD_DEL_DEST,
3614 .flags = GENL_ADMIN_PERM,
3615 .policy = ip_vs_cmd_policy,
3616 .doit = ip_vs_genl_set_cmd,
3617 },
3618 {
3619 .cmd = IPVS_CMD_GET_DEST,
3620 .flags = GENL_ADMIN_PERM,
3621 .policy = ip_vs_cmd_policy,
3622 .dumpit = ip_vs_genl_dump_dests,
3623 },
3624 {
3625 .cmd = IPVS_CMD_NEW_DAEMON,
3626 .flags = GENL_ADMIN_PERM,
3627 .policy = ip_vs_cmd_policy,
3628 .doit = ip_vs_genl_set_daemon,
3629 },
3630 {
3631 .cmd = IPVS_CMD_DEL_DAEMON,
3632 .flags = GENL_ADMIN_PERM,
3633 .policy = ip_vs_cmd_policy,
3634 .doit = ip_vs_genl_set_daemon,
3635 },
3636 {
3637 .cmd = IPVS_CMD_GET_DAEMON,
3638 .flags = GENL_ADMIN_PERM,
3639 .dumpit = ip_vs_genl_dump_daemons,
3640 },
3641 {
3642 .cmd = IPVS_CMD_SET_CONFIG,
3643 .flags = GENL_ADMIN_PERM,
3644 .policy = ip_vs_cmd_policy,
3645 .doit = ip_vs_genl_set_cmd,
3646 },
3647 {
3648 .cmd = IPVS_CMD_GET_CONFIG,
3649 .flags = GENL_ADMIN_PERM,
3650 .doit = ip_vs_genl_get_cmd,
3651 },
3652 {
3653 .cmd = IPVS_CMD_GET_INFO,
3654 .flags = GENL_ADMIN_PERM,
3655 .doit = ip_vs_genl_get_cmd,
3656 },
3657 {
3658 .cmd = IPVS_CMD_ZERO,
3659 .flags = GENL_ADMIN_PERM,
3660 .policy = ip_vs_cmd_policy,
3661 .doit = ip_vs_genl_set_cmd,
3662 },
3663 {
3664 .cmd = IPVS_CMD_FLUSH,
3665 .flags = GENL_ADMIN_PERM,
3666 .doit = ip_vs_genl_set_cmd,
3667 },
3668 };
3669
3670 static int __init ip_vs_genl_register(void)
3671 {
3672 return genl_register_family_with_ops(&ip_vs_genl_family,
3673 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3674 }
3675
3676 static void ip_vs_genl_unregister(void)
3677 {
3678 genl_unregister_family(&ip_vs_genl_family);
3679 }
3680
3681 /* End of Generic Netlink interface definitions */
3682
3683 /*
3684 * per netns intit/exit func.
3685 */
3686 #ifdef CONFIG_SYSCTL
3687 static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3688 {
3689 int idx;
3690 struct netns_ipvs *ipvs = net_ipvs(net);
3691 struct ctl_table *tbl;
3692
3693 atomic_set(&ipvs->dropentry, 0);
3694 spin_lock_init(&ipvs->dropentry_lock);
3695 spin_lock_init(&ipvs->droppacket_lock);
3696 spin_lock_init(&ipvs->securetcp_lock);
3697
3698 if (!net_eq(net, &init_net)) {
3699 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3700 if (tbl == NULL)
3701 return -ENOMEM;
3702 } else
3703 tbl = vs_vars;
3704 /* Initialize sysctl defaults */
3705 idx = 0;
3706 ipvs->sysctl_amemthresh = 1024;
3707 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3708 ipvs->sysctl_am_droprate = 10;
3709 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3710 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3711 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3712 #ifdef CONFIG_IP_VS_NFCT
3713 tbl[idx++].data = &ipvs->sysctl_conntrack;
3714 #endif
3715 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3716 ipvs->sysctl_snat_reroute = 1;
3717 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3718 ipvs->sysctl_sync_ver = 1;
3719 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3720 ipvs->sysctl_sync_ports = 1;
3721 tbl[idx++].data = &ipvs->sysctl_sync_ports;
3722 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3723 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3724 ipvs->sysctl_sync_sock_size = 0;
3725 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3726 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3727 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3728 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3729 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3730 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3731 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3732 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3733 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3734 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3735 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3736 tbl[idx++].data = &ipvs->sysctl_sync_retries;
3737 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3738 ipvs->sysctl_pmtu_disc = 1;
3739 tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3740
3741
3742 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3743 if (ipvs->sysctl_hdr == NULL) {
3744 if (!net_eq(net, &init_net))
3745 kfree(tbl);
3746 return -ENOMEM;
3747 }
3748 ip_vs_start_estimator(net, &ipvs->tot_stats);
3749 ipvs->sysctl_tbl = tbl;
3750 /* Schedule defense work */
3751 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3752 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3753
3754 return 0;
3755 }
3756
3757 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3758 {
3759 struct netns_ipvs *ipvs = net_ipvs(net);
3760
3761 cancel_delayed_work_sync(&ipvs->defense_work);
3762 cancel_work_sync(&ipvs->defense_work.work);
3763 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3764 }
3765
3766 #else
3767
3768 static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3769 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3770
3771 #endif
3772
3773 static struct notifier_block ip_vs_dst_notifier = {
3774 .notifier_call = ip_vs_dst_event,
3775 };
3776
3777 int __net_init ip_vs_control_net_init(struct net *net)
3778 {
3779 int idx;
3780 struct netns_ipvs *ipvs = net_ipvs(net);
3781
3782 rwlock_init(&ipvs->rs_lock);
3783
3784 /* Initialize rs_table */
3785 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3786 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3787
3788 INIT_LIST_HEAD(&ipvs->dest_trash);
3789 atomic_set(&ipvs->ftpsvc_counter, 0);
3790 atomic_set(&ipvs->nullsvc_counter, 0);
3791
3792 /* procfs stats */
3793 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3794 if (!ipvs->tot_stats.cpustats)
3795 return -ENOMEM;
3796
3797 spin_lock_init(&ipvs->tot_stats.lock);
3798
3799 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3800 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3801 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3802 &ip_vs_stats_percpu_fops);
3803
3804 if (ip_vs_control_net_init_sysctl(net))
3805 goto err;
3806
3807 return 0;
3808
3809 err:
3810 free_percpu(ipvs->tot_stats.cpustats);
3811 return -ENOMEM;
3812 }
3813
3814 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3815 {
3816 struct netns_ipvs *ipvs = net_ipvs(net);
3817
3818 ip_vs_trash_cleanup(net);
3819 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3820 ip_vs_control_net_cleanup_sysctl(net);
3821 proc_net_remove(net, "ip_vs_stats_percpu");
3822 proc_net_remove(net, "ip_vs_stats");
3823 proc_net_remove(net, "ip_vs");
3824 free_percpu(ipvs->tot_stats.cpustats);
3825 }
3826
3827 int __init ip_vs_register_nl_ioctl(void)
3828 {
3829 int ret;
3830
3831 ret = nf_register_sockopt(&ip_vs_sockopts);
3832 if (ret) {
3833 pr_err("cannot register sockopt.\n");
3834 goto err_sock;
3835 }
3836
3837 ret = ip_vs_genl_register();
3838 if (ret) {
3839 pr_err("cannot register Generic Netlink interface.\n");
3840 goto err_genl;
3841 }
3842 return 0;
3843
3844 err_genl:
3845 nf_unregister_sockopt(&ip_vs_sockopts);
3846 err_sock:
3847 return ret;
3848 }
3849
3850 void ip_vs_unregister_nl_ioctl(void)
3851 {
3852 ip_vs_genl_unregister();
3853 nf_unregister_sockopt(&ip_vs_sockopts);
3854 }
3855
3856 int __init ip_vs_control_init(void)
3857 {
3858 int idx;
3859 int ret;
3860
3861 EnterFunction(2);
3862
3863 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3864 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3865 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3866 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3867 }
3868
3869 smp_wmb(); /* Do we really need it now ? */
3870
3871 ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3872 if (ret < 0)
3873 return ret;
3874
3875 LeaveFunction(2);
3876 return 0;
3877 }
3878
3879
3880 void ip_vs_control_cleanup(void)
3881 {
3882 EnterFunction(2);
3883 unregister_netdevice_notifier(&ip_vs_dst_notifier);
3884 LeaveFunction(2);
3885 }