2 * NETLINK Kernel-user communication protocol.
4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13 * added netlink_proto_exit
14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15 * use nlk_sk, as sk->protinfo is on a diet 8)
16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
17 * - inc module use count of module that owns
18 * the kernel socket in case userspace opens
19 * socket of same protocol
20 * - remove all module support, since netlink is
21 * mandatory if CONFIG_NET=y these days
24 #include <linux/module.h>
26 #include <linux/capability.h>
27 #include <linux/kernel.h>
28 #include <linux/init.h>
29 #include <linux/signal.h>
30 #include <linux/sched.h>
31 #include <linux/errno.h>
32 #include <linux/string.h>
33 #include <linux/stat.h>
34 #include <linux/socket.h>
36 #include <linux/fcntl.h>
37 #include <linux/termios.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
41 #include <linux/slab.h>
42 #include <asm/uaccess.h>
43 #include <linux/skbuff.h>
44 #include <linux/netdevice.h>
45 #include <linux/rtnetlink.h>
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #include <linux/notifier.h>
49 #include <linux/security.h>
50 #include <linux/jhash.h>
51 #include <linux/jiffies.h>
52 #include <linux/random.h>
53 #include <linux/bitops.h>
55 #include <linux/types.h>
56 #include <linux/audit.h>
57 #include <linux/mutex.h>
58 #include <linux/vmalloc.h>
59 #include <asm/cacheflush.h>
61 #include <net/net_namespace.h>
64 #include <net/netlink.h>
66 #include "af_netlink.h"
70 unsigned long masks
[0];
74 #define NETLINK_CONGESTED 0x0
77 #define NETLINK_KERNEL_SOCKET 0x1
78 #define NETLINK_RECV_PKTINFO 0x2
79 #define NETLINK_BROADCAST_SEND_ERROR 0x4
80 #define NETLINK_RECV_NO_ENOBUFS 0x8
82 static inline int netlink_is_kernel(struct sock
*sk
)
84 return nlk_sk(sk
)->flags
& NETLINK_KERNEL_SOCKET
;
87 struct netlink_table
*nl_table
;
88 EXPORT_SYMBOL_GPL(nl_table
);
90 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait
);
92 static int netlink_dump(struct sock
*sk
);
93 static void netlink_skb_destructor(struct sk_buff
*skb
);
95 DEFINE_RWLOCK(nl_table_lock
);
96 EXPORT_SYMBOL_GPL(nl_table_lock
);
97 static atomic_t nl_table_users
= ATOMIC_INIT(0);
99 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
101 static ATOMIC_NOTIFIER_HEAD(netlink_chain
);
103 static inline u32
netlink_group_mask(u32 group
)
105 return group
? 1 << (group
- 1) : 0;
108 static inline struct hlist_head
*nl_portid_hashfn(struct nl_portid_hash
*hash
, u32 portid
)
110 return &hash
->table
[jhash_1word(portid
, hash
->rnd
) & hash
->mask
];
113 #ifdef CONFIG_NETLINK_MMAP
114 static bool netlink_skb_is_mmaped(const struct sk_buff
*skb
)
116 return NETLINK_CB(skb
).flags
& NETLINK_SKB_MMAPED
;
119 static __pure
struct page
*pgvec_to_page(const void *addr
)
121 if (is_vmalloc_addr(addr
))
122 return vmalloc_to_page(addr
);
124 return virt_to_page(addr
);
127 static void free_pg_vec(void **pg_vec
, unsigned int order
, unsigned int len
)
131 for (i
= 0; i
< len
; i
++) {
132 if (pg_vec
[i
] != NULL
) {
133 if (is_vmalloc_addr(pg_vec
[i
]))
136 free_pages((unsigned long)pg_vec
[i
], order
);
142 static void *alloc_one_pg_vec_page(unsigned long order
)
145 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_COMP
| __GFP_ZERO
|
146 __GFP_NOWARN
| __GFP_NORETRY
;
148 buffer
= (void *)__get_free_pages(gfp_flags
, order
);
152 buffer
= vzalloc((1 << order
) * PAGE_SIZE
);
156 gfp_flags
&= ~__GFP_NORETRY
;
157 return (void *)__get_free_pages(gfp_flags
, order
);
160 static void **alloc_pg_vec(struct netlink_sock
*nlk
,
161 struct nl_mmap_req
*req
, unsigned int order
)
163 unsigned int block_nr
= req
->nm_block_nr
;
167 pg_vec
= kcalloc(block_nr
, sizeof(void *), GFP_KERNEL
);
171 for (i
= 0; i
< block_nr
; i
++) {
172 pg_vec
[i
] = ptr
= alloc_one_pg_vec_page(order
);
173 if (pg_vec
[i
] == NULL
)
179 free_pg_vec(pg_vec
, order
, block_nr
);
183 static int netlink_set_ring(struct sock
*sk
, struct nl_mmap_req
*req
,
184 bool closing
, bool tx_ring
)
186 struct netlink_sock
*nlk
= nlk_sk(sk
);
187 struct netlink_ring
*ring
;
188 struct sk_buff_head
*queue
;
189 void **pg_vec
= NULL
;
190 unsigned int order
= 0;
193 ring
= tx_ring
? &nlk
->tx_ring
: &nlk
->rx_ring
;
194 queue
= tx_ring
? &sk
->sk_write_queue
: &sk
->sk_receive_queue
;
197 if (atomic_read(&nlk
->mapped
))
199 if (atomic_read(&ring
->pending
))
203 if (req
->nm_block_nr
) {
204 if (ring
->pg_vec
!= NULL
)
207 if ((int)req
->nm_block_size
<= 0)
209 if (!IS_ALIGNED(req
->nm_block_size
, PAGE_SIZE
))
211 if (req
->nm_frame_size
< NL_MMAP_HDRLEN
)
213 if (!IS_ALIGNED(req
->nm_frame_size
, NL_MMAP_MSG_ALIGNMENT
))
216 ring
->frames_per_block
= req
->nm_block_size
/
218 if (ring
->frames_per_block
== 0)
220 if (ring
->frames_per_block
* req
->nm_block_nr
!=
224 order
= get_order(req
->nm_block_size
);
225 pg_vec
= alloc_pg_vec(nlk
, req
, order
);
229 if (req
->nm_frame_nr
)
234 mutex_lock(&nlk
->pg_vec_lock
);
235 if (closing
|| atomic_read(&nlk
->mapped
) == 0) {
237 spin_lock_bh(&queue
->lock
);
239 ring
->frame_max
= req
->nm_frame_nr
- 1;
241 ring
->frame_size
= req
->nm_frame_size
;
242 ring
->pg_vec_pages
= req
->nm_block_size
/ PAGE_SIZE
;
244 swap(ring
->pg_vec_len
, req
->nm_block_nr
);
245 swap(ring
->pg_vec_order
, order
);
246 swap(ring
->pg_vec
, pg_vec
);
248 __skb_queue_purge(queue
);
249 spin_unlock_bh(&queue
->lock
);
251 WARN_ON(atomic_read(&nlk
->mapped
));
253 mutex_unlock(&nlk
->pg_vec_lock
);
256 free_pg_vec(pg_vec
, order
, req
->nm_block_nr
);
260 static void netlink_mm_open(struct vm_area_struct
*vma
)
262 struct file
*file
= vma
->vm_file
;
263 struct socket
*sock
= file
->private_data
;
264 struct sock
*sk
= sock
->sk
;
267 atomic_inc(&nlk_sk(sk
)->mapped
);
270 static void netlink_mm_close(struct vm_area_struct
*vma
)
272 struct file
*file
= vma
->vm_file
;
273 struct socket
*sock
= file
->private_data
;
274 struct sock
*sk
= sock
->sk
;
277 atomic_dec(&nlk_sk(sk
)->mapped
);
280 static const struct vm_operations_struct netlink_mmap_ops
= {
281 .open
= netlink_mm_open
,
282 .close
= netlink_mm_close
,
285 static int netlink_mmap(struct file
*file
, struct socket
*sock
,
286 struct vm_area_struct
*vma
)
288 struct sock
*sk
= sock
->sk
;
289 struct netlink_sock
*nlk
= nlk_sk(sk
);
290 struct netlink_ring
*ring
;
291 unsigned long start
, size
, expected
;
298 mutex_lock(&nlk
->pg_vec_lock
);
301 for (ring
= &nlk
->rx_ring
; ring
<= &nlk
->tx_ring
; ring
++) {
302 if (ring
->pg_vec
== NULL
)
304 expected
+= ring
->pg_vec_len
* ring
->pg_vec_pages
* PAGE_SIZE
;
310 size
= vma
->vm_end
- vma
->vm_start
;
311 if (size
!= expected
)
314 start
= vma
->vm_start
;
315 for (ring
= &nlk
->rx_ring
; ring
<= &nlk
->tx_ring
; ring
++) {
316 if (ring
->pg_vec
== NULL
)
319 for (i
= 0; i
< ring
->pg_vec_len
; i
++) {
321 void *kaddr
= ring
->pg_vec
[i
];
324 for (pg_num
= 0; pg_num
< ring
->pg_vec_pages
; pg_num
++) {
325 page
= pgvec_to_page(kaddr
);
326 err
= vm_insert_page(vma
, start
, page
);
335 atomic_inc(&nlk
->mapped
);
336 vma
->vm_ops
= &netlink_mmap_ops
;
339 mutex_unlock(&nlk
->pg_vec_lock
);
343 static void netlink_frame_flush_dcache(const struct nl_mmap_hdr
*hdr
)
345 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
346 struct page
*p_start
, *p_end
;
348 /* First page is flushed through netlink_{get,set}_status */
349 p_start
= pgvec_to_page(hdr
+ PAGE_SIZE
);
350 p_end
= pgvec_to_page((void *)hdr
+ NL_MMAP_MSG_HDRLEN
+ hdr
->nm_len
- 1);
351 while (p_start
<= p_end
) {
352 flush_dcache_page(p_start
);
358 static enum nl_mmap_status
netlink_get_status(const struct nl_mmap_hdr
*hdr
)
361 flush_dcache_page(pgvec_to_page(hdr
));
362 return hdr
->nm_status
;
365 static void netlink_set_status(struct nl_mmap_hdr
*hdr
,
366 enum nl_mmap_status status
)
368 hdr
->nm_status
= status
;
369 flush_dcache_page(pgvec_to_page(hdr
));
373 static struct nl_mmap_hdr
*
374 __netlink_lookup_frame(const struct netlink_ring
*ring
, unsigned int pos
)
376 unsigned int pg_vec_pos
, frame_off
;
378 pg_vec_pos
= pos
/ ring
->frames_per_block
;
379 frame_off
= pos
% ring
->frames_per_block
;
381 return ring
->pg_vec
[pg_vec_pos
] + (frame_off
* ring
->frame_size
);
384 static struct nl_mmap_hdr
*
385 netlink_lookup_frame(const struct netlink_ring
*ring
, unsigned int pos
,
386 enum nl_mmap_status status
)
388 struct nl_mmap_hdr
*hdr
;
390 hdr
= __netlink_lookup_frame(ring
, pos
);
391 if (netlink_get_status(hdr
) != status
)
397 static struct nl_mmap_hdr
*
398 netlink_current_frame(const struct netlink_ring
*ring
,
399 enum nl_mmap_status status
)
401 return netlink_lookup_frame(ring
, ring
->head
, status
);
404 static struct nl_mmap_hdr
*
405 netlink_previous_frame(const struct netlink_ring
*ring
,
406 enum nl_mmap_status status
)
410 prev
= ring
->head
? ring
->head
- 1 : ring
->frame_max
;
411 return netlink_lookup_frame(ring
, prev
, status
);
414 static void netlink_increment_head(struct netlink_ring
*ring
)
416 ring
->head
= ring
->head
!= ring
->frame_max
? ring
->head
+ 1 : 0;
419 static void netlink_forward_ring(struct netlink_ring
*ring
)
421 unsigned int head
= ring
->head
, pos
= head
;
422 const struct nl_mmap_hdr
*hdr
;
425 hdr
= __netlink_lookup_frame(ring
, pos
);
426 if (hdr
->nm_status
== NL_MMAP_STATUS_UNUSED
)
428 if (hdr
->nm_status
!= NL_MMAP_STATUS_SKIP
)
430 netlink_increment_head(ring
);
431 } while (ring
->head
!= head
);
434 static unsigned int netlink_poll(struct file
*file
, struct socket
*sock
,
437 struct sock
*sk
= sock
->sk
;
438 struct netlink_sock
*nlk
= nlk_sk(sk
);
441 mask
= datagram_poll(file
, sock
, wait
);
443 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
444 if (nlk
->rx_ring
.pg_vec
) {
445 netlink_forward_ring(&nlk
->rx_ring
);
446 if (!netlink_previous_frame(&nlk
->rx_ring
, NL_MMAP_STATUS_UNUSED
))
447 mask
|= POLLIN
| POLLRDNORM
;
449 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
451 spin_lock_bh(&sk
->sk_write_queue
.lock
);
452 if (nlk
->tx_ring
.pg_vec
) {
453 if (netlink_current_frame(&nlk
->tx_ring
, NL_MMAP_STATUS_UNUSED
))
454 mask
|= POLLOUT
| POLLWRNORM
;
456 spin_unlock_bh(&sk
->sk_write_queue
.lock
);
461 static struct nl_mmap_hdr
*netlink_mmap_hdr(struct sk_buff
*skb
)
463 return (struct nl_mmap_hdr
*)(skb
->head
- NL_MMAP_HDRLEN
);
466 static void netlink_ring_setup_skb(struct sk_buff
*skb
, struct sock
*sk
,
467 struct netlink_ring
*ring
,
468 struct nl_mmap_hdr
*hdr
)
473 size
= ring
->frame_size
- NL_MMAP_HDRLEN
;
474 data
= (void *)hdr
+ NL_MMAP_HDRLEN
;
478 skb_reset_tail_pointer(skb
);
479 skb
->end
= skb
->tail
+ size
;
482 skb
->destructor
= netlink_skb_destructor
;
483 NETLINK_CB(skb
).flags
|= NETLINK_SKB_MMAPED
;
484 NETLINK_CB(skb
).sk
= sk
;
486 #else /* CONFIG_NETLINK_MMAP */
487 #define netlink_skb_is_mmaped(skb) false
488 #define netlink_mmap sock_no_mmap
489 #define netlink_poll datagram_poll
490 #endif /* CONFIG_NETLINK_MMAP */
492 static void netlink_destroy_callback(struct netlink_callback
*cb
)
498 static void netlink_consume_callback(struct netlink_callback
*cb
)
500 consume_skb(cb
->skb
);
504 static void netlink_skb_destructor(struct sk_buff
*skb
)
506 #ifdef CONFIG_NETLINK_MMAP
507 struct nl_mmap_hdr
*hdr
;
508 struct netlink_ring
*ring
;
511 /* If a packet from the kernel to userspace was freed because of an
512 * error without being delivered to userspace, the kernel must reset
513 * the status. In the direction userspace to kernel, the status is
514 * always reset here after the packet was processed and freed.
516 if (netlink_skb_is_mmaped(skb
)) {
517 hdr
= netlink_mmap_hdr(skb
);
518 sk
= NETLINK_CB(skb
).sk
;
520 if (!(NETLINK_CB(skb
).flags
& NETLINK_SKB_DELIVERED
)) {
522 netlink_set_status(hdr
, NL_MMAP_STATUS_VALID
);
524 ring
= &nlk_sk(sk
)->rx_ring
;
526 WARN_ON(atomic_read(&ring
->pending
) == 0);
527 atomic_dec(&ring
->pending
);
537 static void netlink_skb_set_owner_r(struct sk_buff
*skb
, struct sock
*sk
)
539 WARN_ON(skb
->sk
!= NULL
);
541 skb
->destructor
= netlink_skb_destructor
;
542 atomic_add(skb
->truesize
, &sk
->sk_rmem_alloc
);
543 sk_mem_charge(sk
, skb
->truesize
);
546 static void netlink_sock_destruct(struct sock
*sk
)
548 struct netlink_sock
*nlk
= nlk_sk(sk
);
552 nlk
->cb
->done(nlk
->cb
);
554 module_put(nlk
->cb
->module
);
555 netlink_destroy_callback(nlk
->cb
);
558 skb_queue_purge(&sk
->sk_receive_queue
);
559 #ifdef CONFIG_NETLINK_MMAP
561 struct nl_mmap_req req
;
563 memset(&req
, 0, sizeof(req
));
564 if (nlk
->rx_ring
.pg_vec
)
565 netlink_set_ring(sk
, &req
, true, false);
566 memset(&req
, 0, sizeof(req
));
567 if (nlk
->tx_ring
.pg_vec
)
568 netlink_set_ring(sk
, &req
, true, true);
570 #endif /* CONFIG_NETLINK_MMAP */
572 if (!sock_flag(sk
, SOCK_DEAD
)) {
573 printk(KERN_ERR
"Freeing alive netlink socket %p\n", sk
);
577 WARN_ON(atomic_read(&sk
->sk_rmem_alloc
));
578 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
579 WARN_ON(nlk_sk(sk
)->groups
);
582 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
583 * SMP. Look, when several writers sleep and reader wakes them up, all but one
584 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
585 * this, _but_ remember, it adds useless work on UP machines.
588 void netlink_table_grab(void)
589 __acquires(nl_table_lock
)
593 write_lock_irq(&nl_table_lock
);
595 if (atomic_read(&nl_table_users
)) {
596 DECLARE_WAITQUEUE(wait
, current
);
598 add_wait_queue_exclusive(&nl_table_wait
, &wait
);
600 set_current_state(TASK_UNINTERRUPTIBLE
);
601 if (atomic_read(&nl_table_users
) == 0)
603 write_unlock_irq(&nl_table_lock
);
605 write_lock_irq(&nl_table_lock
);
608 __set_current_state(TASK_RUNNING
);
609 remove_wait_queue(&nl_table_wait
, &wait
);
613 void netlink_table_ungrab(void)
614 __releases(nl_table_lock
)
616 write_unlock_irq(&nl_table_lock
);
617 wake_up(&nl_table_wait
);
621 netlink_lock_table(void)
623 /* read_lock() synchronizes us to netlink_table_grab */
625 read_lock(&nl_table_lock
);
626 atomic_inc(&nl_table_users
);
627 read_unlock(&nl_table_lock
);
631 netlink_unlock_table(void)
633 if (atomic_dec_and_test(&nl_table_users
))
634 wake_up(&nl_table_wait
);
637 static struct sock
*netlink_lookup(struct net
*net
, int protocol
, u32 portid
)
639 struct nl_portid_hash
*hash
= &nl_table
[protocol
].hash
;
640 struct hlist_head
*head
;
643 read_lock(&nl_table_lock
);
644 head
= nl_portid_hashfn(hash
, portid
);
645 sk_for_each(sk
, head
) {
646 if (net_eq(sock_net(sk
), net
) && (nlk_sk(sk
)->portid
== portid
)) {
653 read_unlock(&nl_table_lock
);
657 static struct hlist_head
*nl_portid_hash_zalloc(size_t size
)
659 if (size
<= PAGE_SIZE
)
660 return kzalloc(size
, GFP_ATOMIC
);
662 return (struct hlist_head
*)
663 __get_free_pages(GFP_ATOMIC
| __GFP_ZERO
,
667 static void nl_portid_hash_free(struct hlist_head
*table
, size_t size
)
669 if (size
<= PAGE_SIZE
)
672 free_pages((unsigned long)table
, get_order(size
));
675 static int nl_portid_hash_rehash(struct nl_portid_hash
*hash
, int grow
)
677 unsigned int omask
, mask
, shift
;
679 struct hlist_head
*otable
, *table
;
682 omask
= mask
= hash
->mask
;
683 osize
= size
= (mask
+ 1) * sizeof(*table
);
687 if (++shift
> hash
->max_shift
)
693 table
= nl_portid_hash_zalloc(size
);
697 otable
= hash
->table
;
701 get_random_bytes(&hash
->rnd
, sizeof(hash
->rnd
));
703 for (i
= 0; i
<= omask
; i
++) {
705 struct hlist_node
*tmp
;
707 sk_for_each_safe(sk
, tmp
, &otable
[i
])
708 __sk_add_node(sk
, nl_portid_hashfn(hash
, nlk_sk(sk
)->portid
));
711 nl_portid_hash_free(otable
, osize
);
712 hash
->rehash_time
= jiffies
+ 10 * 60 * HZ
;
716 static inline int nl_portid_hash_dilute(struct nl_portid_hash
*hash
, int len
)
718 int avg
= hash
->entries
>> hash
->shift
;
720 if (unlikely(avg
> 1) && nl_portid_hash_rehash(hash
, 1))
723 if (unlikely(len
> avg
) && time_after(jiffies
, hash
->rehash_time
)) {
724 nl_portid_hash_rehash(hash
, 0);
731 static const struct proto_ops netlink_ops
;
734 netlink_update_listeners(struct sock
*sk
)
736 struct netlink_table
*tbl
= &nl_table
[sk
->sk_protocol
];
739 struct listeners
*listeners
;
741 listeners
= nl_deref_protected(tbl
->listeners
);
745 for (i
= 0; i
< NLGRPLONGS(tbl
->groups
); i
++) {
747 sk_for_each_bound(sk
, &tbl
->mc_list
) {
748 if (i
< NLGRPLONGS(nlk_sk(sk
)->ngroups
))
749 mask
|= nlk_sk(sk
)->groups
[i
];
751 listeners
->masks
[i
] = mask
;
753 /* this function is only called with the netlink table "grabbed", which
754 * makes sure updates are visible before bind or setsockopt return. */
757 static int netlink_insert(struct sock
*sk
, struct net
*net
, u32 portid
)
759 struct nl_portid_hash
*hash
= &nl_table
[sk
->sk_protocol
].hash
;
760 struct hlist_head
*head
;
761 int err
= -EADDRINUSE
;
765 netlink_table_grab();
766 head
= nl_portid_hashfn(hash
, portid
);
768 sk_for_each(osk
, head
) {
769 if (net_eq(sock_net(osk
), net
) && (nlk_sk(osk
)->portid
== portid
))
777 if (nlk_sk(sk
)->portid
)
781 if (BITS_PER_LONG
> 32 && unlikely(hash
->entries
>= UINT_MAX
))
784 if (len
&& nl_portid_hash_dilute(hash
, len
))
785 head
= nl_portid_hashfn(hash
, portid
);
787 nlk_sk(sk
)->portid
= portid
;
788 sk_add_node(sk
, head
);
792 netlink_table_ungrab();
796 static void netlink_remove(struct sock
*sk
)
798 netlink_table_grab();
799 if (sk_del_node_init(sk
))
800 nl_table
[sk
->sk_protocol
].hash
.entries
--;
801 if (nlk_sk(sk
)->subscriptions
)
802 __sk_del_bind_node(sk
);
803 netlink_table_ungrab();
806 static struct proto netlink_proto
= {
808 .owner
= THIS_MODULE
,
809 .obj_size
= sizeof(struct netlink_sock
),
812 static int __netlink_create(struct net
*net
, struct socket
*sock
,
813 struct mutex
*cb_mutex
, int protocol
)
816 struct netlink_sock
*nlk
;
818 sock
->ops
= &netlink_ops
;
820 sk
= sk_alloc(net
, PF_NETLINK
, GFP_KERNEL
, &netlink_proto
);
824 sock_init_data(sock
, sk
);
828 nlk
->cb_mutex
= cb_mutex
;
830 nlk
->cb_mutex
= &nlk
->cb_def_mutex
;
831 mutex_init(nlk
->cb_mutex
);
833 init_waitqueue_head(&nlk
->wait
);
834 #ifdef CONFIG_NETLINK_MMAP
835 mutex_init(&nlk
->pg_vec_lock
);
838 sk
->sk_destruct
= netlink_sock_destruct
;
839 sk
->sk_protocol
= protocol
;
843 static int netlink_create(struct net
*net
, struct socket
*sock
, int protocol
,
846 struct module
*module
= NULL
;
847 struct mutex
*cb_mutex
;
848 struct netlink_sock
*nlk
;
849 void (*bind
)(int group
);
852 sock
->state
= SS_UNCONNECTED
;
854 if (sock
->type
!= SOCK_RAW
&& sock
->type
!= SOCK_DGRAM
)
855 return -ESOCKTNOSUPPORT
;
857 if (protocol
< 0 || protocol
>= MAX_LINKS
)
858 return -EPROTONOSUPPORT
;
860 netlink_lock_table();
861 #ifdef CONFIG_MODULES
862 if (!nl_table
[protocol
].registered
) {
863 netlink_unlock_table();
864 request_module("net-pf-%d-proto-%d", PF_NETLINK
, protocol
);
865 netlink_lock_table();
868 if (nl_table
[protocol
].registered
&&
869 try_module_get(nl_table
[protocol
].module
))
870 module
= nl_table
[protocol
].module
;
872 err
= -EPROTONOSUPPORT
;
873 cb_mutex
= nl_table
[protocol
].cb_mutex
;
874 bind
= nl_table
[protocol
].bind
;
875 netlink_unlock_table();
880 err
= __netlink_create(net
, sock
, cb_mutex
, protocol
);
885 sock_prot_inuse_add(net
, &netlink_proto
, 1);
888 nlk
= nlk_sk(sock
->sk
);
889 nlk
->module
= module
;
890 nlk
->netlink_bind
= bind
;
899 static int netlink_release(struct socket
*sock
)
901 struct sock
*sk
= sock
->sk
;
902 struct netlink_sock
*nlk
;
912 * OK. Socket is unlinked, any packets that arrive now
917 wake_up_interruptible_all(&nlk
->wait
);
919 skb_queue_purge(&sk
->sk_write_queue
);
922 struct netlink_notify n
= {
924 .protocol
= sk
->sk_protocol
,
925 .portid
= nlk
->portid
,
927 atomic_notifier_call_chain(&netlink_chain
,
928 NETLINK_URELEASE
, &n
);
931 module_put(nlk
->module
);
933 netlink_table_grab();
934 if (netlink_is_kernel(sk
)) {
935 BUG_ON(nl_table
[sk
->sk_protocol
].registered
== 0);
936 if (--nl_table
[sk
->sk_protocol
].registered
== 0) {
937 struct listeners
*old
;
939 old
= nl_deref_protected(nl_table
[sk
->sk_protocol
].listeners
);
940 RCU_INIT_POINTER(nl_table
[sk
->sk_protocol
].listeners
, NULL
);
942 nl_table
[sk
->sk_protocol
].module
= NULL
;
943 nl_table
[sk
->sk_protocol
].bind
= NULL
;
944 nl_table
[sk
->sk_protocol
].flags
= 0;
945 nl_table
[sk
->sk_protocol
].registered
= 0;
947 } else if (nlk
->subscriptions
) {
948 netlink_update_listeners(sk
);
950 netlink_table_ungrab();
956 sock_prot_inuse_add(sock_net(sk
), &netlink_proto
, -1);
962 static int netlink_autobind(struct socket
*sock
)
964 struct sock
*sk
= sock
->sk
;
965 struct net
*net
= sock_net(sk
);
966 struct nl_portid_hash
*hash
= &nl_table
[sk
->sk_protocol
].hash
;
967 struct hlist_head
*head
;
969 s32 portid
= task_tgid_vnr(current
);
971 static s32 rover
= -4097;
975 netlink_table_grab();
976 head
= nl_portid_hashfn(hash
, portid
);
977 sk_for_each(osk
, head
) {
978 if (!net_eq(sock_net(osk
), net
))
980 if (nlk_sk(osk
)->portid
== portid
) {
981 /* Bind collision, search negative portid values. */
985 netlink_table_ungrab();
989 netlink_table_ungrab();
991 err
= netlink_insert(sk
, net
, portid
);
992 if (err
== -EADDRINUSE
)
995 /* If 2 threads race to autobind, that is fine. */
1002 static inline int netlink_capable(const struct socket
*sock
, unsigned int flag
)
1004 return (nl_table
[sock
->sk
->sk_protocol
].flags
& flag
) ||
1005 ns_capable(sock_net(sock
->sk
)->user_ns
, CAP_NET_ADMIN
);
1009 netlink_update_subscriptions(struct sock
*sk
, unsigned int subscriptions
)
1011 struct netlink_sock
*nlk
= nlk_sk(sk
);
1013 if (nlk
->subscriptions
&& !subscriptions
)
1014 __sk_del_bind_node(sk
);
1015 else if (!nlk
->subscriptions
&& subscriptions
)
1016 sk_add_bind_node(sk
, &nl_table
[sk
->sk_protocol
].mc_list
);
1017 nlk
->subscriptions
= subscriptions
;
1020 static int netlink_realloc_groups(struct sock
*sk
)
1022 struct netlink_sock
*nlk
= nlk_sk(sk
);
1023 unsigned int groups
;
1024 unsigned long *new_groups
;
1027 netlink_table_grab();
1029 groups
= nl_table
[sk
->sk_protocol
].groups
;
1030 if (!nl_table
[sk
->sk_protocol
].registered
) {
1035 if (nlk
->ngroups
>= groups
)
1038 new_groups
= krealloc(nlk
->groups
, NLGRPSZ(groups
), GFP_ATOMIC
);
1039 if (new_groups
== NULL
) {
1043 memset((char *)new_groups
+ NLGRPSZ(nlk
->ngroups
), 0,
1044 NLGRPSZ(groups
) - NLGRPSZ(nlk
->ngroups
));
1046 nlk
->groups
= new_groups
;
1047 nlk
->ngroups
= groups
;
1049 netlink_table_ungrab();
1053 static int netlink_bind(struct socket
*sock
, struct sockaddr
*addr
,
1056 struct sock
*sk
= sock
->sk
;
1057 struct net
*net
= sock_net(sk
);
1058 struct netlink_sock
*nlk
= nlk_sk(sk
);
1059 struct sockaddr_nl
*nladdr
= (struct sockaddr_nl
*)addr
;
1062 if (addr_len
< sizeof(struct sockaddr_nl
))
1065 if (nladdr
->nl_family
!= AF_NETLINK
)
1068 /* Only superuser is allowed to listen multicasts */
1069 if (nladdr
->nl_groups
) {
1070 if (!netlink_capable(sock
, NL_CFG_F_NONROOT_RECV
))
1072 err
= netlink_realloc_groups(sk
);
1078 if (nladdr
->nl_pid
!= nlk
->portid
)
1081 err
= nladdr
->nl_pid
?
1082 netlink_insert(sk
, net
, nladdr
->nl_pid
) :
1083 netlink_autobind(sock
);
1088 if (!nladdr
->nl_groups
&& (nlk
->groups
== NULL
|| !(u32
)nlk
->groups
[0]))
1091 netlink_table_grab();
1092 netlink_update_subscriptions(sk
, nlk
->subscriptions
+
1093 hweight32(nladdr
->nl_groups
) -
1094 hweight32(nlk
->groups
[0]));
1095 nlk
->groups
[0] = (nlk
->groups
[0] & ~0xffffffffUL
) | nladdr
->nl_groups
;
1096 netlink_update_listeners(sk
);
1097 netlink_table_ungrab();
1099 if (nlk
->netlink_bind
&& nlk
->groups
[0]) {
1102 for (i
=0; i
<nlk
->ngroups
; i
++) {
1103 if (test_bit(i
, nlk
->groups
))
1104 nlk
->netlink_bind(i
);
1111 static int netlink_connect(struct socket
*sock
, struct sockaddr
*addr
,
1112 int alen
, int flags
)
1115 struct sock
*sk
= sock
->sk
;
1116 struct netlink_sock
*nlk
= nlk_sk(sk
);
1117 struct sockaddr_nl
*nladdr
= (struct sockaddr_nl
*)addr
;
1119 if (alen
< sizeof(addr
->sa_family
))
1122 if (addr
->sa_family
== AF_UNSPEC
) {
1123 sk
->sk_state
= NETLINK_UNCONNECTED
;
1124 nlk
->dst_portid
= 0;
1128 if (addr
->sa_family
!= AF_NETLINK
)
1131 /* Only superuser is allowed to send multicasts */
1132 if (nladdr
->nl_groups
&& !netlink_capable(sock
, NL_CFG_F_NONROOT_SEND
))
1136 err
= netlink_autobind(sock
);
1139 sk
->sk_state
= NETLINK_CONNECTED
;
1140 nlk
->dst_portid
= nladdr
->nl_pid
;
1141 nlk
->dst_group
= ffs(nladdr
->nl_groups
);
1147 static int netlink_getname(struct socket
*sock
, struct sockaddr
*addr
,
1148 int *addr_len
, int peer
)
1150 struct sock
*sk
= sock
->sk
;
1151 struct netlink_sock
*nlk
= nlk_sk(sk
);
1152 DECLARE_SOCKADDR(struct sockaddr_nl
*, nladdr
, addr
);
1154 nladdr
->nl_family
= AF_NETLINK
;
1156 *addr_len
= sizeof(*nladdr
);
1159 nladdr
->nl_pid
= nlk
->dst_portid
;
1160 nladdr
->nl_groups
= netlink_group_mask(nlk
->dst_group
);
1162 nladdr
->nl_pid
= nlk
->portid
;
1163 nladdr
->nl_groups
= nlk
->groups
? nlk
->groups
[0] : 0;
1168 static void netlink_overrun(struct sock
*sk
)
1170 struct netlink_sock
*nlk
= nlk_sk(sk
);
1172 if (!(nlk
->flags
& NETLINK_RECV_NO_ENOBUFS
)) {
1173 if (!test_and_set_bit(NETLINK_CONGESTED
, &nlk_sk(sk
)->state
)) {
1174 sk
->sk_err
= ENOBUFS
;
1175 sk
->sk_error_report(sk
);
1178 atomic_inc(&sk
->sk_drops
);
1181 static struct sock
*netlink_getsockbyportid(struct sock
*ssk
, u32 portid
)
1184 struct netlink_sock
*nlk
;
1186 sock
= netlink_lookup(sock_net(ssk
), ssk
->sk_protocol
, portid
);
1188 return ERR_PTR(-ECONNREFUSED
);
1190 /* Don't bother queuing skb if kernel socket has no input function */
1192 if (sock
->sk_state
== NETLINK_CONNECTED
&&
1193 nlk
->dst_portid
!= nlk_sk(ssk
)->portid
) {
1195 return ERR_PTR(-ECONNREFUSED
);
1200 struct sock
*netlink_getsockbyfilp(struct file
*filp
)
1202 struct inode
*inode
= file_inode(filp
);
1205 if (!S_ISSOCK(inode
->i_mode
))
1206 return ERR_PTR(-ENOTSOCK
);
1208 sock
= SOCKET_I(inode
)->sk
;
1209 if (sock
->sk_family
!= AF_NETLINK
)
1210 return ERR_PTR(-EINVAL
);
1217 * Attach a skb to a netlink socket.
1218 * The caller must hold a reference to the destination socket. On error, the
1219 * reference is dropped. The skb is not send to the destination, just all
1220 * all error checks are performed and memory in the queue is reserved.
1222 * < 0: error. skb freed, reference to sock dropped.
1224 * 1: repeat lookup - reference dropped while waiting for socket memory.
1226 int netlink_attachskb(struct sock
*sk
, struct sk_buff
*skb
,
1227 long *timeo
, struct sock
*ssk
)
1229 struct netlink_sock
*nlk
;
1233 if (atomic_read(&sk
->sk_rmem_alloc
) > sk
->sk_rcvbuf
||
1234 test_bit(NETLINK_CONGESTED
, &nlk
->state
)) {
1235 DECLARE_WAITQUEUE(wait
, current
);
1237 if (!ssk
|| netlink_is_kernel(ssk
))
1238 netlink_overrun(sk
);
1244 __set_current_state(TASK_INTERRUPTIBLE
);
1245 add_wait_queue(&nlk
->wait
, &wait
);
1247 if ((atomic_read(&sk
->sk_rmem_alloc
) > sk
->sk_rcvbuf
||
1248 test_bit(NETLINK_CONGESTED
, &nlk
->state
)) &&
1249 !sock_flag(sk
, SOCK_DEAD
))
1250 *timeo
= schedule_timeout(*timeo
);
1252 __set_current_state(TASK_RUNNING
);
1253 remove_wait_queue(&nlk
->wait
, &wait
);
1256 if (signal_pending(current
)) {
1258 return sock_intr_errno(*timeo
);
1262 netlink_skb_set_owner_r(skb
, sk
);
1266 static int __netlink_sendskb(struct sock
*sk
, struct sk_buff
*skb
)
1270 skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1271 sk
->sk_data_ready(sk
, len
);
1275 int netlink_sendskb(struct sock
*sk
, struct sk_buff
*skb
)
1277 int len
= __netlink_sendskb(sk
, skb
);
1283 void netlink_detachskb(struct sock
*sk
, struct sk_buff
*skb
)
1289 static struct sk_buff
*netlink_trim(struct sk_buff
*skb
, gfp_t allocation
)
1293 WARN_ON(skb
->sk
!= NULL
);
1295 delta
= skb
->end
- skb
->tail
;
1296 if (delta
* 2 < skb
->truesize
)
1299 if (skb_shared(skb
)) {
1300 struct sk_buff
*nskb
= skb_clone(skb
, allocation
);
1307 if (!pskb_expand_head(skb
, 0, -delta
, allocation
))
1308 skb
->truesize
-= delta
;
1313 static void netlink_rcv_wake(struct sock
*sk
)
1315 struct netlink_sock
*nlk
= nlk_sk(sk
);
1317 if (skb_queue_empty(&sk
->sk_receive_queue
))
1318 clear_bit(NETLINK_CONGESTED
, &nlk
->state
);
1319 if (!test_bit(NETLINK_CONGESTED
, &nlk
->state
))
1320 wake_up_interruptible(&nlk
->wait
);
1323 static int netlink_unicast_kernel(struct sock
*sk
, struct sk_buff
*skb
,
1327 struct netlink_sock
*nlk
= nlk_sk(sk
);
1329 ret
= -ECONNREFUSED
;
1330 if (nlk
->netlink_rcv
!= NULL
) {
1332 netlink_skb_set_owner_r(skb
, sk
);
1333 NETLINK_CB(skb
).sk
= ssk
;
1334 nlk
->netlink_rcv(skb
);
1343 int netlink_unicast(struct sock
*ssk
, struct sk_buff
*skb
,
1344 u32 portid
, int nonblock
)
1350 skb
= netlink_trim(skb
, gfp_any());
1352 timeo
= sock_sndtimeo(ssk
, nonblock
);
1354 sk
= netlink_getsockbyportid(ssk
, portid
);
1359 if (netlink_is_kernel(sk
))
1360 return netlink_unicast_kernel(sk
, skb
, ssk
);
1362 if (sk_filter(sk
, skb
)) {
1369 err
= netlink_attachskb(sk
, skb
, &timeo
, ssk
);
1375 return netlink_sendskb(sk
, skb
);
1377 EXPORT_SYMBOL(netlink_unicast
);
1379 int netlink_has_listeners(struct sock
*sk
, unsigned int group
)
1382 struct listeners
*listeners
;
1384 BUG_ON(!netlink_is_kernel(sk
));
1387 listeners
= rcu_dereference(nl_table
[sk
->sk_protocol
].listeners
);
1389 if (listeners
&& group
- 1 < nl_table
[sk
->sk_protocol
].groups
)
1390 res
= test_bit(group
- 1, listeners
->masks
);
1396 EXPORT_SYMBOL_GPL(netlink_has_listeners
);
1398 static int netlink_broadcast_deliver(struct sock
*sk
, struct sk_buff
*skb
)
1400 struct netlink_sock
*nlk
= nlk_sk(sk
);
1402 if (atomic_read(&sk
->sk_rmem_alloc
) <= sk
->sk_rcvbuf
&&
1403 !test_bit(NETLINK_CONGESTED
, &nlk
->state
)) {
1404 netlink_skb_set_owner_r(skb
, sk
);
1405 __netlink_sendskb(sk
, skb
);
1406 return atomic_read(&sk
->sk_rmem_alloc
) > (sk
->sk_rcvbuf
>> 1);
1411 struct netlink_broadcast_data
{
1412 struct sock
*exclude_sk
;
1417 int delivery_failure
;
1421 struct sk_buff
*skb
, *skb2
;
1422 int (*tx_filter
)(struct sock
*dsk
, struct sk_buff
*skb
, void *data
);
1426 static int do_one_broadcast(struct sock
*sk
,
1427 struct netlink_broadcast_data
*p
)
1429 struct netlink_sock
*nlk
= nlk_sk(sk
);
1432 if (p
->exclude_sk
== sk
)
1435 if (nlk
->portid
== p
->portid
|| p
->group
- 1 >= nlk
->ngroups
||
1436 !test_bit(p
->group
- 1, nlk
->groups
))
1439 if (!net_eq(sock_net(sk
), p
->net
))
1443 netlink_overrun(sk
);
1448 if (p
->skb2
== NULL
) {
1449 if (skb_shared(p
->skb
)) {
1450 p
->skb2
= skb_clone(p
->skb
, p
->allocation
);
1452 p
->skb2
= skb_get(p
->skb
);
1454 * skb ownership may have been set when
1455 * delivered to a previous socket.
1457 skb_orphan(p
->skb2
);
1460 if (p
->skb2
== NULL
) {
1461 netlink_overrun(sk
);
1462 /* Clone failed. Notify ALL listeners. */
1464 if (nlk
->flags
& NETLINK_BROADCAST_SEND_ERROR
)
1465 p
->delivery_failure
= 1;
1466 } else if (p
->tx_filter
&& p
->tx_filter(sk
, p
->skb2
, p
->tx_data
)) {
1469 } else if (sk_filter(sk
, p
->skb2
)) {
1472 } else if ((val
= netlink_broadcast_deliver(sk
, p
->skb2
)) < 0) {
1473 netlink_overrun(sk
);
1474 if (nlk
->flags
& NETLINK_BROADCAST_SEND_ERROR
)
1475 p
->delivery_failure
= 1;
1477 p
->congested
|= val
;
1487 int netlink_broadcast_filtered(struct sock
*ssk
, struct sk_buff
*skb
, u32 portid
,
1488 u32 group
, gfp_t allocation
,
1489 int (*filter
)(struct sock
*dsk
, struct sk_buff
*skb
, void *data
),
1492 struct net
*net
= sock_net(ssk
);
1493 struct netlink_broadcast_data info
;
1496 skb
= netlink_trim(skb
, allocation
);
1498 info
.exclude_sk
= ssk
;
1500 info
.portid
= portid
;
1503 info
.delivery_failure
= 0;
1506 info
.allocation
= allocation
;
1509 info
.tx_filter
= filter
;
1510 info
.tx_data
= filter_data
;
1512 /* While we sleep in clone, do not allow to change socket list */
1514 netlink_lock_table();
1516 sk_for_each_bound(sk
, &nl_table
[ssk
->sk_protocol
].mc_list
)
1517 do_one_broadcast(sk
, &info
);
1521 netlink_unlock_table();
1523 if (info
.delivery_failure
) {
1524 kfree_skb(info
.skb2
);
1527 consume_skb(info
.skb2
);
1529 if (info
.delivered
) {
1530 if (info
.congested
&& (allocation
& __GFP_WAIT
))
1536 EXPORT_SYMBOL(netlink_broadcast_filtered
);
1538 int netlink_broadcast(struct sock
*ssk
, struct sk_buff
*skb
, u32 portid
,
1539 u32 group
, gfp_t allocation
)
1541 return netlink_broadcast_filtered(ssk
, skb
, portid
, group
, allocation
,
1544 EXPORT_SYMBOL(netlink_broadcast
);
1546 struct netlink_set_err_data
{
1547 struct sock
*exclude_sk
;
1553 static int do_one_set_err(struct sock
*sk
, struct netlink_set_err_data
*p
)
1555 struct netlink_sock
*nlk
= nlk_sk(sk
);
1558 if (sk
== p
->exclude_sk
)
1561 if (!net_eq(sock_net(sk
), sock_net(p
->exclude_sk
)))
1564 if (nlk
->portid
== p
->portid
|| p
->group
- 1 >= nlk
->ngroups
||
1565 !test_bit(p
->group
- 1, nlk
->groups
))
1568 if (p
->code
== ENOBUFS
&& nlk
->flags
& NETLINK_RECV_NO_ENOBUFS
) {
1573 sk
->sk_err
= p
->code
;
1574 sk
->sk_error_report(sk
);
1580 * netlink_set_err - report error to broadcast listeners
1581 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1582 * @portid: the PORTID of a process that we want to skip (if any)
1583 * @groups: the broadcast group that will notice the error
1584 * @code: error code, must be negative (as usual in kernelspace)
1586 * This function returns the number of broadcast listeners that have set the
1587 * NETLINK_RECV_NO_ENOBUFS socket option.
1589 int netlink_set_err(struct sock
*ssk
, u32 portid
, u32 group
, int code
)
1591 struct netlink_set_err_data info
;
1595 info
.exclude_sk
= ssk
;
1596 info
.portid
= portid
;
1598 /* sk->sk_err wants a positive error value */
1601 read_lock(&nl_table_lock
);
1603 sk_for_each_bound(sk
, &nl_table
[ssk
->sk_protocol
].mc_list
)
1604 ret
+= do_one_set_err(sk
, &info
);
1606 read_unlock(&nl_table_lock
);
1609 EXPORT_SYMBOL(netlink_set_err
);
1611 /* must be called with netlink table grabbed */
1612 static void netlink_update_socket_mc(struct netlink_sock
*nlk
,
1616 int old
, new = !!is_new
, subscriptions
;
1618 old
= test_bit(group
- 1, nlk
->groups
);
1619 subscriptions
= nlk
->subscriptions
- old
+ new;
1621 __set_bit(group
- 1, nlk
->groups
);
1623 __clear_bit(group
- 1, nlk
->groups
);
1624 netlink_update_subscriptions(&nlk
->sk
, subscriptions
);
1625 netlink_update_listeners(&nlk
->sk
);
1628 static int netlink_setsockopt(struct socket
*sock
, int level
, int optname
,
1629 char __user
*optval
, unsigned int optlen
)
1631 struct sock
*sk
= sock
->sk
;
1632 struct netlink_sock
*nlk
= nlk_sk(sk
);
1633 unsigned int val
= 0;
1636 if (level
!= SOL_NETLINK
)
1637 return -ENOPROTOOPT
;
1639 if (optname
!= NETLINK_RX_RING
&& optname
!= NETLINK_TX_RING
&&
1640 optlen
>= sizeof(int) &&
1641 get_user(val
, (unsigned int __user
*)optval
))
1645 case NETLINK_PKTINFO
:
1647 nlk
->flags
|= NETLINK_RECV_PKTINFO
;
1649 nlk
->flags
&= ~NETLINK_RECV_PKTINFO
;
1652 case NETLINK_ADD_MEMBERSHIP
:
1653 case NETLINK_DROP_MEMBERSHIP
: {
1654 if (!netlink_capable(sock
, NL_CFG_F_NONROOT_RECV
))
1656 err
= netlink_realloc_groups(sk
);
1659 if (!val
|| val
- 1 >= nlk
->ngroups
)
1661 netlink_table_grab();
1662 netlink_update_socket_mc(nlk
, val
,
1663 optname
== NETLINK_ADD_MEMBERSHIP
);
1664 netlink_table_ungrab();
1666 if (nlk
->netlink_bind
)
1667 nlk
->netlink_bind(val
);
1672 case NETLINK_BROADCAST_ERROR
:
1674 nlk
->flags
|= NETLINK_BROADCAST_SEND_ERROR
;
1676 nlk
->flags
&= ~NETLINK_BROADCAST_SEND_ERROR
;
1679 case NETLINK_NO_ENOBUFS
:
1681 nlk
->flags
|= NETLINK_RECV_NO_ENOBUFS
;
1682 clear_bit(NETLINK_CONGESTED
, &nlk
->state
);
1683 wake_up_interruptible(&nlk
->wait
);
1685 nlk
->flags
&= ~NETLINK_RECV_NO_ENOBUFS
;
1689 #ifdef CONFIG_NETLINK_MMAP
1690 case NETLINK_RX_RING
:
1691 case NETLINK_TX_RING
: {
1692 struct nl_mmap_req req
;
1694 /* Rings might consume more memory than queue limits, require
1697 if (!capable(CAP_NET_ADMIN
))
1699 if (optlen
< sizeof(req
))
1701 if (copy_from_user(&req
, optval
, sizeof(req
)))
1703 err
= netlink_set_ring(sk
, &req
, false,
1704 optname
== NETLINK_TX_RING
);
1707 #endif /* CONFIG_NETLINK_MMAP */
1714 static int netlink_getsockopt(struct socket
*sock
, int level
, int optname
,
1715 char __user
*optval
, int __user
*optlen
)
1717 struct sock
*sk
= sock
->sk
;
1718 struct netlink_sock
*nlk
= nlk_sk(sk
);
1721 if (level
!= SOL_NETLINK
)
1722 return -ENOPROTOOPT
;
1724 if (get_user(len
, optlen
))
1730 case NETLINK_PKTINFO
:
1731 if (len
< sizeof(int))
1734 val
= nlk
->flags
& NETLINK_RECV_PKTINFO
? 1 : 0;
1735 if (put_user(len
, optlen
) ||
1736 put_user(val
, optval
))
1740 case NETLINK_BROADCAST_ERROR
:
1741 if (len
< sizeof(int))
1744 val
= nlk
->flags
& NETLINK_BROADCAST_SEND_ERROR
? 1 : 0;
1745 if (put_user(len
, optlen
) ||
1746 put_user(val
, optval
))
1750 case NETLINK_NO_ENOBUFS
:
1751 if (len
< sizeof(int))
1754 val
= nlk
->flags
& NETLINK_RECV_NO_ENOBUFS
? 1 : 0;
1755 if (put_user(len
, optlen
) ||
1756 put_user(val
, optval
))
1766 static void netlink_cmsg_recv_pktinfo(struct msghdr
*msg
, struct sk_buff
*skb
)
1768 struct nl_pktinfo info
;
1770 info
.group
= NETLINK_CB(skb
).dst_group
;
1771 put_cmsg(msg
, SOL_NETLINK
, NETLINK_PKTINFO
, sizeof(info
), &info
);
1774 static int netlink_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1775 struct msghdr
*msg
, size_t len
)
1777 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1778 struct sock
*sk
= sock
->sk
;
1779 struct netlink_sock
*nlk
= nlk_sk(sk
);
1780 struct sockaddr_nl
*addr
= msg
->msg_name
;
1783 struct sk_buff
*skb
;
1785 struct scm_cookie scm
;
1787 if (msg
->msg_flags
&MSG_OOB
)
1790 if (NULL
== siocb
->scm
)
1793 err
= scm_send(sock
, msg
, siocb
->scm
, true);
1797 if (msg
->msg_namelen
) {
1799 if (addr
->nl_family
!= AF_NETLINK
)
1801 dst_portid
= addr
->nl_pid
;
1802 dst_group
= ffs(addr
->nl_groups
);
1804 if ((dst_group
|| dst_portid
) &&
1805 !netlink_capable(sock
, NL_CFG_F_NONROOT_SEND
))
1808 dst_portid
= nlk
->dst_portid
;
1809 dst_group
= nlk
->dst_group
;
1813 err
= netlink_autobind(sock
);
1819 if (len
> sk
->sk_sndbuf
- 32)
1822 skb
= alloc_skb(len
, GFP_KERNEL
);
1826 NETLINK_CB(skb
).portid
= nlk
->portid
;
1827 NETLINK_CB(skb
).dst_group
= dst_group
;
1828 NETLINK_CB(skb
).creds
= siocb
->scm
->creds
;
1831 if (memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
)) {
1836 err
= security_netlink_send(sk
, skb
);
1843 atomic_inc(&skb
->users
);
1844 netlink_broadcast(sk
, skb
, dst_portid
, dst_group
, GFP_KERNEL
);
1846 err
= netlink_unicast(sk
, skb
, dst_portid
, msg
->msg_flags
&MSG_DONTWAIT
);
1849 scm_destroy(siocb
->scm
);
1853 static int netlink_recvmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1854 struct msghdr
*msg
, size_t len
,
1857 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1858 struct scm_cookie scm
;
1859 struct sock
*sk
= sock
->sk
;
1860 struct netlink_sock
*nlk
= nlk_sk(sk
);
1861 int noblock
= flags
&MSG_DONTWAIT
;
1863 struct sk_buff
*skb
, *data_skb
;
1871 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
1877 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1878 if (unlikely(skb_shinfo(skb
)->frag_list
)) {
1880 * If this skb has a frag_list, then here that means that we
1881 * will have to use the frag_list skb's data for compat tasks
1882 * and the regular skb's data for normal (non-compat) tasks.
1884 * If we need to send the compat skb, assign it to the
1885 * 'data_skb' variable so that it will be used below for data
1886 * copying. We keep 'skb' for everything else, including
1887 * freeing both later.
1889 if (flags
& MSG_CMSG_COMPAT
)
1890 data_skb
= skb_shinfo(skb
)->frag_list
;
1894 msg
->msg_namelen
= 0;
1896 copied
= data_skb
->len
;
1898 msg
->msg_flags
|= MSG_TRUNC
;
1902 skb_reset_transport_header(data_skb
);
1903 err
= skb_copy_datagram_iovec(data_skb
, 0, msg
->msg_iov
, copied
);
1905 if (msg
->msg_name
) {
1906 struct sockaddr_nl
*addr
= (struct sockaddr_nl
*)msg
->msg_name
;
1907 addr
->nl_family
= AF_NETLINK
;
1909 addr
->nl_pid
= NETLINK_CB(skb
).portid
;
1910 addr
->nl_groups
= netlink_group_mask(NETLINK_CB(skb
).dst_group
);
1911 msg
->msg_namelen
= sizeof(*addr
);
1914 if (nlk
->flags
& NETLINK_RECV_PKTINFO
)
1915 netlink_cmsg_recv_pktinfo(msg
, skb
);
1917 if (NULL
== siocb
->scm
) {
1918 memset(&scm
, 0, sizeof(scm
));
1921 siocb
->scm
->creds
= *NETLINK_CREDS(skb
);
1922 if (flags
& MSG_TRUNC
)
1923 copied
= data_skb
->len
;
1925 skb_free_datagram(sk
, skb
);
1927 if (nlk
->cb
&& atomic_read(&sk
->sk_rmem_alloc
) <= sk
->sk_rcvbuf
/ 2) {
1928 ret
= netlink_dump(sk
);
1931 sk
->sk_error_report(sk
);
1935 scm_recv(sock
, msg
, siocb
->scm
, flags
);
1937 netlink_rcv_wake(sk
);
1938 return err
? : copied
;
1941 static void netlink_data_ready(struct sock
*sk
, int len
)
1947 * We export these functions to other modules. They provide a
1948 * complete set of kernel non-blocking support for message
1953 __netlink_kernel_create(struct net
*net
, int unit
, struct module
*module
,
1954 struct netlink_kernel_cfg
*cfg
)
1956 struct socket
*sock
;
1958 struct netlink_sock
*nlk
;
1959 struct listeners
*listeners
= NULL
;
1960 struct mutex
*cb_mutex
= cfg
? cfg
->cb_mutex
: NULL
;
1961 unsigned int groups
;
1965 if (unit
< 0 || unit
>= MAX_LINKS
)
1968 if (sock_create_lite(PF_NETLINK
, SOCK_DGRAM
, unit
, &sock
))
1972 * We have to just have a reference on the net from sk, but don't
1973 * get_net it. Besides, we cannot get and then put the net here.
1974 * So we create one inside init_net and the move it to net.
1977 if (__netlink_create(&init_net
, sock
, cb_mutex
, unit
) < 0)
1978 goto out_sock_release_nosk
;
1981 sk_change_net(sk
, net
);
1983 if (!cfg
|| cfg
->groups
< 32)
1986 groups
= cfg
->groups
;
1988 listeners
= kzalloc(sizeof(*listeners
) + NLGRPSZ(groups
), GFP_KERNEL
);
1990 goto out_sock_release
;
1992 sk
->sk_data_ready
= netlink_data_ready
;
1993 if (cfg
&& cfg
->input
)
1994 nlk_sk(sk
)->netlink_rcv
= cfg
->input
;
1996 if (netlink_insert(sk
, net
, 0))
1997 goto out_sock_release
;
2000 nlk
->flags
|= NETLINK_KERNEL_SOCKET
;
2002 netlink_table_grab();
2003 if (!nl_table
[unit
].registered
) {
2004 nl_table
[unit
].groups
= groups
;
2005 rcu_assign_pointer(nl_table
[unit
].listeners
, listeners
);
2006 nl_table
[unit
].cb_mutex
= cb_mutex
;
2007 nl_table
[unit
].module
= module
;
2009 nl_table
[unit
].bind
= cfg
->bind
;
2010 nl_table
[unit
].flags
= cfg
->flags
;
2012 nl_table
[unit
].registered
= 1;
2015 nl_table
[unit
].registered
++;
2017 netlink_table_ungrab();
2022 netlink_kernel_release(sk
);
2025 out_sock_release_nosk
:
2029 EXPORT_SYMBOL(__netlink_kernel_create
);
2032 netlink_kernel_release(struct sock
*sk
)
2034 sk_release_kernel(sk
);
2036 EXPORT_SYMBOL(netlink_kernel_release
);
2038 int __netlink_change_ngroups(struct sock
*sk
, unsigned int groups
)
2040 struct listeners
*new, *old
;
2041 struct netlink_table
*tbl
= &nl_table
[sk
->sk_protocol
];
2046 if (NLGRPSZ(tbl
->groups
) < NLGRPSZ(groups
)) {
2047 new = kzalloc(sizeof(*new) + NLGRPSZ(groups
), GFP_ATOMIC
);
2050 old
= nl_deref_protected(tbl
->listeners
);
2051 memcpy(new->masks
, old
->masks
, NLGRPSZ(tbl
->groups
));
2052 rcu_assign_pointer(tbl
->listeners
, new);
2054 kfree_rcu(old
, rcu
);
2056 tbl
->groups
= groups
;
2062 * netlink_change_ngroups - change number of multicast groups
2064 * This changes the number of multicast groups that are available
2065 * on a certain netlink family. Note that it is not possible to
2066 * change the number of groups to below 32. Also note that it does
2067 * not implicitly call netlink_clear_multicast_users() when the
2068 * number of groups is reduced.
2070 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2071 * @groups: The new number of groups.
2073 int netlink_change_ngroups(struct sock
*sk
, unsigned int groups
)
2077 netlink_table_grab();
2078 err
= __netlink_change_ngroups(sk
, groups
);
2079 netlink_table_ungrab();
2084 void __netlink_clear_multicast_users(struct sock
*ksk
, unsigned int group
)
2087 struct netlink_table
*tbl
= &nl_table
[ksk
->sk_protocol
];
2089 sk_for_each_bound(sk
, &tbl
->mc_list
)
2090 netlink_update_socket_mc(nlk_sk(sk
), group
, 0);
2094 * netlink_clear_multicast_users - kick off multicast listeners
2096 * This function removes all listeners from the given group.
2097 * @ksk: The kernel netlink socket, as returned by
2098 * netlink_kernel_create().
2099 * @group: The multicast group to clear.
2101 void netlink_clear_multicast_users(struct sock
*ksk
, unsigned int group
)
2103 netlink_table_grab();
2104 __netlink_clear_multicast_users(ksk
, group
);
2105 netlink_table_ungrab();
2109 __nlmsg_put(struct sk_buff
*skb
, u32 portid
, u32 seq
, int type
, int len
, int flags
)
2111 struct nlmsghdr
*nlh
;
2112 int size
= nlmsg_msg_size(len
);
2114 nlh
= (struct nlmsghdr
*)skb_put(skb
, NLMSG_ALIGN(size
));
2115 nlh
->nlmsg_type
= type
;
2116 nlh
->nlmsg_len
= size
;
2117 nlh
->nlmsg_flags
= flags
;
2118 nlh
->nlmsg_pid
= portid
;
2119 nlh
->nlmsg_seq
= seq
;
2120 if (!__builtin_constant_p(size
) || NLMSG_ALIGN(size
) - size
!= 0)
2121 memset(nlmsg_data(nlh
) + len
, 0, NLMSG_ALIGN(size
) - size
);
2124 EXPORT_SYMBOL(__nlmsg_put
);
2127 * It looks a bit ugly.
2128 * It would be better to create kernel thread.
2131 static int netlink_dump(struct sock
*sk
)
2133 struct netlink_sock
*nlk
= nlk_sk(sk
);
2134 struct netlink_callback
*cb
;
2135 struct sk_buff
*skb
= NULL
;
2136 struct nlmsghdr
*nlh
;
2137 int len
, err
= -ENOBUFS
;
2140 mutex_lock(nlk
->cb_mutex
);
2148 alloc_size
= max_t(int, cb
->min_dump_alloc
, NLMSG_GOODSIZE
);
2150 skb
= sock_rmalloc(sk
, alloc_size
, 0, GFP_KERNEL
);
2154 len
= cb
->dump(skb
, cb
);
2157 mutex_unlock(nlk
->cb_mutex
);
2159 if (sk_filter(sk
, skb
))
2162 __netlink_sendskb(sk
, skb
);
2166 nlh
= nlmsg_put_answer(skb
, cb
, NLMSG_DONE
, sizeof(len
), NLM_F_MULTI
);
2170 nl_dump_check_consistent(cb
, nlh
);
2172 memcpy(nlmsg_data(nlh
), &len
, sizeof(len
));
2174 if (sk_filter(sk
, skb
))
2177 __netlink_sendskb(sk
, skb
);
2182 mutex_unlock(nlk
->cb_mutex
);
2184 module_put(cb
->module
);
2185 netlink_consume_callback(cb
);
2189 mutex_unlock(nlk
->cb_mutex
);
2194 int __netlink_dump_start(struct sock
*ssk
, struct sk_buff
*skb
,
2195 const struct nlmsghdr
*nlh
,
2196 struct netlink_dump_control
*control
)
2198 struct netlink_callback
*cb
;
2200 struct netlink_sock
*nlk
;
2203 cb
= kzalloc(sizeof(*cb
), GFP_KERNEL
);
2207 cb
->dump
= control
->dump
;
2208 cb
->done
= control
->done
;
2210 cb
->data
= control
->data
;
2211 cb
->module
= control
->module
;
2212 cb
->min_dump_alloc
= control
->min_dump_alloc
;
2213 atomic_inc(&skb
->users
);
2216 sk
= netlink_lookup(sock_net(ssk
), ssk
->sk_protocol
, NETLINK_CB(skb
).portid
);
2218 netlink_destroy_callback(cb
);
2219 return -ECONNREFUSED
;
2223 mutex_lock(nlk
->cb_mutex
);
2224 /* A dump is in progress... */
2226 mutex_unlock(nlk
->cb_mutex
);
2227 netlink_destroy_callback(cb
);
2231 /* add reference of module which cb->dump belongs to */
2232 if (!try_module_get(cb
->module
)) {
2233 mutex_unlock(nlk
->cb_mutex
);
2234 netlink_destroy_callback(cb
);
2235 ret
= -EPROTONOSUPPORT
;
2240 mutex_unlock(nlk
->cb_mutex
);
2242 ret
= netlink_dump(sk
);
2249 /* We successfully started a dump, by returning -EINTR we
2250 * signal not to send ACK even if it was requested.
2254 EXPORT_SYMBOL(__netlink_dump_start
);
2256 void netlink_ack(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
, int err
)
2258 struct sk_buff
*skb
;
2259 struct nlmsghdr
*rep
;
2260 struct nlmsgerr
*errmsg
;
2261 size_t payload
= sizeof(*errmsg
);
2263 /* error messages get the original request appened */
2265 payload
+= nlmsg_len(nlh
);
2267 skb
= nlmsg_new(payload
, GFP_KERNEL
);
2271 sk
= netlink_lookup(sock_net(in_skb
->sk
),
2272 in_skb
->sk
->sk_protocol
,
2273 NETLINK_CB(in_skb
).portid
);
2275 sk
->sk_err
= ENOBUFS
;
2276 sk
->sk_error_report(sk
);
2282 rep
= __nlmsg_put(skb
, NETLINK_CB(in_skb
).portid
, nlh
->nlmsg_seq
,
2283 NLMSG_ERROR
, payload
, 0);
2284 errmsg
= nlmsg_data(rep
);
2285 errmsg
->error
= err
;
2286 memcpy(&errmsg
->msg
, nlh
, err
? nlh
->nlmsg_len
: sizeof(*nlh
));
2287 netlink_unicast(in_skb
->sk
, skb
, NETLINK_CB(in_skb
).portid
, MSG_DONTWAIT
);
2289 EXPORT_SYMBOL(netlink_ack
);
2291 int netlink_rcv_skb(struct sk_buff
*skb
, int (*cb
)(struct sk_buff
*,
2294 struct nlmsghdr
*nlh
;
2297 while (skb
->len
>= nlmsg_total_size(0)) {
2300 nlh
= nlmsg_hdr(skb
);
2303 if (nlh
->nlmsg_len
< NLMSG_HDRLEN
|| skb
->len
< nlh
->nlmsg_len
)
2306 /* Only requests are handled by the kernel */
2307 if (!(nlh
->nlmsg_flags
& NLM_F_REQUEST
))
2310 /* Skip control messages */
2311 if (nlh
->nlmsg_type
< NLMSG_MIN_TYPE
)
2319 if (nlh
->nlmsg_flags
& NLM_F_ACK
|| err
)
2320 netlink_ack(skb
, nlh
, err
);
2323 msglen
= NLMSG_ALIGN(nlh
->nlmsg_len
);
2324 if (msglen
> skb
->len
)
2326 skb_pull(skb
, msglen
);
2331 EXPORT_SYMBOL(netlink_rcv_skb
);
2334 * nlmsg_notify - send a notification netlink message
2335 * @sk: netlink socket to use
2336 * @skb: notification message
2337 * @portid: destination netlink portid for reports or 0
2338 * @group: destination multicast group or 0
2339 * @report: 1 to report back, 0 to disable
2340 * @flags: allocation flags
2342 int nlmsg_notify(struct sock
*sk
, struct sk_buff
*skb
, u32 portid
,
2343 unsigned int group
, int report
, gfp_t flags
)
2348 int exclude_portid
= 0;
2351 atomic_inc(&skb
->users
);
2352 exclude_portid
= portid
;
2355 /* errors reported via destination sk->sk_err, but propagate
2356 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2357 err
= nlmsg_multicast(sk
, skb
, exclude_portid
, group
, flags
);
2363 err2
= nlmsg_unicast(sk
, skb
, portid
);
2364 if (!err
|| err
== -ESRCH
)
2370 EXPORT_SYMBOL(nlmsg_notify
);
2372 #ifdef CONFIG_PROC_FS
2373 struct nl_seq_iter
{
2374 struct seq_net_private p
;
2379 static struct sock
*netlink_seq_socket_idx(struct seq_file
*seq
, loff_t pos
)
2381 struct nl_seq_iter
*iter
= seq
->private;
2386 for (i
= 0; i
< MAX_LINKS
; i
++) {
2387 struct nl_portid_hash
*hash
= &nl_table
[i
].hash
;
2389 for (j
= 0; j
<= hash
->mask
; j
++) {
2390 sk_for_each(s
, &hash
->table
[j
]) {
2391 if (sock_net(s
) != seq_file_net(seq
))
2405 static void *netlink_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2406 __acquires(nl_table_lock
)
2408 read_lock(&nl_table_lock
);
2409 return *pos
? netlink_seq_socket_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
2412 static void *netlink_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2415 struct nl_seq_iter
*iter
;
2420 if (v
== SEQ_START_TOKEN
)
2421 return netlink_seq_socket_idx(seq
, 0);
2423 iter
= seq
->private;
2427 } while (s
&& sock_net(s
) != seq_file_net(seq
));
2432 j
= iter
->hash_idx
+ 1;
2435 struct nl_portid_hash
*hash
= &nl_table
[i
].hash
;
2437 for (; j
<= hash
->mask
; j
++) {
2438 s
= sk_head(&hash
->table
[j
]);
2439 while (s
&& sock_net(s
) != seq_file_net(seq
))
2449 } while (++i
< MAX_LINKS
);
2454 static void netlink_seq_stop(struct seq_file
*seq
, void *v
)
2455 __releases(nl_table_lock
)
2457 read_unlock(&nl_table_lock
);
2461 static int netlink_seq_show(struct seq_file
*seq
, void *v
)
2463 if (v
== SEQ_START_TOKEN
) {
2465 "sk Eth Pid Groups "
2466 "Rmem Wmem Dump Locks Drops Inode\n");
2469 struct netlink_sock
*nlk
= nlk_sk(s
);
2471 seq_printf(seq
, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2475 nlk
->groups
? (u32
)nlk
->groups
[0] : 0,
2476 sk_rmem_alloc_get(s
),
2477 sk_wmem_alloc_get(s
),
2479 atomic_read(&s
->sk_refcnt
),
2480 atomic_read(&s
->sk_drops
),
2488 static const struct seq_operations netlink_seq_ops
= {
2489 .start
= netlink_seq_start
,
2490 .next
= netlink_seq_next
,
2491 .stop
= netlink_seq_stop
,
2492 .show
= netlink_seq_show
,
2496 static int netlink_seq_open(struct inode
*inode
, struct file
*file
)
2498 return seq_open_net(inode
, file
, &netlink_seq_ops
,
2499 sizeof(struct nl_seq_iter
));
2502 static const struct file_operations netlink_seq_fops
= {
2503 .owner
= THIS_MODULE
,
2504 .open
= netlink_seq_open
,
2506 .llseek
= seq_lseek
,
2507 .release
= seq_release_net
,
2512 int netlink_register_notifier(struct notifier_block
*nb
)
2514 return atomic_notifier_chain_register(&netlink_chain
, nb
);
2516 EXPORT_SYMBOL(netlink_register_notifier
);
2518 int netlink_unregister_notifier(struct notifier_block
*nb
)
2520 return atomic_notifier_chain_unregister(&netlink_chain
, nb
);
2522 EXPORT_SYMBOL(netlink_unregister_notifier
);
2524 static const struct proto_ops netlink_ops
= {
2525 .family
= PF_NETLINK
,
2526 .owner
= THIS_MODULE
,
2527 .release
= netlink_release
,
2528 .bind
= netlink_bind
,
2529 .connect
= netlink_connect
,
2530 .socketpair
= sock_no_socketpair
,
2531 .accept
= sock_no_accept
,
2532 .getname
= netlink_getname
,
2533 .poll
= netlink_poll
,
2534 .ioctl
= sock_no_ioctl
,
2535 .listen
= sock_no_listen
,
2536 .shutdown
= sock_no_shutdown
,
2537 .setsockopt
= netlink_setsockopt
,
2538 .getsockopt
= netlink_getsockopt
,
2539 .sendmsg
= netlink_sendmsg
,
2540 .recvmsg
= netlink_recvmsg
,
2541 .mmap
= netlink_mmap
,
2542 .sendpage
= sock_no_sendpage
,
2545 static const struct net_proto_family netlink_family_ops
= {
2546 .family
= PF_NETLINK
,
2547 .create
= netlink_create
,
2548 .owner
= THIS_MODULE
, /* for consistency 8) */
2551 static int __net_init
netlink_net_init(struct net
*net
)
2553 #ifdef CONFIG_PROC_FS
2554 if (!proc_create("netlink", 0, net
->proc_net
, &netlink_seq_fops
))
2560 static void __net_exit
netlink_net_exit(struct net
*net
)
2562 #ifdef CONFIG_PROC_FS
2563 remove_proc_entry("netlink", net
->proc_net
);
2567 static void __init
netlink_add_usersock_entry(void)
2569 struct listeners
*listeners
;
2572 listeners
= kzalloc(sizeof(*listeners
) + NLGRPSZ(groups
), GFP_KERNEL
);
2574 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2576 netlink_table_grab();
2578 nl_table
[NETLINK_USERSOCK
].groups
= groups
;
2579 rcu_assign_pointer(nl_table
[NETLINK_USERSOCK
].listeners
, listeners
);
2580 nl_table
[NETLINK_USERSOCK
].module
= THIS_MODULE
;
2581 nl_table
[NETLINK_USERSOCK
].registered
= 1;
2582 nl_table
[NETLINK_USERSOCK
].flags
= NL_CFG_F_NONROOT_SEND
;
2584 netlink_table_ungrab();
2587 static struct pernet_operations __net_initdata netlink_net_ops
= {
2588 .init
= netlink_net_init
,
2589 .exit
= netlink_net_exit
,
2592 static int __init
netlink_proto_init(void)
2595 unsigned long limit
;
2597 int err
= proto_register(&netlink_proto
, 0);
2602 BUILD_BUG_ON(sizeof(struct netlink_skb_parms
) > FIELD_SIZEOF(struct sk_buff
, cb
));
2604 nl_table
= kcalloc(MAX_LINKS
, sizeof(*nl_table
), GFP_KERNEL
);
2608 if (totalram_pages
>= (128 * 1024))
2609 limit
= totalram_pages
>> (21 - PAGE_SHIFT
);
2611 limit
= totalram_pages
>> (23 - PAGE_SHIFT
);
2613 order
= get_bitmask_order(limit
) - 1 + PAGE_SHIFT
;
2614 limit
= (1UL << order
) / sizeof(struct hlist_head
);
2615 order
= get_bitmask_order(min(limit
, (unsigned long)UINT_MAX
)) - 1;
2617 for (i
= 0; i
< MAX_LINKS
; i
++) {
2618 struct nl_portid_hash
*hash
= &nl_table
[i
].hash
;
2620 hash
->table
= nl_portid_hash_zalloc(1 * sizeof(*hash
->table
));
2623 nl_portid_hash_free(nl_table
[i
].hash
.table
,
2624 1 * sizeof(*hash
->table
));
2628 hash
->max_shift
= order
;
2631 hash
->rehash_time
= jiffies
;
2634 netlink_add_usersock_entry();
2636 sock_register(&netlink_family_ops
);
2637 register_pernet_subsys(&netlink_net_ops
);
2638 /* The netlink device handler may be needed early. */
2643 panic("netlink_init: Cannot allocate nl_table\n");
2646 core_initcall(netlink_proto_init
);