netdev: Use default implementation of ethtool_ops::get_link where possible
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
5a0e3ad6 63#include <linux/slab.h>
0e3125c7 64#include <linux/vmalloc.h>
457c4cbc 65#include <net/net_namespace.h>
1da177e4
LT
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
a1f8e7f7 76#include <asm/cacheflush.h>
1da177e4
LT
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
905db440 83#include <linux/mutex.h>
05423b24 84#include <linux/if_vlan.h>
bfd5f4a3 85#include <linux/virtio_net.h>
ed85b565 86#include <linux/errqueue.h>
614f60fa 87#include <linux/net_tstamp.h>
1da177e4
LT
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
1da177e4
LT
93/*
94 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> data
1da177e4
LT
110
111Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
112 mac_header -> ll header
113 data -> ll header
1da177e4
LT
114
115Incoming, dev->hard_header==NULL
b0e380b1
ACM
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
db0c58f9 118 assymetry between rx and tx paths.
b0e380b1 119 data -> data
1da177e4
LT
120
121Outgoing, dev->hard_header==NULL
b0e380b1
ACM
122 mac_header -> data. ll header is still not built!
123 data -> data
1da177e4
LT
124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
b0e380b1
ACM
133 mac_header -> ll header
134 data -> ll header
1da177e4
LT
135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
137 mac_header -> data
138 data -> data
1da177e4
LT
139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
1da177e4
LT
144/* Private packet socket structures. */
145
40d4e3df 146struct packet_mclist {
1da177e4
LT
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
0fb375fb
EB
152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
40d4e3df 157struct packet_mreq_max {
0fb375fb
EB
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 162};
a2efcfa0 163
69e3c75f
JB
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
0e3125c7
NH
167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
0e3125c7
NH
170};
171
69e3c75f 172struct packet_ring_buffer {
0e3125c7 173 struct pgv *pg_vec;
69e3c75f
JB
174 unsigned int head;
175 unsigned int frames_per_block;
176 unsigned int frame_size;
177 unsigned int frame_max;
178
179 unsigned int pg_vec_order;
180 unsigned int pg_vec_pages;
181 unsigned int pg_vec_len;
182
183 atomic_t pending;
184};
185
186struct packet_sock;
187static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
188
189static void packet_flush_mclist(struct sock *sk);
190
191struct packet_sock {
192 /* struct sock has to be the first member of packet_sock */
193 struct sock sk;
194 struct tpacket_stats stats;
69e3c75f
JB
195 struct packet_ring_buffer rx_ring;
196 struct packet_ring_buffer tx_ring;
1da177e4 197 int copy_thresh;
1da177e4 198 spinlock_t bind_lock;
905db440 199 struct mutex pg_vec_lock;
8dc41944 200 unsigned int running:1, /* prot_hook is attached*/
80feaacb 201 auxdata:1,
bfd5f4a3
SS
202 origdev:1,
203 has_vnet_hdr:1;
1da177e4 204 int ifindex; /* bound device */
0e11c91e 205 __be16 num;
1da177e4 206 struct packet_mclist *mclist;
1da177e4 207 atomic_t mapped;
bbd6ef87
PM
208 enum tpacket_versions tp_version;
209 unsigned int tp_hdrlen;
8913336a 210 unsigned int tp_reserve;
69e3c75f 211 unsigned int tp_loss:1;
614f60fa 212 unsigned int tp_tstamp;
94b05952 213 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
214};
215
ffbc6111
HX
216struct packet_skb_cb {
217 unsigned int origlen;
218 union {
219 struct sockaddr_pkt pkt;
220 struct sockaddr_ll ll;
221 } sa;
222};
223
224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 225
f6dafa95 226static inline __pure struct page *pgv_to_page(void *addr)
0af55bb5
CG
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
69e3c75f 233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 234{
bbd6ef87
PM
235 union {
236 struct tpacket_hdr *h1;
237 struct tpacket2_hdr *h2;
238 void *raw;
239 } h;
1da177e4 240
69e3c75f 241 h.raw = frame;
bbd6ef87
PM
242 switch (po->tp_version) {
243 case TPACKET_V1:
69e3c75f 244 h.h1->tp_status = status;
0af55bb5 245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
bbd6ef87
PM
246 break;
247 case TPACKET_V2:
69e3c75f 248 h.h2->tp_status = status;
0af55bb5 249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
bbd6ef87 250 break;
69e3c75f 251 default:
40d4e3df 252 pr_err("TPACKET version not supported\n");
69e3c75f 253 BUG();
bbd6ef87 254 }
69e3c75f
JB
255
256 smp_wmb();
bbd6ef87
PM
257}
258
69e3c75f 259static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
260{
261 union {
262 struct tpacket_hdr *h1;
263 struct tpacket2_hdr *h2;
264 void *raw;
265 } h;
266
69e3c75f
JB
267 smp_rmb();
268
bbd6ef87
PM
269 h.raw = frame;
270 switch (po->tp_version) {
271 case TPACKET_V1:
0af55bb5 272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
69e3c75f 273 return h.h1->tp_status;
bbd6ef87 274 case TPACKET_V2:
0af55bb5 275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
69e3c75f
JB
276 return h.h2->tp_status;
277 default:
40d4e3df 278 pr_err("TPACKET version not supported\n");
69e3c75f
JB
279 BUG();
280 return 0;
bbd6ef87 281 }
1da177e4 282}
69e3c75f
JB
283
284static void *packet_lookup_frame(struct packet_sock *po,
285 struct packet_ring_buffer *rb,
286 unsigned int position,
287 int status)
288{
289 unsigned int pg_vec_pos, frame_offset;
290 union {
291 struct tpacket_hdr *h1;
292 struct tpacket2_hdr *h2;
293 void *raw;
294 } h;
295
296 pg_vec_pos = position / rb->frames_per_block;
297 frame_offset = position % rb->frames_per_block;
298
0e3125c7
NH
299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
69e3c75f
JB
301
302 if (status != __packet_get_status(po, h.raw))
303 return NULL;
304
305 return h.raw;
306}
307
308static inline void *packet_current_frame(struct packet_sock *po,
309 struct packet_ring_buffer *rb,
310 int status)
311{
312 return packet_lookup_frame(po, rb, rb->head, status);
313}
314
315static inline void *packet_previous_frame(struct packet_sock *po,
316 struct packet_ring_buffer *rb,
317 int status)
318{
319 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
320 return packet_lookup_frame(po, rb, previous, status);
321}
322
323static inline void packet_increment_head(struct packet_ring_buffer *buff)
324{
325 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
326}
327
1da177e4
LT
328static inline struct packet_sock *pkt_sk(struct sock *sk)
329{
330 return (struct packet_sock *)sk;
331}
332
333static void packet_sock_destruct(struct sock *sk)
334{
ed85b565
RC
335 skb_queue_purge(&sk->sk_error_queue);
336
547b792c
IJ
337 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
338 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
339
340 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 341 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
342 return;
343 }
344
17ab56a2 345 sk_refcnt_debug_dec(sk);
1da177e4
LT
346}
347
348
90ddc4f0 349static const struct proto_ops packet_ops;
1da177e4 350
90ddc4f0 351static const struct proto_ops packet_ops_spkt;
1da177e4 352
40d4e3df
ED
353static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
354 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
355{
356 struct sock *sk;
357 struct sockaddr_pkt *spkt;
358
359 /*
360 * When we registered the protocol we saved the socket in the data
361 * field for just this event.
362 */
363
364 sk = pt->af_packet_priv;
1ce4f28b 365
1da177e4
LT
366 /*
367 * Yank back the headers [hope the device set this
368 * right or kerboom...]
369 *
370 * Incoming packets have ll header pulled,
371 * push it back.
372 *
98e399f8 373 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
374 * so that this procedure is noop.
375 */
376
377 if (skb->pkt_type == PACKET_LOOPBACK)
378 goto out;
379
09ad9bc7 380 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
381 goto out;
382
40d4e3df
ED
383 skb = skb_share_check(skb, GFP_ATOMIC);
384 if (skb == NULL)
1da177e4
LT
385 goto oom;
386
387 /* drop any routing info */
adf30907 388 skb_dst_drop(skb);
1da177e4 389
84531c24
PO
390 /* drop conntrack reference */
391 nf_reset(skb);
392
ffbc6111 393 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 394
98e399f8 395 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
396
397 /*
398 * The SOCK_PACKET socket receives _all_ frames.
399 */
400
401 spkt->spkt_family = dev->type;
402 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
403 spkt->spkt_protocol = skb->protocol;
404
405 /*
406 * Charge the memory to the socket. This is done specifically
407 * to prevent sockets using all the memory up.
408 */
409
40d4e3df 410 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
411 return 0;
412
413out:
414 kfree_skb(skb);
415oom:
416 return 0;
417}
418
419
420/*
421 * Output a raw packet to a device layer. This bypasses all the other
422 * protocol layers and you must therefore supply it with a complete frame
423 */
1ce4f28b 424
1da177e4
LT
425static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
426 struct msghdr *msg, size_t len)
427{
428 struct sock *sk = sock->sk;
40d4e3df 429 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 430 struct sk_buff *skb = NULL;
1da177e4 431 struct net_device *dev;
40d4e3df 432 __be16 proto = 0;
1da177e4 433 int err;
1ce4f28b 434
1da177e4 435 /*
1ce4f28b 436 * Get and verify the address.
1da177e4
LT
437 */
438
40d4e3df 439 if (saddr) {
1da177e4 440 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
441 return -EINVAL;
442 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
443 proto = saddr->spkt_protocol;
444 } else
445 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
446
447 /*
1ce4f28b 448 * Find the device first to size check it
1da177e4
LT
449 */
450
451 saddr->spkt_device[13] = 0;
1a35ca80 452retry:
654d1f8a
ED
453 rcu_read_lock();
454 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
455 err = -ENODEV;
456 if (dev == NULL)
457 goto out_unlock;
1ce4f28b 458
d5e76b0a
DM
459 err = -ENETDOWN;
460 if (!(dev->flags & IFF_UP))
461 goto out_unlock;
462
1da177e4 463 /*
40d4e3df
ED
464 * You may not queue a frame bigger than the mtu. This is the lowest level
465 * raw protocol and you must do your own fragmentation at this level.
1da177e4 466 */
1ce4f28b 467
1da177e4 468 err = -EMSGSIZE;
8ae55f04 469 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
470 goto out_unlock;
471
1a35ca80
ED
472 if (!skb) {
473 size_t reserved = LL_RESERVED_SPACE(dev);
474 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
475
476 rcu_read_unlock();
477 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
478 if (skb == NULL)
479 return -ENOBUFS;
480 /* FIXME: Save some space for broken drivers that write a hard
481 * header at transmission time by themselves. PPP is the notable
482 * one here. This should really be fixed at the driver level.
483 */
484 skb_reserve(skb, reserved);
485 skb_reset_network_header(skb);
486
487 /* Try to align data part correctly */
488 if (hhlen) {
489 skb->data -= hhlen;
490 skb->tail -= hhlen;
491 if (len < hhlen)
492 skb_reset_network_header(skb);
493 }
494 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
495 if (err)
496 goto out_free;
497 goto retry;
1da177e4
LT
498 }
499
1a35ca80 500
1da177e4
LT
501 skb->protocol = proto;
502 skb->dev = dev;
503 skb->priority = sk->sk_priority;
2d37a186 504 skb->mark = sk->sk_mark;
2244d07b 505 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
506 if (err < 0)
507 goto out_unlock;
1da177e4
LT
508
509 dev_queue_xmit(skb);
654d1f8a 510 rcu_read_unlock();
40d4e3df 511 return len;
1da177e4 512
1da177e4 513out_unlock:
654d1f8a 514 rcu_read_unlock();
1a35ca80
ED
515out_free:
516 kfree_skb(skb);
1da177e4
LT
517 return err;
518}
1da177e4 519
62ab0812
ED
520static inline unsigned int run_filter(const struct sk_buff *skb,
521 const struct sock *sk,
dbcb5855 522 unsigned int res)
1da177e4
LT
523{
524 struct sk_filter *filter;
fda9ef5d
DM
525
526 rcu_read_lock_bh();
a898def2 527 filter = rcu_dereference_bh(sk->sk_filter);
dbcb5855 528 if (filter != NULL)
93aaae2e 529 res = sk_run_filter(skb, filter->insns);
fda9ef5d 530 rcu_read_unlock_bh();
1da177e4 531
dbcb5855 532 return res;
1da177e4
LT
533}
534
535/*
62ab0812
ED
536 * This function makes lazy skb cloning in hope that most of packets
537 * are discarded by BPF.
538 *
539 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
540 * and skb->cb are mangled. It works because (and until) packets
541 * falling here are owned by current CPU. Output packets are cloned
542 * by dev_queue_xmit_nit(), input packets are processed by net_bh
543 * sequencially, so that if we return skb to original state on exit,
544 * we will not harm anyone.
1da177e4
LT
545 */
546
40d4e3df
ED
547static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
548 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
549{
550 struct sock *sk;
551 struct sockaddr_ll *sll;
552 struct packet_sock *po;
40d4e3df 553 u8 *skb_head = skb->data;
1da177e4 554 int skb_len = skb->len;
dbcb5855 555 unsigned int snaplen, res;
1da177e4
LT
556
557 if (skb->pkt_type == PACKET_LOOPBACK)
558 goto drop;
559
560 sk = pt->af_packet_priv;
561 po = pkt_sk(sk);
562
09ad9bc7 563 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
564 goto drop;
565
1da177e4
LT
566 skb->dev = dev;
567
3b04ddde 568 if (dev->header_ops) {
1da177e4 569 /* The device has an explicit notion of ll header,
62ab0812
ED
570 * exported to higher levels.
571 *
572 * Otherwise, the device hides details of its frame
573 * structure, so that corresponding packet head is
574 * never delivered to user.
1da177e4
LT
575 */
576 if (sk->sk_type != SOCK_DGRAM)
98e399f8 577 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
578 else if (skb->pkt_type == PACKET_OUTGOING) {
579 /* Special case: outgoing packets have ll header at head */
bbe735e4 580 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
581 }
582 }
583
584 snaplen = skb->len;
585
dbcb5855
DM
586 res = run_filter(skb, sk, snaplen);
587 if (!res)
fda9ef5d 588 goto drop_n_restore;
dbcb5855
DM
589 if (snaplen > res)
590 snaplen = res;
1da177e4
LT
591
592 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
593 (unsigned)sk->sk_rcvbuf)
594 goto drop_n_acct;
595
596 if (skb_shared(skb)) {
597 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
598 if (nskb == NULL)
599 goto drop_n_acct;
600
601 if (skb_head != skb->data) {
602 skb->data = skb_head;
603 skb->len = skb_len;
604 }
605 kfree_skb(skb);
606 skb = nskb;
607 }
608
ffbc6111
HX
609 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
610 sizeof(skb->cb));
611
612 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
613 sll->sll_family = AF_PACKET;
614 sll->sll_hatype = dev->type;
615 sll->sll_protocol = skb->protocol;
616 sll->sll_pkttype = skb->pkt_type;
8032b464 617 if (unlikely(po->origdev))
80feaacb
PWJ
618 sll->sll_ifindex = orig_dev->ifindex;
619 else
620 sll->sll_ifindex = dev->ifindex;
1da177e4 621
b95cce35 622 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 623
ffbc6111 624 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 625
1da177e4
LT
626 if (pskb_trim(skb, snaplen))
627 goto drop_n_acct;
628
629 skb_set_owner_r(skb, sk);
630 skb->dev = NULL;
adf30907 631 skb_dst_drop(skb);
1da177e4 632
84531c24
PO
633 /* drop conntrack reference */
634 nf_reset(skb);
635
1da177e4
LT
636 spin_lock(&sk->sk_receive_queue.lock);
637 po->stats.tp_packets++;
3b885787 638 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
639 __skb_queue_tail(&sk->sk_receive_queue, skb);
640 spin_unlock(&sk->sk_receive_queue.lock);
641 sk->sk_data_ready(sk, skb->len);
642 return 0;
643
644drop_n_acct:
3b885787 645 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
646
647drop_n_restore:
648 if (skb_head != skb->data && skb_shared(skb)) {
649 skb->data = skb_head;
650 skb->len = skb_len;
651 }
652drop:
ead2ceb0 653 consume_skb(skb);
1da177e4
LT
654 return 0;
655}
656
40d4e3df
ED
657static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
658 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
659{
660 struct sock *sk;
661 struct packet_sock *po;
662 struct sockaddr_ll *sll;
bbd6ef87
PM
663 union {
664 struct tpacket_hdr *h1;
665 struct tpacket2_hdr *h2;
666 void *raw;
667 } h;
40d4e3df 668 u8 *skb_head = skb->data;
1da177e4 669 int skb_len = skb->len;
dbcb5855 670 unsigned int snaplen, res;
1da177e4 671 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 672 unsigned short macoff, netoff, hdrlen;
1da177e4 673 struct sk_buff *copy_skb = NULL;
b7aa0bf7 674 struct timeval tv;
bbd6ef87 675 struct timespec ts;
614f60fa 676 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1da177e4
LT
677
678 if (skb->pkt_type == PACKET_LOOPBACK)
679 goto drop;
680
681 sk = pt->af_packet_priv;
682 po = pkt_sk(sk);
683
09ad9bc7 684 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
685 goto drop;
686
3b04ddde 687 if (dev->header_ops) {
1da177e4 688 if (sk->sk_type != SOCK_DGRAM)
98e399f8 689 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
690 else if (skb->pkt_type == PACKET_OUTGOING) {
691 /* Special case: outgoing packets have ll header at head */
bbe735e4 692 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
693 }
694 }
695
8dc41944
HX
696 if (skb->ip_summed == CHECKSUM_PARTIAL)
697 status |= TP_STATUS_CSUMNOTREADY;
698
1da177e4
LT
699 snaplen = skb->len;
700
dbcb5855
DM
701 res = run_filter(skb, sk, snaplen);
702 if (!res)
fda9ef5d 703 goto drop_n_restore;
dbcb5855
DM
704 if (snaplen > res)
705 snaplen = res;
1da177e4
LT
706
707 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
708 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
709 po->tp_reserve;
1da177e4 710 } else {
bbe735e4 711 unsigned maclen = skb_network_offset(skb);
bbd6ef87 712 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
713 (maclen < 16 ? 16 : maclen)) +
714 po->tp_reserve;
1da177e4
LT
715 macoff = netoff - maclen;
716 }
717
69e3c75f 718 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
719 if (po->copy_thresh &&
720 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
721 (unsigned)sk->sk_rcvbuf) {
722 if (skb_shared(skb)) {
723 copy_skb = skb_clone(skb, GFP_ATOMIC);
724 } else {
725 copy_skb = skb_get(skb);
726 skb_head = skb->data;
727 }
728 if (copy_skb)
729 skb_set_owner_r(copy_skb, sk);
730 }
69e3c75f 731 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
732 if ((int)snaplen < 0)
733 snaplen = 0;
734 }
1da177e4
LT
735
736 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 737 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 738 if (!h.raw)
1da177e4 739 goto ring_is_full;
69e3c75f 740 packet_increment_head(&po->rx_ring);
1da177e4
LT
741 po->stats.tp_packets++;
742 if (copy_skb) {
743 status |= TP_STATUS_COPY;
744 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
745 }
746 if (!po->stats.tp_drops)
747 status &= ~TP_STATUS_LOSING;
748 spin_unlock(&sk->sk_receive_queue.lock);
749
bbd6ef87 750 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 751
bbd6ef87
PM
752 switch (po->tp_version) {
753 case TPACKET_V1:
754 h.h1->tp_len = skb->len;
755 h.h1->tp_snaplen = snaplen;
756 h.h1->tp_mac = macoff;
757 h.h1->tp_net = netoff;
614f60fa
SM
758 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
759 && shhwtstamps->syststamp.tv64)
760 tv = ktime_to_timeval(shhwtstamps->syststamp);
761 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
762 && shhwtstamps->hwtstamp.tv64)
763 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
764 else if (skb->tstamp.tv64)
bbd6ef87
PM
765 tv = ktime_to_timeval(skb->tstamp);
766 else
767 do_gettimeofday(&tv);
768 h.h1->tp_sec = tv.tv_sec;
769 h.h1->tp_usec = tv.tv_usec;
770 hdrlen = sizeof(*h.h1);
771 break;
772 case TPACKET_V2:
773 h.h2->tp_len = skb->len;
774 h.h2->tp_snaplen = snaplen;
775 h.h2->tp_mac = macoff;
776 h.h2->tp_net = netoff;
614f60fa
SM
777 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
778 && shhwtstamps->syststamp.tv64)
779 ts = ktime_to_timespec(shhwtstamps->syststamp);
780 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
781 && shhwtstamps->hwtstamp.tv64)
782 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
783 else if (skb->tstamp.tv64)
bbd6ef87
PM
784 ts = ktime_to_timespec(skb->tstamp);
785 else
786 getnstimeofday(&ts);
787 h.h2->tp_sec = ts.tv_sec;
788 h.h2->tp_nsec = ts.tv_nsec;
05423b24 789 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
bbd6ef87
PM
790 hdrlen = sizeof(*h.h2);
791 break;
792 default:
793 BUG();
794 }
1da177e4 795
bbd6ef87 796 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 797 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
798 sll->sll_family = AF_PACKET;
799 sll->sll_hatype = dev->type;
800 sll->sll_protocol = skb->protocol;
801 sll->sll_pkttype = skb->pkt_type;
8032b464 802 if (unlikely(po->origdev))
80feaacb
PWJ
803 sll->sll_ifindex = orig_dev->ifindex;
804 else
805 sll->sll_ifindex = dev->ifindex;
1da177e4 806
bbd6ef87 807 __packet_set_status(po, h.raw, status);
e16aa207 808 smp_mb();
f6dafa95 809#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1da177e4 810 {
0af55bb5
CG
811 u8 *start, *end;
812
813 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
814 for (start = h.raw; start < end; start += PAGE_SIZE)
815 flush_dcache_page(pgv_to_page(start));
1da177e4 816 }
f6dafa95 817#endif
1da177e4
LT
818
819 sk->sk_data_ready(sk, 0);
820
821drop_n_restore:
822 if (skb_head != skb->data && skb_shared(skb)) {
823 skb->data = skb_head;
824 skb->len = skb_len;
825 }
826drop:
1ce4f28b 827 kfree_skb(skb);
1da177e4
LT
828 return 0;
829
830ring_is_full:
831 po->stats.tp_drops++;
832 spin_unlock(&sk->sk_receive_queue.lock);
833
834 sk->sk_data_ready(sk, 0);
acb5d75b 835 kfree_skb(copy_skb);
1da177e4
LT
836 goto drop_n_restore;
837}
838
69e3c75f
JB
839static void tpacket_destruct_skb(struct sk_buff *skb)
840{
841 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 842 void *ph;
1da177e4 843
69e3c75f 844 BUG_ON(skb == NULL);
1da177e4 845
69e3c75f
JB
846 if (likely(po->tx_ring.pg_vec)) {
847 ph = skb_shinfo(skb)->destructor_arg;
848 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
849 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
850 atomic_dec(&po->tx_ring.pending);
851 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
852 }
853
854 sock_wfree(skb);
855}
856
40d4e3df
ED
857static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
858 void *frame, struct net_device *dev, int size_max,
859 __be16 proto, unsigned char *addr)
69e3c75f
JB
860{
861 union {
862 struct tpacket_hdr *h1;
863 struct tpacket2_hdr *h2;
864 void *raw;
865 } ph;
866 int to_write, offset, len, tp_len, nr_frags, len_max;
867 struct socket *sock = po->sk.sk_socket;
868 struct page *page;
869 void *data;
870 int err;
871
872 ph.raw = frame;
873
874 skb->protocol = proto;
875 skb->dev = dev;
876 skb->priority = po->sk.sk_priority;
2d37a186 877 skb->mark = po->sk.sk_mark;
69e3c75f
JB
878 skb_shinfo(skb)->destructor_arg = ph.raw;
879
880 switch (po->tp_version) {
881 case TPACKET_V2:
882 tp_len = ph.h2->tp_len;
883 break;
884 default:
885 tp_len = ph.h1->tp_len;
886 break;
887 }
888 if (unlikely(tp_len > size_max)) {
40d4e3df 889 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
890 return -EMSGSIZE;
891 }
892
893 skb_reserve(skb, LL_RESERVED_SPACE(dev));
894 skb_reset_network_header(skb);
895
896 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
897 to_write = tp_len;
898
899 if (sock->type == SOCK_DGRAM) {
900 err = dev_hard_header(skb, dev, ntohs(proto), addr,
901 NULL, tp_len);
902 if (unlikely(err < 0))
903 return -EINVAL;
40d4e3df 904 } else if (dev->hard_header_len) {
69e3c75f
JB
905 /* net device doesn't like empty head */
906 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
907 pr_err("packet size is too short (%d < %d)\n",
908 tp_len, dev->hard_header_len);
69e3c75f
JB
909 return -EINVAL;
910 }
911
912 skb_push(skb, dev->hard_header_len);
913 err = skb_store_bits(skb, 0, data,
914 dev->hard_header_len);
915 if (unlikely(err))
916 return err;
917
918 data += dev->hard_header_len;
919 to_write -= dev->hard_header_len;
920 }
921
922 err = -EFAULT;
69e3c75f
JB
923 offset = offset_in_page(data);
924 len_max = PAGE_SIZE - offset;
925 len = ((to_write > len_max) ? len_max : to_write);
926
927 skb->data_len = to_write;
928 skb->len += to_write;
929 skb->truesize += to_write;
930 atomic_add(to_write, &po->sk.sk_wmem_alloc);
931
932 while (likely(to_write)) {
933 nr_frags = skb_shinfo(skb)->nr_frags;
934
935 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
936 pr_err("Packet exceed the number of skb frags(%lu)\n",
937 MAX_SKB_FRAGS);
69e3c75f
JB
938 return -EFAULT;
939 }
940
0af55bb5
CG
941 page = pgv_to_page(data);
942 data += len;
69e3c75f
JB
943 flush_dcache_page(page);
944 get_page(page);
0af55bb5 945 skb_fill_page_desc(skb, nr_frags, page, offset, len);
69e3c75f
JB
946 to_write -= len;
947 offset = 0;
948 len_max = PAGE_SIZE;
949 len = ((to_write > len_max) ? len_max : to_write);
950 }
951
952 return tp_len;
953}
954
955static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
956{
957 struct socket *sock;
958 struct sk_buff *skb;
959 struct net_device *dev;
960 __be16 proto;
961 int ifindex, err, reserve = 0;
40d4e3df
ED
962 void *ph;
963 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
964 int tp_len, size_max;
965 unsigned char *addr;
966 int len_sum = 0;
967 int status = 0;
968
969 sock = po->sk.sk_socket;
970
971 mutex_lock(&po->pg_vec_lock);
972
973 err = -EBUSY;
974 if (saddr == NULL) {
975 ifindex = po->ifindex;
976 proto = po->num;
977 addr = NULL;
978 } else {
979 err = -EINVAL;
980 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
981 goto out;
982 if (msg->msg_namelen < (saddr->sll_halen
983 + offsetof(struct sockaddr_ll,
984 sll_addr)))
985 goto out;
986 ifindex = saddr->sll_ifindex;
987 proto = saddr->sll_protocol;
988 addr = saddr->sll_addr;
989 }
990
991 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
992 err = -ENXIO;
993 if (unlikely(dev == NULL))
994 goto out;
995
996 reserve = dev->hard_header_len;
997
998 err = -ENETDOWN;
999 if (unlikely(!(dev->flags & IFF_UP)))
1000 goto out_put;
1001
1002 size_max = po->tx_ring.frame_size
b5dd884e 1003 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
1004
1005 if (size_max > dev->mtu + reserve)
1006 size_max = dev->mtu + reserve;
1007
1008 do {
1009 ph = packet_current_frame(po, &po->tx_ring,
1010 TP_STATUS_SEND_REQUEST);
1011
1012 if (unlikely(ph == NULL)) {
1013 schedule();
1014 continue;
1015 }
1016
1017 status = TP_STATUS_SEND_REQUEST;
1018 skb = sock_alloc_send_skb(&po->sk,
1019 LL_ALLOCATED_SPACE(dev)
1020 + sizeof(struct sockaddr_ll),
1021 0, &err);
1022
1023 if (unlikely(skb == NULL))
1024 goto out_status;
1025
1026 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1027 addr);
1028
1029 if (unlikely(tp_len < 0)) {
1030 if (po->tp_loss) {
1031 __packet_set_status(po, ph,
1032 TP_STATUS_AVAILABLE);
1033 packet_increment_head(&po->tx_ring);
1034 kfree_skb(skb);
1035 continue;
1036 } else {
1037 status = TP_STATUS_WRONG_FORMAT;
1038 err = tp_len;
1039 goto out_status;
1040 }
1041 }
1042
1043 skb->destructor = tpacket_destruct_skb;
1044 __packet_set_status(po, ph, TP_STATUS_SENDING);
1045 atomic_inc(&po->tx_ring.pending);
1046
1047 status = TP_STATUS_SEND_REQUEST;
1048 err = dev_queue_xmit(skb);
eb70df13
JP
1049 if (unlikely(err > 0)) {
1050 err = net_xmit_errno(err);
1051 if (err && __packet_get_status(po, ph) ==
1052 TP_STATUS_AVAILABLE) {
1053 /* skb was destructed already */
1054 skb = NULL;
1055 goto out_status;
1056 }
1057 /*
1058 * skb was dropped but not destructed yet;
1059 * let's treat it like congestion or err < 0
1060 */
1061 err = 0;
1062 }
69e3c75f
JB
1063 packet_increment_head(&po->tx_ring);
1064 len_sum += tp_len;
f64f9e71
JP
1065 } while (likely((ph != NULL) ||
1066 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1067 (atomic_read(&po->tx_ring.pending))))
1068 );
69e3c75f
JB
1069
1070 err = len_sum;
1071 goto out_put;
1072
69e3c75f
JB
1073out_status:
1074 __packet_set_status(po, ph, status);
1075 kfree_skb(skb);
1076out_put:
1077 dev_put(dev);
1078out:
1079 mutex_unlock(&po->pg_vec_lock);
1080 return err;
1081}
69e3c75f 1082
bfd5f4a3
SS
1083static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1084 size_t reserve, size_t len,
1085 size_t linear, int noblock,
1086 int *err)
1087{
1088 struct sk_buff *skb;
1089
1090 /* Under a page? Don't bother with paged skb. */
1091 if (prepad + len < PAGE_SIZE || !linear)
1092 linear = len;
1093
1094 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1095 err);
1096 if (!skb)
1097 return NULL;
1098
1099 skb_reserve(skb, reserve);
1100 skb_put(skb, linear);
1101 skb->data_len = len - linear;
1102 skb->len += len - linear;
1103
1104 return skb;
1105}
1106
69e3c75f 1107static int packet_snd(struct socket *sock,
1da177e4
LT
1108 struct msghdr *msg, size_t len)
1109{
1110 struct sock *sk = sock->sk;
40d4e3df 1111 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1112 struct sk_buff *skb;
1113 struct net_device *dev;
0e11c91e 1114 __be16 proto;
1da177e4
LT
1115 unsigned char *addr;
1116 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1117 struct virtio_net_hdr vnet_hdr = { 0 };
1118 int offset = 0;
1119 int vnet_hdr_len;
1120 struct packet_sock *po = pkt_sk(sk);
1121 unsigned short gso_type = 0;
1da177e4
LT
1122
1123 /*
1ce4f28b 1124 * Get and verify the address.
1da177e4 1125 */
1ce4f28b 1126
1da177e4 1127 if (saddr == NULL) {
1da177e4
LT
1128 ifindex = po->ifindex;
1129 proto = po->num;
1130 addr = NULL;
1131 } else {
1132 err = -EINVAL;
1133 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1134 goto out;
0fb375fb
EB
1135 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1136 goto out;
1da177e4
LT
1137 ifindex = saddr->sll_ifindex;
1138 proto = saddr->sll_protocol;
1139 addr = saddr->sll_addr;
1140 }
1141
1142
3b1e0a65 1143 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1144 err = -ENXIO;
1145 if (dev == NULL)
1146 goto out_unlock;
1147 if (sock->type == SOCK_RAW)
1148 reserve = dev->hard_header_len;
1149
d5e76b0a
DM
1150 err = -ENETDOWN;
1151 if (!(dev->flags & IFF_UP))
1152 goto out_unlock;
1153
bfd5f4a3
SS
1154 if (po->has_vnet_hdr) {
1155 vnet_hdr_len = sizeof(vnet_hdr);
1156
1157 err = -EINVAL;
1158 if (len < vnet_hdr_len)
1159 goto out_unlock;
1160
1161 len -= vnet_hdr_len;
1162
1163 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1164 vnet_hdr_len);
1165 if (err < 0)
1166 goto out_unlock;
1167
1168 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1169 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1170 vnet_hdr.hdr_len))
1171 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1172 vnet_hdr.csum_offset + 2;
1173
1174 err = -EINVAL;
1175 if (vnet_hdr.hdr_len > len)
1176 goto out_unlock;
1177
1178 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1180 case VIRTIO_NET_HDR_GSO_TCPV4:
1181 gso_type = SKB_GSO_TCPV4;
1182 break;
1183 case VIRTIO_NET_HDR_GSO_TCPV6:
1184 gso_type = SKB_GSO_TCPV6;
1185 break;
1186 case VIRTIO_NET_HDR_GSO_UDP:
1187 gso_type = SKB_GSO_UDP;
1188 break;
1189 default:
1190 goto out_unlock;
1191 }
1192
1193 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1194 gso_type |= SKB_GSO_TCP_ECN;
1195
1196 if (vnet_hdr.gso_size == 0)
1197 goto out_unlock;
1198
1199 }
1200 }
1201
1da177e4 1202 err = -EMSGSIZE;
bfd5f4a3 1203 if (!gso_type && (len > dev->mtu+reserve))
1da177e4
LT
1204 goto out_unlock;
1205
bfd5f4a3
SS
1206 err = -ENOBUFS;
1207 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1208 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1209 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1210 if (skb == NULL)
1da177e4
LT
1211 goto out_unlock;
1212
bfd5f4a3 1213 skb_set_network_header(skb, reserve);
1da177e4 1214
0c4e8581
SH
1215 err = -EINVAL;
1216 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1217 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1218 goto out_free;
1da177e4
LT
1219
1220 /* Returns -EFAULT on error */
bfd5f4a3 1221 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1222 if (err)
1223 goto out_free;
2244d07b 1224 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
ed85b565
RC
1225 if (err < 0)
1226 goto out_free;
1da177e4
LT
1227
1228 skb->protocol = proto;
1229 skb->dev = dev;
1230 skb->priority = sk->sk_priority;
2d37a186 1231 skb->mark = sk->sk_mark;
1da177e4 1232
bfd5f4a3
SS
1233 if (po->has_vnet_hdr) {
1234 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1235 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1236 vnet_hdr.csum_offset)) {
1237 err = -EINVAL;
1238 goto out_free;
1239 }
1240 }
1241
1242 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1243 skb_shinfo(skb)->gso_type = gso_type;
1244
1245 /* Header must be checked, and gso_segs computed. */
1246 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1247 skb_shinfo(skb)->gso_segs = 0;
1248
1249 len += vnet_hdr_len;
1250 }
1251
1da177e4
LT
1252 /*
1253 * Now send it
1254 */
1255
1256 err = dev_queue_xmit(skb);
1257 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1258 goto out_unlock;
1259
1260 dev_put(dev);
1261
40d4e3df 1262 return len;
1da177e4
LT
1263
1264out_free:
1265 kfree_skb(skb);
1266out_unlock:
1267 if (dev)
1268 dev_put(dev);
1269out:
1270 return err;
1271}
1272
69e3c75f
JB
1273static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1274 struct msghdr *msg, size_t len)
1275{
69e3c75f
JB
1276 struct sock *sk = sock->sk;
1277 struct packet_sock *po = pkt_sk(sk);
1278 if (po->tx_ring.pg_vec)
1279 return tpacket_snd(po, msg);
1280 else
69e3c75f
JB
1281 return packet_snd(sock, msg, len);
1282}
1283
1da177e4
LT
1284/*
1285 * Close a PACKET socket. This is fairly simple. We immediately go
1286 * to 'closed' state and remove our protocol entry in the device list.
1287 */
1288
1289static int packet_release(struct socket *sock)
1290{
1291 struct sock *sk = sock->sk;
1292 struct packet_sock *po;
d12d01d6 1293 struct net *net;
69e3c75f 1294 struct tpacket_req req;
1da177e4
LT
1295
1296 if (!sk)
1297 return 0;
1298
3b1e0a65 1299 net = sock_net(sk);
1da177e4
LT
1300 po = pkt_sk(sk);
1301
808f5114 1302 spin_lock_bh(&net->packet.sklist_lock);
1303 sk_del_node_init_rcu(sk);
920de804 1304 sock_prot_inuse_add(net, sk->sk_prot, -1);
808f5114 1305 spin_unlock_bh(&net->packet.sklist_lock);
1da177e4 1306
808f5114 1307 spin_lock(&po->bind_lock);
1da177e4
LT
1308 if (po->running) {
1309 /*
808f5114 1310 * Remove from protocol table
1da177e4 1311 */
1da177e4
LT
1312 po->running = 0;
1313 po->num = 0;
808f5114 1314 __dev_remove_pack(&po->prot_hook);
1da177e4
LT
1315 __sock_put(sk);
1316 }
808f5114 1317 spin_unlock(&po->bind_lock);
1da177e4 1318
1da177e4 1319 packet_flush_mclist(sk);
1da177e4 1320
69e3c75f
JB
1321 memset(&req, 0, sizeof(req));
1322
1323 if (po->rx_ring.pg_vec)
1324 packet_set_ring(sk, &req, 1, 0);
1325
1326 if (po->tx_ring.pg_vec)
1327 packet_set_ring(sk, &req, 1, 1);
1da177e4 1328
808f5114 1329 synchronize_net();
1da177e4
LT
1330 /*
1331 * Now the socket is dead. No more input will appear.
1332 */
1da177e4
LT
1333 sock_orphan(sk);
1334 sock->sk = NULL;
1335
1336 /* Purge queues */
1337
1338 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1339 sk_refcnt_debug_release(sk);
1da177e4
LT
1340
1341 sock_put(sk);
1342 return 0;
1343}
1344
1345/*
1346 * Attach a packet hook.
1347 */
1348
0e11c91e 1349static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1350{
1351 struct packet_sock *po = pkt_sk(sk);
1352 /*
1353 * Detach an existing hook if present.
1354 */
1355
1356 lock_sock(sk);
1357
1358 spin_lock(&po->bind_lock);
1359 if (po->running) {
1360 __sock_put(sk);
1361 po->running = 0;
1362 po->num = 0;
1363 spin_unlock(&po->bind_lock);
1364 dev_remove_pack(&po->prot_hook);
1365 spin_lock(&po->bind_lock);
1366 }
1367
1368 po->num = protocol;
1369 po->prot_hook.type = protocol;
1370 po->prot_hook.dev = dev;
1371
1372 po->ifindex = dev ? dev->ifindex : 0;
1373
1374 if (protocol == 0)
1375 goto out_unlock;
1376
be85d4ad 1377 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1378 dev_add_pack(&po->prot_hook);
1379 sock_hold(sk);
1380 po->running = 1;
be85d4ad
UT
1381 } else {
1382 sk->sk_err = ENETDOWN;
1383 if (!sock_flag(sk, SOCK_DEAD))
1384 sk->sk_error_report(sk);
1da177e4
LT
1385 }
1386
1387out_unlock:
1388 spin_unlock(&po->bind_lock);
1389 release_sock(sk);
1390 return 0;
1391}
1392
1393/*
1394 * Bind a packet socket to a device
1395 */
1396
40d4e3df
ED
1397static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1398 int addr_len)
1da177e4 1399{
40d4e3df 1400 struct sock *sk = sock->sk;
1da177e4
LT
1401 char name[15];
1402 struct net_device *dev;
1403 int err = -ENODEV;
1ce4f28b 1404
1da177e4
LT
1405 /*
1406 * Check legality
1407 */
1ce4f28b 1408
8ae55f04 1409 if (addr_len != sizeof(struct sockaddr))
1da177e4 1410 return -EINVAL;
40d4e3df 1411 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1412
3b1e0a65 1413 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1414 if (dev) {
1415 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1416 dev_put(dev);
1417 }
1418 return err;
1419}
1da177e4
LT
1420
1421static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1422{
40d4e3df
ED
1423 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1424 struct sock *sk = sock->sk;
1da177e4
LT
1425 struct net_device *dev = NULL;
1426 int err;
1427
1428
1429 /*
1430 * Check legality
1431 */
1ce4f28b 1432
1da177e4
LT
1433 if (addr_len < sizeof(struct sockaddr_ll))
1434 return -EINVAL;
1435 if (sll->sll_family != AF_PACKET)
1436 return -EINVAL;
1437
1438 if (sll->sll_ifindex) {
1439 err = -ENODEV;
3b1e0a65 1440 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1441 if (dev == NULL)
1442 goto out;
1443 }
1444 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1445 if (dev)
1446 dev_put(dev);
1447
1448out:
1449 return err;
1450}
1451
1452static struct proto packet_proto = {
1453 .name = "PACKET",
1454 .owner = THIS_MODULE,
1455 .obj_size = sizeof(struct packet_sock),
1456};
1457
1458/*
1ce4f28b 1459 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1460 */
1461
3f378b68
EP
1462static int packet_create(struct net *net, struct socket *sock, int protocol,
1463 int kern)
1da177e4
LT
1464{
1465 struct sock *sk;
1466 struct packet_sock *po;
0e11c91e 1467 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1468 int err;
1469
1470 if (!capable(CAP_NET_RAW))
1471 return -EPERM;
be02097c
DM
1472 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1473 sock->type != SOCK_PACKET)
1da177e4
LT
1474 return -ESOCKTNOSUPPORT;
1475
1476 sock->state = SS_UNCONNECTED;
1477
1478 err = -ENOBUFS;
6257ff21 1479 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1480 if (sk == NULL)
1481 goto out;
1482
1483 sock->ops = &packet_ops;
1da177e4
LT
1484 if (sock->type == SOCK_PACKET)
1485 sock->ops = &packet_ops_spkt;
be02097c 1486
1da177e4
LT
1487 sock_init_data(sock, sk);
1488
1489 po = pkt_sk(sk);
1490 sk->sk_family = PF_PACKET;
0e11c91e 1491 po->num = proto;
1da177e4
LT
1492
1493 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1494 sk_refcnt_debug_inc(sk);
1da177e4
LT
1495
1496 /*
1497 * Attach a protocol block
1498 */
1499
1500 spin_lock_init(&po->bind_lock);
905db440 1501 mutex_init(&po->pg_vec_lock);
1da177e4 1502 po->prot_hook.func = packet_rcv;
be02097c 1503
1da177e4
LT
1504 if (sock->type == SOCK_PACKET)
1505 po->prot_hook.func = packet_rcv_spkt;
be02097c 1506
1da177e4
LT
1507 po->prot_hook.af_packet_priv = sk;
1508
0e11c91e
AV
1509 if (proto) {
1510 po->prot_hook.type = proto;
1da177e4
LT
1511 dev_add_pack(&po->prot_hook);
1512 sock_hold(sk);
1513 po->running = 1;
1514 }
1515
808f5114 1516 spin_lock_bh(&net->packet.sklist_lock);
1517 sk_add_node_rcu(sk, &net->packet.sklist);
3680453c 1518 sock_prot_inuse_add(net, &packet_proto, 1);
808f5114 1519 spin_unlock_bh(&net->packet.sklist_lock);
1520
40d4e3df 1521 return 0;
1da177e4
LT
1522out:
1523 return err;
1524}
1525
ed85b565
RC
1526static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1527{
1528 struct sock_exterr_skb *serr;
1529 struct sk_buff *skb, *skb2;
1530 int copied, err;
1531
1532 err = -EAGAIN;
1533 skb = skb_dequeue(&sk->sk_error_queue);
1534 if (skb == NULL)
1535 goto out;
1536
1537 copied = skb->len;
1538 if (copied > len) {
1539 msg->msg_flags |= MSG_TRUNC;
1540 copied = len;
1541 }
1542 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1543 if (err)
1544 goto out_free_skb;
1545
1546 sock_recv_timestamp(msg, sk, skb);
1547
1548 serr = SKB_EXT_ERR(skb);
1549 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1550 sizeof(serr->ee), &serr->ee);
1551
1552 msg->msg_flags |= MSG_ERRQUEUE;
1553 err = copied;
1554
1555 /* Reset and regenerate socket error */
1556 spin_lock_bh(&sk->sk_error_queue.lock);
1557 sk->sk_err = 0;
1558 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1559 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1560 spin_unlock_bh(&sk->sk_error_queue.lock);
1561 sk->sk_error_report(sk);
1562 } else
1563 spin_unlock_bh(&sk->sk_error_queue.lock);
1564
1565out_free_skb:
1566 kfree_skb(skb);
1567out:
1568 return err;
1569}
1570
1da177e4
LT
1571/*
1572 * Pull a packet from our receive queue and hand it to the user.
1573 * If necessary we block.
1574 */
1575
1576static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1577 struct msghdr *msg, size_t len, int flags)
1578{
1579 struct sock *sk = sock->sk;
1580 struct sk_buff *skb;
1581 int copied, err;
0fb375fb 1582 struct sockaddr_ll *sll;
bfd5f4a3 1583 int vnet_hdr_len = 0;
1da177e4
LT
1584
1585 err = -EINVAL;
ed85b565 1586 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1da177e4
LT
1587 goto out;
1588
1589#if 0
1590 /* What error should we return now? EUNATTACH? */
1591 if (pkt_sk(sk)->ifindex < 0)
1592 return -ENODEV;
1593#endif
1594
ed85b565
RC
1595 if (flags & MSG_ERRQUEUE) {
1596 err = packet_recv_error(sk, msg, len);
1597 goto out;
1598 }
1599
1da177e4
LT
1600 /*
1601 * Call the generic datagram receiver. This handles all sorts
1602 * of horrible races and re-entrancy so we can forget about it
1603 * in the protocol layers.
1604 *
1605 * Now it will return ENETDOWN, if device have just gone down,
1606 * but then it will block.
1607 */
1608
40d4e3df 1609 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1610
1611 /*
1ce4f28b 1612 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1613 * handles the blocking we don't see and worry about blocking
1614 * retries.
1615 */
1616
8ae55f04 1617 if (skb == NULL)
1da177e4
LT
1618 goto out;
1619
bfd5f4a3
SS
1620 if (pkt_sk(sk)->has_vnet_hdr) {
1621 struct virtio_net_hdr vnet_hdr = { 0 };
1622
1623 err = -EINVAL;
1624 vnet_hdr_len = sizeof(vnet_hdr);
1f18b717 1625 if (len < vnet_hdr_len)
bfd5f4a3
SS
1626 goto out_free;
1627
1f18b717
MK
1628 len -= vnet_hdr_len;
1629
bfd5f4a3
SS
1630 if (skb_is_gso(skb)) {
1631 struct skb_shared_info *sinfo = skb_shinfo(skb);
1632
1633 /* This is a hint as to how much should be linear. */
1634 vnet_hdr.hdr_len = skb_headlen(skb);
1635 vnet_hdr.gso_size = sinfo->gso_size;
1636 if (sinfo->gso_type & SKB_GSO_TCPV4)
1637 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1638 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1639 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1640 else if (sinfo->gso_type & SKB_GSO_UDP)
1641 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1642 else if (sinfo->gso_type & SKB_GSO_FCOE)
1643 goto out_free;
1644 else
1645 BUG();
1646 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1647 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1648 } else
1649 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1650
1651 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1652 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1653 vnet_hdr.csum_start = skb->csum_start -
1654 skb_headroom(skb);
1655 vnet_hdr.csum_offset = skb->csum_offset;
1656 } /* else everything is zero */
1657
1658 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1659 vnet_hdr_len);
1660 if (err < 0)
1661 goto out_free;
1662 }
1663
0fb375fb
EB
1664 /*
1665 * If the address length field is there to be filled in, we fill
1666 * it in now.
1667 */
1668
ffbc6111 1669 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1670 if (sock->type == SOCK_PACKET)
1671 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1672 else
1673 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1674
1da177e4
LT
1675 /*
1676 * You lose any data beyond the buffer you gave. If it worries a
1677 * user program they can ask the device for its MTU anyway.
1678 */
1679
1680 copied = skb->len;
40d4e3df
ED
1681 if (copied > len) {
1682 copied = len;
1683 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1684 }
1685
1686 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1687 if (err)
1688 goto out_free;
1689
3b885787 1690 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1691
1692 if (msg->msg_name)
ffbc6111
HX
1693 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1694 msg->msg_namelen);
1da177e4 1695
8dc41944 1696 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1697 struct tpacket_auxdata aux;
1698
1699 aux.tp_status = TP_STATUS_USER;
1700 if (skb->ip_summed == CHECKSUM_PARTIAL)
1701 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1702 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1703 aux.tp_snaplen = skb->len;
1704 aux.tp_mac = 0;
bbe735e4 1705 aux.tp_net = skb_network_offset(skb);
05423b24 1706 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
ffbc6111
HX
1707
1708 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1709 }
1710
1da177e4
LT
1711 /*
1712 * Free or return the buffer as appropriate. Again this
1713 * hides all the races and re-entrancy issues from us.
1714 */
bfd5f4a3 1715 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1716
1717out_free:
1718 skb_free_datagram(sk, skb);
1719out:
1720 return err;
1721}
1722
1da177e4
LT
1723static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1724 int *uaddr_len, int peer)
1725{
1726 struct net_device *dev;
1727 struct sock *sk = sock->sk;
1728
1729 if (peer)
1730 return -EOPNOTSUPP;
1731
1732 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1733 rcu_read_lock();
1734 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1735 if (dev)
67286640 1736 strncpy(uaddr->sa_data, dev->name, 14);
654d1f8a 1737 else
1da177e4 1738 memset(uaddr->sa_data, 0, 14);
654d1f8a 1739 rcu_read_unlock();
1da177e4
LT
1740 *uaddr_len = sizeof(*uaddr);
1741
1742 return 0;
1743}
1da177e4
LT
1744
1745static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1746 int *uaddr_len, int peer)
1747{
1748 struct net_device *dev;
1749 struct sock *sk = sock->sk;
1750 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1751 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1752
1753 if (peer)
1754 return -EOPNOTSUPP;
1755
1756 sll->sll_family = AF_PACKET;
1757 sll->sll_ifindex = po->ifindex;
1758 sll->sll_protocol = po->num;
67286640 1759 sll->sll_pkttype = 0;
654d1f8a
ED
1760 rcu_read_lock();
1761 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1762 if (dev) {
1763 sll->sll_hatype = dev->type;
1764 sll->sll_halen = dev->addr_len;
1765 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1766 } else {
1767 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1768 sll->sll_halen = 0;
1769 }
654d1f8a 1770 rcu_read_unlock();
0fb375fb 1771 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1772
1773 return 0;
1774}
1775
2aeb0b88
WC
1776static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1777 int what)
1da177e4
LT
1778{
1779 switch (i->type) {
1780 case PACKET_MR_MULTICAST:
1162563f
JP
1781 if (i->alen != dev->addr_len)
1782 return -EINVAL;
1da177e4 1783 if (what > 0)
22bedad3 1784 return dev_mc_add(dev, i->addr);
1da177e4 1785 else
22bedad3 1786 return dev_mc_del(dev, i->addr);
1da177e4
LT
1787 break;
1788 case PACKET_MR_PROMISC:
2aeb0b88 1789 return dev_set_promiscuity(dev, what);
1da177e4
LT
1790 break;
1791 case PACKET_MR_ALLMULTI:
2aeb0b88 1792 return dev_set_allmulti(dev, what);
1da177e4 1793 break;
d95ed927 1794 case PACKET_MR_UNICAST:
1162563f
JP
1795 if (i->alen != dev->addr_len)
1796 return -EINVAL;
d95ed927 1797 if (what > 0)
a748ee24 1798 return dev_uc_add(dev, i->addr);
d95ed927 1799 else
a748ee24 1800 return dev_uc_del(dev, i->addr);
d95ed927 1801 break;
40d4e3df
ED
1802 default:
1803 break;
1da177e4 1804 }
2aeb0b88 1805 return 0;
1da177e4
LT
1806}
1807
1808static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1809{
40d4e3df 1810 for ( ; i; i = i->next) {
1da177e4
LT
1811 if (i->ifindex == dev->ifindex)
1812 packet_dev_mc(dev, i, what);
1813 }
1814}
1815
0fb375fb 1816static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1817{
1818 struct packet_sock *po = pkt_sk(sk);
1819 struct packet_mclist *ml, *i;
1820 struct net_device *dev;
1821 int err;
1822
1823 rtnl_lock();
1824
1825 err = -ENODEV;
3b1e0a65 1826 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1827 if (!dev)
1828 goto done;
1829
1830 err = -EINVAL;
1162563f 1831 if (mreq->mr_alen > dev->addr_len)
1da177e4
LT
1832 goto done;
1833
1834 err = -ENOBUFS;
8b3a7005 1835 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1836 if (i == NULL)
1837 goto done;
1838
1839 err = 0;
1840 for (ml = po->mclist; ml; ml = ml->next) {
1841 if (ml->ifindex == mreq->mr_ifindex &&
1842 ml->type == mreq->mr_type &&
1843 ml->alen == mreq->mr_alen &&
1844 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1845 ml->count++;
1846 /* Free the new element ... */
1847 kfree(i);
1848 goto done;
1849 }
1850 }
1851
1852 i->type = mreq->mr_type;
1853 i->ifindex = mreq->mr_ifindex;
1854 i->alen = mreq->mr_alen;
1855 memcpy(i->addr, mreq->mr_address, i->alen);
1856 i->count = 1;
1857 i->next = po->mclist;
1858 po->mclist = i;
2aeb0b88
WC
1859 err = packet_dev_mc(dev, i, 1);
1860 if (err) {
1861 po->mclist = i->next;
1862 kfree(i);
1863 }
1da177e4
LT
1864
1865done:
1866 rtnl_unlock();
1867 return err;
1868}
1869
0fb375fb 1870static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1871{
1872 struct packet_mclist *ml, **mlp;
1873
1874 rtnl_lock();
1875
1876 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1877 if (ml->ifindex == mreq->mr_ifindex &&
1878 ml->type == mreq->mr_type &&
1879 ml->alen == mreq->mr_alen &&
1880 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1881 if (--ml->count == 0) {
1882 struct net_device *dev;
1883 *mlp = ml->next;
ad959e76
ED
1884 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1885 if (dev)
1da177e4 1886 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1887 kfree(ml);
1888 }
1889 rtnl_unlock();
1890 return 0;
1891 }
1892 }
1893 rtnl_unlock();
1894 return -EADDRNOTAVAIL;
1895}
1896
1897static void packet_flush_mclist(struct sock *sk)
1898{
1899 struct packet_sock *po = pkt_sk(sk);
1900 struct packet_mclist *ml;
1901
1902 if (!po->mclist)
1903 return;
1904
1905 rtnl_lock();
1906 while ((ml = po->mclist) != NULL) {
1907 struct net_device *dev;
1908
1909 po->mclist = ml->next;
ad959e76
ED
1910 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1911 if (dev != NULL)
1da177e4 1912 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1913 kfree(ml);
1914 }
1915 rtnl_unlock();
1916}
1da177e4
LT
1917
1918static int
b7058842 1919packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1920{
1921 struct sock *sk = sock->sk;
8dc41944 1922 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1923 int ret;
1924
1925 if (level != SOL_PACKET)
1926 return -ENOPROTOOPT;
1927
69e3c75f 1928 switch (optname) {
1ce4f28b 1929 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1930 case PACKET_DROP_MEMBERSHIP:
1931 {
0fb375fb
EB
1932 struct packet_mreq_max mreq;
1933 int len = optlen;
1934 memset(&mreq, 0, sizeof(mreq));
1935 if (len < sizeof(struct packet_mreq))
1da177e4 1936 return -EINVAL;
0fb375fb
EB
1937 if (len > sizeof(mreq))
1938 len = sizeof(mreq);
40d4e3df 1939 if (copy_from_user(&mreq, optval, len))
1da177e4 1940 return -EFAULT;
0fb375fb
EB
1941 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1942 return -EINVAL;
1da177e4
LT
1943 if (optname == PACKET_ADD_MEMBERSHIP)
1944 ret = packet_mc_add(sk, &mreq);
1945 else
1946 ret = packet_mc_drop(sk, &mreq);
1947 return ret;
1948 }
a2efcfa0 1949
1da177e4 1950 case PACKET_RX_RING:
69e3c75f 1951 case PACKET_TX_RING:
1da177e4
LT
1952 {
1953 struct tpacket_req req;
1954
40d4e3df 1955 if (optlen < sizeof(req))
1da177e4 1956 return -EINVAL;
bfd5f4a3
SS
1957 if (pkt_sk(sk)->has_vnet_hdr)
1958 return -EINVAL;
40d4e3df 1959 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1960 return -EFAULT;
69e3c75f 1961 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1962 }
1963 case PACKET_COPY_THRESH:
1964 {
1965 int val;
1966
40d4e3df 1967 if (optlen != sizeof(val))
1da177e4 1968 return -EINVAL;
40d4e3df 1969 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1970 return -EFAULT;
1971
1972 pkt_sk(sk)->copy_thresh = val;
1973 return 0;
1974 }
bbd6ef87
PM
1975 case PACKET_VERSION:
1976 {
1977 int val;
1978
1979 if (optlen != sizeof(val))
1980 return -EINVAL;
69e3c75f 1981 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1982 return -EBUSY;
1983 if (copy_from_user(&val, optval, sizeof(val)))
1984 return -EFAULT;
1985 switch (val) {
1986 case TPACKET_V1:
1987 case TPACKET_V2:
1988 po->tp_version = val;
1989 return 0;
1990 default:
1991 return -EINVAL;
1992 }
1993 }
8913336a
PM
1994 case PACKET_RESERVE:
1995 {
1996 unsigned int val;
1997
1998 if (optlen != sizeof(val))
1999 return -EINVAL;
69e3c75f 2000 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
2001 return -EBUSY;
2002 if (copy_from_user(&val, optval, sizeof(val)))
2003 return -EFAULT;
2004 po->tp_reserve = val;
2005 return 0;
2006 }
69e3c75f
JB
2007 case PACKET_LOSS:
2008 {
2009 unsigned int val;
2010
2011 if (optlen != sizeof(val))
2012 return -EINVAL;
2013 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2014 return -EBUSY;
2015 if (copy_from_user(&val, optval, sizeof(val)))
2016 return -EFAULT;
2017 po->tp_loss = !!val;
2018 return 0;
2019 }
8dc41944
HX
2020 case PACKET_AUXDATA:
2021 {
2022 int val;
2023
2024 if (optlen < sizeof(val))
2025 return -EINVAL;
2026 if (copy_from_user(&val, optval, sizeof(val)))
2027 return -EFAULT;
2028
2029 po->auxdata = !!val;
2030 return 0;
2031 }
80feaacb
PWJ
2032 case PACKET_ORIGDEV:
2033 {
2034 int val;
2035
2036 if (optlen < sizeof(val))
2037 return -EINVAL;
2038 if (copy_from_user(&val, optval, sizeof(val)))
2039 return -EFAULT;
2040
2041 po->origdev = !!val;
2042 return 0;
2043 }
bfd5f4a3
SS
2044 case PACKET_VNET_HDR:
2045 {
2046 int val;
2047
2048 if (sock->type != SOCK_RAW)
2049 return -EINVAL;
2050 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2051 return -EBUSY;
2052 if (optlen < sizeof(val))
2053 return -EINVAL;
2054 if (copy_from_user(&val, optval, sizeof(val)))
2055 return -EFAULT;
2056
2057 po->has_vnet_hdr = !!val;
2058 return 0;
2059 }
614f60fa
SM
2060 case PACKET_TIMESTAMP:
2061 {
2062 int val;
2063
2064 if (optlen != sizeof(val))
2065 return -EINVAL;
2066 if (copy_from_user(&val, optval, sizeof(val)))
2067 return -EFAULT;
2068
2069 po->tp_tstamp = val;
2070 return 0;
2071 }
1da177e4
LT
2072 default:
2073 return -ENOPROTOOPT;
2074 }
2075}
2076
2077static int packet_getsockopt(struct socket *sock, int level, int optname,
2078 char __user *optval, int __user *optlen)
2079{
2080 int len;
8dc41944 2081 int val;
1da177e4
LT
2082 struct sock *sk = sock->sk;
2083 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
2084 void *data;
2085 struct tpacket_stats st;
1da177e4
LT
2086
2087 if (level != SOL_PACKET)
2088 return -ENOPROTOOPT;
2089
8ae55f04
KK
2090 if (get_user(len, optlen))
2091 return -EFAULT;
1da177e4
LT
2092
2093 if (len < 0)
2094 return -EINVAL;
1ce4f28b 2095
69e3c75f 2096 switch (optname) {
1da177e4 2097 case PACKET_STATISTICS:
1da177e4
LT
2098 if (len > sizeof(struct tpacket_stats))
2099 len = sizeof(struct tpacket_stats);
2100 spin_lock_bh(&sk->sk_receive_queue.lock);
2101 st = po->stats;
2102 memset(&po->stats, 0, sizeof(st));
2103 spin_unlock_bh(&sk->sk_receive_queue.lock);
2104 st.tp_packets += st.tp_drops;
2105
8dc41944
HX
2106 data = &st;
2107 break;
2108 case PACKET_AUXDATA:
2109 if (len > sizeof(int))
2110 len = sizeof(int);
2111 val = po->auxdata;
2112
80feaacb
PWJ
2113 data = &val;
2114 break;
2115 case PACKET_ORIGDEV:
2116 if (len > sizeof(int))
2117 len = sizeof(int);
2118 val = po->origdev;
2119
bfd5f4a3
SS
2120 data = &val;
2121 break;
2122 case PACKET_VNET_HDR:
2123 if (len > sizeof(int))
2124 len = sizeof(int);
2125 val = po->has_vnet_hdr;
2126
8dc41944 2127 data = &val;
1da177e4 2128 break;
bbd6ef87
PM
2129 case PACKET_VERSION:
2130 if (len > sizeof(int))
2131 len = sizeof(int);
2132 val = po->tp_version;
2133 data = &val;
2134 break;
2135 case PACKET_HDRLEN:
2136 if (len > sizeof(int))
2137 len = sizeof(int);
2138 if (copy_from_user(&val, optval, len))
2139 return -EFAULT;
2140 switch (val) {
2141 case TPACKET_V1:
2142 val = sizeof(struct tpacket_hdr);
2143 break;
2144 case TPACKET_V2:
2145 val = sizeof(struct tpacket2_hdr);
2146 break;
2147 default:
2148 return -EINVAL;
2149 }
2150 data = &val;
2151 break;
8913336a
PM
2152 case PACKET_RESERVE:
2153 if (len > sizeof(unsigned int))
2154 len = sizeof(unsigned int);
2155 val = po->tp_reserve;
2156 data = &val;
2157 break;
69e3c75f
JB
2158 case PACKET_LOSS:
2159 if (len > sizeof(unsigned int))
2160 len = sizeof(unsigned int);
2161 val = po->tp_loss;
2162 data = &val;
2163 break;
614f60fa
SM
2164 case PACKET_TIMESTAMP:
2165 if (len > sizeof(int))
2166 len = sizeof(int);
2167 val = po->tp_tstamp;
2168 data = &val;
2169 break;
1da177e4
LT
2170 default:
2171 return -ENOPROTOOPT;
2172 }
2173
8ae55f04
KK
2174 if (put_user(len, optlen))
2175 return -EFAULT;
8dc41944
HX
2176 if (copy_to_user(optval, data, len))
2177 return -EFAULT;
8ae55f04 2178 return 0;
1da177e4
LT
2179}
2180
2181
2182static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2183{
2184 struct sock *sk;
2185 struct hlist_node *node;
ad930650 2186 struct net_device *dev = data;
c346dca1 2187 struct net *net = dev_net(dev);
1da177e4 2188
808f5114 2189 rcu_read_lock();
2190 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1da177e4
LT
2191 struct packet_sock *po = pkt_sk(sk);
2192
2193 switch (msg) {
2194 case NETDEV_UNREGISTER:
1da177e4
LT
2195 if (po->mclist)
2196 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2197 /* fallthrough */
2198
1da177e4
LT
2199 case NETDEV_DOWN:
2200 if (dev->ifindex == po->ifindex) {
2201 spin_lock(&po->bind_lock);
2202 if (po->running) {
2203 __dev_remove_pack(&po->prot_hook);
2204 __sock_put(sk);
2205 po->running = 0;
2206 sk->sk_err = ENETDOWN;
2207 if (!sock_flag(sk, SOCK_DEAD))
2208 sk->sk_error_report(sk);
2209 }
2210 if (msg == NETDEV_UNREGISTER) {
2211 po->ifindex = -1;
2212 po->prot_hook.dev = NULL;
2213 }
2214 spin_unlock(&po->bind_lock);
2215 }
2216 break;
2217 case NETDEV_UP:
808f5114 2218 if (dev->ifindex == po->ifindex) {
2219 spin_lock(&po->bind_lock);
2220 if (po->num && !po->running) {
2221 dev_add_pack(&po->prot_hook);
2222 sock_hold(sk);
2223 po->running = 1;
2224 }
2225 spin_unlock(&po->bind_lock);
1da177e4 2226 }
1da177e4
LT
2227 break;
2228 }
2229 }
808f5114 2230 rcu_read_unlock();
1da177e4
LT
2231 return NOTIFY_DONE;
2232}
2233
2234
2235static int packet_ioctl(struct socket *sock, unsigned int cmd,
2236 unsigned long arg)
2237{
2238 struct sock *sk = sock->sk;
2239
69e3c75f 2240 switch (cmd) {
40d4e3df
ED
2241 case SIOCOUTQ:
2242 {
2243 int amount = sk_wmem_alloc_get(sk);
31e6d363 2244
40d4e3df
ED
2245 return put_user(amount, (int __user *)arg);
2246 }
2247 case SIOCINQ:
2248 {
2249 struct sk_buff *skb;
2250 int amount = 0;
2251
2252 spin_lock_bh(&sk->sk_receive_queue.lock);
2253 skb = skb_peek(&sk->sk_receive_queue);
2254 if (skb)
2255 amount = skb->len;
2256 spin_unlock_bh(&sk->sk_receive_queue.lock);
2257 return put_user(amount, (int __user *)arg);
2258 }
2259 case SIOCGSTAMP:
2260 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2261 case SIOCGSTAMPNS:
2262 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2263
1da177e4 2264#ifdef CONFIG_INET
40d4e3df
ED
2265 case SIOCADDRT:
2266 case SIOCDELRT:
2267 case SIOCDARP:
2268 case SIOCGARP:
2269 case SIOCSARP:
2270 case SIOCGIFADDR:
2271 case SIOCSIFADDR:
2272 case SIOCGIFBRDADDR:
2273 case SIOCSIFBRDADDR:
2274 case SIOCGIFNETMASK:
2275 case SIOCSIFNETMASK:
2276 case SIOCGIFDSTADDR:
2277 case SIOCSIFDSTADDR:
2278 case SIOCSIFFLAGS:
40d4e3df 2279 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2280#endif
2281
40d4e3df
ED
2282 default:
2283 return -ENOIOCTLCMD;
1da177e4
LT
2284 }
2285 return 0;
2286}
2287
40d4e3df 2288static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2289 poll_table *wait)
2290{
2291 struct sock *sk = sock->sk;
2292 struct packet_sock *po = pkt_sk(sk);
2293 unsigned int mask = datagram_poll(file, sock, wait);
2294
2295 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2296 if (po->rx_ring.pg_vec) {
2297 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2298 mask |= POLLIN | POLLRDNORM;
2299 }
2300 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2301 spin_lock_bh(&sk->sk_write_queue.lock);
2302 if (po->tx_ring.pg_vec) {
2303 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2304 mask |= POLLOUT | POLLWRNORM;
2305 }
2306 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2307 return mask;
2308}
2309
2310
2311/* Dirty? Well, I still did not learn better way to account
2312 * for user mmaps.
2313 */
2314
2315static void packet_mm_open(struct vm_area_struct *vma)
2316{
2317 struct file *file = vma->vm_file;
40d4e3df 2318 struct socket *sock = file->private_data;
1da177e4 2319 struct sock *sk = sock->sk;
1ce4f28b 2320
1da177e4
LT
2321 if (sk)
2322 atomic_inc(&pkt_sk(sk)->mapped);
2323}
2324
2325static void packet_mm_close(struct vm_area_struct *vma)
2326{
2327 struct file *file = vma->vm_file;
40d4e3df 2328 struct socket *sock = file->private_data;
1da177e4 2329 struct sock *sk = sock->sk;
1ce4f28b 2330
1da177e4
LT
2331 if (sk)
2332 atomic_dec(&pkt_sk(sk)->mapped);
2333}
2334
f0f37e2f 2335static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2336 .open = packet_mm_open,
2337 .close = packet_mm_close,
1da177e4
LT
2338};
2339
0e3125c7
NH
2340static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2341 unsigned int len)
1da177e4
LT
2342{
2343 int i;
2344
4ebf0ae2 2345 for (i = 0; i < len; i++) {
0e3125c7 2346 if (likely(pg_vec[i].buffer)) {
c56b4d90 2347 if (is_vmalloc_addr(pg_vec[i].buffer))
0e3125c7
NH
2348 vfree(pg_vec[i].buffer);
2349 else
2350 free_pages((unsigned long)pg_vec[i].buffer,
2351 order);
2352 pg_vec[i].buffer = NULL;
2353 }
1da177e4
LT
2354 }
2355 kfree(pg_vec);
2356}
2357
c56b4d90 2358static inline char *alloc_one_pg_vec_page(unsigned long order)
4ebf0ae2 2359{
0e3125c7
NH
2360 char *buffer = NULL;
2361 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2362 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2363
2364 buffer = (char *) __get_free_pages(gfp_flags, order);
2365
2366 if (buffer)
2367 return buffer;
2368
2369 /*
2370 * __get_free_pages failed, fall back to vmalloc
2371 */
bbce5a59 2372 buffer = vzalloc((1 << order) * PAGE_SIZE);
719bfeaa 2373
0e3125c7
NH
2374 if (buffer)
2375 return buffer;
2376
2377 /*
2378 * vmalloc failed, lets dig into swap here
2379 */
0e3125c7
NH
2380 gfp_flags &= ~__GFP_NORETRY;
2381 buffer = (char *)__get_free_pages(gfp_flags, order);
2382 if (buffer)
2383 return buffer;
2384
2385 /*
2386 * complete and utter failure
2387 */
2388 return NULL;
4ebf0ae2
DM
2389}
2390
0e3125c7 2391static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4ebf0ae2
DM
2392{
2393 unsigned int block_nr = req->tp_block_nr;
0e3125c7 2394 struct pgv *pg_vec;
4ebf0ae2
DM
2395 int i;
2396
0e3125c7 2397 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4ebf0ae2
DM
2398 if (unlikely(!pg_vec))
2399 goto out;
2400
2401 for (i = 0; i < block_nr; i++) {
c56b4d90 2402 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
0e3125c7 2403 if (unlikely(!pg_vec[i].buffer))
4ebf0ae2
DM
2404 goto out_free_pgvec;
2405 }
2406
2407out:
2408 return pg_vec;
2409
2410out_free_pgvec:
2411 free_pg_vec(pg_vec, order, block_nr);
2412 pg_vec = NULL;
2413 goto out;
2414}
1da177e4 2415
69e3c75f
JB
2416static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2417 int closing, int tx_ring)
1da177e4 2418{
0e3125c7 2419 struct pgv *pg_vec = NULL;
1da177e4 2420 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2421 int was_running, order = 0;
69e3c75f
JB
2422 struct packet_ring_buffer *rb;
2423 struct sk_buff_head *rb_queue;
0e11c91e 2424 __be16 num;
69e3c75f 2425 int err;
1ce4f28b 2426
69e3c75f
JB
2427 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2428 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2429
69e3c75f
JB
2430 err = -EBUSY;
2431 if (!closing) {
2432 if (atomic_read(&po->mapped))
2433 goto out;
2434 if (atomic_read(&rb->pending))
2435 goto out;
2436 }
1da177e4 2437
69e3c75f
JB
2438 if (req->tp_block_nr) {
2439 /* Sanity tests and some calculations */
2440 err = -EBUSY;
2441 if (unlikely(rb->pg_vec))
2442 goto out;
1da177e4 2443
bbd6ef87
PM
2444 switch (po->tp_version) {
2445 case TPACKET_V1:
2446 po->tp_hdrlen = TPACKET_HDRLEN;
2447 break;
2448 case TPACKET_V2:
2449 po->tp_hdrlen = TPACKET2_HDRLEN;
2450 break;
2451 }
2452
69e3c75f 2453 err = -EINVAL;
4ebf0ae2 2454 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2455 goto out;
4ebf0ae2 2456 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2457 goto out;
8913336a 2458 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2459 po->tp_reserve))
2460 goto out;
4ebf0ae2 2461 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2462 goto out;
1da177e4 2463
69e3c75f
JB
2464 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2465 if (unlikely(rb->frames_per_block <= 0))
2466 goto out;
2467 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2468 req->tp_frame_nr))
2469 goto out;
1da177e4
LT
2470
2471 err = -ENOMEM;
4ebf0ae2
DM
2472 order = get_order(req->tp_block_size);
2473 pg_vec = alloc_pg_vec(req, order);
2474 if (unlikely(!pg_vec))
1da177e4 2475 goto out;
69e3c75f
JB
2476 }
2477 /* Done */
2478 else {
2479 err = -EINVAL;
4ebf0ae2 2480 if (unlikely(req->tp_frame_nr))
69e3c75f 2481 goto out;
1da177e4
LT
2482 }
2483
2484 lock_sock(sk);
2485
2486 /* Detach socket from network */
2487 spin_lock(&po->bind_lock);
2488 was_running = po->running;
2489 num = po->num;
2490 if (was_running) {
2491 __dev_remove_pack(&po->prot_hook);
2492 po->num = 0;
2493 po->running = 0;
2494 __sock_put(sk);
2495 }
2496 spin_unlock(&po->bind_lock);
1ce4f28b 2497
1da177e4
LT
2498 synchronize_net();
2499
2500 err = -EBUSY;
905db440 2501 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2502 if (closing || atomic_read(&po->mapped) == 0) {
2503 err = 0;
2504#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
69e3c75f
JB
2505 spin_lock_bh(&rb_queue->lock);
2506 pg_vec = XC(rb->pg_vec, pg_vec);
2507 rb->frame_max = (req->tp_frame_nr - 1);
2508 rb->head = 0;
2509 rb->frame_size = req->tp_frame_size;
2510 spin_unlock_bh(&rb_queue->lock);
2511
2512 order = XC(rb->pg_vec_order, order);
2513 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2514
2515 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2516 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2517 tpacket_rcv : packet_rcv;
2518 skb_queue_purge(rb_queue);
1da177e4
LT
2519#undef XC
2520 if (atomic_read(&po->mapped))
40d4e3df
ED
2521 pr_err("packet_mmap: vma is busy: %d\n",
2522 atomic_read(&po->mapped));
1da177e4 2523 }
905db440 2524 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2525
2526 spin_lock(&po->bind_lock);
2527 if (was_running && !po->running) {
2528 sock_hold(sk);
2529 po->running = 1;
2530 po->num = num;
2531 dev_add_pack(&po->prot_hook);
2532 }
2533 spin_unlock(&po->bind_lock);
2534
2535 release_sock(sk);
2536
1da177e4
LT
2537 if (pg_vec)
2538 free_pg_vec(pg_vec, order, req->tp_block_nr);
2539out:
2540 return err;
2541}
2542
69e3c75f
JB
2543static int packet_mmap(struct file *file, struct socket *sock,
2544 struct vm_area_struct *vma)
1da177e4
LT
2545{
2546 struct sock *sk = sock->sk;
2547 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2548 unsigned long size, expected_size;
2549 struct packet_ring_buffer *rb;
1da177e4
LT
2550 unsigned long start;
2551 int err = -EINVAL;
2552 int i;
2553
2554 if (vma->vm_pgoff)
2555 return -EINVAL;
2556
905db440 2557 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2558
2559 expected_size = 0;
2560 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2561 if (rb->pg_vec) {
2562 expected_size += rb->pg_vec_len
2563 * rb->pg_vec_pages
2564 * PAGE_SIZE;
2565 }
2566 }
2567
2568 if (expected_size == 0)
1da177e4 2569 goto out;
69e3c75f
JB
2570
2571 size = vma->vm_end - vma->vm_start;
2572 if (size != expected_size)
1da177e4
LT
2573 goto out;
2574
1da177e4 2575 start = vma->vm_start;
69e3c75f
JB
2576 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2577 if (rb->pg_vec == NULL)
2578 continue;
2579
2580 for (i = 0; i < rb->pg_vec_len; i++) {
0e3125c7
NH
2581 struct page *page;
2582 void *kaddr = rb->pg_vec[i].buffer;
69e3c75f
JB
2583 int pg_num;
2584
c56b4d90
CG
2585 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2586 page = pgv_to_page(kaddr);
69e3c75f
JB
2587 err = vm_insert_page(vma, start, page);
2588 if (unlikely(err))
2589 goto out;
2590 start += PAGE_SIZE;
0e3125c7 2591 kaddr += PAGE_SIZE;
69e3c75f 2592 }
4ebf0ae2 2593 }
1da177e4 2594 }
69e3c75f 2595
4ebf0ae2 2596 atomic_inc(&po->mapped);
1da177e4
LT
2597 vma->vm_ops = &packet_mmap_ops;
2598 err = 0;
2599
2600out:
905db440 2601 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2602 return err;
2603}
1da177e4 2604
90ddc4f0 2605static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2606 .family = PF_PACKET,
2607 .owner = THIS_MODULE,
2608 .release = packet_release,
2609 .bind = packet_bind_spkt,
2610 .connect = sock_no_connect,
2611 .socketpair = sock_no_socketpair,
2612 .accept = sock_no_accept,
2613 .getname = packet_getname_spkt,
2614 .poll = datagram_poll,
2615 .ioctl = packet_ioctl,
2616 .listen = sock_no_listen,
2617 .shutdown = sock_no_shutdown,
2618 .setsockopt = sock_no_setsockopt,
2619 .getsockopt = sock_no_getsockopt,
2620 .sendmsg = packet_sendmsg_spkt,
2621 .recvmsg = packet_recvmsg,
2622 .mmap = sock_no_mmap,
2623 .sendpage = sock_no_sendpage,
2624};
1da177e4 2625
90ddc4f0 2626static const struct proto_ops packet_ops = {
1da177e4
LT
2627 .family = PF_PACKET,
2628 .owner = THIS_MODULE,
2629 .release = packet_release,
2630 .bind = packet_bind,
2631 .connect = sock_no_connect,
2632 .socketpair = sock_no_socketpair,
2633 .accept = sock_no_accept,
1ce4f28b 2634 .getname = packet_getname,
1da177e4
LT
2635 .poll = packet_poll,
2636 .ioctl = packet_ioctl,
2637 .listen = sock_no_listen,
2638 .shutdown = sock_no_shutdown,
2639 .setsockopt = packet_setsockopt,
2640 .getsockopt = packet_getsockopt,
2641 .sendmsg = packet_sendmsg,
2642 .recvmsg = packet_recvmsg,
2643 .mmap = packet_mmap,
2644 .sendpage = sock_no_sendpage,
2645};
2646
ec1b4cf7 2647static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2648 .family = PF_PACKET,
2649 .create = packet_create,
2650 .owner = THIS_MODULE,
2651};
2652
2653static struct notifier_block packet_netdev_notifier = {
40d4e3df 2654 .notifier_call = packet_notifier,
1da177e4
LT
2655};
2656
2657#ifdef CONFIG_PROC_FS
1da177e4
LT
2658
2659static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
808f5114 2660 __acquires(RCU)
1da177e4 2661{
e372c414 2662 struct net *net = seq_file_net(seq);
808f5114 2663
2664 rcu_read_lock();
2665 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
1da177e4
LT
2666}
2667
2668static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2669{
1bf40954 2670 struct net *net = seq_file_net(seq);
808f5114 2671 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
1da177e4
LT
2672}
2673
2674static void packet_seq_stop(struct seq_file *seq, void *v)
808f5114 2675 __releases(RCU)
1da177e4 2676{
808f5114 2677 rcu_read_unlock();
1da177e4
LT
2678}
2679
1ce4f28b 2680static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2681{
2682 if (v == SEQ_START_TOKEN)
2683 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2684 else {
b7ceabd9 2685 struct sock *s = sk_entry(v);
1da177e4
LT
2686 const struct packet_sock *po = pkt_sk(s);
2687
2688 seq_printf(seq,
2689 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2690 s,
2691 atomic_read(&s->sk_refcnt),
2692 s->sk_type,
2693 ntohs(po->num),
2694 po->ifindex,
2695 po->running,
2696 atomic_read(&s->sk_rmem_alloc),
2697 sock_i_uid(s),
40d4e3df 2698 sock_i_ino(s));
1da177e4
LT
2699 }
2700
2701 return 0;
2702}
2703
56b3d975 2704static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2705 .start = packet_seq_start,
2706 .next = packet_seq_next,
2707 .stop = packet_seq_stop,
2708 .show = packet_seq_show,
2709};
2710
2711static int packet_seq_open(struct inode *inode, struct file *file)
2712{
e372c414
DL
2713 return seq_open_net(inode, file, &packet_seq_ops,
2714 sizeof(struct seq_net_private));
1da177e4
LT
2715}
2716
da7071d7 2717static const struct file_operations packet_seq_fops = {
1da177e4
LT
2718 .owner = THIS_MODULE,
2719 .open = packet_seq_open,
2720 .read = seq_read,
2721 .llseek = seq_lseek,
e372c414 2722 .release = seq_release_net,
1da177e4
LT
2723};
2724
2725#endif
2726
2c8c1e72 2727static int __net_init packet_net_init(struct net *net)
d12d01d6 2728{
808f5114 2729 spin_lock_init(&net->packet.sklist_lock);
2aaef4e4 2730 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2731
2732 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2733 return -ENOMEM;
2734
2735 return 0;
2736}
2737
2c8c1e72 2738static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2739{
2740 proc_net_remove(net, "packet");
2741}
2742
2743static struct pernet_operations packet_net_ops = {
2744 .init = packet_net_init,
2745 .exit = packet_net_exit,
2746};
2747
2748
1da177e4
LT
2749static void __exit packet_exit(void)
2750{
1da177e4 2751 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2752 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2753 sock_unregister(PF_PACKET);
2754 proto_unregister(&packet_proto);
2755}
2756
2757static int __init packet_init(void)
2758{
2759 int rc = proto_register(&packet_proto, 0);
2760
2761 if (rc != 0)
2762 goto out;
2763
2764 sock_register(&packet_family_ops);
d12d01d6 2765 register_pernet_subsys(&packet_net_ops);
1da177e4 2766 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2767out:
2768 return rc;
2769}
2770
2771module_init(packet_init);
2772module_exit(packet_exit);
2773MODULE_LICENSE("GPL");
2774MODULE_ALIAS_NETPROTO(PF_PACKET);