Commit | Line | Data |
---|---|---|
fceaf24a | 1 | /* |
fceaf24a HJ |
2 | * Copyright (c) 2009, Microsoft Corporation. |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms and conditions of the GNU General Public License, | |
6 | * version 2, as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope it will be useful, but WITHOUT | |
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
11 | * more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License along with | |
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | |
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | |
16 | * | |
17 | * Authors: | |
d0e94d17 | 18 | * Haiyang Zhang <haiyangz@microsoft.com> |
fceaf24a | 19 | * Hank Janssen <hjanssen@microsoft.com> |
fceaf24a | 20 | */ |
eb335bc4 HJ |
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
22 | ||
fceaf24a | 23 | #include <linux/init.h> |
9079ce69 | 24 | #include <linux/atomic.h> |
fceaf24a HJ |
25 | #include <linux/module.h> |
26 | #include <linux/highmem.h> | |
27 | #include <linux/device.h> | |
fceaf24a | 28 | #include <linux/io.h> |
fceaf24a HJ |
29 | #include <linux/delay.h> |
30 | #include <linux/netdevice.h> | |
31 | #include <linux/inetdevice.h> | |
32 | #include <linux/etherdevice.h> | |
33 | #include <linux/skbuff.h> | |
34 | #include <linux/in.h> | |
5a0e3ad6 | 35 | #include <linux/slab.h> |
fceaf24a HJ |
36 | #include <net/arp.h> |
37 | #include <net/route.h> | |
38 | #include <net/sock.h> | |
39 | #include <net/pkt_sched.h> | |
3f335ea2 | 40 | |
5ca7252a | 41 | #include "hyperv_net.h" |
fceaf24a | 42 | |
fceaf24a | 43 | struct net_device_context { |
02fafbc6 | 44 | /* point back to our device context */ |
6bad88da | 45 | struct hv_device *device_ctx; |
9079ce69 | 46 | atomic_t avail; |
122a5f64 | 47 | struct delayed_work dwork; |
fceaf24a HJ |
48 | }; |
49 | ||
fceaf24a | 50 | |
b220f5f9 SH |
51 | #define PACKET_PAGES_LOWATER 8 |
52 | /* Need this many pages to handle worst case fragmented packet */ | |
53 | #define PACKET_PAGES_HIWATER (MAX_SKB_FRAGS + 2) | |
54 | ||
99c8da0f | 55 | static int ring_size = 128; |
450d7a4b SH |
56 | module_param(ring_size, int, S_IRUGO); |
57 | MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); | |
fceaf24a | 58 | |
0ff36f69 BP |
59 | /* no-op so the netdev core doesn't return -EINVAL when modifying the the |
60 | * multicast address list in SIOCADDMULTI. hv is setup to get all multicast | |
61 | * when it calls RndisFilterOnOpen() */ | |
4e9bfefa | 62 | static void netvsc_set_multicast_list(struct net_device *net) |
fceaf24a HJ |
63 | { |
64 | } | |
65 | ||
fceaf24a HJ |
66 | static int netvsc_open(struct net_device *net) |
67 | { | |
fceaf24a | 68 | struct net_device_context *net_device_ctx = netdev_priv(net); |
6bad88da | 69 | struct hv_device *device_obj = net_device_ctx->device_ctx; |
02fafbc6 | 70 | int ret = 0; |
fceaf24a | 71 | |
02fafbc6 | 72 | if (netif_carrier_ok(net)) { |
454f18a9 | 73 | /* Open up the device */ |
9c26aa0d | 74 | ret = rndis_filter_open(device_obj); |
02fafbc6 | 75 | if (ret != 0) { |
eb335bc4 HJ |
76 | netdev_err(net, "unable to open device (ret %d).\n", |
77 | ret); | |
fceaf24a HJ |
78 | return ret; |
79 | } | |
80 | ||
81 | netif_start_queue(net); | |
02fafbc6 | 82 | } else { |
eb335bc4 | 83 | netdev_err(net, "unable to open device...link is down.\n"); |
fceaf24a HJ |
84 | } |
85 | ||
fceaf24a HJ |
86 | return ret; |
87 | } | |
88 | ||
fceaf24a HJ |
89 | static int netvsc_close(struct net_device *net) |
90 | { | |
fceaf24a | 91 | struct net_device_context *net_device_ctx = netdev_priv(net); |
6bad88da | 92 | struct hv_device *device_obj = net_device_ctx->device_ctx; |
02fafbc6 | 93 | int ret; |
fceaf24a | 94 | |
fceaf24a HJ |
95 | netif_stop_queue(net); |
96 | ||
9c26aa0d | 97 | ret = rndis_filter_close(device_obj); |
fceaf24a | 98 | if (ret != 0) |
eb335bc4 | 99 | netdev_err(net, "unable to close device (ret %d).\n", ret); |
fceaf24a | 100 | |
fceaf24a HJ |
101 | return ret; |
102 | } | |
103 | ||
fceaf24a HJ |
104 | static void netvsc_xmit_completion(void *context) |
105 | { | |
4193d4f4 | 106 | struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; |
02fafbc6 | 107 | struct sk_buff *skb = (struct sk_buff *) |
72a2f5bd | 108 | (unsigned long)packet->completion.send.send_completion_tid; |
fceaf24a | 109 | |
fceaf24a HJ |
110 | kfree(packet); |
111 | ||
02fafbc6 | 112 | if (skb) { |
7880fc54 | 113 | struct net_device *net = skb->dev; |
b220f5f9 SH |
114 | struct net_device_context *net_device_ctx = netdev_priv(net); |
115 | unsigned int num_pages = skb_shinfo(skb)->nr_frags + 2; | |
fceaf24a | 116 | |
b220f5f9 | 117 | dev_kfree_skb_any(skb); |
fceaf24a | 118 | |
9079ce69 S |
119 | atomic_add(num_pages, &net_device_ctx->avail); |
120 | if (atomic_read(&net_device_ctx->avail) >= | |
121 | PACKET_PAGES_HIWATER) | |
e4d59ac5 | 122 | netif_wake_queue(net); |
fceaf24a | 123 | } |
fceaf24a HJ |
124 | } |
125 | ||
02fafbc6 | 126 | static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) |
fceaf24a | 127 | { |
fceaf24a | 128 | struct net_device_context *net_device_ctx = netdev_priv(net); |
4193d4f4 | 129 | struct hv_netvsc_packet *packet; |
02fafbc6 | 130 | int ret; |
6048718d | 131 | unsigned int i, num_pages; |
fceaf24a | 132 | |
6048718d SH |
133 | /* Add 1 for skb->data and additional one for RNDIS */ |
134 | num_pages = skb_shinfo(skb)->nr_frags + 1 + 1; | |
9079ce69 | 135 | if (num_pages > atomic_read(&net_device_ctx->avail)) |
b220f5f9 | 136 | return NETDEV_TX_BUSY; |
fceaf24a | 137 | |
454f18a9 | 138 | /* Allocate a netvsc packet based on # of frags. */ |
02fafbc6 | 139 | packet = kzalloc(sizeof(struct hv_netvsc_packet) + |
6048718d | 140 | (num_pages * sizeof(struct hv_page_buffer)) + |
f8ba8c70 | 141 | sizeof(struct rndis_filter_packet), GFP_ATOMIC); |
02fafbc6 | 142 | if (!packet) { |
bf769375 | 143 | /* out of memory, drop packet */ |
eb335bc4 | 144 | netdev_err(net, "unable to allocate hv_netvsc_packet\n"); |
b220f5f9 SH |
145 | |
146 | dev_kfree_skb(skb); | |
147 | net->stats.tx_dropped++; | |
bf769375 | 148 | return NETDEV_TX_BUSY; |
fceaf24a HJ |
149 | } |
150 | ||
72a2f5bd | 151 | packet->extension = (void *)(unsigned long)packet + |
02fafbc6 | 152 | sizeof(struct hv_netvsc_packet) + |
6048718d | 153 | (num_pages * sizeof(struct hv_page_buffer)); |
fceaf24a | 154 | |
454f18a9 | 155 | /* Setup the rndis header */ |
72a2f5bd | 156 | packet->page_buf_cnt = num_pages; |
fceaf24a | 157 | |
454f18a9 | 158 | /* Initialize it from the skb */ |
72a2f5bd | 159 | packet->total_data_buflen = skb->len; |
fceaf24a | 160 | |
6048718d | 161 | /* Start filling in the page buffers starting after RNDIS buffer. */ |
ca623ad3 HZ |
162 | packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT; |
163 | packet->page_buf[1].offset | |
6048718d | 164 | = (unsigned long)skb->data & (PAGE_SIZE - 1); |
ca623ad3 | 165 | packet->page_buf[1].len = skb_headlen(skb); |
6048718d SH |
166 | |
167 | /* Additional fragments are after SKB data */ | |
168 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
169 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; | |
170 | ||
ca623ad3 HZ |
171 | packet->page_buf[i+2].pfn = page_to_pfn(f->page); |
172 | packet->page_buf[i+2].offset = f->page_offset; | |
173 | packet->page_buf[i+2].len = f->size; | |
fceaf24a HJ |
174 | } |
175 | ||
454f18a9 | 176 | /* Set the completion routine */ |
72a2f5bd HZ |
177 | packet->completion.send.send_completion = netvsc_xmit_completion; |
178 | packet->completion.send.send_completion_ctx = packet; | |
179 | packet->completion.send.send_completion_tid = (unsigned long)skb; | |
fceaf24a | 180 | |
55acb696 | 181 | ret = rndis_filter_send(net_device_ctx->device_ctx, |
02fafbc6 | 182 | packet); |
02fafbc6 | 183 | if (ret == 0) { |
b852fdce SH |
184 | net->stats.tx_bytes += skb->len; |
185 | net->stats.tx_packets++; | |
fceaf24a | 186 | |
9079ce69 S |
187 | atomic_sub(num_pages, &net_device_ctx->avail); |
188 | if (atomic_read(&net_device_ctx->avail) < PACKET_PAGES_LOWATER) | |
b220f5f9 SH |
189 | netif_stop_queue(net); |
190 | } else { | |
191 | /* we are shutting down or bus overloaded, just drop packet */ | |
b852fdce | 192 | net->stats.tx_dropped++; |
8a5f9edc HZ |
193 | kfree(packet); |
194 | dev_kfree_skb_any(skb); | |
fceaf24a HJ |
195 | } |
196 | ||
bf769375 | 197 | return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK; |
fceaf24a HJ |
198 | } |
199 | ||
3e189519 | 200 | /* |
02fafbc6 GKH |
201 | * netvsc_linkstatus_callback - Link up/down notification |
202 | */ | |
90ef117a | 203 | void netvsc_linkstatus_callback(struct hv_device *device_obj, |
02fafbc6 | 204 | unsigned int status) |
fceaf24a | 205 | { |
6bad88da | 206 | struct net_device *net = dev_get_drvdata(&device_obj->device); |
c996edcf | 207 | struct net_device_context *ndev_ctx; |
fceaf24a | 208 | |
02fafbc6 | 209 | if (!net) { |
eb335bc4 HJ |
210 | netdev_err(net, "got link status but net device " |
211 | "not initialized yet\n"); | |
fceaf24a HJ |
212 | return; |
213 | } | |
214 | ||
02fafbc6 | 215 | if (status == 1) { |
fceaf24a HJ |
216 | netif_carrier_on(net); |
217 | netif_wake_queue(net); | |
c996edcf | 218 | ndev_ctx = netdev_priv(net); |
c4b6a2ea | 219 | schedule_delayed_work(&ndev_ctx->dwork, 0); |
122a5f64 | 220 | schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); |
02fafbc6 | 221 | } else { |
fceaf24a HJ |
222 | netif_carrier_off(net); |
223 | netif_stop_queue(net); | |
224 | } | |
fceaf24a HJ |
225 | } |
226 | ||
3e189519 HJ |
227 | /* |
228 | * netvsc_recv_callback - Callback when we receive a packet from the | |
229 | * "wire" on the specified device. | |
02fafbc6 | 230 | */ |
f79adf8f | 231 | int netvsc_recv_callback(struct hv_device *device_obj, |
02fafbc6 | 232 | struct hv_netvsc_packet *packet) |
fceaf24a | 233 | { |
6bad88da | 234 | struct net_device *net = dev_get_drvdata(&device_obj->device); |
fceaf24a HJ |
235 | struct sk_buff *skb; |
236 | void *data; | |
02fafbc6 | 237 | int i; |
fceaf24a HJ |
238 | unsigned long flags; |
239 | ||
02fafbc6 | 240 | if (!net) { |
eb335bc4 HJ |
241 | netdev_err(net, "got receive callback but net device" |
242 | " not initialized yet\n"); | |
fceaf24a HJ |
243 | return 0; |
244 | } | |
245 | ||
9495c282 | 246 | /* Allocate a skb - TODO direct I/O to pages? */ |
72a2f5bd | 247 | skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); |
9495c282 SH |
248 | if (unlikely(!skb)) { |
249 | ++net->stats.rx_dropped; | |
250 | return 0; | |
251 | } | |
fceaf24a | 252 | |
454f18a9 | 253 | /* for kmap_atomic */ |
fceaf24a HJ |
254 | local_irq_save(flags); |
255 | ||
02fafbc6 GKH |
256 | /* |
257 | * Copy to skb. This copy is needed here since the memory pointed by | |
258 | * hv_netvsc_packet cannot be deallocated | |
259 | */ | |
72a2f5bd | 260 | for (i = 0; i < packet->page_buf_cnt; i++) { |
ca623ad3 | 261 | data = kmap_atomic(pfn_to_page(packet->page_buf[i].pfn), |
02fafbc6 GKH |
262 | KM_IRQ1); |
263 | data = (void *)(unsigned long)data + | |
ca623ad3 | 264 | packet->page_buf[i].offset; |
02fafbc6 | 265 | |
ca623ad3 HZ |
266 | memcpy(skb_put(skb, packet->page_buf[i].len), data, |
267 | packet->page_buf[i].len); | |
02fafbc6 GKH |
268 | |
269 | kunmap_atomic((void *)((unsigned long)data - | |
ca623ad3 | 270 | packet->page_buf[i].offset), KM_IRQ1); |
fceaf24a HJ |
271 | } |
272 | ||
273 | local_irq_restore(flags); | |
274 | ||
275 | skb->protocol = eth_type_trans(skb, net); | |
fceaf24a HJ |
276 | skb->ip_summed = CHECKSUM_NONE; |
277 | ||
9495c282 SH |
278 | net->stats.rx_packets++; |
279 | net->stats.rx_bytes += skb->len; | |
280 | ||
02fafbc6 GKH |
281 | /* |
282 | * Pass the skb back up. Network stack will deallocate the skb when it | |
9495c282 SH |
283 | * is done. |
284 | * TODO - use NAPI? | |
02fafbc6 | 285 | */ |
9495c282 | 286 | netif_rx(skb); |
fceaf24a | 287 | |
fceaf24a HJ |
288 | return 0; |
289 | } | |
290 | ||
f82f4ad7 SH |
291 | static void netvsc_get_drvinfo(struct net_device *net, |
292 | struct ethtool_drvinfo *info) | |
293 | { | |
294 | strcpy(info->driver, "hv_netvsc"); | |
295 | strcpy(info->version, HV_DRV_VERSION); | |
296 | strcpy(info->fw_version, "N/A"); | |
297 | } | |
298 | ||
299 | static const struct ethtool_ops ethtool_ops = { | |
300 | .get_drvinfo = netvsc_get_drvinfo, | |
f82f4ad7 SH |
301 | .get_link = ethtool_op_get_link, |
302 | }; | |
303 | ||
df2fff28 GKH |
304 | static const struct net_device_ops device_ops = { |
305 | .ndo_open = netvsc_open, | |
306 | .ndo_stop = netvsc_close, | |
307 | .ndo_start_xmit = netvsc_start_xmit, | |
df2fff28 | 308 | .ndo_set_multicast_list = netvsc_set_multicast_list, |
b681b588 HZ |
309 | .ndo_change_mtu = eth_change_mtu, |
310 | .ndo_validate_addr = eth_validate_addr, | |
311 | .ndo_set_mac_address = eth_mac_addr, | |
df2fff28 GKH |
312 | }; |
313 | ||
c996edcf HZ |
314 | /* |
315 | * Send GARP packet to network peers after migrations. | |
316 | * After Quick Migration, the network is not immediately operational in the | |
317 | * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add | |
122a5f64 | 318 | * another netif_notify_peers() into a delayed work, otherwise GARP packet |
c996edcf HZ |
319 | * will not be sent after quick migration, and cause network disconnection. |
320 | */ | |
321 | static void netvsc_send_garp(struct work_struct *w) | |
322 | { | |
323 | struct net_device_context *ndev_ctx; | |
324 | struct net_device *net; | |
325 | ||
122a5f64 | 326 | ndev_ctx = container_of(w, struct net_device_context, dwork.work); |
c996edcf HZ |
327 | net = dev_get_drvdata(&ndev_ctx->device_ctx->device); |
328 | netif_notify_peers(net); | |
329 | } | |
330 | ||
331 | ||
9efd21e1 | 332 | static int netvsc_probe(struct hv_device *dev) |
df2fff28 | 333 | { |
df2fff28 GKH |
334 | struct net_device *net = NULL; |
335 | struct net_device_context *net_device_ctx; | |
336 | struct netvsc_device_info device_info; | |
337 | int ret; | |
338 | ||
546d9e10 | 339 | net = alloc_etherdev(sizeof(struct net_device_context)); |
df2fff28 | 340 | if (!net) |
51a805d0 | 341 | return -ENOMEM; |
df2fff28 GKH |
342 | |
343 | /* Set initial state */ | |
344 | netif_carrier_off(net); | |
df2fff28 GKH |
345 | |
346 | net_device_ctx = netdev_priv(net); | |
9efd21e1 | 347 | net_device_ctx->device_ctx = dev; |
9079ce69 | 348 | atomic_set(&net_device_ctx->avail, ring_size); |
9efd21e1 | 349 | dev_set_drvdata(&dev->device, net); |
122a5f64 | 350 | INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp); |
df2fff28 | 351 | |
df2fff28 GKH |
352 | net->netdev_ops = &device_ops; |
353 | ||
6048718d | 354 | /* TODO: Add GSO and Checksum offload */ |
877a344b | 355 | net->hw_features = NETIF_F_SG; |
6048718d SH |
356 | net->features = NETIF_F_SG; |
357 | ||
f82f4ad7 | 358 | SET_ETHTOOL_OPS(net, ðtool_ops); |
9efd21e1 | 359 | SET_NETDEV_DEV(net, &dev->device); |
df2fff28 GKH |
360 | |
361 | ret = register_netdev(net); | |
362 | if (ret != 0) { | |
692e084e | 363 | pr_err("Unable to register netdev.\n"); |
df2fff28 | 364 | free_netdev(net); |
692e084e | 365 | goto out; |
df2fff28 GKH |
366 | } |
367 | ||
692e084e HZ |
368 | /* Notify the netvsc driver of the new device */ |
369 | device_info.ring_size = ring_size; | |
370 | ret = rndis_filter_device_add(dev, &device_info); | |
371 | if (ret != 0) { | |
372 | netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); | |
373 | unregister_netdev(net); | |
374 | free_netdev(net); | |
375 | dev_set_drvdata(&dev->device, NULL); | |
376 | return ret; | |
377 | } | |
378 | memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); | |
379 | ||
380 | netif_carrier_on(net); | |
381 | ||
382 | out: | |
df2fff28 GKH |
383 | return ret; |
384 | } | |
385 | ||
415b023a | 386 | static int netvsc_remove(struct hv_device *dev) |
df2fff28 | 387 | { |
415b023a | 388 | struct net_device *net = dev_get_drvdata(&dev->device); |
122a5f64 | 389 | struct net_device_context *ndev_ctx; |
df2fff28 | 390 | |
df2fff28 | 391 | if (net == NULL) { |
415b023a | 392 | dev_err(&dev->device, "No net device to remove\n"); |
df2fff28 GKH |
393 | return 0; |
394 | } | |
395 | ||
122a5f64 HZ |
396 | ndev_ctx = netdev_priv(net); |
397 | cancel_delayed_work_sync(&ndev_ctx->dwork); | |
398 | ||
df2fff28 GKH |
399 | /* Stop outbound asap */ |
400 | netif_stop_queue(net); | |
df2fff28 GKH |
401 | |
402 | unregister_netdev(net); | |
403 | ||
404 | /* | |
405 | * Call to the vsc driver to let it know that the device is being | |
406 | * removed | |
407 | */ | |
df06bcff | 408 | rndis_filter_device_remove(dev); |
df2fff28 GKH |
409 | |
410 | free_netdev(net); | |
df06bcff | 411 | return 0; |
df2fff28 GKH |
412 | } |
413 | ||
345c4cc3 | 414 | static const struct hv_vmbus_device_id id_table[] = { |
c45cf2d4 GKH |
415 | /* Network guid */ |
416 | { VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, | |
417 | 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) }, | |
418 | { }, | |
345c4cc3 S |
419 | }; |
420 | ||
421 | MODULE_DEVICE_TABLE(vmbus, id_table); | |
422 | ||
f1542a66 | 423 | /* The one and only one */ |
fde0ef9b | 424 | static struct hv_driver netvsc_drv = { |
768fa219 | 425 | .name = "netvsc", |
345c4cc3 | 426 | .id_table = id_table, |
fde0ef9b S |
427 | .probe = netvsc_probe, |
428 | .remove = netvsc_remove, | |
d4890970 | 429 | }; |
f1542a66 | 430 | |
a9869c94 | 431 | static void __exit netvsc_drv_exit(void) |
fceaf24a | 432 | { |
768fa219 | 433 | vmbus_driver_unregister(&netvsc_drv); |
fceaf24a HJ |
434 | } |
435 | ||
1fde28cf | 436 | static int __init netvsc_drv_init(void) |
df2fff28 | 437 | { |
768fa219 | 438 | return vmbus_driver_register(&netvsc_drv); |
df2fff28 GKH |
439 | } |
440 | ||
26c14cc1 HJ |
441 | MODULE_LICENSE("GPL"); |
442 | MODULE_VERSION(HV_DRV_VERSION); | |
7880fc54 | 443 | MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); |
fceaf24a | 444 | |
1fde28cf | 445 | module_init(netvsc_drv_init); |
a9869c94 | 446 | module_exit(netvsc_drv_exit); |