Commit | Line | Data |
---|---|---|
fceaf24a | 1 | /* |
fceaf24a HJ |
2 | * Copyright (c) 2009, Microsoft Corporation. |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms and conditions of the GNU General Public License, | |
6 | * version 2, as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope it will be useful, but WITHOUT | |
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
11 | * more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License along with | |
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | |
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | |
16 | * | |
17 | * Authors: | |
d0e94d17 | 18 | * Haiyang Zhang <haiyangz@microsoft.com> |
fceaf24a | 19 | * Hank Janssen <hjanssen@microsoft.com> |
fceaf24a | 20 | */ |
eb335bc4 HJ |
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
22 | ||
fceaf24a | 23 | #include <linux/init.h> |
9079ce69 | 24 | #include <linux/atomic.h> |
fceaf24a HJ |
25 | #include <linux/module.h> |
26 | #include <linux/highmem.h> | |
27 | #include <linux/device.h> | |
fceaf24a | 28 | #include <linux/io.h> |
fceaf24a HJ |
29 | #include <linux/delay.h> |
30 | #include <linux/netdevice.h> | |
31 | #include <linux/inetdevice.h> | |
32 | #include <linux/etherdevice.h> | |
33 | #include <linux/skbuff.h> | |
34 | #include <linux/in.h> | |
5a0e3ad6 | 35 | #include <linux/slab.h> |
fceaf24a HJ |
36 | #include <net/arp.h> |
37 | #include <net/route.h> | |
38 | #include <net/sock.h> | |
39 | #include <net/pkt_sched.h> | |
3f335ea2 S |
40 | |
41 | #include "hyperv.h" | |
5ca7252a | 42 | #include "hyperv_net.h" |
fceaf24a | 43 | |
fceaf24a | 44 | struct net_device_context { |
02fafbc6 | 45 | /* point back to our device context */ |
6bad88da | 46 | struct hv_device *device_ctx; |
9079ce69 | 47 | atomic_t avail; |
122a5f64 | 48 | struct delayed_work dwork; |
fceaf24a HJ |
49 | }; |
50 | ||
fceaf24a | 51 | |
b220f5f9 SH |
52 | #define PACKET_PAGES_LOWATER 8 |
53 | /* Need this many pages to handle worst case fragmented packet */ | |
54 | #define PACKET_PAGES_HIWATER (MAX_SKB_FRAGS + 2) | |
55 | ||
99c8da0f | 56 | static int ring_size = 128; |
450d7a4b SH |
57 | module_param(ring_size, int, S_IRUGO); |
58 | MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); | |
fceaf24a | 59 | |
0ff36f69 BP |
60 | /* no-op so the netdev core doesn't return -EINVAL when modifying the the |
61 | * multicast address list in SIOCADDMULTI. hv is setup to get all multicast | |
62 | * when it calls RndisFilterOnOpen() */ | |
4e9bfefa | 63 | static void netvsc_set_multicast_list(struct net_device *net) |
fceaf24a HJ |
64 | { |
65 | } | |
66 | ||
fceaf24a HJ |
67 | static int netvsc_open(struct net_device *net) |
68 | { | |
fceaf24a | 69 | struct net_device_context *net_device_ctx = netdev_priv(net); |
6bad88da | 70 | struct hv_device *device_obj = net_device_ctx->device_ctx; |
02fafbc6 | 71 | int ret = 0; |
fceaf24a | 72 | |
02fafbc6 | 73 | if (netif_carrier_ok(net)) { |
454f18a9 | 74 | /* Open up the device */ |
9c26aa0d | 75 | ret = rndis_filter_open(device_obj); |
02fafbc6 | 76 | if (ret != 0) { |
eb335bc4 HJ |
77 | netdev_err(net, "unable to open device (ret %d).\n", |
78 | ret); | |
fceaf24a HJ |
79 | return ret; |
80 | } | |
81 | ||
82 | netif_start_queue(net); | |
02fafbc6 | 83 | } else { |
eb335bc4 | 84 | netdev_err(net, "unable to open device...link is down.\n"); |
fceaf24a HJ |
85 | } |
86 | ||
fceaf24a HJ |
87 | return ret; |
88 | } | |
89 | ||
fceaf24a HJ |
90 | static int netvsc_close(struct net_device *net) |
91 | { | |
fceaf24a | 92 | struct net_device_context *net_device_ctx = netdev_priv(net); |
6bad88da | 93 | struct hv_device *device_obj = net_device_ctx->device_ctx; |
02fafbc6 | 94 | int ret; |
fceaf24a | 95 | |
fceaf24a HJ |
96 | netif_stop_queue(net); |
97 | ||
9c26aa0d | 98 | ret = rndis_filter_close(device_obj); |
fceaf24a | 99 | if (ret != 0) |
eb335bc4 | 100 | netdev_err(net, "unable to close device (ret %d).\n", ret); |
fceaf24a | 101 | |
fceaf24a HJ |
102 | return ret; |
103 | } | |
104 | ||
fceaf24a HJ |
105 | static void netvsc_xmit_completion(void *context) |
106 | { | |
4193d4f4 | 107 | struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; |
02fafbc6 | 108 | struct sk_buff *skb = (struct sk_buff *) |
72a2f5bd | 109 | (unsigned long)packet->completion.send.send_completion_tid; |
fceaf24a | 110 | |
fceaf24a HJ |
111 | kfree(packet); |
112 | ||
02fafbc6 | 113 | if (skb) { |
7880fc54 | 114 | struct net_device *net = skb->dev; |
b220f5f9 SH |
115 | struct net_device_context *net_device_ctx = netdev_priv(net); |
116 | unsigned int num_pages = skb_shinfo(skb)->nr_frags + 2; | |
fceaf24a | 117 | |
b220f5f9 | 118 | dev_kfree_skb_any(skb); |
fceaf24a | 119 | |
9079ce69 S |
120 | atomic_add(num_pages, &net_device_ctx->avail); |
121 | if (atomic_read(&net_device_ctx->avail) >= | |
122 | PACKET_PAGES_HIWATER) | |
e4d59ac5 | 123 | netif_wake_queue(net); |
fceaf24a | 124 | } |
fceaf24a HJ |
125 | } |
126 | ||
02fafbc6 | 127 | static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) |
fceaf24a | 128 | { |
fceaf24a | 129 | struct net_device_context *net_device_ctx = netdev_priv(net); |
4193d4f4 | 130 | struct hv_netvsc_packet *packet; |
02fafbc6 | 131 | int ret; |
6048718d | 132 | unsigned int i, num_pages; |
fceaf24a | 133 | |
6048718d SH |
134 | /* Add 1 for skb->data and additional one for RNDIS */ |
135 | num_pages = skb_shinfo(skb)->nr_frags + 1 + 1; | |
9079ce69 | 136 | if (num_pages > atomic_read(&net_device_ctx->avail)) |
b220f5f9 | 137 | return NETDEV_TX_BUSY; |
fceaf24a | 138 | |
454f18a9 | 139 | /* Allocate a netvsc packet based on # of frags. */ |
02fafbc6 | 140 | packet = kzalloc(sizeof(struct hv_netvsc_packet) + |
6048718d | 141 | (num_pages * sizeof(struct hv_page_buffer)) + |
f8ba8c70 | 142 | sizeof(struct rndis_filter_packet), GFP_ATOMIC); |
02fafbc6 | 143 | if (!packet) { |
b220f5f9 | 144 | /* out of memory, silently drop packet */ |
eb335bc4 | 145 | netdev_err(net, "unable to allocate hv_netvsc_packet\n"); |
b220f5f9 SH |
146 | |
147 | dev_kfree_skb(skb); | |
148 | net->stats.tx_dropped++; | |
149 | return NETDEV_TX_OK; | |
fceaf24a HJ |
150 | } |
151 | ||
72a2f5bd | 152 | packet->extension = (void *)(unsigned long)packet + |
02fafbc6 | 153 | sizeof(struct hv_netvsc_packet) + |
6048718d | 154 | (num_pages * sizeof(struct hv_page_buffer)); |
fceaf24a | 155 | |
454f18a9 | 156 | /* Setup the rndis header */ |
72a2f5bd | 157 | packet->page_buf_cnt = num_pages; |
fceaf24a | 158 | |
454f18a9 | 159 | /* Initialize it from the skb */ |
72a2f5bd | 160 | packet->total_data_buflen = skb->len; |
fceaf24a | 161 | |
6048718d | 162 | /* Start filling in the page buffers starting after RNDIS buffer. */ |
ca623ad3 HZ |
163 | packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT; |
164 | packet->page_buf[1].offset | |
6048718d | 165 | = (unsigned long)skb->data & (PAGE_SIZE - 1); |
ca623ad3 | 166 | packet->page_buf[1].len = skb_headlen(skb); |
6048718d SH |
167 | |
168 | /* Additional fragments are after SKB data */ | |
169 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | |
170 | skb_frag_t *f = &skb_shinfo(skb)->frags[i]; | |
171 | ||
ca623ad3 HZ |
172 | packet->page_buf[i+2].pfn = page_to_pfn(f->page); |
173 | packet->page_buf[i+2].offset = f->page_offset; | |
174 | packet->page_buf[i+2].len = f->size; | |
fceaf24a HJ |
175 | } |
176 | ||
454f18a9 | 177 | /* Set the completion routine */ |
72a2f5bd HZ |
178 | packet->completion.send.send_completion = netvsc_xmit_completion; |
179 | packet->completion.send.send_completion_ctx = packet; | |
180 | packet->completion.send.send_completion_tid = (unsigned long)skb; | |
fceaf24a | 181 | |
55acb696 | 182 | ret = rndis_filter_send(net_device_ctx->device_ctx, |
02fafbc6 | 183 | packet); |
02fafbc6 | 184 | if (ret == 0) { |
b852fdce SH |
185 | net->stats.tx_bytes += skb->len; |
186 | net->stats.tx_packets++; | |
fceaf24a | 187 | |
9079ce69 S |
188 | atomic_sub(num_pages, &net_device_ctx->avail); |
189 | if (atomic_read(&net_device_ctx->avail) < PACKET_PAGES_LOWATER) | |
b220f5f9 SH |
190 | netif_stop_queue(net); |
191 | } else { | |
192 | /* we are shutting down or bus overloaded, just drop packet */ | |
b852fdce | 193 | net->stats.tx_dropped++; |
b220f5f9 | 194 | netvsc_xmit_completion(packet); |
fceaf24a HJ |
195 | } |
196 | ||
b220f5f9 | 197 | return NETDEV_TX_OK; |
fceaf24a HJ |
198 | } |
199 | ||
3e189519 | 200 | /* |
02fafbc6 GKH |
201 | * netvsc_linkstatus_callback - Link up/down notification |
202 | */ | |
90ef117a | 203 | void netvsc_linkstatus_callback(struct hv_device *device_obj, |
02fafbc6 | 204 | unsigned int status) |
fceaf24a | 205 | { |
6bad88da | 206 | struct net_device *net = dev_get_drvdata(&device_obj->device); |
c996edcf | 207 | struct net_device_context *ndev_ctx; |
fceaf24a | 208 | |
02fafbc6 | 209 | if (!net) { |
eb335bc4 HJ |
210 | netdev_err(net, "got link status but net device " |
211 | "not initialized yet\n"); | |
fceaf24a HJ |
212 | return; |
213 | } | |
214 | ||
02fafbc6 | 215 | if (status == 1) { |
fceaf24a HJ |
216 | netif_carrier_on(net); |
217 | netif_wake_queue(net); | |
7c161d0b | 218 | netif_notify_peers(net); |
c996edcf | 219 | ndev_ctx = netdev_priv(net); |
122a5f64 | 220 | schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); |
02fafbc6 | 221 | } else { |
fceaf24a HJ |
222 | netif_carrier_off(net); |
223 | netif_stop_queue(net); | |
224 | } | |
fceaf24a HJ |
225 | } |
226 | ||
3e189519 HJ |
227 | /* |
228 | * netvsc_recv_callback - Callback when we receive a packet from the | |
229 | * "wire" on the specified device. | |
02fafbc6 | 230 | */ |
f79adf8f | 231 | int netvsc_recv_callback(struct hv_device *device_obj, |
02fafbc6 | 232 | struct hv_netvsc_packet *packet) |
fceaf24a | 233 | { |
6bad88da | 234 | struct net_device *net = dev_get_drvdata(&device_obj->device); |
fceaf24a HJ |
235 | struct sk_buff *skb; |
236 | void *data; | |
02fafbc6 | 237 | int i; |
fceaf24a HJ |
238 | unsigned long flags; |
239 | ||
02fafbc6 | 240 | if (!net) { |
eb335bc4 HJ |
241 | netdev_err(net, "got receive callback but net device" |
242 | " not initialized yet\n"); | |
fceaf24a HJ |
243 | return 0; |
244 | } | |
245 | ||
9495c282 | 246 | /* Allocate a skb - TODO direct I/O to pages? */ |
72a2f5bd | 247 | skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); |
9495c282 SH |
248 | if (unlikely(!skb)) { |
249 | ++net->stats.rx_dropped; | |
250 | return 0; | |
251 | } | |
fceaf24a | 252 | |
454f18a9 | 253 | /* for kmap_atomic */ |
fceaf24a HJ |
254 | local_irq_save(flags); |
255 | ||
02fafbc6 GKH |
256 | /* |
257 | * Copy to skb. This copy is needed here since the memory pointed by | |
258 | * hv_netvsc_packet cannot be deallocated | |
259 | */ | |
72a2f5bd | 260 | for (i = 0; i < packet->page_buf_cnt; i++) { |
ca623ad3 | 261 | data = kmap_atomic(pfn_to_page(packet->page_buf[i].pfn), |
02fafbc6 GKH |
262 | KM_IRQ1); |
263 | data = (void *)(unsigned long)data + | |
ca623ad3 | 264 | packet->page_buf[i].offset; |
02fafbc6 | 265 | |
ca623ad3 HZ |
266 | memcpy(skb_put(skb, packet->page_buf[i].len), data, |
267 | packet->page_buf[i].len); | |
02fafbc6 GKH |
268 | |
269 | kunmap_atomic((void *)((unsigned long)data - | |
ca623ad3 | 270 | packet->page_buf[i].offset), KM_IRQ1); |
fceaf24a HJ |
271 | } |
272 | ||
273 | local_irq_restore(flags); | |
274 | ||
275 | skb->protocol = eth_type_trans(skb, net); | |
fceaf24a HJ |
276 | skb->ip_summed = CHECKSUM_NONE; |
277 | ||
9495c282 SH |
278 | net->stats.rx_packets++; |
279 | net->stats.rx_bytes += skb->len; | |
280 | ||
02fafbc6 GKH |
281 | /* |
282 | * Pass the skb back up. Network stack will deallocate the skb when it | |
9495c282 SH |
283 | * is done. |
284 | * TODO - use NAPI? | |
02fafbc6 | 285 | */ |
9495c282 | 286 | netif_rx(skb); |
fceaf24a | 287 | |
fceaf24a HJ |
288 | return 0; |
289 | } | |
290 | ||
f82f4ad7 SH |
291 | static void netvsc_get_drvinfo(struct net_device *net, |
292 | struct ethtool_drvinfo *info) | |
293 | { | |
294 | strcpy(info->driver, "hv_netvsc"); | |
295 | strcpy(info->version, HV_DRV_VERSION); | |
296 | strcpy(info->fw_version, "N/A"); | |
297 | } | |
298 | ||
299 | static const struct ethtool_ops ethtool_ops = { | |
300 | .get_drvinfo = netvsc_get_drvinfo, | |
f82f4ad7 SH |
301 | .get_link = ethtool_op_get_link, |
302 | }; | |
303 | ||
df2fff28 GKH |
304 | static const struct net_device_ops device_ops = { |
305 | .ndo_open = netvsc_open, | |
306 | .ndo_stop = netvsc_close, | |
307 | .ndo_start_xmit = netvsc_start_xmit, | |
df2fff28 | 308 | .ndo_set_multicast_list = netvsc_set_multicast_list, |
b681b588 HZ |
309 | .ndo_change_mtu = eth_change_mtu, |
310 | .ndo_validate_addr = eth_validate_addr, | |
311 | .ndo_set_mac_address = eth_mac_addr, | |
df2fff28 GKH |
312 | }; |
313 | ||
c996edcf HZ |
314 | /* |
315 | * Send GARP packet to network peers after migrations. | |
316 | * After Quick Migration, the network is not immediately operational in the | |
317 | * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add | |
122a5f64 | 318 | * another netif_notify_peers() into a delayed work, otherwise GARP packet |
c996edcf HZ |
319 | * will not be sent after quick migration, and cause network disconnection. |
320 | */ | |
321 | static void netvsc_send_garp(struct work_struct *w) | |
322 | { | |
323 | struct net_device_context *ndev_ctx; | |
324 | struct net_device *net; | |
325 | ||
122a5f64 | 326 | ndev_ctx = container_of(w, struct net_device_context, dwork.work); |
c996edcf HZ |
327 | net = dev_get_drvdata(&ndev_ctx->device_ctx->device); |
328 | netif_notify_peers(net); | |
329 | } | |
330 | ||
331 | ||
9efd21e1 | 332 | static int netvsc_probe(struct hv_device *dev) |
df2fff28 | 333 | { |
df2fff28 GKH |
334 | struct net_device *net = NULL; |
335 | struct net_device_context *net_device_ctx; | |
336 | struct netvsc_device_info device_info; | |
337 | int ret; | |
338 | ||
546d9e10 | 339 | net = alloc_etherdev(sizeof(struct net_device_context)); |
df2fff28 GKH |
340 | if (!net) |
341 | return -1; | |
342 | ||
343 | /* Set initial state */ | |
344 | netif_carrier_off(net); | |
df2fff28 GKH |
345 | |
346 | net_device_ctx = netdev_priv(net); | |
9efd21e1 | 347 | net_device_ctx->device_ctx = dev; |
9079ce69 | 348 | atomic_set(&net_device_ctx->avail, ring_size); |
9efd21e1 | 349 | dev_set_drvdata(&dev->device, net); |
122a5f64 | 350 | INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp); |
df2fff28 GKH |
351 | |
352 | /* Notify the netvsc driver of the new device */ | |
aae23986 | 353 | device_info.ring_size = ring_size; |
bdbad576 | 354 | ret = rndis_filter_device_add(dev, &device_info); |
df2fff28 GKH |
355 | if (ret != 0) { |
356 | free_netdev(net); | |
9efd21e1 | 357 | dev_set_drvdata(&dev->device, NULL); |
df2fff28 | 358 | |
eb335bc4 | 359 | netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); |
df2fff28 GKH |
360 | return ret; |
361 | } | |
362 | ||
b1956a81 | 363 | netif_carrier_on(net); |
df2fff28 | 364 | |
72a2f5bd | 365 | memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); |
df2fff28 GKH |
366 | |
367 | net->netdev_ops = &device_ops; | |
368 | ||
6048718d | 369 | /* TODO: Add GSO and Checksum offload */ |
877a344b | 370 | net->hw_features = NETIF_F_SG; |
6048718d SH |
371 | net->features = NETIF_F_SG; |
372 | ||
f82f4ad7 | 373 | SET_ETHTOOL_OPS(net, ðtool_ops); |
9efd21e1 | 374 | SET_NETDEV_DEV(net, &dev->device); |
df2fff28 GKH |
375 | |
376 | ret = register_netdev(net); | |
377 | if (ret != 0) { | |
378 | /* Remove the device and release the resource */ | |
58de3fc6 | 379 | rndis_filter_device_remove(dev); |
df2fff28 GKH |
380 | free_netdev(net); |
381 | } | |
382 | ||
df2fff28 GKH |
383 | return ret; |
384 | } | |
385 | ||
415b023a | 386 | static int netvsc_remove(struct hv_device *dev) |
df2fff28 | 387 | { |
415b023a | 388 | struct net_device *net = dev_get_drvdata(&dev->device); |
122a5f64 | 389 | struct net_device_context *ndev_ctx; |
df2fff28 | 390 | |
df2fff28 | 391 | if (net == NULL) { |
415b023a | 392 | dev_err(&dev->device, "No net device to remove\n"); |
df2fff28 GKH |
393 | return 0; |
394 | } | |
395 | ||
122a5f64 HZ |
396 | ndev_ctx = netdev_priv(net); |
397 | cancel_delayed_work_sync(&ndev_ctx->dwork); | |
398 | ||
df2fff28 GKH |
399 | /* Stop outbound asap */ |
400 | netif_stop_queue(net); | |
df2fff28 GKH |
401 | |
402 | unregister_netdev(net); | |
403 | ||
404 | /* | |
405 | * Call to the vsc driver to let it know that the device is being | |
406 | * removed | |
407 | */ | |
df06bcff | 408 | rndis_filter_device_remove(dev); |
df2fff28 GKH |
409 | |
410 | free_netdev(net); | |
df06bcff | 411 | return 0; |
df2fff28 GKH |
412 | } |
413 | ||
345c4cc3 | 414 | static const struct hv_vmbus_device_id id_table[] = { |
c45cf2d4 GKH |
415 | /* Network guid */ |
416 | { VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, | |
417 | 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) }, | |
418 | { }, | |
345c4cc3 S |
419 | }; |
420 | ||
421 | MODULE_DEVICE_TABLE(vmbus, id_table); | |
422 | ||
f1542a66 | 423 | /* The one and only one */ |
fde0ef9b | 424 | static struct hv_driver netvsc_drv = { |
345c4cc3 | 425 | .id_table = id_table, |
fde0ef9b S |
426 | .probe = netvsc_probe, |
427 | .remove = netvsc_remove, | |
d4890970 | 428 | }; |
f1542a66 | 429 | |
a9869c94 | 430 | static void __exit netvsc_drv_exit(void) |
fceaf24a | 431 | { |
fde0ef9b | 432 | vmbus_child_driver_unregister(&netvsc_drv.driver); |
fceaf24a HJ |
433 | } |
434 | ||
4753ff6a | 435 | |
1fde28cf | 436 | static int __init netvsc_drv_init(void) |
df2fff28 | 437 | { |
fde0ef9b | 438 | struct hv_driver *drv = &netvsc_drv; |
df2fff28 GKH |
439 | int ret; |
440 | ||
bb546d0e S |
441 | pr_info("initializing...."); |
442 | ||
df2fff28 | 443 | /* Callback to client driver to complete the initialization */ |
073aad34 | 444 | netvsc_initialize(drv); |
df2fff28 | 445 | |
fde0ef9b | 446 | drv->driver.name = drv->name; |
df2fff28 | 447 | |
df2fff28 | 448 | /* The driver belongs to vmbus */ |
150f9398 | 449 | ret = vmbus_child_driver_register(&drv->driver); |
df2fff28 | 450 | |
df2fff28 GKH |
451 | return ret; |
452 | } | |
453 | ||
26c14cc1 HJ |
454 | MODULE_LICENSE("GPL"); |
455 | MODULE_VERSION(HV_DRV_VERSION); | |
7880fc54 | 456 | MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); |
fceaf24a | 457 | |
1fde28cf | 458 | module_init(netvsc_drv_init); |
a9869c94 | 459 | module_exit(netvsc_drv_exit); |