drivers: power: report battery voltage in AOSP compatible format
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / power / tuxonice_cluster.c
1 /*
2 * kernel/power/tuxonice_cluster.c
3 *
4 * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
5 *
6 * This file is released under the GPLv2.
7 *
8 * This file contains routines for cluster hibernation support.
9 *
10 * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
11 *
12 * How does it work?
13 *
14 * There is no 'master' node that tells everyone else what to do. All nodes
15 * send messages to the broadcast address/port, maintain a list of peers
16 * and figure out when to progress to the next step in hibernating or resuming.
17 * This makes us more fault tolerant when it comes to nodes coming and going
18 * (which may be more of an issue if we're hibernating when power supplies
19 * are being unreliable).
20 *
21 * At boot time, we start a ktuxonice thread that handles communication with
22 * other nodes. This node maintains a state machine that controls our progress
23 * through hibernating and resuming, keeping us in step with other nodes. Nodes
24 * are identified by their hw address.
25 *
26 * On startup, the node sends CLUSTER_PING on the configured interface's
27 * broadcast address, port $toi_cluster_port (see below) and begins to listen
28 * for other broadcast messages. CLUSTER_PING messages are repeated at
29 * intervals of 5 minutes, with a random offset to spread traffic out.
30 *
31 * A hibernation cycle is initiated from any node via
32 *
33 * echo > /sys/power/tuxonice/do_hibernate
34 *
35 * and (possibily) the hibernate script. At each step of the process, the node
36 * completes its work, and waits for all other nodes to signal completion of
37 * their work (or timeout) before progressing to the next step.
38 *
39 * Request/state Action before reply Possible reply Next state
40 * HIBERNATE capable, pre-script HIBERNATE|ACK NODE_PREP
41 * HIBERNATE|NACK INIT_0
42 *
43 * PREP prepare_image PREP|ACK IMAGE_WRITE
44 * PREP|NACK INIT_0
45 * ABORT RUNNING
46 *
47 * IO write image IO|ACK power off
48 * ABORT POST_RESUME
49 *
50 * (Boot time) check for image IMAGE|ACK RESUME_PREP
51 * (Note 1)
52 * IMAGE|NACK (Note 2)
53 *
54 * PREP prepare read image PREP|ACK IMAGE_READ
55 * PREP|NACK (As NACK_IMAGE)
56 *
57 * IO read image IO|ACK POST_RESUME
58 *
59 * POST_RESUME thaw, post-script RUNNING
60 *
61 * INIT_0 init 0
62 *
63 * Other messages:
64 *
65 * - PING: Request for all other live nodes to send a PONG. Used at startup to
66 * announce presence, when a node is suspected dead and periodically, in case
67 * segments of the network are [un]plugged.
68 *
69 * - PONG: Response to a PING.
70 *
71 * - ABORT: Request to cancel writing an image.
72 *
73 * - BYE: Notification that this node is shutting down.
74 *
75 * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
76 * nodes which are slower to start up can get state synchronised. If a node
77 * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
78 * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
79 * must invalidate its image (if any) and boot normally.
80 *
81 * Note 2: May occur when one node lost power or powered off while others
82 * hibernated. This node waits for others to complete resuming (ACK_READ)
83 * before completing its boot, so that it appears as a fail node restarting.
84 *
85 * If any node has an image, then it also has a list of nodes that hibernated
86 * in synchronisation with it. The node will wait for other nodes to appear
87 * or timeout before beginning its restoration.
88 *
89 * If a node has no image, it needs to wait, in case other nodes which do have
90 * an image are going to resume, but are taking longer to announce their
91 * presence. For this reason, the user can specify a timeout value and a number
92 * of nodes detected before we just continue. (We might want to assume in a
93 * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
94 * the remaining nodes will too. This might help in situations where some nodes
95 * are much slower to boot, or more subject to hardware failures or such like).
96 */
97
98 #include <linux/suspend.h>
99 #include <linux/module.h>
100 #include <linux/moduleparam.h>
101 #include <linux/if.h>
102 #include <linux/rtnetlink.h>
103 #include <linux/ip.h>
104 #include <linux/udp.h>
105 #include <linux/in.h>
106 #include <linux/if_arp.h>
107 #include <linux/kthread.h>
108 #include <linux/wait.h>
109 #include <linux/netdevice.h>
110 #include <net/ip.h>
111
112 #include "tuxonice.h"
113 #include "tuxonice_modules.h"
114 #include "tuxonice_sysfs.h"
115 #include "tuxonice_alloc.h"
116 #include "tuxonice_io.h"
117
118 #if 1
119 #define PRINTK(a, b...) do { printk(a, ##b); } while (0)
120 #else
121 #define PRINTK(a, b...) do { } while (0)
122 #endif
123
124 static int loopback_mode;
125 static int num_local_nodes = 1;
126 #define MAX_LOCAL_NODES 8
127 #define SADDR (loopback_mode ? b->sid : h->saddr)
128
129 #define MYNAME "TuxOnIce Clustering"
130
131 enum cluster_message {
132 MSG_ACK = 1,
133 MSG_NACK = 2,
134 MSG_PING = 4,
135 MSG_ABORT = 8,
136 MSG_BYE = 16,
137 MSG_HIBERNATE = 32,
138 MSG_IMAGE = 64,
139 MSG_IO = 128,
140 MSG_RUNNING = 256
141 };
142
143 static char *str_message(int message)
144 {
145 switch (message) {
146 case 4:
147 return "Ping";
148 case 8:
149 return "Abort";
150 case 9:
151 return "Abort acked";
152 case 10:
153 return "Abort nacked";
154 case 16:
155 return "Bye";
156 case 17:
157 return "Bye acked";
158 case 18:
159 return "Bye nacked";
160 case 32:
161 return "Hibernate request";
162 case 33:
163 return "Hibernate ack";
164 case 34:
165 return "Hibernate nack";
166 case 64:
167 return "Image exists?";
168 case 65:
169 return "Image does exist";
170 case 66:
171 return "No image here";
172 case 128:
173 return "I/O";
174 case 129:
175 return "I/O okay";
176 case 130:
177 return "I/O failed";
178 case 256:
179 return "Running";
180 default:
181 printk(KERN_ERR "Unrecognised message %d.\n", message);
182 return "Unrecognised message (see dmesg)";
183 }
184 }
185
186 #define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
187 #define MSG_STATE_MASK (~MSG_ACK_MASK)
188
189 struct node_info {
190 struct list_head member_list;
191 wait_queue_head_t member_events;
192 spinlock_t member_list_lock;
193 spinlock_t receive_lock;
194 int peer_count, ignored_peer_count;
195 struct toi_sysfs_data sysfs_data;
196 enum cluster_message current_message;
197 };
198
199 struct node_info node_array[MAX_LOCAL_NODES];
200
201 struct cluster_member {
202 __be32 addr;
203 enum cluster_message message;
204 struct list_head list;
205 int ignore;
206 };
207
208 #define toi_cluster_port_send 3501
209 #define toi_cluster_port_recv 3502
210
211 static struct net_device *net_dev;
212 static struct toi_module_ops toi_cluster_ops;
213
214 static int toi_recv(struct sk_buff *skb, struct net_device *dev,
215 struct packet_type *pt, struct net_device *orig_dev);
216
217 static struct packet_type toi_cluster_packet_type = {
218 .type = __constant_htons(ETH_P_IP),
219 .func = toi_recv,
220 };
221
222 struct toi_pkt { /* BOOTP packet format */
223 struct iphdr iph; /* IP header */
224 struct udphdr udph; /* UDP header */
225 u8 htype; /* HW address type */
226 u8 hlen; /* HW address length */
227 __be32 xid; /* Transaction ID */
228 __be16 secs; /* Seconds since we started */
229 __be16 flags; /* Just what it says */
230 u8 hw_addr[16]; /* Sender's HW address */
231 u16 message; /* Message */
232 unsigned long sid; /* Source ID for loopback testing */
233 };
234
235 static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
236
237 static int added_pack;
238
239 static int others_have_image;
240
241 /* Key used to allow multiple clusters on the same lan */
242 static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
243 static char pre_hibernate_script[255] = CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
244 static char post_hibernate_script[255] = CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
245
246 /* List of cluster members */
247 static unsigned long continue_delay = 5 * HZ;
248 static unsigned long cluster_message_timeout = 3 * HZ;
249
250 /* === Membership list === */
251
252 static void print_member_info(int index)
253 {
254 struct cluster_member *this;
255
256 printk(KERN_INFO "==> Dumping node %d.\n", index);
257
258 list_for_each_entry(this, &node_array[index].member_list, list)
259 printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
260 NIPQUAD(this->addr),
261 str_message(this->message), this->ignore ? "(Ignored)" : "");
262 printk(KERN_INFO "== Done ==\n");
263 }
264
265 static struct cluster_member *__find_member(int index, __be32 addr)
266 {
267 struct cluster_member *this;
268
269 list_for_each_entry(this, &node_array[index].member_list, list) {
270 if (this->addr != addr)
271 continue;
272
273 return this;
274 }
275
276 return NULL;
277 }
278
279 static void set_ignore(int index, __be32 addr, struct cluster_member *this)
280 {
281 if (this->ignore) {
282 PRINTK("Node %d already ignoring %d.%d.%d.%d.\n", index, NIPQUAD(addr));
283 return;
284 }
285
286 PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n", index, NIPQUAD(addr));
287 this->ignore = 1;
288 node_array[index].ignored_peer_count++;
289 }
290
291 static int __add_update_member(int index, __be32 addr, int message)
292 {
293 struct cluster_member *this;
294
295 this = __find_member(index, addr);
296 if (this) {
297 if (this->message != message) {
298 this->message = message;
299 if ((message & MSG_NACK) &&
300 (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
301 set_ignore(index, addr, this);
302 PRINTK("Node %d sees node %d.%d.%d.%d now sending "
303 "%s.\n", index, NIPQUAD(addr), str_message(message));
304 wake_up(&node_array[index].member_events);
305 }
306 return 0;
307 }
308
309 this = (struct cluster_member *)toi_kzalloc(36, sizeof(struct cluster_member), GFP_KERNEL);
310
311 if (!this)
312 return -1;
313
314 this->addr = addr;
315 this->message = message;
316 this->ignore = 0;
317 INIT_LIST_HEAD(&this->list);
318
319 node_array[index].peer_count++;
320
321 PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
322 NIPQUAD(addr), str_message(message));
323
324 if ((message & MSG_NACK) && (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
325 set_ignore(index, addr, this);
326 list_add_tail(&this->list, &node_array[index].member_list);
327 return 1;
328 }
329
330 static int add_update_member(int index, __be32 addr, int message)
331 {
332 int result;
333 unsigned long flags;
334 spin_lock_irqsave(&node_array[index].member_list_lock, flags);
335 result = __add_update_member(index, addr, message);
336 spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
337
338 print_member_info(index);
339
340 wake_up(&node_array[index].member_events);
341
342 return result;
343 }
344
345 static void del_member(int index, __be32 addr)
346 {
347 struct cluster_member *this;
348 unsigned long flags;
349
350 spin_lock_irqsave(&node_array[index].member_list_lock, flags);
351 this = __find_member(index, addr);
352
353 if (this) {
354 list_del_init(&this->list);
355 toi_kfree(36, this, sizeof(*this));
356 node_array[index].peer_count--;
357 }
358
359 spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
360 }
361
362 /* === Message transmission === */
363
364 static void toi_send_if(int message, unsigned long my_id);
365
366 /*
367 * Process received TOI packet.
368 */
369 static int toi_recv(struct sk_buff *skb, struct net_device *dev,
370 struct packet_type *pt, struct net_device *orig_dev)
371 {
372 struct toi_pkt *b;
373 struct iphdr *h;
374 int len, result, index;
375 unsigned long addr, message, ack;
376
377 /* Perform verifications before taking the lock. */
378 if (skb->pkt_type == PACKET_OTHERHOST)
379 goto drop;
380
381 if (dev != net_dev)
382 goto drop;
383
384 skb = skb_share_check(skb, GFP_ATOMIC);
385 if (!skb)
386 return NET_RX_DROP;
387
388 if (!pskb_may_pull(skb, sizeof(struct iphdr) + sizeof(struct udphdr)))
389 goto drop;
390
391 b = (struct toi_pkt *)skb_network_header(skb);
392 h = &b->iph;
393
394 if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
395 goto drop;
396
397 /* Fragments are not supported */
398 if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
399 if (net_ratelimit())
400 printk(KERN_ERR "TuxOnIce: Ignoring fragmented " "cluster message.\n");
401 goto drop;
402 }
403
404 if (skb->len < ntohs(h->tot_len))
405 goto drop;
406
407 if (ip_fast_csum((char *)h, h->ihl))
408 goto drop;
409
410 if (b->udph.source != htons(toi_cluster_port_send) ||
411 b->udph.dest != htons(toi_cluster_port_recv))
412 goto drop;
413
414 if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
415 goto drop;
416
417 len = ntohs(b->udph.len) - sizeof(struct udphdr);
418
419 /* Ok the front looks good, make sure we can get at the rest. */
420 if (!pskb_may_pull(skb, skb->len))
421 goto drop;
422
423 b = (struct toi_pkt *)skb_network_header(skb);
424 h = &b->iph;
425
426 addr = SADDR;
427 PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
428 str_message(b->message), NIPQUAD(addr));
429
430 message = b->message & MSG_STATE_MASK;
431 ack = b->message & MSG_ACK_MASK;
432
433 for (index = 0; index < num_local_nodes; index++) {
434 int new_message = node_array[index].current_message, old_message = new_message;
435
436 if (index == SADDR || !old_message) {
437 PRINTK("Ignoring node %d (offline or self).\n", index);
438 continue;
439 }
440
441 /* One message at a time, please. */
442 spin_lock(&node_array[index].receive_lock);
443
444 result = add_update_member(index, SADDR, b->message);
445 if (result == -1) {
446 printk(KERN_INFO "Failed to add new cluster member "
447 NIPQUAD_FMT ".\n", NIPQUAD(addr));
448 goto drop_unlock;
449 }
450
451 switch (b->message & MSG_STATE_MASK) {
452 case MSG_PING:
453 break;
454 case MSG_ABORT:
455 break;
456 case MSG_BYE:
457 break;
458 case MSG_HIBERNATE:
459 /* Can I hibernate? */
460 new_message = MSG_HIBERNATE | ((index & 1) ? MSG_NACK : MSG_ACK);
461 break;
462 case MSG_IMAGE:
463 /* Can I resume? */
464 new_message = MSG_IMAGE | ((index & 1) ? MSG_NACK : MSG_ACK);
465 if (new_message != old_message)
466 printk(KERN_ERR "Setting whether I can resume "
467 "to %d.\n", new_message);
468 break;
469 case MSG_IO:
470 new_message = MSG_IO | MSG_ACK;
471 break;
472 case MSG_RUNNING:
473 break;
474 default:
475 if (net_ratelimit())
476 printk(KERN_ERR "Unrecognised TuxOnIce cluster"
477 " message %d from " NIPQUAD_FMT ".\n",
478 b->message, NIPQUAD(addr));
479 };
480
481 if (old_message != new_message) {
482 node_array[index].current_message = new_message;
483 printk(KERN_INFO ">>> Sending new message for node " "%d.\n", index);
484 toi_send_if(new_message, index);
485 } else if (!ack) {
486 printk(KERN_INFO ">>> Resending message for node %d.\n", index);
487 toi_send_if(new_message, index);
488 }
489 drop_unlock:
490 spin_unlock(&node_array[index].receive_lock);
491 };
492
493 drop:
494 /* Throw the packet out. */
495 kfree_skb(skb);
496
497 return 0;
498 }
499
500 /*
501 * Send cluster message to single interface.
502 */
503 static void toi_send_if(int message, unsigned long my_id)
504 {
505 struct sk_buff *skb;
506 struct toi_pkt *b;
507 int hh_len = LL_RESERVED_SPACE(net_dev);
508 struct iphdr *h;
509
510 /* Allocate packet */
511 skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
512 if (!skb)
513 return;
514 skb_reserve(skb, hh_len);
515 b = (struct toi_pkt *)skb_put(skb, sizeof(struct toi_pkt));
516 memset(b, 0, sizeof(struct toi_pkt));
517
518 /* Construct IP header */
519 skb_reset_network_header(skb);
520 h = ip_hdr(skb);
521 h->version = 4;
522 h->ihl = 5;
523 h->tot_len = htons(sizeof(struct toi_pkt));
524 h->frag_off = htons(IP_DF);
525 h->ttl = 64;
526 h->protocol = IPPROTO_UDP;
527 h->daddr = htonl(INADDR_BROADCAST);
528 h->check = ip_fast_csum((unsigned char *)h, h->ihl);
529
530 /* Construct UDP header */
531 b->udph.source = htons(toi_cluster_port_send);
532 b->udph.dest = htons(toi_cluster_port_recv);
533 b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
534 /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
535
536 /* Construct message */
537 b->message = message;
538 b->sid = my_id;
539 b->htype = net_dev->type; /* can cause undefined behavior */
540 b->hlen = net_dev->addr_len;
541 memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
542 b->secs = htons(3); /* 3 seconds */
543
544 /* Chain packet down the line... */
545 skb->dev = net_dev;
546 skb->protocol = htons(ETH_P_IP);
547 if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
548 net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
549 dev_queue_xmit(skb) < 0)
550 printk(KERN_INFO "E");
551 }
552
553 /* ========================================= */
554
555 /* kTOICluster */
556
557 static atomic_t num_cluster_threads;
558 static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
559
560 static int kTOICluster(void *data)
561 {
562 unsigned long my_id;
563
564 my_id = atomic_add_return(1, &num_cluster_threads) - 1;
565 node_array[my_id].current_message = (unsigned long)data;
566
567 PRINTK("kTOICluster daemon %lu starting.\n", my_id);
568
569 current->flags |= PF_NOFREEZE;
570
571 while (node_array[my_id].current_message) {
572 toi_send_if(node_array[my_id].current_message, my_id);
573 sleep_on_timeout(&clusterd_events, cluster_message_timeout);
574 PRINTK("Link state %lu is %d.\n", my_id, node_array[my_id].current_message);
575 }
576
577 toi_send_if(MSG_BYE, my_id);
578 atomic_dec(&num_cluster_threads);
579 wake_up(&clusterd_events);
580
581 PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
582 __set_current_state(TASK_RUNNING);
583 return 0;
584 }
585
586 static void kill_clusterd(void)
587 {
588 int i;
589
590 for (i = 0; i < num_local_nodes; i++) {
591 if (node_array[i].current_message) {
592 PRINTK("Seeking to kill clusterd %d.\n", i);
593 node_array[i].current_message = 0;
594 }
595 }
596 wait_event(clusterd_events, !atomic_read(&num_cluster_threads));
597 PRINTK("All cluster daemons have exited.\n");
598 }
599
600 static int peers_not_in_message(int index, int message, int precise)
601 {
602 struct cluster_member *this;
603 unsigned long flags;
604 int result = 0;
605
606 spin_lock_irqsave(&node_array[index].member_list_lock, flags);
607 list_for_each_entry(this, &node_array[index].member_list, list) {
608 if (this->ignore)
609 continue;
610
611 PRINTK("Peer %d.%d.%d.%d sending %s. "
612 "Seeking %s.\n",
613 NIPQUAD(this->addr), str_message(this->message), str_message(message));
614 if ((precise ? this->message : this->message & MSG_STATE_MASK) != message)
615 result++;
616 }
617 spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
618 PRINTK("%d peers in sought message.\n", result);
619 return result;
620 }
621
622 static void reset_ignored(int index)
623 {
624 struct cluster_member *this;
625 unsigned long flags;
626
627 spin_lock_irqsave(&node_array[index].member_list_lock, flags);
628 list_for_each_entry(this, &node_array[index].member_list, list)
629 this->ignore = 0;
630 node_array[index].ignored_peer_count = 0;
631 spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
632 }
633
634 static int peers_in_message(int index, int message, int precise)
635 {
636 return node_array[index].peer_count -
637 node_array[index].ignored_peer_count - peers_not_in_message(index, message, precise);
638 }
639
640 static int time_to_continue(int index, unsigned long start, int message)
641 {
642 int first = peers_not_in_message(index, message, 0);
643 int second = peers_in_message(index, message, 1);
644
645 PRINTK("First part returns %d, second returns %d.\n", first, second);
646
647 if (!first && !second) {
648 PRINTK("All peers answered message %d.\n", message);
649 return 1;
650 }
651
652 if (time_after(jiffies, start + continue_delay)) {
653 PRINTK("Timeout reached.\n");
654 return 1;
655 }
656
657 PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies, start + continue_delay);
658 return 0;
659 }
660
661 void toi_initiate_cluster_hibernate(void)
662 {
663 int result;
664 unsigned long start;
665
666 result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
667 if (result)
668 return;
669
670 toi_send_if(MSG_HIBERNATE, 0);
671
672 start = jiffies;
673 wait_event(node_array[0].member_events, time_to_continue(0, start, MSG_HIBERNATE));
674
675 if (test_action_state(TOI_FREEZER_TEST)) {
676 toi_send_if(MSG_ABORT, 0);
677
678 start = jiffies;
679 wait_event(node_array[0].member_events, time_to_continue(0, start, MSG_RUNNING));
680
681 do_toi_step(STEP_QUIET_CLEANUP);
682 return;
683 }
684
685 toi_send_if(MSG_IO, 0);
686
687 result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
688 if (result)
689 return;
690
691 /* This code runs at resume time too! */
692 if (toi_in_hibernate)
693 result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
694 }
695 EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate);
696
697 /* toi_cluster_print_debug_stats
698 *
699 * Description: Print information to be recorded for debugging purposes into a
700 * buffer.
701 * Arguments: buffer: Pointer to a buffer into which the debug info will be
702 * printed.
703 * size: Size of the buffer.
704 * Returns: Number of characters written to the buffer.
705 */
706 static int toi_cluster_print_debug_stats(char *buffer, int size)
707 {
708 int len;
709
710 if (strlen(toi_cluster_iface))
711 len = scnprintf(buffer, size, "- Cluster interface is '%s'.\n", toi_cluster_iface);
712 else
713 len = scnprintf(buffer, size, "- Cluster support is disabled.\n");
714 return len;
715 }
716
717 /* cluster_memory_needed
718 *
719 * Description: Tell the caller how much memory we need to operate during
720 * hibernate/resume.
721 * Returns: Unsigned long. Maximum number of bytes of memory required for
722 * operation.
723 */
724 static int toi_cluster_memory_needed(void)
725 {
726 return 0;
727 }
728
729 static int toi_cluster_storage_needed(void)
730 {
731 return 1 + strlen(toi_cluster_iface);
732 }
733
734 /* toi_cluster_save_config_info
735 *
736 * Description: Save informaton needed when reloading the image at resume time.
737 * Arguments: Buffer: Pointer to a buffer of size PAGE_SIZE.
738 * Returns: Number of bytes used for saving our data.
739 */
740 static int toi_cluster_save_config_info(char *buffer)
741 {
742 strcpy(buffer, toi_cluster_iface);
743 return strlen(toi_cluster_iface + 1);
744 }
745
746 /* toi_cluster_load_config_info
747 *
748 * Description: Reload information needed for declustering the image at
749 * resume time.
750 * Arguments: Buffer: Pointer to the start of the data.
751 * Size: Number of bytes that were saved.
752 */
753 static void toi_cluster_load_config_info(char *buffer, int size)
754 {
755 strncpy(toi_cluster_iface, buffer, size);
756 return;
757 }
758
759 static void cluster_startup(void)
760 {
761 int have_image = do_check_can_resume(), i;
762 unsigned long start = jiffies, initial_message;
763 struct task_struct *p;
764
765 initial_message = MSG_IMAGE;
766
767 have_image = 1;
768
769 for (i = 0; i < num_local_nodes; i++) {
770 PRINTK("Starting ktoiclusterd %d.\n", i);
771 p = kthread_create(kTOICluster, (void *)initial_message, "ktoiclusterd/%d", i);
772 if (IS_ERR(p)) {
773 printk(KERN_ERR "Failed to start ktoiclusterd.\n");
774 return;
775 }
776
777 wake_up_process(p);
778 }
779
780 /* Wait for delay or someone else sending first message */
781 wait_event(node_array[0].member_events, time_to_continue(0, start, MSG_IMAGE));
782
783 others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
784
785 printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
786 " %d.\n", have_image ? "" : "don't ", others_have_image);
787
788 if (have_image) {
789 int result;
790
791 /* Start to resume */
792 printk(KERN_INFO " === Starting to resume === \n");
793 node_array[0].current_message = MSG_IO;
794 toi_send_if(MSG_IO, 0);
795
796 /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
797 result = 0;
798
799 if (!result) {
800 /*
801 * Atomic restore - we'll come back in the hibernation
802 * path.
803 */
804
805 /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
806 result = 0;
807
808 /* do_toi_step(STEP_QUIET_CLEANUP); */
809 }
810
811 node_array[0].current_message |= MSG_NACK;
812
813 /* For debugging - disable for real life? */
814 wait_event(node_array[0].member_events, time_to_continue(0, start, MSG_IO));
815 }
816
817 if (others_have_image) {
818 /* Wait for them to resume */
819 printk(KERN_INFO "Waiting for other nodes to resume.\n");
820 start = jiffies;
821 wait_event(node_array[0].member_events, time_to_continue(0, start, MSG_RUNNING));
822 if (peers_not_in_message(0, MSG_RUNNING, 0))
823 printk(KERN_INFO "Timed out while waiting for other " "nodes to resume.\n");
824 }
825
826 /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
827 * as appropriate.
828 *
829 * If we don't have an image:
830 * - Wait until someone else says they have one, or conditions are met
831 * for continuing to boot (n machines or t seconds).
832 * - If anyone has an image, wait for them to resume before continuing
833 * to boot.
834 *
835 * If we have an image:
836 * - Wait until conditions are met before continuing to resume (n
837 * machines or t seconds). Send RESUME_PREP and freeze processes.
838 * NACK_PREP if freezing fails (shouldn't) and follow logic for
839 * us having no image above. On success, wait for [N]ACK_PREP from
840 * other machines. Read image (including atomic restore) until done.
841 * Wait for ACK_READ from others (should never fail). Thaw processes
842 * and do post-resume. (The section after the atomic restore is done
843 * via the code for hibernating).
844 */
845
846 node_array[0].current_message = MSG_RUNNING;
847 }
848
849 /* toi_cluster_open_iface
850 *
851 * Description: Prepare to use an interface.
852 */
853
854 static int toi_cluster_open_iface(void)
855 {
856 struct net_device *dev;
857
858 rtnl_lock();
859
860 for_each_netdev(&init_net, dev) {
861 if (/* dev == &init_net.loopback_dev || */
862 strcmp(dev->name, toi_cluster_iface))
863 continue;
864
865 net_dev = dev;
866 break;
867 }
868
869 rtnl_unlock();
870
871 if (!net_dev) {
872 printk(KERN_ERR MYNAME ": Device %s not found.\n", toi_cluster_iface);
873 return -ENODEV;
874 }
875
876 dev_add_pack(&toi_cluster_packet_type);
877 added_pack = 1;
878
879 loopback_mode = (net_dev == init_net.loopback_dev);
880 num_local_nodes = loopback_mode ? 8 : 1;
881
882 PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
883 loopback_mode ? "on" : "off", num_local_nodes);
884
885 cluster_startup();
886 return 0;
887 }
888
889 /* toi_cluster_close_iface
890 *
891 * Description: Stop using an interface.
892 */
893
894 static int toi_cluster_close_iface(void)
895 {
896 kill_clusterd();
897 if (added_pack) {
898 dev_remove_pack(&toi_cluster_packet_type);
899 added_pack = 0;
900 }
901 return 0;
902 }
903
904 static void write_side_effect(void)
905 {
906 if (toi_cluster_ops.enabled) {
907 toi_cluster_open_iface();
908 set_toi_state(TOI_CLUSTER_MODE);
909 } else {
910 toi_cluster_close_iface();
911 clear_toi_state(TOI_CLUSTER_MODE);
912 }
913 }
914
915 static void node_write_side_effect(void)
916 {
917 }
918
919 /*
920 * data for our sysfs entries.
921 */
922 static struct toi_sysfs_data sysfs_params[] = {
923 SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
924 NULL),
925 SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
926 write_side_effect),
927 SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
928 SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
929 256, 0, NULL),
930 SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
931 256, 0, STRING),
932 SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
933 0)
934 };
935
936 /*
937 * Ops structure.
938 */
939
940 static struct toi_module_ops toi_cluster_ops = {
941 .type = FILTER_MODULE,
942 .name = "Cluster",
943 .directory = "cluster",
944 .module = THIS_MODULE,
945 .memory_needed = toi_cluster_memory_needed,
946 .print_debug_info = toi_cluster_print_debug_stats,
947 .save_config_info = toi_cluster_save_config_info,
948 .load_config_info = toi_cluster_load_config_info,
949 .storage_needed = toi_cluster_storage_needed,
950
951 .sysfs_data = sysfs_params,
952 .num_sysfs_entries = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data),
953 };
954
955 /* ---- Registration ---- */
956
957 #ifdef MODULE
958 #define INIT static __init
959 #define EXIT static __exit
960 #else
961 #define INIT
962 #define EXIT
963 #endif
964
965 INIT int toi_cluster_init(void)
966 {
967 int temp = toi_register_module(&toi_cluster_ops), i;
968 struct kobject *kobj = toi_cluster_ops.dir_kobj;
969
970 for (i = 0; i < MAX_LOCAL_NODES; i++) {
971 node_array[i].current_message = 0;
972 INIT_LIST_HEAD(&node_array[i].member_list);
973 init_waitqueue_head(&node_array[i].member_events);
974 spin_lock_init(&node_array[i].member_list_lock);
975 spin_lock_init(&node_array[i].receive_lock);
976
977 /* Set up sysfs entry */
978 node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
979 sizeof(node_array[i].sysfs_data.
980 attr.name), GFP_KERNEL);
981 sprintf((char *)node_array[i].sysfs_data.attr.name, "node_%d", i);
982 node_array[i].sysfs_data.attr.mode = SYSFS_RW;
983 node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
984 node_array[i].sysfs_data.flags = 0;
985 node_array[i].sysfs_data.data.integer.variable =
986 (int *)&node_array[i].current_message;
987 node_array[i].sysfs_data.data.integer.minimum = 0;
988 node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
989 node_array[i].sysfs_data.write_side_effect = node_write_side_effect;
990 toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
991 }
992
993 toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
994
995 if (toi_cluster_ops.enabled)
996 toi_cluster_open_iface();
997
998 return temp;
999 }
1000
1001 EXIT void toi_cluster_exit(void)
1002 {
1003 int i;
1004 toi_cluster_close_iface();
1005
1006 for (i = 0; i < MAX_LOCAL_NODES; i++)
1007 toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj, &node_array[i].sysfs_data);
1008 toi_unregister_module(&toi_cluster_ops);
1009 }
1010
1011 static int __init toi_cluster_iface_setup(char *iface)
1012 {
1013 toi_cluster_ops.enabled = (*iface && strcmp(iface, "off"));
1014
1015 if (toi_cluster_ops.enabled)
1016 strncpy(toi_cluster_iface, iface, strlen(iface));
1017 }
1018
1019 __setup("toi_cluster=", toi_cluster_iface_setup);
1020
1021 #ifdef MODULE
1022 MODULE_LICENSE("GPL");
1023 module_init(toi_cluster_init);
1024 module_exit(toi_cluster_exit);
1025 MODULE_AUTHOR("Nigel Cunningham");
1026 MODULE_DESCRIPTION("Cluster Support for TuxOnIce");
1027 #endif