1 /* Simple program to layout "physical" memory for new lguest guest.
2 * Linked high to avoid likely physical memory. */
3 #define _LARGEFILE64_SOURCE
13 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <sys/ioctl.h>
24 #include <netinet/in.h>
26 #include <linux/sockios.h>
27 #include <linux/if_tun.h>
32 typedef unsigned long long u64
;
36 #include "../../include/linux/lguest_launcher.h"
37 #include "../../include/asm-i386/e820.h"
39 #define PAGE_PRESENT 0x7 /* Present, RW, Execute */
41 #define BRIDGE_PFX "bridge:"
43 #define SIOCBRADDIF 0x89a2 /* add interface to bridge */
47 #define verbose(args...) \
48 do { if (verbose) printf(args); } while(0)
57 struct device
**lastdev
;
63 struct lguest_device_desc
*desc
;
66 /* Watch this fd if handle_input non-NULL. */
68 bool (*handle_input
)(int fd
, struct device
*me
);
70 /* Watch DMA to this key if handle_input non-NULL. */
71 unsigned long watch_key
;
72 u32 (*handle_output
)(int fd
, const struct iovec
*iov
,
73 unsigned int num
, struct device
*me
);
75 /* Device-specific data. */
79 static int open_or_die(const char *name
, int flags
)
81 int fd
= open(name
, flags
);
83 err(1, "Failed to open %s", name
);
87 static void *map_zeroed_pages(unsigned long addr
, unsigned int num
)
92 fd
= open_or_die("/dev/zero", O_RDONLY
);
94 if (mmap((void *)addr
, getpagesize() * num
,
95 PROT_READ
|PROT_WRITE
|PROT_EXEC
, MAP_FIXED
|MAP_PRIVATE
, fd
, 0)
97 err(1, "Mmaping %u pages of /dev/zero @%p", num
, (void *)addr
);
101 /* Find magic string marking entry point, return entry point. */
102 static unsigned long entry_point(void *start
, void *end
,
103 unsigned long page_offset
)
107 for (p
= start
; p
< end
; p
++)
108 if (memcmp(p
, "GenuineLguest", strlen("GenuineLguest")) == 0)
109 return (long)p
+ strlen("GenuineLguest") + page_offset
;
111 err(1, "Is this image a genuine lguest?");
114 /* Returns the entry point */
115 static unsigned long map_elf(int elf_fd
, const Elf32_Ehdr
*ehdr
,
116 unsigned long *page_offset
)
119 Elf32_Phdr phdr
[ehdr
->e_phnum
];
121 unsigned long start
= -1UL, end
= 0;
124 if (ehdr
->e_type
!= ET_EXEC
125 || ehdr
->e_machine
!= EM_386
126 || ehdr
->e_phentsize
!= sizeof(Elf32_Phdr
)
127 || ehdr
->e_phnum
< 1 || ehdr
->e_phnum
> 65536U/sizeof(Elf32_Phdr
))
128 errx(1, "Malformed elf header");
130 if (lseek(elf_fd
, ehdr
->e_phoff
, SEEK_SET
) < 0)
131 err(1, "Seeking to program headers");
132 if (read(elf_fd
, phdr
, sizeof(phdr
)) != sizeof(phdr
))
133 err(1, "Reading program headers");
136 /* We map the loadable segments at virtual addresses corresponding
137 * to their physical addresses (our virtual == guest physical). */
138 for (i
= 0; i
< ehdr
->e_phnum
; i
++) {
139 if (phdr
[i
].p_type
!= PT_LOAD
)
142 verbose("Section %i: size %i addr %p\n",
143 i
, phdr
[i
].p_memsz
, (void *)phdr
[i
].p_paddr
);
145 /* We expect linear address space. */
147 *page_offset
= phdr
[i
].p_vaddr
- phdr
[i
].p_paddr
;
148 else if (*page_offset
!= phdr
[i
].p_vaddr
- phdr
[i
].p_paddr
)
149 errx(1, "Page offset of section %i different", i
);
151 if (phdr
[i
].p_paddr
< start
)
152 start
= phdr
[i
].p_paddr
;
153 if (phdr
[i
].p_paddr
+ phdr
[i
].p_filesz
> end
)
154 end
= phdr
[i
].p_paddr
+ phdr
[i
].p_filesz
;
156 /* We map everything private, writable. */
157 addr
= mmap((void *)phdr
[i
].p_paddr
,
159 PROT_READ
|PROT_WRITE
|PROT_EXEC
,
160 MAP_FIXED
|MAP_PRIVATE
,
161 elf_fd
, phdr
[i
].p_offset
);
162 if (addr
!= (void *)phdr
[i
].p_paddr
)
163 err(1, "Mmaping vmlinux seg %i gave %p not %p",
164 i
, addr
, (void *)phdr
[i
].p_paddr
);
167 return entry_point((void *)start
, (void *)end
, *page_offset
);
170 /* This is amazingly reliable. */
171 static unsigned long intuit_page_offset(unsigned char *img
, unsigned long len
)
173 unsigned int i
, possibilities
[256] = { 0 };
175 for (i
= 0; i
+ 4 < len
; i
++) {
176 /* mov 0xXXXXXXXX,%eax */
177 if (img
[i
] == 0xA1 && ++possibilities
[img
[i
+4]] > 3)
178 return (unsigned long)img
[i
+4] << 24;
180 errx(1, "could not determine page offset");
183 static unsigned long unpack_bzimage(int fd
, unsigned long *page_offset
)
187 void *img
= (void *)0x100000;
189 f
= gzdopen(fd
, "rb");
190 while ((ret
= gzread(f
, img
+ len
, 65536)) > 0)
193 err(1, "reading image from bzImage");
195 verbose("Unpacked size %i addr %p\n", len
, img
);
196 *page_offset
= intuit_page_offset(img
, len
);
198 return entry_point(img
, img
+ len
, *page_offset
);
201 static unsigned long load_bzimage(int fd
, unsigned long *page_offset
)
206 /* Ugly brute force search for gzip header. */
207 while (read(fd
, &c
, 1) == 1) {
223 lseek(fd
, -10, SEEK_CUR
);
224 if (c
!= 0x03) /* Compressed under UNIX. */
227 return unpack_bzimage(fd
, page_offset
);
230 errx(1, "Could not find kernel in bzImage");
233 static unsigned long load_kernel(int fd
, unsigned long *page_offset
)
237 if (read(fd
, &hdr
, sizeof(hdr
)) != sizeof(hdr
))
238 err(1, "Reading kernel");
240 if (memcmp(hdr
.e_ident
, ELFMAG
, SELFMAG
) == 0)
241 return map_elf(fd
, &hdr
, page_offset
);
243 return load_bzimage(fd
, page_offset
);
246 static inline unsigned long page_align(unsigned long addr
)
248 return ((addr
+ getpagesize()-1) & ~(getpagesize()-1));
251 /* initrd gets loaded at top of memory: return length. */
252 static unsigned long load_initrd(const char *name
, unsigned long mem
)
259 ifd
= open_or_die(name
, O_RDONLY
);
260 if (fstat(ifd
, &st
) < 0)
261 err(1, "fstat() on initrd '%s'", name
);
263 len
= page_align(st
.st_size
);
264 iaddr
= mmap((void *)mem
- len
, st
.st_size
,
265 PROT_READ
|PROT_EXEC
|PROT_WRITE
,
266 MAP_FIXED
|MAP_PRIVATE
, ifd
, 0);
267 if (iaddr
!= (void *)mem
- len
)
268 err(1, "Mmaping initrd '%s' returned %p not %p",
269 name
, iaddr
, (void *)mem
- len
);
271 verbose("mapped initrd %s size=%lu @ %p\n", name
, st
.st_size
, iaddr
);
275 static unsigned long setup_pagetables(unsigned long mem
,
276 unsigned long initrd_size
,
277 unsigned long page_offset
)
280 unsigned int mapped_pages
, i
, linear_pages
;
281 unsigned int ptes_per_page
= getpagesize()/sizeof(u32
);
283 /* If we can map all of memory above page_offset, we do so. */
284 if (mem
<= -page_offset
)
285 mapped_pages
= mem
/getpagesize();
287 mapped_pages
= -page_offset
/getpagesize();
289 /* Each linear PTE page can map ptes_per_page pages. */
290 linear_pages
= (mapped_pages
+ ptes_per_page
-1)/ptes_per_page
;
292 /* We lay out top-level then linear mapping immediately below initrd */
293 pgdir
= (void *)mem
- initrd_size
- getpagesize();
294 linear
= (void *)pgdir
- linear_pages
*getpagesize();
296 for (i
= 0; i
< mapped_pages
; i
++)
297 linear
[i
] = ((i
* getpagesize()) | PAGE_PRESENT
);
299 /* Now set up pgd so that this memory is at page_offset */
300 for (i
= 0; i
< mapped_pages
; i
+= ptes_per_page
) {
301 pgdir
[(i
+ page_offset
/getpagesize())/ptes_per_page
]
302 = (((u32
)linear
+ i
*sizeof(u32
)) | PAGE_PRESENT
);
305 verbose("Linear mapping of %u pages in %u pte pages at %p\n",
306 mapped_pages
, linear_pages
, linear
);
308 return (unsigned long)pgdir
;
311 static void concat(char *dst
, char *args
[])
313 unsigned int i
, len
= 0;
315 for (i
= 0; args
[i
]; i
++) {
316 strcpy(dst
+len
, args
[i
]);
317 strcat(dst
+len
, " ");
318 len
+= strlen(args
[i
]) + 1;
320 /* In case it's empty. */
324 static int tell_kernel(u32 pgdir
, u32 start
, u32 page_offset
)
326 u32 args
[] = { LHREQ_INITIALIZE
,
327 LGUEST_GUEST_TOP
/getpagesize(), /* Just below us */
328 pgdir
, start
, page_offset
};
331 fd
= open_or_die("/dev/lguest", O_RDWR
);
332 if (write(fd
, args
, sizeof(args
)) < 0)
333 err(1, "Writing to /dev/lguest");
337 static void set_fd(int fd
, struct device_list
*devices
)
339 FD_SET(fd
, &devices
->infds
);
340 if (fd
> devices
->max_infd
)
341 devices
->max_infd
= fd
;
344 /* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */
345 static void wake_parent(int pipefd
, int lguest_fd
, struct device_list
*devices
)
347 set_fd(pipefd
, devices
);
350 fd_set rfds
= devices
->infds
;
351 u32 args
[] = { LHREQ_BREAK
, 1 };
353 select(devices
->max_infd
+1, &rfds
, NULL
, NULL
, NULL
);
354 if (FD_ISSET(pipefd
, &rfds
)) {
356 if (read(pipefd
, &ignorefd
, sizeof(ignorefd
)) == 0)
358 FD_CLR(ignorefd
, &devices
->infds
);
360 write(lguest_fd
, args
, sizeof(args
));
364 static int setup_waker(int lguest_fd
, struct device_list
*device_list
)
366 int pipefd
[2], child
;
375 wake_parent(pipefd
[0], lguest_fd
, device_list
);
382 static void *_check_pointer(unsigned long addr
, unsigned int size
,
385 if (addr
>= LGUEST_GUEST_TOP
|| addr
+ size
>= LGUEST_GUEST_TOP
)
386 errx(1, "%s:%i: Invalid address %li", __FILE__
, line
, addr
);
389 #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
391 /* Returns pointer to dma->used_len */
392 static u32
*dma2iov(unsigned long dma
, struct iovec iov
[], unsigned *num
)
395 struct lguest_dma
*udma
;
397 udma
= check_pointer(dma
, sizeof(*udma
));
398 for (i
= 0; i
< LGUEST_MAX_DMA_SECTIONS
; i
++) {
402 iov
[i
].iov_base
= check_pointer(udma
->addr
[i
], udma
->len
[i
]);
403 iov
[i
].iov_len
= udma
->len
[i
];
406 return &udma
->used_len
;
409 static u32
*get_dma_buffer(int fd
, void *key
,
410 struct iovec iov
[], unsigned int *num
, u32
*irq
)
412 u32 buf
[] = { LHREQ_GETDMA
, (u32
)key
};
416 udma
= write(fd
, buf
, sizeof(buf
));
417 if (udma
== (unsigned long)-1)
420 /* Kernel stashes irq in ->used_len. */
421 res
= dma2iov(udma
, iov
, num
);
426 static void trigger_irq(int fd
, u32 irq
)
428 u32 buf
[] = { LHREQ_IRQ
, irq
};
429 if (write(fd
, buf
, sizeof(buf
)) != 0)
430 err(1, "Triggering irq %i", irq
);
433 static void discard_iovec(struct iovec
*iov
, unsigned int *num
)
435 static char discard_buf
[1024];
437 iov
->iov_base
= discard_buf
;
438 iov
->iov_len
= sizeof(discard_buf
);
441 static struct termios orig_term
;
442 static void restore_term(void)
444 tcsetattr(STDIN_FILENO
, TCSANOW
, &orig_term
);
450 struct timeval start
;
453 /* We DMA input to buffer bound at start of console page. */
454 static bool handle_console_input(int fd
, struct device
*dev
)
459 struct iovec iov
[LGUEST_MAX_DMA_SECTIONS
];
460 struct console_abort
*abort
= dev
->priv
;
462 lenp
= get_dma_buffer(fd
, dev
->mem
, iov
, &num
, &irq
);
464 warn("console: no dma buffer!");
465 discard_iovec(iov
, &num
);
468 len
= readv(dev
->fd
, iov
, num
);
470 warnx("Failed to get console input, ignoring console.");
476 trigger_irq(fd
, irq
);
479 /* Three ^C within one second? Exit. */
480 if (len
== 1 && ((char *)iov
[0].iov_base
)[0] == 3) {
482 gettimeofday(&abort
->start
, NULL
);
483 else if (abort
->count
== 3) {
485 gettimeofday(&now
, NULL
);
486 if (now
.tv_sec
<= abort
->start
.tv_sec
+1) {
487 /* Make sure waker is not blocked in BREAK */
488 u32 args
[] = { LHREQ_BREAK
, 0 };
490 write(fd
, args
, sizeof(args
));
505 static u32
handle_console_output(int fd
, const struct iovec
*iov
,
506 unsigned num
, struct device
*dev
)
508 return writev(STDOUT_FILENO
, iov
, num
);
511 static u32
handle_tun_output(int fd
, const struct iovec
*iov
,
512 unsigned num
, struct device
*dev
)
514 /* Now we've seen output, we should warn if we can't get buffers. */
515 *(bool *)dev
->priv
= true;
516 return writev(dev
->fd
, iov
, num
);
519 static unsigned long peer_offset(unsigned int peernum
)
524 static bool handle_tun_input(int fd
, struct device
*dev
)
529 struct iovec iov
[LGUEST_MAX_DMA_SECTIONS
];
531 lenp
= get_dma_buffer(fd
, dev
->mem
+peer_offset(NET_PEERNUM
), iov
, &num
,
534 if (*(bool *)dev
->priv
)
535 warn("network: no dma buffer!");
536 discard_iovec(iov
, &num
);
539 len
= readv(dev
->fd
, iov
, num
);
541 err(1, "reading network");
544 trigger_irq(fd
, irq
);
546 verbose("tun input packet len %i [%02x %02x] (%s)\n", len
,
547 ((u8
*)iov
[0].iov_base
)[0], ((u8
*)iov
[0].iov_base
)[1],
548 lenp
? "sent" : "discarded");
552 static u32
handle_block_output(int fd
, const struct iovec
*iov
,
553 unsigned num
, struct device
*dev
)
555 struct lguest_block_page
*p
= dev
->mem
;
557 unsigned int len
, reply_num
;
558 struct iovec reply
[LGUEST_MAX_DMA_SECTIONS
];
559 off64_t device_len
, off
= (off64_t
)p
->sector
* 512;
561 device_len
= *(off64_t
*)dev
->priv
;
563 if (off
>= device_len
)
564 err(1, "Bad offset %llu vs %llu", off
, device_len
);
565 if (lseek64(dev
->fd
, off
, SEEK_SET
) != off
)
566 err(1, "Bad seek to sector %i", p
->sector
);
568 verbose("Block: %s at offset %llu\n", p
->type
? "WRITE" : "READ", off
);
570 lenp
= get_dma_buffer(fd
, dev
->mem
, reply
, &reply_num
, &irq
);
572 err(1, "Block request didn't give us a dma buffer");
575 len
= writev(dev
->fd
, iov
, num
);
576 if (off
+ len
> device_len
) {
577 ftruncate(dev
->fd
, device_len
);
578 errx(1, "Write past end %llu+%u", off
, len
);
582 len
= readv(dev
->fd
, reply
, reply_num
);
586 p
->result
= 1 + (p
->bytes
!= len
);
587 trigger_irq(fd
, irq
);
591 static void handle_output(int fd
, unsigned long dma
, unsigned long key
,
592 struct device_list
*devices
)
596 struct iovec iov
[LGUEST_MAX_DMA_SECTIONS
];
599 lenp
= dma2iov(dma
, iov
, &num
);
600 for (i
= devices
->dev
; i
; i
= i
->next
) {
601 if (i
->handle_output
&& key
== i
->watch_key
) {
602 *lenp
= i
->handle_output(fd
, iov
, num
, i
);
606 warnx("Pending dma %p, key %p", (void *)dma
, (void *)key
);
609 static void handle_input(int fd
, struct device_list
*devices
)
611 struct timeval poll
= { .tv_sec
= 0, .tv_usec
= 0 };
615 fd_set fds
= devices
->infds
;
617 if (select(devices
->max_infd
+1, &fds
, NULL
, NULL
, &poll
) == 0)
620 for (i
= devices
->dev
; i
; i
= i
->next
) {
621 if (i
->handle_input
&& FD_ISSET(i
->fd
, &fds
)) {
622 if (!i
->handle_input(fd
, i
)) {
623 FD_CLR(i
->fd
, &devices
->infds
);
624 /* Tell waker to ignore it too... */
625 write(waker_fd
, &i
->fd
, sizeof(i
->fd
));
632 static struct lguest_device_desc
*new_dev_desc(u16 type
, u16 features
,
635 static unsigned long top
= LGUEST_GUEST_TOP
;
636 struct lguest_device_desc
*desc
;
638 desc
= malloc(sizeof(*desc
));
640 desc
->num_pages
= num_pages
;
641 desc
->features
= features
;
644 top
-= num_pages
*getpagesize();
645 map_zeroed_pages(top
, num_pages
);
646 desc
->pfn
= top
/ getpagesize();
652 static struct device
*new_device(struct device_list
*devices
,
653 u16 type
, u16 num_pages
, u16 features
,
655 bool (*handle_input
)(int, struct device
*),
656 unsigned long watch_off
,
657 u32 (*handle_output
)(int,
658 const struct iovec
*,
662 struct device
*dev
= malloc(sizeof(*dev
));
664 /* Append to device list. */
665 *devices
->lastdev
= dev
;
667 devices
->lastdev
= &dev
->next
;
671 set_fd(dev
->fd
, devices
);
672 dev
->desc
= new_dev_desc(type
, features
, num_pages
);
673 dev
->mem
= (void *)(dev
->desc
->pfn
* getpagesize());
674 dev
->handle_input
= handle_input
;
675 dev
->watch_key
= (unsigned long)dev
->mem
+ watch_off
;
676 dev
->handle_output
= handle_output
;
680 static void setup_console(struct device_list
*devices
)
684 if (tcgetattr(STDIN_FILENO
, &orig_term
) == 0) {
685 struct termios term
= orig_term
;
686 term
.c_lflag
&= ~(ISIG
|ICANON
|ECHO
);
687 tcsetattr(STDIN_FILENO
, TCSANOW
, &term
);
688 atexit(restore_term
);
691 /* We don't currently require a page for the console. */
692 dev
= new_device(devices
, LGUEST_DEVICE_T_CONSOLE
, 0, 0,
693 STDIN_FILENO
, handle_console_input
,
694 LGUEST_CONSOLE_DMA_KEY
, handle_console_output
);
695 dev
->priv
= malloc(sizeof(struct console_abort
));
696 ((struct console_abort
*)dev
->priv
)->count
= 0;
697 verbose("device %p: console\n",
698 (void *)(dev
->desc
->pfn
* getpagesize()));
701 static void setup_block_file(const char *filename
, struct device_list
*devices
)
706 struct lguest_block_page
*p
;
708 fd
= open_or_die(filename
, O_RDWR
|O_LARGEFILE
|O_DIRECT
);
709 dev
= new_device(devices
, LGUEST_DEVICE_T_BLOCK
, 1,
710 LGUEST_DEVICE_F_RANDOMNESS
,
711 fd
, NULL
, 0, handle_block_output
);
712 device_len
= dev
->priv
= malloc(sizeof(*device_len
));
713 *device_len
= lseek64(fd
, 0, SEEK_END
);
716 p
->num_sectors
= *device_len
/512;
717 verbose("device %p: block %i sectors\n",
718 (void *)(dev
->desc
->pfn
* getpagesize()), p
->num_sectors
);
721 /* We use fnctl locks to reserve network slots (autocleanup!) */
722 static unsigned int find_slot(int netfd
, const char *filename
)
727 fl
.l_whence
= SEEK_SET
;
730 fl
.l_start
< getpagesize()/sizeof(struct lguest_net
);
732 if (fcntl(netfd
, F_SETLK
, &fl
) == 0)
735 errx(1, "No free slots in network file %s", filename
);
738 static void setup_net_file(const char *filename
,
739 struct device_list
*devices
)
744 netfd
= open(filename
, O_RDWR
, 0);
746 if (errno
== ENOENT
) {
747 netfd
= open(filename
, O_RDWR
|O_CREAT
, 0600);
749 char page
[getpagesize()];
750 memset(page
, 0, sizeof(page
));
751 write(netfd
, page
, sizeof(page
));
755 err(1, "cannot open net file '%s'", filename
);
758 dev
= new_device(devices
, LGUEST_DEVICE_T_NET
, 1,
759 find_slot(netfd
, filename
)|LGUEST_NET_F_NOCSUM
,
762 /* We overwrite the /dev/zero mapping with the actual file. */
763 if (mmap(dev
->mem
, getpagesize(), PROT_READ
|PROT_WRITE
,
764 MAP_FIXED
|MAP_SHARED
, netfd
, 0) != dev
->mem
)
765 err(1, "could not mmap '%s'", filename
);
766 verbose("device %p: shared net %s, peer %i\n",
767 (void *)(dev
->desc
->pfn
* getpagesize()), filename
,
768 dev
->desc
->features
& ~LGUEST_NET_F_NOCSUM
);
771 static u32
str2ip(const char *ipaddr
)
773 unsigned int byte
[4];
775 sscanf(ipaddr
, "%u.%u.%u.%u", &byte
[0], &byte
[1], &byte
[2], &byte
[3]);
776 return (byte
[0] << 24) | (byte
[1] << 16) | (byte
[2] << 8) | byte
[3];
779 /* adapted from libbridge */
780 static void add_to_bridge(int fd
, const char *if_name
, const char *br_name
)
786 errx(1, "must specify bridge name");
788 ifidx
= if_nametoindex(if_name
);
790 errx(1, "interface %s does not exist!", if_name
);
792 strncpy(ifr
.ifr_name
, br_name
, IFNAMSIZ
);
793 ifr
.ifr_ifindex
= ifidx
;
794 if (ioctl(fd
, SIOCBRADDIF
, &ifr
) < 0)
795 err(1, "can't add %s to bridge %s", if_name
, br_name
);
798 static void configure_device(int fd
, const char *devname
, u32 ipaddr
,
799 unsigned char hwaddr
[6])
802 struct sockaddr_in
*sin
= (struct sockaddr_in
*)&ifr
.ifr_addr
;
804 memset(&ifr
, 0, sizeof(ifr
));
805 strcpy(ifr
.ifr_name
, devname
);
806 sin
->sin_family
= AF_INET
;
807 sin
->sin_addr
.s_addr
= htonl(ipaddr
);
808 if (ioctl(fd
, SIOCSIFADDR
, &ifr
) != 0)
809 err(1, "Setting %s interface address", devname
);
810 ifr
.ifr_flags
= IFF_UP
;
811 if (ioctl(fd
, SIOCSIFFLAGS
, &ifr
) != 0)
812 err(1, "Bringing interface %s up", devname
);
814 if (ioctl(fd
, SIOCGIFHWADDR
, &ifr
) != 0)
815 err(1, "getting hw address for %s", devname
);
817 memcpy(hwaddr
, ifr
.ifr_hwaddr
.sa_data
, 6);
820 static void setup_tun_net(const char *arg
, struct device_list
*devices
)
826 const char *br_name
= NULL
;
828 netfd
= open_or_die("/dev/net/tun", O_RDWR
);
829 memset(&ifr
, 0, sizeof(ifr
));
830 ifr
.ifr_flags
= IFF_TAP
| IFF_NO_PI
;
831 strcpy(ifr
.ifr_name
, "tap%d");
832 if (ioctl(netfd
, TUNSETIFF
, &ifr
) != 0)
833 err(1, "configuring /dev/net/tun");
834 ioctl(netfd
, TUNSETNOCSUM
, 1);
836 /* You will be peer 1: we should create enough jitter to randomize */
837 dev
= new_device(devices
, LGUEST_DEVICE_T_NET
, 1,
838 NET_PEERNUM
|LGUEST_DEVICE_F_RANDOMNESS
, netfd
,
839 handle_tun_input
, peer_offset(0), handle_tun_output
);
840 dev
->priv
= malloc(sizeof(bool));
841 *(bool *)dev
->priv
= false;
843 ipfd
= socket(PF_INET
, SOCK_DGRAM
, IPPROTO_IP
);
845 err(1, "opening IP socket");
847 if (!strncmp(BRIDGE_PFX
, arg
, strlen(BRIDGE_PFX
))) {
849 br_name
= arg
+ strlen(BRIDGE_PFX
);
850 add_to_bridge(ipfd
, ifr
.ifr_name
, br_name
);
854 /* We are peer 0, ie. first slot. */
855 configure_device(ipfd
, ifr
.ifr_name
, ip
, dev
->mem
);
857 /* Set "promisc" bit: we want every single packet. */
858 *((u8
*)dev
->mem
) |= 0x1;
862 verbose("device %p: tun net %u.%u.%u.%u\n",
863 (void *)(dev
->desc
->pfn
* getpagesize()),
864 (u8
)(ip
>>24), (u8
)(ip
>>16), (u8
)(ip
>>8), (u8
)ip
);
866 verbose("attached to bridge: %s\n", br_name
);
869 /* Now we know how much memory we have, we copy in device descriptors */
870 static void map_device_descriptors(struct device_list
*devs
, unsigned long mem
)
874 struct lguest_device_desc
*descs
;
876 /* Device descriptor array sits just above top of normal memory */
877 descs
= map_zeroed_pages(mem
, 1);
879 for (i
= devs
->dev
, num
= 0; i
; i
= i
->next
, num
++) {
880 if (num
== LGUEST_MAX_DEVICES
)
881 errx(1, "too many devices");
882 verbose("Device %i: %s\n", num
,
883 i
->desc
->type
== LGUEST_DEVICE_T_NET
? "net"
884 : i
->desc
->type
== LGUEST_DEVICE_T_CONSOLE
? "console"
885 : i
->desc
->type
== LGUEST_DEVICE_T_BLOCK
? "block"
887 descs
[num
] = *i
->desc
;
889 i
->desc
= &descs
[num
];
893 static void __attribute__((noreturn
))
894 run_guest(int lguest_fd
, struct device_list
*device_list
)
897 u32 args
[] = { LHREQ_BREAK
, 0 };
898 unsigned long arr
[2];
901 /* We read from the /dev/lguest device to run the Guest. */
902 readval
= read(lguest_fd
, arr
, sizeof(arr
));
904 if (readval
== sizeof(arr
)) {
905 handle_output(lguest_fd
, arr
[0], arr
[1], device_list
);
907 } else if (errno
== ENOENT
) {
908 char reason
[1024] = { 0 };
909 read(lguest_fd
, reason
, sizeof(reason
)-1);
910 errx(1, "%s", reason
);
911 } else if (errno
!= EAGAIN
)
912 err(1, "Running guest failed");
913 handle_input(lguest_fd
, device_list
);
914 if (write(lguest_fd
, args
, sizeof(args
)) < 0)
915 err(1, "Resetting break");
919 static struct option opts
[] = {
920 { "verbose", 0, NULL
, 'v' },
921 { "sharenet", 1, NULL
, 's' },
922 { "tunnet", 1, NULL
, 't' },
923 { "block", 1, NULL
, 'b' },
924 { "initrd", 1, NULL
, 'i' },
927 static void usage(void)
929 errx(1, "Usage: lguest [--verbose] "
930 "[--sharenet=<filename>|--tunnet=(<ipaddr>|bridge:<bridgename>)\n"
931 "|--block=<filename>|--initrd=<filename>]...\n"
932 "<mem-in-mb> vmlinux [args...]");
935 int main(int argc
, char *argv
[])
937 unsigned long mem
, pgdir
, start
, page_offset
, initrd_size
= 0;
939 struct device_list device_list
;
940 void *boot
= (void *)0;
941 const char *initrd_name
= NULL
;
943 device_list
.max_infd
= -1;
944 device_list
.dev
= NULL
;
945 device_list
.lastdev
= &device_list
.dev
;
946 FD_ZERO(&device_list
.infds
);
948 while ((c
= getopt_long(argc
, argv
, "v", opts
, NULL
)) != EOF
) {
954 setup_net_file(optarg
, &device_list
);
957 setup_tun_net(optarg
, &device_list
);
960 setup_block_file(optarg
, &device_list
);
963 initrd_name
= optarg
;
966 warnx("Unknown argument %s", argv
[optind
]);
970 if (optind
+ 2 > argc
)
973 /* We need a console device */
974 setup_console(&device_list
);
976 /* First we map /dev/zero over all of guest-physical memory. */
977 mem
= atoi(argv
[optind
]) * 1024 * 1024;
978 map_zeroed_pages(0, mem
/ getpagesize());
980 /* Now we load the kernel */
981 start
= load_kernel(open_or_die(argv
[optind
+1], O_RDONLY
),
984 /* Write the device descriptors into memory. */
985 map_device_descriptors(&device_list
, mem
);
987 /* Map the initrd image if requested */
989 initrd_size
= load_initrd(initrd_name
, mem
);
990 *(unsigned long *)(boot
+0x218) = mem
- initrd_size
;
991 *(unsigned long *)(boot
+0x21c) = initrd_size
;
992 *(unsigned char *)(boot
+0x210) = 0xFF;
995 /* Set up the initial linar pagetables. */
996 pgdir
= setup_pagetables(mem
, initrd_size
, page_offset
);
998 /* E820 memory map: ours is a simple, single region. */
999 *(char*)(boot
+E820NR
) = 1;
1000 *((struct e820entry
*)(boot
+E820MAP
))
1001 = ((struct e820entry
) { 0, mem
, E820_RAM
});
1002 /* Command line pointer and command line (at 4096) */
1003 *(void **)(boot
+ 0x228) = boot
+ 4096;
1004 concat(boot
+ 4096, argv
+optind
+2);
1005 /* Paravirt type: 1 == lguest */
1006 *(int *)(boot
+ 0x23c) = 1;
1008 lguest_fd
= tell_kernel(pgdir
, start
, page_offset
);
1009 waker_fd
= setup_waker(lguest_fd
, &device_list
);
1011 run_guest(lguest_fd
, &device_list
);