int main(int argc, char** argv) { struct timeval start_tv = {0}; struct timeval end_tv = {0}; long usec_diff; long nr_ctx_switches; void *join_ret; if (argc > 1) nr_switch_loops = strtol(argv[1], 0, 10); printf("Making 2 threads of %d switches each\n", nr_switch_loops); pthread_can_vcore_request(FALSE); /* 2LS won't manage vcores */ pthread_need_tls(FALSE); pthread_lib_init(); /* gives us one vcore */ /* each is passed the other's pthread_t. th1 starts the switching. */ if (pthread_create(&th1, NULL, &switch_thread, &th2)) perror("pth_create 1 failed"); /* thread 2 is created, but not put on the runnable list */ if (__pthread_create(&th2, NULL, &switch_thread, &th1)) perror("pth_create 2 failed"); if (gettimeofday(&start_tv, 0)) perror("Start time error..."); ready = TRUE; /* signal to any spinning uthreads to start */ pthread_join(th1, &join_ret); pthread_join(th2, &join_ret); if (gettimeofday(&end_tv, 0)) perror("End time error..."); nr_ctx_switches = 2 * nr_switch_loops; usec_diff = (end_tv.tv_sec - start_tv.tv_sec) * 1000000 + (end_tv.tv_usec - start_tv.tv_usec); printf("Done: %d loops\n", nr_switch_loops); printf("Nr context switches: %ld\n", nr_ctx_switches); printf("Time to run: %ld usec\n", usec_diff); printf("Context switch latency: %d nsec\n", (int)(1000LL*usec_diff / nr_ctx_switches)); printf("Context switches / sec: %d\n\n", (int)(1000000LL*nr_ctx_switches / usec_diff)); }
int main(int argc, char** argv) { struct timeval start_tv = {0}; struct timeval end_tv = {0}; long usec_diff; long nr_ctx_switches; if (argc > 1) nr_yield_threads = strtol(argv[1], 0, 10); if (argc > 2) nr_yield_loops = strtol(argv[2], 0, 10); if (argc > 3) nr_vcores = strtol(argv[3], 0, 10); if (argc > 4) amt_fake_work = strtol(argv[4], 0, 10); nr_yield_threads = MIN(nr_yield_threads, MAX_NR_TEST_THREADS); printf("Making %d threads of %d loops each, on %d vcore(s), %d work\n", nr_yield_threads, nr_yield_loops, nr_vcores, amt_fake_work); /* OS dependent prep work */ if (nr_vcores) { /* Only do the vcore trickery if requested */ pthread_can_vcore_request(FALSE); /* 2LS won't manage vcores */ pthread_need_tls(FALSE); pthread_lib_init(); /* gives us one vcore */ vcore_request(nr_vcores - 1); /* ghetto incremental interface */ for (int i = 0; i < nr_vcores; i++) { printd("Vcore %d mapped to pcore %d\n", i, __procinfo.vcoremap[i].pcoreid); } } /* create and join on yield */ for (int i = 0; i < nr_yield_threads; i++) { printf_safe("[A] About to create thread %d\n", i); if (pthread_create(&my_threads[i], NULL, &yield_thread, NULL)) perror("pth_create failed"); } if (gettimeofday(&start_tv, 0)) perror("Start time error..."); ready = TRUE; /* signal to any spinning uthreads to start */ for (int i = 0; i < nr_yield_threads; i++) { printf_safe("[A] About to join on thread %d(%p)\n", i, my_threads[i]); pthread_join(my_threads[i], &my_retvals[i]); printf_safe("[A] Successfully joined on thread %d (retval: %p)\n", i, my_retvals[i]); } if (gettimeofday(&end_tv, 0)) perror("End time error..."); nr_ctx_switches = nr_yield_threads * nr_yield_loops; usec_diff = (end_tv.tv_sec - start_tv.tv_sec) * 1000000 + (end_tv.tv_usec - start_tv.tv_usec); printf("Done: %d uthreads, %d loops, %d vcores, %d work\n", nr_yield_threads, nr_yield_loops, nr_vcores, amt_fake_work); printf("Nr context switches: %d\n", nr_ctx_switches); printf("Time to run: %d usec\n", usec_diff); if (nr_vcores == 1) printf("Context switch latency: %d nsec\n", (int)(1000LL*usec_diff / nr_ctx_switches)); printf("Context switches / sec: %d\n\n", (int)(1000000LL*nr_ctx_switches / usec_diff)); }
int main(int argc, char** argv) { struct timeval start_tv = {0}; struct timeval end_tv = {0}; long usec_diff; long nr_ctx_switches; if (argc > 1) nr_yield_threads = strtol(argv[1], 0, 10); if (argc > 2) nr_yield_loops = strtol(argv[2], 0, 10); if (argc > 3) nr_vcores = strtol(argv[3], 0, 10); if (argc > 4) amt_fake_work = strtol(argv[4], 0, 10); nr_yield_threads = MIN(nr_yield_threads, MAX_NR_TEST_THREADS); printf("Making %d threads of %d loops each, on %d vcore(s), %d work\n", nr_yield_threads, nr_yield_loops, nr_vcores, amt_fake_work); /* OS dependent prep work */ #ifdef __ros__ if (nr_vcores) { /* Only do the vcore trickery if requested */ parlib_never_yield = TRUE; pthread_need_tls(FALSE); pthread_mcp_init(); /* gives us one vcore */ vcore_request_total(nr_vcores); parlib_never_vc_request = TRUE; for (int i = 0; i < nr_vcores; i++) { printf_safe("Vcore %d mapped to pcore %d\n", i, __procinfo.vcoremap[i].pcoreid); } } struct uth_join_request *join_reqs; join_reqs = malloc(nr_yield_threads * sizeof(struct uth_join_request)); for (int i = 0; i < nr_yield_threads; i++) join_reqs[i].retval_loc = &my_retvals[i]; assert(join_reqs); #endif /* __ros__ */ pthread_barrier_init(&barrier, NULL, nr_yield_threads); /* create and join on yield */ for (int i = 0; i < nr_yield_threads; i++) { printf_safe("[A] About to create thread %d\n", i); if (pthread_create(&my_threads[i], NULL, &yield_thread, NULL)) perror("pth_create failed"); } if (gettimeofday(&start_tv, 0)) perror("Start time error..."); /* Akaros supports parallel join */ #ifdef __ros__ for (int i = 0; i < nr_yield_threads; i++) join_reqs[i].uth = (struct uthread*)my_threads[i]; uthread_join_arr(join_reqs, nr_yield_threads); #else for (int i = 0; i < nr_yield_threads; i++) { printf_safe("[A] About to join on thread %d(%p)\n", i, my_threads[i]); pthread_join(my_threads[i], &my_retvals[i]); printf_safe("[A] Successful join on thread %d (retval: %p)\n", i, my_retvals[i]); } #endif if (gettimeofday(&end_tv, 0)) perror("End time error..."); nr_ctx_switches = nr_yield_threads * nr_yield_loops; usec_diff = (end_tv.tv_sec - start_tv.tv_sec) * 1000000 + (end_tv.tv_usec - start_tv.tv_usec); printf("Done: %d uthreads, %d loops, %d vcores, %d work\n", nr_yield_threads, nr_yield_loops, nr_vcores, amt_fake_work); printf("Nr context switches: %ld\n", nr_ctx_switches); printf("Time to run: %ld usec\n", usec_diff); if (nr_vcores == 1) printf("Context switch latency: %d nsec\n", (int)(1000LL*usec_diff / nr_ctx_switches)); printf("Context switches / sec: %d\n\n", (int)(1000000LL*nr_ctx_switches / usec_diff)); }
int main(int argc, char **argv) { int i, amt; int nr_gpcs = 1; uint64_t entry; int fd = open("#c/sysctl", O_RDWR), ret; int kfd = -1; bool smallkernel = false; void * x; static char cmd[512]; if (fd < 0) { perror("#c/sysctl"); exit(1); } argc--,argv++; if (argc != 2) { fprintf(stderr, "Usage: %s vmimage entrypoint\n", argv[0]); exit(1); } entry = strtoull(argv[1], 0, 0); kfd = open(argv[0], O_RDONLY); if (kfd < 0) { perror(argv[0]); exit(1); } if (ros_syscall(SYS_setup_vmm, nr_gpcs, 0, 0, 0, 0, 0) != nr_gpcs) { perror("Guest pcore setup failed"); exit(1); } my_threads = malloc(sizeof(pthread_t) * nr_threads); my_retvals = malloc(sizeof(void*) * nr_threads); if (!(my_retvals && my_threads)) perror("Init threads/malloc"); pthread_can_vcore_request(FALSE); /* 2LS won't manage vcores */ pthread_need_tls(FALSE); pthread_mcp_init(); /* gives us one vcore */ vcore_request(nr_threads - 1); /* ghetto incremental interface */ for (int i = 0; i < nr_threads; i++) { x = __procinfo.vcoremap; printf("%p\n", __procinfo.vcoremap); printf("Vcore %d mapped to pcore %d\n", i, __procinfo.vcoremap[i].pcoreid); } if (pthread_create(&my_threads[0], NULL, &talk_thread, NULL)) perror("pth_create failed"); // if (pthread_create(&my_threads[1], NULL, &fail, NULL)) // perror("pth_create failed"); printf("threads started\n"); if (0) for (int i = 0; i < nr_threads-1; i++) { int ret; if (pthread_join(my_threads[i], &my_retvals[i])) perror("pth_join failed"); printf("%d %d\n", i, ret); } ret = syscall(33, 1); if (ret < 0) { perror("vm setup"); exit(1); } /* blob that is faulted in from the EPT first. we need this to be in low * memory (not above the normal mmap_break), so the EPT can look it up. * Note that we won't get 4096. The min is 1MB now, and ld is there. */ mmap_blob = mmap((int*)(15*1048576), 16 * 1048576, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS, -1, 0); if (mmap_blob == MAP_FAILED) { perror("Unable to mmap"); exit(1); } memset(mmap_blob, 0, 16*1048576); // read in the kernel. x = mmap_blob + 0x1000000; for(;;) { amt = read(kfd, x, 1048576); if (amt < 0) { perror("read"); exit(1); } if (amt == 0) { break; } x += amt; } fprintf(stderr, "Read in %d bytes\n", x-mmap_blob); p512 = mmap_blob; p1 = &p512[512]; p2m = &p512[1024]; // We had thought to enter the kernel at the high address. But // there's just too much state the kernel has to clean up to // make this really work -- consider all the segment // descriptors that have to move, etc. So we will enter the // kernel in the low part of the address space, and let it // work up its page tables and the other good fun. Map the // kernel address space at low virtual, for 1G. It's ok to // map memory we have no access to. #define _2MiB 0x200000 p512[0] = (unsigned long long)p1 | 7; // if only we could guarantee 1G pages everywhere! p1[0] = /*0x87; */(unsigned long long)p2m | 7; for(i = 0; i < 16; i++) { p2m[i] = 0x87 | i * _2MiB; printf("pwm[%d] = 0x%llx\n", i, p2m[i]); } printf("p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]); sprintf(cmd, "V 0x%llx 0x%llx 0x%llx", entry, (unsigned long long) &stack[1024], (unsigned long long) p512); printf("Writing command :%s:\n", cmd); ret = write(fd, cmd, strlen(cmd)); if (ret != strlen(cmd)) { perror(cmd); } sprintf(cmd, "V 0 0 0"); while (! done) { char c[1]; printf("hit return\n"); read(0, c, 1); if (debug) fprintf(stderr, "RESUME\n"); ret = write(fd, cmd, strlen(cmd)); if (ret != strlen(cmd)) { perror(cmd); } } return 0; }