int main(int argc, char** argv) { int c; int fd; int rc = 0; int flags = 0; int iovcnt = 0; int act = ACT_NONE; char pad = 0xba; char *end; char *fname = "FILE"; unsigned long len = 0; struct iovec *iov; off64_t offset = 0; while ((c = getopt(argc, argv, "f:n:s:rwahvdz")) != -1) { switch (c) { case 'f': fname = optarg; break; case 'n': iovcnt = strtoul(optarg, &end, 0); if (*end) { printf("Bad iov count: %s\n", optarg); return 1; } if (iovcnt > UIO_MAXIOV || iovcnt <= 0) { printf("Wrong iov count\n"); return 1; } break; case 's': act |= ACT_SEEK; offset = strtoull(optarg, &end, 0); if (*end) { printf("Bad seek offset: %s\n", optarg); return 1; } break; case 'w': act |= ACT_WRITE; break; case 'r': act |= ACT_READ; break; case 'a': flags |= O_APPEND; break; case 'd': flags |= O_LOV_DELAY_CREATE; break; case 'z': pad = 0; act |= ACT_READHOLE; break; case 'v': act |= ACT_VERIFY; break; case 'h': usage(); break; } } if (act == ACT_NONE) { usage(); return 1; } if ((act & ACT_READ) && (act & ACT_WRITE)) { printf("Read and write test should be exclusive\n"); return 1; } if (argc - optind < iovcnt) { printf("Not enough parameters for iov size\n"); return 1; } iov = (struct iovec *)malloc(iovcnt * sizeof(struct iovec)); if (iov == NULL) { printf("No memory %s\n", strerror(errno)); return 1; } for (c = 0; c < iovcnt; c++) { struct iovec *iv = &iov[c]; iv->iov_len = strtoul(argv[optind++], &end, 0); if (*end) { printf("Error iov size\n"); rc = 1; goto out; } iv->iov_base = mmap(NULL, iv->iov_len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 0, 0); if (iv->iov_base == MAP_FAILED) { printf("No memory %s\n", strerror(errno)); rc = 1; goto out; } if (act & ACT_WRITE) memset(iv->iov_base, pad, iv->iov_len); len += iv->iov_len; } fd = open(fname, O_LARGEFILE | O_RDWR | O_CREAT | flags, 0644); if (fd == -1) { printf("Cannot open %s:%s\n", fname, strerror(errno)); return 1; } if ((act & ACT_SEEK) && (lseek64(fd, offset, SEEK_SET) < 0)) { printf("Cannot seek %s\n", strerror(errno)); rc = 1; goto out; } if (act & ACT_WRITE) { rc = writev(fd, iov, iovcnt); if (rc != len) { printf("Write error: %s (rc = %d, len = %ld)\n", strerror(errno), rc, len); rc = 1; goto out; } } else if (act & ACT_READ) { rc = readv(fd, iov, iovcnt); if (rc != len) { printf("Read error: %s rc = %d\n", strerror(errno), rc); rc = 1; goto out; } /* It should return zeroed buf if the read hits hole.*/ if (((act & ACT_READHOLE) || (act & ACT_VERIFY)) && data_verify(iov, iovcnt, pad)) { rc = 1; goto out; } } rc = 0; out: if (iov) free(iov); return rc; }
int main (int argc, char *argv[]) { MPI_Init (&argc, &argv); int nProc, iProc; MPI_Comm_rank (MPI_COMM_WORLD, &iProc); MPI_Comm_size (MPI_COMM_WORLD, &nProc); // number of threads const int NTHREADS = 6; // number of buffers const int NWAY = 2; // left neighbour const int left = LEFT(iProc, nProc); // right neighbour const int right = RIGHT(iProc, nProc); // allocate array of for local vector, left halo and right halo double* array = malloc (NWAY * (NTHREADS+2) * 2 * VLEN * sizeof (double)); ASSERT (array != 0); // initial buffer id int buffer_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); omp_set_num_threads (NTHREADS); MPI_Barrier (MPI_COMM_WORLD); double time = -now(); #pragma omp parallel default (shared) firstprivate (buffer_id) { const int tid = omp_get_thread_num(); for (int k = 0; k < NITER; ++k) { for ( int i = 0; i < nProc * NTHREADS; ++i ) { const int slice_id = tid + 1; const int left_halo = 0; const int right_halo = NTHREADS+1; if (tid == 0) { MPI_Request send_req[2]; MPI_Request recv_req[2]; // post recv MPI_Irecv ( &array_ELEM_right (buffer_id, left_halo, 0), VLEN, MPI_DOUBLE , left, i, MPI_COMM_WORLD, &recv_req[0]); // post recv MPI_Irecv ( &array_ELEM_left (buffer_id, right_halo, 0), VLEN, MPI_DOUBLE , right, i, MPI_COMM_WORLD, &recv_req[1]); // issue send MPI_Isend ( &array_ELEM_right (buffer_id, right_halo - 1, 0), VLEN, MPI_DOUBLE , right, i, MPI_COMM_WORLD, &send_req[0]); // issue send MPI_Isend ( &array_ELEM_left (buffer_id, left_halo + 1, 0), VLEN, MPI_DOUBLE , left, i, MPI_COMM_WORLD, &send_req[1]); // free send request MPI_Request_free(&send_req[0]); MPI_Request_free(&send_req[1]); // wait for Irecv, Isend MPI_Waitall (2, recv_req, MPI_STATUSES_IGNORE); } #pragma omp barrier // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute (NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); #pragma omp barrier // alternate the buffer buffer_id = 1 - buffer_id; } } } time += now(); data_verify (NTHREADS, iProc, ( NITER * nProc * NTHREADS ) % NWAY, array); printf ("# mpi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); MPI_Finalize(); free (array); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; // allocate segment for array for local vector, left halo and right halo gaspi_segment_id_t const segment_id = 0; SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize slice data structures slice *ssl = (slice *) malloc (NTHREADS * sizeof (slice)); ASSERT (ssl); init_slices (ssl, NTHREADS); // initialize data data_init (NTHREADS,iProc, buffer_id, array); const int right_halo = NTHREADS+1; const int left_halo = 0; // issue initial write to left ngb wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1, queue_id, GASPI_BLOCK)); // issue initial write to right ngb wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1, queue_id, GASPI_BLOCK)); // set total number of iterations per slice const int num = nProc * NTHREADS * NITER; omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (none) firstprivate (buffer_id, queue_id) \ shared (array, left_data_available, right_data_available, ssl, stderr) { slice* sl; while ((sl = get_slice_and_lock (ssl, NTHREADS, num))) { handle_slice ( sl, array, left_data_available, right_data_available , segment_id, queue_id, NWAY, NTHREADS, num); /* TODO ==== - Which functionality do we need in 'handle_slice' ? (asynchronous dataflow for 1-D halo-exchange) - Discuss. - Bonus question: Can we be at different iteration stages for left and right halo ? if yes: Why ? */ omp_unset_lock (&sl->lock); } #pragma omp barrier } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; gaspi_segment_id_t const segment_id = 0; // allocate segment for array for local vector, left halo and right halo SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (shared) firstprivate (buffer_id) { const int tid = omp_get_thread_num(); for (int k = 0; k < NITER; ++k) { for ( int i = 0; i < nProc * NTHREADS; ++i ) { const int left_halo = 0; const int slice_id = tid + 1; const int right_halo = NTHREADS+1; if (tid == 0) { // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // wait for data notification wait_or_die (segment_id, right_data_available[buffer_id], 1 + i); // wait for data notification wait_or_die (segment_id, left_data_available[buffer_id], 1 + i); } #pragma omp barrier // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute ( NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); #pragma omp barrier // alternate the buffer buffer_id = 1 - buffer_id; } } } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
void nand_boot(int nand_boot_select) { unsigned int offset, size; void (*kernel)(int, char **, char *); int i; static u32 *param_addr = 0; static u8 *tmpbuf = 0; static u8 cmdline[256] = CFG_CMDLINE; serial_puts_info("Enter nand_boot routine ...\n"); switch (nand_boot_select) { case NORMAL_BOOT: offset = CFG_BOOT_OFFS; size = CFG_BOOT_SIZE; #ifdef BOOTARGS_NORMAL strcpy((char *)cmdline, BOOTARGS_NORMAL); #endif serial_puts_info("Normal boot ...\n"); break; case RECOVERY_BOOT: offset = CFG_RECOVERY_OFFS; size = CFG_RECOVERY_SIZE; #ifdef BOOTARGS_RECOVERY strcpy((char *)cmdline, BOOTARGS_RECOVERY); #endif serial_puts_info("Recovery boot ...\n"); break; #if defined(CONFIG_JZ4760_PT701_8) case PRETEST_BOOT: offset = CFG_PRETEST_OFFS; size = CFG_PRETEST_SIZE; serial_puts_info("Pretest boot ...\n"); break; #endif default: serial_puts_info("Get nand boot select failed, defualt normal boot ...\n"); offset = CFG_BOOT_OFFS; size = CFG_BOOT_SIZE; break; } serial_puts_info("Load kernel from NAND ...\n"); /* Load kernel and ramdisk */ do_nand(offset,CFG_NAND_PAGE_SIZE,(u8 *)CFG_KERNEL_DST); struct boot_img_hdr *bootimginfo; int kernel_actual; int ramdisk_actual; unsigned int page_mask; if(2048 < sizeof(struct boot_img_hdr)){ serial_puts_info("size too small"); } bootimginfo = (struct boot_img_hdr *)CFG_KERNEL_DST; page_mask = CFG_NAND_PAGE_SIZE - 1; kernel_actual = (bootimginfo->kernel_size + page_mask) & (~page_mask); ramdisk_actual = (bootimginfo->ramdisk_size + page_mask) & (~page_mask); size = kernel_actual + ramdisk_actual + M; // ' + M' to make sure including the special data. do_nand(offset + CFG_NAND_PAGE_SIZE, size, (u8 *)(CFG_KERNEL_DST + CFG_NAND_PAGE_SIZE)); #ifdef CONFIG_SECURITY_ENABLE // Special data is 4M from head. if(data_verify((unsigned char *)CFG_KERNEL_DST, 4 * M, ENV_BOOTLOADER) < 0) { serial_puts_spl("kernel verify failed, power off\n"); //powerdown(); while(1); } #endif #if 0 serial_puts_info("CRC32 = 0x"); serial_put_hex(CRC_32(CFG_KERNEL_DST,2973696)); serial_put_hex(*((unsigned int *)(CFG_KERNEL_DST+0))); serial_put_hex(*((unsigned int *)(CFG_KERNEL_DST+4))); serial_put_hex(*((unsigned int *)(CFG_KERNEL_DST+8))); serial_put_hex(*((unsigned int *)(CFG_KERNEL_DST+12))); #endif serial_puts_info("Prepare kernel parameters ...\n"); /* init kernel, ramdisk and prepare parameters */ if (init_boot_linux((unsigned char*)CFG_KERNEL_DST, size) == 0) { serial_puts_info("Jump to kernel start Addr 0x"); dump_uint(CFG_KERNEL_DST); serial_puts("\n\n"); kernel = (void (*)(int, char **, char *))CFG_KERNEL_DST; flush_cache_all(); #if CONFIG_XBOOT_LOGO_FILE //__lcd_display_off(); #endif /* Jump to kernel image */ (*kernel)(2, (char **)(PARAM_BASE + 16), (char *)PARAM_BASE); serial_puts_info("We should not come here ... \n"); } else serial_puts_info("Magic number error,boot error...\n"); }