// =========================================================================== // =========================================================================== void fft_myrmics_cfft2_phase(rid_t *buf_regions, float ***buf, float **w, int n, int tile_size, int blk_size) { rid_t buf_single_region; float **buf_row; float *w_row; int i; void *args[6]; unsigned int deps[6]; // For each row of tiles, run cfft2 for (i = 0; i < tile_size; i++) { buf_single_region = buf_regions[i]; buf_row = buf[i]; w_row = w[i]; //printf("cfft2_row rid %d ptrs 0x%X 0x%X\r\n", buf_single_region, buf_row, w_row); args[0] = (void *) buf_single_region; deps[0] = SYS_TYPE_REGION_ARG | SYS_TYPE_INOUT_ARG; args[1] = (void *) buf_row; deps[1] = SYS_TYPE_BYVALUE_ARG; args[2] = (void *) w_row; deps[2] = SYS_TYPE_IN_ARG; args[3] = (void *) n; deps[3] = SYS_TYPE_BYVALUE_ARG; args[4] = (void *) tile_size; deps[4] = SYS_TYPE_BYVALUE_ARG; args[5] = (void *) blk_size; deps[5] = SYS_TYPE_BYVALUE_ARG; sys_spawn(1, args, deps, 6); // fft_myrmics_cfft2_row() } }
void syscall_dispatch(void) { unsigned int nr; nr = syscall_get_arg1(); switch (nr) { case SYS_puts: /* * Output a string to the screen. * * Parameters: * a[0]: the linear address where the string is * a[1]: the length of the string * * Return: * None. * * Error: * E_MEM */ sys_puts(); break; case SYS_spawn: /* * Create a new process. * * Parameters: * a[0]: the identifier of the ELF image * * Return: * the process ID of the process * * Error: * E_INVAL_ADDR, E_INVAL_PID */ sys_spawn(); break; case SYS_yield: /* * Called by a process to abandon its CPU slice. * * Parameters: * None. * * Return: * None. * * Error: * None. */ sys_yield(); break; default: syscall_set_errno(E_INVAL_CALLNR); } }
int main(void) { SysCtlClockSet(SYSCTL_SYSDIV_2_5 | SYSCTL_USE_PLL | SYSCTL_XTAL_16MHZ | SYSCTL_OSC_MAIN); Serial_init(Serial_module_debug, 115200); Serial_puts(Serial_module_debug, "Hello, world!\r\n"); kernel_init(kernel_stack + sizeof(kernel_stack)); sys_spawn(worker1_main, NULL); sys_spawn(worker2_main, NULL); while(1) { sys_yield(); } return 0; }
// =========================================================================== // =========================================================================== void fft_myrmics_Xpose(float ***src_buf_copy, rid_t *dst_buf_row, float ***dst_buf, int tile_size, int blk_size) { void *args[MAX_TILES + 4]; unsigned int deps[MAX_TILES + 4]; int i; int j; sys_assert(tile_size <= MAX_TILES); for (i = 0; i < tile_size; i++) { args[0] = (void *) dst_buf_row[i]; deps[0] = SYS_TYPE_REGION_ARG | SYS_TYPE_INOUT_ARG; args[1] = (void *) dst_buf[i]; deps[1] = SYS_TYPE_BYVALUE_ARG; args[2] = (void *) tile_size; deps[2] = SYS_TYPE_BYVALUE_ARG; args[3] = (void *) blk_size; deps[3] = SYS_TYPE_BYVALUE_ARG; for (j = 0; j < tile_size; j++) { args[j + 4] = (void *) src_buf_copy[j][i]; deps[j + 4] = SYS_TYPE_IN_ARG; } sys_spawn(2, args, deps, tile_size + 4); // fft_myrmics_Xpose_row } }
// =========================================================================== // fft_myrmics() Myrmics version of FFT // A n x n input table is partitioned into // tile_size x tile_size sub-blocks where each // block has size (n/tile_size) x (n/tile_size) // =========================================================================== // * INPUTS // int n Number of rows and columns of the input table // int tile_size Number of tile rows and columns in the 2D layout // =========================================================================== void fft_myrmics(int tile_size, int n) { rid_t r; rid_t *ra_row; rid_t *rb_row; int blk_size; int blk_size_sq; int i, j, k, l; float ***a; float ***a_copy; float ***b; float ***b_copy; float **w; unsigned int time_start; void *args[4]; unsigned int deps[4]; // Sanity checks if (n & (n - 1)) { printf("N must be a power of 2\r\n"); return; } if (tile_size & (tile_size - 1)) { printf("Tile size must be a power of 2\r\n"); return; } blk_size = n/tile_size; blk_size_sq = blk_size * blk_size; // Create holding region r = sys_ralloc(0, 99); // highest level a = sys_alloc(tile_size * sizeof(float **), r); b = sys_alloc(tile_size * sizeof(float **), r); w = sys_alloc(tile_size * sizeof(float *), r); // Initialize tiles and regions ra_row = sys_alloc(tile_size * sizeof(rid_t), r); rb_row = sys_alloc(tile_size * sizeof(rid_t), r); for (i = 0; i < tile_size; i++) { // Create a region for each row of tiles, for buffers a and b ra_row[i] = sys_ralloc(r, 0); rb_row[i] = sys_ralloc(r, 0); // Allocate pointers for each tile in the row a[i] = sys_alloc(tile_size * sizeof(float *), ra_row[i]); b[i] = sys_alloc(tile_size * sizeof(float *), rb_row[i]); // Allocate tiles in the row sys_balloc(2* blk_size_sq * sizeof(float), ra_row[i], tile_size, (void *) a[i]); sys_balloc(2* blk_size_sq * sizeof(float), rb_row[i], tile_size, (void *) b[i]); // Allocate w array w[i] = sys_alloc(2 * n * sizeof(float), r); // Initialize tables with some values for (j = 0; j < tile_size; j++) { for (k = 0; k < blk_size; k++) { for (l = 0; l < 2 * blk_size; l++) { a[i][j][k * 2 * blk_size + l] = 0.01F; } } } for (j = 0; j < 2 * n; j++) { w[i][j] = 0.3F; } } // Copy the pointers of the a and b tables, because we'll need them in // fft_myrmics_Xpose() to spawn the tasks. We need to do this, because in // fft_myrmics_cfft2_phase() we delegate all r_row[*] to children tasks: // a[*][*] and b[*][*] are allocated in these regions, so we don't have // access there anymore from the master task. a_copy = sys_alloc(tile_size * sizeof(float **), 0); b_copy = sys_alloc(tile_size * sizeof(float **), 0); sys_balloc(tile_size * sizeof(float *), 0, tile_size, (void *) a_copy); sys_balloc(tile_size * sizeof(float *), 0, tile_size, (void *) b_copy); for (i = 0; i < tile_size; i++) { for (j = 0; j < tile_size; j++) { a_copy[i][j] = a[i][j]; b_copy[i][j] = b[i][j]; } } // Starting FFT printf("FFT 2D-block of %d x %d starting split into %d x %d tiles \r\n", n, n, tile_size, tile_size); // Start time time_start = ar_free_timer_get_ticks(); // Run first phase on buffer a fft_myrmics_cfft2_phase(ra_row, a, w, n, tile_size, blk_size); // Transpose a->b fft_myrmics_Xpose(a_copy, rb_row, b, tile_size, blk_size); // Run second phase on buffer b fft_myrmics_cfft2_phase(rb_row, b, w, n, tile_size, blk_size); // Transpose b->a fft_myrmics_Xpose(b_copy, ra_row, a, tile_size, blk_size); // Stop time args[0] = (void *) r; deps[0] = SYS_TYPE_REGION_ARG | SYS_TYPE_INOUT_ARG; args[1] = (void *) time_start; deps[1] = SYS_TYPE_BYVALUE_ARG; sys_spawn(3, args, deps, 2); // fft_myrmics_time() // Checksum args[0] = (void *) r; deps[0] = SYS_TYPE_REGION_ARG | SYS_TYPE_INOUT_ARG; args[1] = (void *) a; deps[1] = SYS_TYPE_BYVALUE_ARG; args[2] = (void *) tile_size; deps[2] = SYS_TYPE_BYVALUE_ARG; args[3] = (void *) blk_size; deps[3] = SYS_TYPE_BYVALUE_ARG; sys_spawn(4, args, deps, 4); // fft_myrmics_checksum() printf("%d: spawns done\r\n", sys_get_worker_id()); }
int spawn(char* path) { char* argv[] = {NULL}; // ugly hack alert char* envp[] = {NULL}; return sys_spawn(path,argv,envp); }
int spawn(void *text, int argc, char **argv) { return sys_spawn(text, argc, argv); }