void fdst_gpu(float *data, float *data2, float *data3, int Nx, int Ny, int Lx) { float s; s = sqrt(2.0/(Nx+1)); #pragma acc data copy(data[0:Nx*Ny]), create(data2[0:Lx*Ny], data3[0:2*Lx*Ny]) { expand_data(data, data2, Nx, Ny, Lx); expand_idata(data2, data3, Nx, Ny, Lx); // Copy data to device at start of region and back to host and end of region // Inside this region the device data pointer will be used #pragma acc host_data use_device(data3) { void *stream = acc_get_cuda_stream(acc_async_sync); cuda_fft(data3, Lx, Ny, stream); } #pragma acc parallel loop independent for (int i=0;i<Ny;i++) { #pragma acc loop independent for (int j=0;j<Nx;j++) data[Nx*i+j] = -1.0*s*data3[2*Lx*i+2*j+3]/2; } } }
// Like sprintf, except size of the buffer is passed in so that it won't get overrun. int16_t ssprintf(char *print_buffer, int16_t buffer_size, const char *format, va_list ap) { PARSEOPTION_TYPE options; char format_char; uint16_t out_size = 0; do { format_char = pgm_read_byte(format++); if (format_char == '%') { uint16_t size; init_option_data(&options); parse_format(&format, &options); size = expand_data(&ap, &options, print_buffer, buffer_size - out_size); print_buffer += size; out_size += size; } else { // just a normal character, move it in the buffer. *print_buffer++ = format_char; out_size++; } } while ((format_char != '\0') && (out_size < buffer_size)); // return how many characters we put into the buffer. return out_size; }
int do_syscall () { /* Syscalls for the source-language version of SPIM. These are easier to use than the real syscall and are portable to non-MIPS operating systems. */ switch (R[REG_V0]) { case PRINT_INT_SYSCALL: write_output (console_out, "%d", R[REG_A0]); break; case PRINT_FLOAT_SYSCALL: { float val = FPR_S (REG_FA0); write_output (console_out, "%.8f", val); break; } case PRINT_DOUBLE_SYSCALL: write_output (console_out, "%.18g", FPR[REG_FA0 / 2]); break; case PRINT_STRING_SYSCALL: write_output (console_out, "%s", mem_reference (R[REG_A0])); break; case READ_INT_SYSCALL: { static char str [256]; read_input (str, 256); R[REG_RES] = atol (str); break; } case READ_FLOAT_SYSCALL: { static char str [256]; read_input (str, 256); FPR_S (REG_FRES) = (float) atof (str); break; } case READ_DOUBLE_SYSCALL: { static char str [256]; read_input (str, 256); FPR [REG_FRES] = atof (str); break; } case READ_STRING_SYSCALL: { read_input ( (char *) mem_reference (R[REG_A0]), R[REG_A1]); data_modified = 1; break; } case SBRK_SYSCALL: { mem_addr x = data_top; expand_data (R[REG_A0]); R[REG_RES] = x; data_modified = 1; break; } case PRINT_CHARACTER_SYSCALL: write_output (console_out, "%c", R[REG_A0]); break; case READ_CHARACTER_SYSCALL: { static char str [2]; read_input (str, 2); if (*str == '\0') *str = '\n'; /* makes xspim = spim */ R[REG_RES] = (long) str[0]; break; } case EXIT_SYSCALL: spim_return_value = 0; return (0); case EXIT2_SYSCALL: spim_return_value = R[REG_A0]; /* value passed to spim's exit() call */ return (0); case OPEN_SYSCALL: { #ifdef WIN32 R[REG_RES] = _open(mem_reference (R[REG_A0]), R[REG_A1], R[REG_A2]); #else R[REG_RES] = open(mem_reference (R[REG_A0]), R[REG_A1], R[REG_A2]); #endif break; } case READ_SYSCALL: { /* Test if address is valid */ (void)mem_reference (R[REG_A1] + R[REG_A2] - 1); #ifdef WIN32 R[REG_RES] = _read(R[REG_A0], mem_reference (R[REG_A1]), R[REG_A2]); #else R[REG_RES] = read(R[REG_A0], mem_reference (R[REG_A1]), R[REG_A2]); #endif data_modified = 1; break; } case WRITE_SYSCALL: { /* Test if address is valid */ (void)mem_reference (R[REG_A1] + R[REG_A2] - 1); #ifdef WIN32 R[REG_RES] = _write(R[REG_A0], mem_reference (R[REG_A1]), R[REG_A2]); #else R[REG_RES] = write(R[REG_A0], mem_reference (R[REG_A1]), R[REG_A2]); #endif break; } case CLOSE_SYSCALL: { #ifdef WIN32 R[REG_RES] = _close(R[REG_A0]); #else R[REG_RES] = close(R[REG_A0]); #endif break; } default: run_error ("Unknown system call: %d\n", R[REG_V0]); break; } return (1); }