TEST(Mem, set_value_real_double_complex_matrix) { // Double precision complex matrix. int n = 100, status = 0; oskar_Mem *mem, *mem2; mem = oskar_mem_create(OSKAR_DOUBLE_COMPLEX_MATRIX, OSKAR_GPU, n, &status); oskar_mem_set_value_real(mem, 6.5, 0, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); mem2 = oskar_mem_create_copy(mem, OSKAR_CPU, &status); double4c* v = oskar_mem_double4c(mem2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); for (int i = 0; i < n; ++i) { EXPECT_DOUBLE_EQ(v[i].a.x, 6.5); EXPECT_DOUBLE_EQ(v[i].a.y, 0.0); EXPECT_DOUBLE_EQ(v[i].b.x, 0.0); EXPECT_DOUBLE_EQ(v[i].b.y, 0.0); EXPECT_DOUBLE_EQ(v[i].c.x, 0.0); EXPECT_DOUBLE_EQ(v[i].c.y, 0.0); EXPECT_DOUBLE_EQ(v[i].d.x, 6.5); EXPECT_DOUBLE_EQ(v[i].d.y, 0.0); } oskar_mem_free(mem, &status); oskar_mem_free(mem2, &status); }
oskar_Mem* oskar_mem_read_binary_raw(const char* filename, int type, int location, int* status) { size_t num_elements, element_size, size_bytes; oskar_Mem *mem = 0; FILE* stream; /* Check if safe to proceed. */ if (*status) return 0; /* Open the input file. */ stream = fopen(filename, "rb"); if (!stream) { *status = OSKAR_ERR_FILE_IO; return 0; } /* Get the file size. */ fseek(stream, 0, SEEK_END); size_bytes = ftell(stream); /* Create memory block of the right size. */ element_size = oskar_mem_element_size(type); num_elements = (size_t)ceil(size_bytes / element_size); mem = oskar_mem_create(type, OSKAR_CPU, num_elements, status); if (*status) { oskar_mem_free(mem, status); fclose(stream); return 0; } /* Read the data. */ fseek(stream, 0, SEEK_SET); if (fread(oskar_mem_void(mem), 1, size_bytes, stream) != size_bytes) { oskar_mem_free(mem, status); fclose(stream); *status = OSKAR_ERR_FILE_IO; return 0; } /* Close the input file. */ fclose(stream); /* Copy to GPU memory if required. */ if (location != OSKAR_CPU) { oskar_Mem* gpu; gpu = oskar_mem_create_copy(mem, location, status); oskar_mem_free(mem, status); return gpu; } return mem; }
void oskar_fft_exec(oskar_FFT* h, oskar_Mem* data, int* status) { oskar_Mem *data_copy = 0, *data_ptr = data; if (oskar_mem_location(data) != h->location) { data_copy = oskar_mem_create_copy(data, h->location, status); data_ptr = data_copy; } if (h->location == OSKAR_CPU) { if (h->num_dim == 1) { *status = OSKAR_ERR_FUNCTION_NOT_AVAILABLE; } else if (h->num_dim == 2) { if (h->precision == OSKAR_DOUBLE) oskar_fftpack_cfft2f(h->dim_size, h->dim_size, h->dim_size, oskar_mem_double(data_ptr, status), oskar_mem_double(h->fftpack_wsave, status), oskar_mem_double(h->fftpack_work, status)); else oskar_fftpack_cfft2f_f(h->dim_size, h->dim_size, h->dim_size, oskar_mem_float(data_ptr, status), oskar_mem_float(h->fftpack_wsave, status), oskar_mem_float(h->fftpack_work, status)); /* This step not needed for W-kernel generation, so turn it off. */ if (h->ensure_consistent_norm) oskar_mem_scale_real(data_ptr, (double)h->num_cells_total, 0, h->num_cells_total, status); } } else if (h->location == OSKAR_GPU) { #ifdef OSKAR_HAVE_CUDA if (h->precision == OSKAR_DOUBLE) cufftExecZ2Z(h->cufft_plan, (cufftDoubleComplex*) oskar_mem_void(data_ptr), (cufftDoubleComplex*) oskar_mem_void(data_ptr), CUFFT_FORWARD); else cufftExecC2C(h->cufft_plan, (cufftComplex*) oskar_mem_void(data_ptr), (cufftComplex*) oskar_mem_void(data_ptr), CUFFT_FORWARD); #endif } else *status = OSKAR_ERR_BAD_LOCATION; if (oskar_mem_location(data) != h->location) oskar_mem_copy(data, data_ptr, status); oskar_mem_free(data_copy, status); }
TEST(Mem, set_value_real_single_complex) { // Single precision complex. int n = 100, status = 0; oskar_Mem *mem, *mem2; mem = oskar_mem_create(OSKAR_SINGLE_COMPLEX, OSKAR_GPU, n, &status); oskar_mem_set_value_real(mem, 6.5, 0, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); mem2 = oskar_mem_create_copy(mem, OSKAR_CPU, &status); float2* v = oskar_mem_float2(mem2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); for (int i = 0; i < n; ++i) { EXPECT_FLOAT_EQ(v[i].x, 6.5); EXPECT_FLOAT_EQ(v[i].y, 0.0); } oskar_mem_free(mem, &status); oskar_mem_free(mem2, &status); }
void oskar_mem_evaluate_relative_error(const oskar_Mem* val_approx, const oskar_Mem* val_accurate, double* min_rel_error, double* max_rel_error, double* avg_rel_error, double* std_rel_error, int* status) { int prec_approx, prec_accurate; size_t i, n; const oskar_Mem *app_ptr, *acc_ptr; oskar_Mem *approx_temp = 0, *accurate_temp = 0; double old_m = 0.0, new_m = 0.0, old_s = 0.0, new_s = 0.0; /* Check if safe to proceed. */ if (*status) return; /* Initialise outputs. */ if (max_rel_error) *max_rel_error = -DBL_MAX; if (min_rel_error) *min_rel_error = DBL_MAX; if (avg_rel_error) *avg_rel_error = DBL_MAX; if (std_rel_error) *std_rel_error = DBL_MAX; /* Type and dimension check. */ if (oskar_mem_is_matrix(val_approx) && !oskar_mem_is_matrix(val_accurate)) { *status = OSKAR_ERR_TYPE_MISMATCH; return; } if (oskar_mem_is_complex(val_approx) && !oskar_mem_is_complex(val_accurate)) { *status = OSKAR_ERR_TYPE_MISMATCH; return; } /* Get and check base types. */ prec_approx = oskar_mem_precision(val_approx); prec_accurate = oskar_mem_precision(val_accurate); if (prec_approx != OSKAR_SINGLE && prec_approx != OSKAR_DOUBLE) { *status = OSKAR_ERR_BAD_DATA_TYPE; return; } if (prec_accurate != OSKAR_SINGLE && prec_accurate != OSKAR_DOUBLE) { *status = OSKAR_ERR_BAD_DATA_TYPE; return; } /* Get number of elements to check. */ n = oskar_mem_length(val_approx) < oskar_mem_length(val_accurate) ? oskar_mem_length(val_approx) : oskar_mem_length(val_accurate); if (oskar_mem_is_matrix(val_approx)) n *= 4; /* Copy input data to temporary CPU arrays if required. */ app_ptr = val_approx; acc_ptr = val_accurate; if (oskar_mem_location(val_approx) != OSKAR_CPU) { approx_temp = oskar_mem_create_copy(val_approx, OSKAR_CPU, status); if (*status) { oskar_mem_free(approx_temp, status); return; } app_ptr = approx_temp; } if (oskar_mem_location(val_accurate) != OSKAR_CPU) { accurate_temp = oskar_mem_create_copy(val_accurate, OSKAR_CPU, status); if (*status) { oskar_mem_free(accurate_temp, status); return; } acc_ptr = accurate_temp; } /* Check numbers are the same, to appropriate precision. */ if (prec_approx == OSKAR_SINGLE && prec_accurate == OSKAR_SINGLE) { const float *approx, *accurate; approx = oskar_mem_float_const(app_ptr, status); accurate = oskar_mem_float_const(acc_ptr, status); CHECK_ELEMENTS(1e-5) } else if (prec_approx == OSKAR_DOUBLE && prec_accurate == OSKAR_SINGLE)
void oskar_mem_save_ascii(FILE* file, size_t num_mem, size_t offset, size_t num_elements, int* status, ...) { int type; size_t i, j; va_list args; oskar_Mem** handles; /* Array of oskar_Mem pointers in CPU memory. */ /* Check if safe to proceed. */ if (*status) return; /* Check there are at least the number of specified elements in * each array. */ va_start(args, status); for (i = 0; i < num_mem; ++i) { const oskar_Mem* mem; mem = va_arg(args, const oskar_Mem*); if (oskar_mem_length(mem) < num_elements) *status = OSKAR_ERR_DIMENSION_MISMATCH; } va_end(args); /* Check if safe to proceed. */ if (*status) return; /* Allocate and set up the handle array. */ handles = (oskar_Mem**) malloc(num_mem * sizeof(oskar_Mem*)); va_start(args, status); for (i = 0; i < num_mem; ++i) { oskar_Mem* mem; mem = va_arg(args, oskar_Mem*); if (oskar_mem_location(mem) != OSKAR_CPU) { handles[i] = oskar_mem_create_copy(mem, OSKAR_CPU, status); } else { handles[i] = mem; } } va_end(args); for (j = 0; j < num_elements; ++j) { /* Break if error. */ if (*status) break; for (i = 0; i < num_mem; ++i) { const void* data; data = oskar_mem_void_const(handles[i]); type = oskar_mem_type(handles[i]); switch (type) { case OSKAR_SINGLE: { fprintf(file, SDF, ((const float*)data)[j + offset]); continue; } case OSKAR_DOUBLE: { fprintf(file, SDD, ((const double*)data)[j + offset]); continue; } case OSKAR_SINGLE_COMPLEX: { float2 d; d = ((const float2*)data)[j + offset]; fprintf(file, SDF SDF, d.x, d.y); continue; } case OSKAR_DOUBLE_COMPLEX: { double2 d; d = ((const double2*)data)[j + offset]; fprintf(file, SDD SDD, d.x, d.y); continue; } case OSKAR_SINGLE_COMPLEX_MATRIX: { float4c d; d = ((const float4c*)data)[j + offset]; fprintf(file, SDF SDF SDF SDF SDF SDF SDF SDF, d.a.x, d.a.y, d.b.x, d.b.y, d.c.x, d.c.y, d.d.x, d.d.y); continue; } case OSKAR_DOUBLE_COMPLEX_MATRIX: { double4c d; d = ((const double4c*)data)[j + offset]; fprintf(file, SDD SDD SDD SDD SDD SDD SDD SDD, d.a.x, d.a.y, d.b.x, d.b.y, d.c.x, d.c.y, d.d.x, d.d.y); continue; } case OSKAR_CHAR: { putc(((const char*)data)[j + offset], file); continue; } case OSKAR_INT: { fprintf(file, "%5d ", ((const int*)data)[j + offset]); continue; } default: { *status = OSKAR_ERR_BAD_DATA_TYPE; continue; } } } putc('\n', file); } /* Free any temporary memory used by this function. */ va_start(args, status); for (i = 0; i < num_mem; ++i) { const oskar_Mem* mem; mem = va_arg(args, const oskar_Mem*); if (oskar_mem_location(mem) != OSKAR_CPU) { oskar_mem_free(handles[i], status); } } va_end(args); /* Free the handle array. */ free(handles); }
TEST(binary_file, binary_read_write_mem) { const char filename[] = "temp_test_mem_binary.dat"; int num_cpu = 1000; int num_gpu = 2048; int status = 0; // Create the handle. oskar_Binary* h = oskar_binary_create(filename, 'w', &status); // Save data from CPU. { oskar_Mem* mem = oskar_mem_create(OSKAR_SINGLE, OSKAR_CPU, num_cpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); float* data = oskar_mem_float(mem, &status); // Fill array with data. for (int i = 0; i < num_cpu; ++i) { data[i] = i * 1024.0; } // Save CPU data. oskar_binary_write_mem_ext(h, mem, "USER", "TEST", 987654, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_free(mem, &status); } // Save data from GPU. { oskar_Mem *mem_cpu, *mem_gpu; mem_cpu = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_CPU, num_gpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); double2* data = oskar_mem_double2(mem_cpu, &status); // Fill array with data. for (int i = 0; i < num_gpu; ++i) { data[i].x = i * 10.0; data[i].y = i * 20.0 + 1.0; } // Copy data to GPU. mem_gpu = oskar_mem_create_copy(mem_cpu, OSKAR_GPU, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Save GPU data. oskar_binary_write_mem_ext(h, mem_gpu, "AA", "BB", 2, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_free(mem_cpu, &status); oskar_mem_free(mem_gpu, &status); } // Save a single integer with a large index. int val = 0xFFFFFF; oskar_binary_write_int(h, 50, 9, 800000, val, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Save data from CPU with blank tags. { oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, num_cpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); double* data = oskar_mem_double(mem, &status); // Fill array with data. for (int i = 0; i < num_cpu; ++i) { data[i] = i * 500.0; } // Save CPU data. oskar_binary_write_mem_ext(h, mem, "", "", 10, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Fill array with data. for (int i = 0; i < num_cpu; ++i) { data[i] = i * 501.0; } // Save CPU data. oskar_binary_write_mem_ext(h, mem, "", "", 11, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_free(mem, &status); } // Save CPU data with tags that are equal lengths. { oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, num_cpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); double* data = oskar_mem_double(mem, &status); // Fill array with data. for (int i = 0; i < num_cpu; ++i) { data[i] = i * 1001.0; } // Save CPU data. oskar_binary_write_mem_ext(h, mem, "DOG", "CAT", 0, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Fill array with data. for (int i = 0; i < num_cpu; ++i) { data[i] = i * 127.0; } // Save CPU data. oskar_binary_write_mem_ext(h, mem, "ONE", "TWO", 0, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_free(mem, &status); } // Create the handle for reading. oskar_binary_free(h); h = oskar_binary_create(filename, 'r', &status); // Load data directly to GPU. { oskar_Mem *mem_gpu, *mem_cpu; mem_gpu = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_GPU, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_binary_read_mem_ext(h, mem_gpu, "AA", "BB", 2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); EXPECT_EQ(num_gpu, (int)oskar_mem_length(mem_gpu)); // Copy back to CPU and examine contents. mem_cpu = oskar_mem_create_copy(mem_gpu, OSKAR_CPU, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); double2* data = oskar_mem_double2(mem_cpu, &status); for (int i = 0; i < num_gpu; ++i) { EXPECT_DOUBLE_EQ(i * 10.0, data[i].x); EXPECT_DOUBLE_EQ(i * 20.0 + 1.0, data[i].y); } oskar_mem_free(mem_cpu, &status); oskar_mem_free(mem_gpu, &status); } // Load integer with a large index. int new_val = 0; oskar_binary_read_int(h, 50, 9, 800000, &new_val, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); EXPECT_EQ(val, new_val); // Load CPU data. { oskar_Mem* mem = oskar_mem_create(OSKAR_SINGLE, OSKAR_CPU, num_cpu, &status); oskar_binary_read_mem_ext(h, mem, "USER", "TEST", 987654, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem)); float* data = oskar_mem_float(mem, &status); for (int i = 0; i < num_cpu; ++i) { EXPECT_DOUBLE_EQ(i * 1024.0, data[i]); } oskar_mem_free(mem, &status); } // Load CPU data with blank tags. { double* data; oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, num_cpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_binary_read_mem_ext(h, mem, "", "", 10, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_binary_read_mem_ext(h, mem, "DOESN'T", "EXIST", 10, &status); EXPECT_EQ((int)OSKAR_ERR_BINARY_TAG_NOT_FOUND, status); status = 0; ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem)); data = oskar_mem_double(mem, &status); for (int i = 0; i < num_cpu; ++i) { EXPECT_DOUBLE_EQ(i * 500.0, data[i]); } oskar_binary_read_mem_ext(h, mem, "", "", 11, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem)); data = oskar_mem_double(mem, &status); for (int i = 0; i < num_cpu; ++i) { EXPECT_DOUBLE_EQ(i * 501.0, data[i]); } oskar_mem_free(mem, &status); } // Load CPU data with tags that are equal lengths. { double* data; oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_binary_read_mem_ext(h, mem, "ONE", "TWO", 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem)); data = oskar_mem_double(mem, &status); for (int i = 0; i < num_cpu; ++i) { EXPECT_DOUBLE_EQ(i * 127.0, data[i]); } oskar_binary_read_mem_ext(h, mem, "DOG", "CAT", 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem)); data = oskar_mem_double(mem, &status); for (int i = 0; i < num_cpu; ++i) { EXPECT_DOUBLE_EQ(i * 1001.0, data[i]); } oskar_mem_free(mem, &status); } // Try to load data that isn't present. { oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_binary_read_mem_ext(h, mem, "DOESN'T", "EXIST", 10, &status); EXPECT_EQ((int)OSKAR_ERR_BINARY_TAG_NOT_FOUND, status); status = 0; EXPECT_EQ(0, (int)oskar_mem_length(mem)); oskar_mem_free(mem, &status); } // Release the handle. oskar_binary_free(h); ASSERT_EQ(0, status) << oskar_get_error_string(status); }
void oskar_mem_multiply( oskar_Mem* out, const oskar_Mem* in1, const oskar_Mem* in2, size_t offset_out, size_t offset_in1, size_t offset_in2, size_t num_elements, int* status) { oskar_Mem *a_temp = 0, *b_temp = 0; const oskar_Mem *a_, *b_; /* Pointers. */ if (num_elements == 0) return; const int location = oskar_mem_location(out); const unsigned int off_a = (unsigned int) offset_in1; const unsigned int off_b = (unsigned int) offset_in2; const unsigned int off_c = (unsigned int) offset_out; const unsigned int n = (unsigned int) num_elements; if (*status) return; a_ = in1; b_ = in2; if (oskar_mem_location(in1) != location) { a_temp = oskar_mem_create_copy(in1, location, status); a_ = a_temp; } if (oskar_mem_location(in2) != location) { b_temp = oskar_mem_create_copy(in2, location, status); b_ = b_temp; } if (location == OSKAR_CPU) { void *c = out->data; const void *a = a_->data, *b = b_->data; /* Check if types are all the same. */ if (out->type == in1->type && out->type == in2->type) { switch (out->type) { case OSKAR_DOUBLE: mem_mul_rr_r_double(off_a, off_b, off_c, n, (const double*)a, (const double*)b, (double*)c); break; case OSKAR_DOUBLE_COMPLEX: mem_mul_cc_c_double(off_a, off_b, off_c, n, (const double2*)a, (const double2*)b, (double2*)c); break; case OSKAR_DOUBLE_COMPLEX_MATRIX: mem_mul_mm_m_double(off_a, off_b, off_c, n, (const double4c*)a, (const double4c*)b, (double4c*)c); break; case OSKAR_SINGLE: mem_mul_rr_r_float(off_a, off_b, off_c, n, (const float*)a, (const float*)b, (float*)c); break; case OSKAR_SINGLE_COMPLEX: mem_mul_cc_c_float(off_a, off_b, off_c, n, (const float2*)a, (const float2*)b, (float2*)c); break; case OSKAR_SINGLE_COMPLEX_MATRIX: mem_mul_mm_m_float(off_a, off_b, off_c, n, (const float4c*)a, (const float4c*)b, (float4c*)c); break; default: *status = OSKAR_ERR_BAD_DATA_TYPE; break; } } else { switch (out->type) { case OSKAR_DOUBLE_COMPLEX_MATRIX: { switch (in1->type) { case OSKAR_DOUBLE_COMPLEX: if (in2->type == in1->type) mem_mul_cc_m_double(off_a, off_b, off_c, n, (const double2*)a, (const double2*)b, (double4c*)c); else if (in2->type == out->type) mem_mul_cm_m_double(off_a, off_b, off_c, n, (const double2*)a, (const double4c*)b, (double4c*)c); else *status = OSKAR_ERR_TYPE_MISMATCH; break; case OSKAR_DOUBLE_COMPLEX_MATRIX: if (in2->type == OSKAR_DOUBLE_COMPLEX) mem_mul_mc_m_double(off_a, off_b, off_c, n, (const double4c*)a, (const double2*)b, (double4c*)c); else *status = OSKAR_ERR_TYPE_MISMATCH; break; default: *status = OSKAR_ERR_TYPE_MISMATCH; break; } break; } case OSKAR_SINGLE_COMPLEX_MATRIX: { switch (in1->type) { case OSKAR_SINGLE_COMPLEX: if (in2->type == in1->type) mem_mul_cc_m_float(off_a, off_b, off_c, n, (const float2*)a, (const float2*)b, (float4c*)c); else if (in2->type == out->type) mem_mul_cm_m_float(off_a, off_b, off_c, n, (const float2*)a, (const float4c*)b, (float4c*)c); else *status = OSKAR_ERR_TYPE_MISMATCH; break; case OSKAR_SINGLE_COMPLEX_MATRIX: if (in2->type == OSKAR_SINGLE_COMPLEX) mem_mul_mc_m_float(off_a, off_b, off_c, n, (const float4c*)a, (const float2*)b, (float4c*)c); else *status = OSKAR_ERR_TYPE_MISMATCH; break; default: *status = OSKAR_ERR_TYPE_MISMATCH; break; } break; } default: *status = OSKAR_ERR_TYPE_MISMATCH; break; } } } else { const char* k = 0; /* Check if types are all the same. */ if (out->type == in1->type && out->type == in2->type) { switch (out->type) { case OSKAR_DOUBLE: k = "mem_mul_rr_r_double"; break; case OSKAR_DOUBLE_COMPLEX: k = "mem_mul_cc_c_double"; break; case OSKAR_DOUBLE_COMPLEX_MATRIX: k = "mem_mul_mm_m_double"; break; case OSKAR_SINGLE: k = "mem_mul_rr_r_float"; break; case OSKAR_SINGLE_COMPLEX: k = "mem_mul_cc_c_float"; break; case OSKAR_SINGLE_COMPLEX_MATRIX: k = "mem_mul_mm_m_float"; break; default: *status = OSKAR_ERR_BAD_DATA_TYPE; break; } } else { switch (out->type) { case OSKAR_DOUBLE_COMPLEX_MATRIX: { switch (in1->type) { case OSKAR_DOUBLE_COMPLEX: if (in2->type == in1->type) k = "mem_mul_cc_m_double"; else if (in2->type == out->type) k = "mem_mul_cm_m_double"; else *status = OSKAR_ERR_TYPE_MISMATCH; break; case OSKAR_DOUBLE_COMPLEX_MATRIX: if (in2->type == OSKAR_DOUBLE_COMPLEX) k = "mem_mul_mc_m_double"; else *status = OSKAR_ERR_TYPE_MISMATCH; break; default: *status = OSKAR_ERR_TYPE_MISMATCH; break; } break; } case OSKAR_SINGLE_COMPLEX_MATRIX: { switch (in1->type) { case OSKAR_SINGLE_COMPLEX: if (in2->type == in1->type) k = "mem_mul_cc_m_float"; else if (in2->type == out->type) k = "mem_mul_cm_m_float"; else *status = OSKAR_ERR_TYPE_MISMATCH; break; case OSKAR_SINGLE_COMPLEX_MATRIX: if (in2->type == OSKAR_SINGLE_COMPLEX) k = "mem_mul_mc_m_float"; else *status = OSKAR_ERR_TYPE_MISMATCH; break; default: *status = OSKAR_ERR_TYPE_MISMATCH; break; } break; } default: *status = OSKAR_ERR_TYPE_MISMATCH; break; } } if (!*status) { size_t local_size[] = {256, 1, 1}, global_size[] = {1, 1, 1}; oskar_device_check_local_size(location, 0, local_size); global_size[0] = oskar_device_global_size( num_elements, local_size[0]); const oskar_Arg args[] = { {INT_SZ, &off_a}, {INT_SZ, &off_b}, {INT_SZ, &off_c}, {INT_SZ, &n}, {PTR_SZ, oskar_mem_buffer_const(a_)}, {PTR_SZ, oskar_mem_buffer_const(b_)}, {PTR_SZ, oskar_mem_buffer(out)} }; oskar_device_launch_kernel(k, location, 1, local_size, global_size, sizeof(args) / sizeof(oskar_Arg), args, 0, 0, status); } } /* Free temporary arrays. */ oskar_mem_free(a_temp, status); oskar_mem_free(b_temp, status); }
TEST(prefix_sum, test) { int n = 100000, status = 0, exclusive = 1; oskar_Mem* in_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status); oskar_Mem* out_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status); oskar_Timer* tmr = oskar_timer_create(OSKAR_TIMER_NATIVE); // Fill input with random integers from 0 to 9. int* t = oskar_mem_int(in_cpu, &status); srand(1556); for (int i = 0; i < n; ++i) t[i] = (int) (10.0 * rand() / ((double) RAND_MAX)); t[0] = 3; // Run on CPU. oskar_timer_start(tmr); oskar_prefix_sum(n, in_cpu, out_cpu, 0, exclusive, &status); EXPECT_EQ(0, status); printf("Prefix sum on CPU took %.3f sec\n", oskar_timer_elapsed(tmr)); #ifdef OSKAR_HAVE_CUDA // Run on GPU with CUDA. oskar_Mem* in_gpu = oskar_mem_create_copy(in_cpu, OSKAR_GPU, &status); oskar_Mem* out_gpu = oskar_mem_create(OSKAR_INT, OSKAR_GPU, n, &status); oskar_timer_start(tmr); oskar_prefix_sum(n, in_gpu, out_gpu, 0, exclusive, &status); EXPECT_EQ(0, status); printf("Prefix sum on GPU took %.3f sec\n", oskar_timer_elapsed(tmr)); // Check consistency between CPU and GPU results. oskar_Mem* out_cmp_gpu = oskar_mem_create_copy(out_gpu, OSKAR_CPU, &status); EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_gpu, n, &status)); #endif #ifdef OSKAR_HAVE_OPENCL // Run on OpenCL. oskar_Mem* in_cl = oskar_mem_create_copy(in_cpu, OSKAR_CL, &status); oskar_Mem* out_cl = oskar_mem_create(OSKAR_INT, OSKAR_CL, n, &status); oskar_timer_start(tmr); printf("Using %s\n", oskar_cl_device_name()); oskar_prefix_sum(n, in_cl, out_cl, 0, exclusive, &status); EXPECT_EQ(0, status); printf("Prefix sum on OpenCL took %.3f sec\n", oskar_timer_elapsed(tmr)); // Check consistency between CPU and OpenCL results. oskar_Mem* out_cmp_cl = oskar_mem_create_copy(out_cl, OSKAR_CPU, &status); EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_cl, n, &status)); #endif if (save) { size_t num_mem = 1; FILE* fhan = fopen("prefix_sum_test.txt", "w"); #ifdef OSKAR_HAVE_CUDA num_mem += 1; #endif #ifdef OSKAR_HAVE_OPENCL num_mem += 1; #endif oskar_mem_save_ascii(fhan, num_mem, n, &status, out_cpu #ifdef OSKAR_HAVE_CUDA , out_cmp_gpu #endif #ifdef OSKAR_HAVE_OPENCL , out_cmp_cl #endif ); fclose(fhan); } // Clean up. oskar_timer_free(tmr); oskar_mem_free(in_cpu, &status); oskar_mem_free(out_cpu, &status); #ifdef OSKAR_HAVE_CUDA oskar_mem_free(in_gpu, &status); oskar_mem_free(out_gpu, &status); oskar_mem_free(out_cmp_gpu, &status); #endif #ifdef OSKAR_HAVE_OPENCL oskar_mem_free(in_cl, &status); oskar_mem_free(out_cl, &status); oskar_mem_free(out_cmp_cl, &status); #endif }
void oskar_mem_write_fits_cube(oskar_Mem* data, const char* root_name, int width, int height, int num_planes, int i_plane, int* status) { oskar_Mem *copy = 0, *ptr = 0; size_t len, buf_len; char* fname; /* Checks. */ if (*status) return; if (oskar_mem_is_matrix(data)) { *status = OSKAR_ERR_BAD_DATA_TYPE; return; } /* Construct the filename. */ len = strlen(root_name); buf_len = 11 + len; fname = (char*) calloc(buf_len, sizeof(char)); /* Copy to host memory if necessary. */ ptr = data; if (oskar_mem_location(data) != OSKAR_CPU) { copy = oskar_mem_create_copy(ptr, OSKAR_CPU, status); ptr = copy; } /* Deal with complex data. */ if (oskar_mem_is_complex(ptr)) { oskar_Mem *temp; temp = oskar_mem_create(oskar_mem_precision(ptr), OSKAR_CPU, oskar_mem_length(ptr), status); /* Extract the real part and write it. */ SNPRINTF(fname, buf_len, "%s_REAL.fits", root_name); convert_complex(ptr, temp, 0, status); write_pixels(temp, fname, width, height, num_planes, i_plane, status); /* Extract the imaginary part and write it. */ SNPRINTF(fname, buf_len, "%s_IMAG.fits", root_name); convert_complex(ptr, temp, 1, status); write_pixels(temp, fname, width, height, num_planes, i_plane, status); oskar_mem_free(temp, status); } else { /* No conversion needed. */ if ((len >= 5) && ( !strcmp(&(root_name[len-5]), ".fits") || !strcmp(&(root_name[len-5]), ".FITS") )) { SNPRINTF(fname, buf_len, "%s", root_name); } else { SNPRINTF(fname, buf_len, "%s.fits", root_name); } write_pixels(ptr, fname, width, height, num_planes, i_plane, status); } free(fname); oskar_mem_free(copy, status); }
TEST(element_weights_errors, test_apply) { int num_elements = 10000; int status = 0; double gain = 1.5; double gain_error = 0.2; double phase = 0.1 * M_PI; double phase_error = (5 / 180.0) * M_PI; double weight_gain = 1.0; double weight_phase = 0.5 * M_PI; double2 weight; weight.x = weight_gain * cos(weight_phase); weight.y = weight_gain * sin(weight_phase); oskar_Mem *d_gain, *d_gain_error, *d_phase, *d_phase_error, *d_errors; oskar_Mem *h_weights, *d_weights; d_errors = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_GPU, num_elements, &status); d_gain = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); d_gain_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); d_phase = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); d_phase_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); h_weights = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_CPU, num_elements, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_set_value_real(d_gain, gain, 0, 0, &status); oskar_mem_set_value_real(d_gain_error, gain_error, 0, 0, &status); oskar_mem_set_value_real(d_phase, phase, 0, 0, &status); oskar_mem_set_value_real(d_phase_error, phase_error, 0, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); double2* h_weights_ = oskar_mem_double2(h_weights, &status); for (int i = 0; i < num_elements; ++i) { h_weights_[i].x = weight.x; h_weights_[i].y = weight.y; } d_weights = oskar_mem_create_copy(h_weights, OSKAR_GPU, &status); oskar_evaluate_element_weights_errors(num_elements, d_gain, d_gain_error, d_phase, d_phase_error, 0, 0, 0, d_errors, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_element_multiply(NULL, d_weights, d_errors, num_elements, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Write memory to file for inspection. const char* fname = "temp_test_weights.dat"; FILE* file = fopen(fname, "w"); oskar_mem_save_ascii(file, 7, num_elements, &status, d_gain, d_gain_error, d_phase, d_phase_error, d_errors, h_weights, d_weights); fclose(file); remove(fname); // Free memory. oskar_mem_free(d_gain, &status); oskar_mem_free(d_gain_error, &status); oskar_mem_free(d_phase, &status); oskar_mem_free(d_phase_error, &status); oskar_mem_free(d_errors, &status); oskar_mem_free(h_weights, &status); oskar_mem_free(d_weights, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); }
TEST(element_weights_errors, test_reinit) { int num_elements = 5; int status = 0; double gain = 1.5; double gain_error = 0.2; double phase = 0.1 * M_PI; double phase_error = (5 / 180.0) * M_PI; oskar_Mem *d_errors, *d_gain, *d_gain_error, *d_phase, *d_phase_error; d_errors = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_GPU, num_elements, &status); d_gain = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); d_gain_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); d_phase = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); d_phase_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, num_elements, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_mem_set_value_real(d_gain, gain, 0, 0, &status); oskar_mem_set_value_real(d_gain_error, gain_error, 0, 0, &status); oskar_mem_set_value_real(d_phase, phase, 0, 0, &status); oskar_mem_set_value_real(d_phase_error, phase_error, 0, 0, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); int num_channels = 2; int num_chunks = 3; int num_stations = 5; int num_times = 3; unsigned int seed = 1; const char* fname = "temp_test_weights_error_reinit.dat"; FILE* file = fopen(fname, "w"); for (int chan = 0; chan < num_channels; ++chan) { fprintf(file, "channel: %i\n", chan); for (int chunk = 0; chunk < num_chunks; ++chunk) { fprintf(file, " chunk: %i\n", chunk); ASSERT_EQ(0, status) << oskar_get_error_string(status); for (int t = 0; t < num_times; ++t) { fprintf(file, " time: %i\n", t); for (int s = 0; s < num_stations; ++s) { fprintf(file, " station: %i ==> ", s); oskar_evaluate_element_weights_errors(num_elements, d_gain, d_gain_error, d_phase, d_phase_error, seed, t, s, d_errors, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_Mem *h_errors = oskar_mem_create_copy(d_errors, OSKAR_CPU, &status); double2* errors = oskar_mem_double2(h_errors, &status); for (int i = 0; i < num_elements; ++i) { fprintf(file, "(% -6.4f, % -6.4f), ", errors[i].x, errors[i].y); } fprintf(file, "\n"); oskar_mem_free(h_errors, &status); } } ASSERT_EQ(0, status) << oskar_get_error_string(status); } } fclose(file); // remove(fname); oskar_mem_free(d_gain, &status); oskar_mem_free(d_gain_error, &status); oskar_mem_free(d_phase, &status); oskar_mem_free(d_phase_error, &status); oskar_mem_free(d_errors, &status); }
TEST(evaluate_baselines, cpu_gpu) { oskar_Mem *u, *v, *w, *uu, *vv, *ww; oskar_Mem *u_gpu, *v_gpu, *w_gpu, *uu_gpu, *vv_gpu, *ww_gpu; int num_baselines, num_stations = 50, status = 0, type, location; double *u_, *v_, *w_, *uu_, *vv_, *ww_; num_baselines = num_stations * (num_stations - 1) / 2; type = OSKAR_DOUBLE; // Allocate host memory. location = OSKAR_CPU; u = oskar_mem_create(type, location, num_stations, &status); v = oskar_mem_create(type, location, num_stations, &status); w = oskar_mem_create(type, location, num_stations, &status); uu = oskar_mem_create(type, location, num_baselines, &status); vv = oskar_mem_create(type, location, num_baselines, &status); ww = oskar_mem_create(type, location, num_baselines, &status); u_ = oskar_mem_double(u, &status); v_ = oskar_mem_double(v, &status); w_ = oskar_mem_double(w, &status); uu_ = oskar_mem_double(uu, &status); vv_ = oskar_mem_double(vv, &status); ww_ = oskar_mem_double(ww, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Fill station coordinates with test data. for (int i = 0; i < num_stations; ++i) { u_[i] = (double)(i + 1); v_[i] = (double)(i + 2); w_[i] = (double)(i + 3); } // Evaluate baseline coordinates on CPU. oskar_convert_station_uvw_to_baseline_uvw(num_stations, 0, u, v, w, 0, uu, vv, ww, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Check results are correct. for (int s1 = 0, b = 0; s1 < num_stations; ++s1) { for (int s2 = s1 + 1; s2 < num_stations; ++s2, ++b) { EXPECT_DOUBLE_EQ(u_[s2] - u_[s1], uu_[b]); EXPECT_DOUBLE_EQ(v_[s2] - v_[s1], vv_[b]); EXPECT_DOUBLE_EQ(w_[s2] - w_[s1], ww_[b]); } } // Allocate device memory and copy input data. #ifdef OSKAR_HAVE_CUDA location = OSKAR_GPU; #endif u_gpu = oskar_mem_create_copy(u, location, &status); v_gpu = oskar_mem_create_copy(v, location, &status); w_gpu = oskar_mem_create_copy(w, location, &status); uu_gpu = oskar_mem_create(type, location, num_baselines, &status); vv_gpu = oskar_mem_create(type, location, num_baselines, &status); ww_gpu = oskar_mem_create(type, location, num_baselines, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Evaluate baseline coordinates on device. oskar_convert_station_uvw_to_baseline_uvw(num_stations, 0, u_gpu, v_gpu, w_gpu, 0, uu_gpu, vv_gpu, ww_gpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Check results are consistent. double max_, avg; oskar_mem_evaluate_relative_error(uu_gpu, uu, 0, &max_, &avg, 0, &status); ASSERT_LT(max_, 1e-12); ASSERT_LT(avg, 1e-12); oskar_mem_evaluate_relative_error(vv_gpu, vv, 0, &max_, &avg, 0, &status); ASSERT_LT(max_, 1e-12); ASSERT_LT(avg, 1e-12); oskar_mem_evaluate_relative_error(ww_gpu, ww, 0, &max_, &avg, 0, &status); ASSERT_LT(max_, 1e-12); ASSERT_LT(avg, 1e-12); // Free memory. oskar_mem_free(u, &status); oskar_mem_free(v, &status); oskar_mem_free(w, &status); oskar_mem_free(uu, &status); oskar_mem_free(vv, &status); oskar_mem_free(ww, &status); oskar_mem_free(u_gpu, &status); oskar_mem_free(v_gpu, &status); oskar_mem_free(w_gpu, &status); oskar_mem_free(uu_gpu, &status); oskar_mem_free(vv_gpu, &status); oskar_mem_free(ww_gpu, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); }
static void* run_blocks(void* arg) { oskar_Imager* h; oskar_Mem *plane, *uu, *vv, *ww = 0, *amp, *weight, *block, *l, *m, *n; size_t max_size; const size_t smallest = 1024, largest = 65536; int dev_loc = OSKAR_CPU, *status; /* Get thread function arguments. */ h = ((ThreadArgs*)arg)->h; const int thread_id = ((ThreadArgs*)arg)->thread_id; const int num_vis = ((ThreadArgs*)arg)->num_vis; plane = ((ThreadArgs*)arg)->plane; status = &(h->status); /* Set the device used by the thread. */ if (thread_id < h->num_gpus) { dev_loc = h->dev_loc; oskar_device_set(h->dev_loc, h->gpu_ids[thread_id], status); } /* Copy visibility data to device. */ uu = oskar_mem_create_copy(((ThreadArgs*)arg)->uu, dev_loc, status); vv = oskar_mem_create_copy(((ThreadArgs*)arg)->vv, dev_loc, status); amp = oskar_mem_create_copy(((ThreadArgs*)arg)->amp, dev_loc, status); weight = oskar_mem_create_copy(((ThreadArgs*)arg)->weight, dev_loc, status); if (h->algorithm == OSKAR_ALGORITHM_DFT_3D) ww = oskar_mem_create_copy(((ThreadArgs*)arg)->ww, dev_loc, status); #ifdef _OPENMP /* Disable nested parallelism. */ omp_set_nested(0); omp_set_num_threads(1); #endif /* Calculate the maximum pixel block size, and number of blocks. */ const size_t num_pixels = (size_t)h->image_size * (size_t)h->image_size; max_size = num_pixels / h->num_devices; max_size = ((max_size + smallest - 1) / smallest) * smallest; if (max_size > largest) max_size = largest; if (max_size < smallest) max_size = smallest; const int num_blocks = (int) ((num_pixels + max_size - 1) / max_size); /* Allocate device memory for pixel block data. */ block = oskar_mem_create(h->imager_prec, dev_loc, 0, status); l = oskar_mem_create(h->imager_prec, dev_loc, max_size, status); m = oskar_mem_create(h->imager_prec, dev_loc, max_size, status); n = oskar_mem_create(h->imager_prec, dev_loc, max_size, status); /* Loop until all blocks are done. */ for (;;) { size_t block_size; /* Get a unique block index. */ oskar_mutex_lock(h->mutex); const int i_block = (h->i_block)++; oskar_mutex_unlock(h->mutex); if ((i_block >= num_blocks) || *status) break; /* Calculate the block size. */ const size_t block_start = i_block * max_size; block_size = num_pixels - block_start; if (block_size > max_size) block_size = max_size; /* Copy the (l,m,n) positions for the block. */ oskar_mem_copy_contents(l, h->l, 0, block_start, block_size, status); oskar_mem_copy_contents(m, h->m, 0, block_start, block_size, status); if (h->algorithm == OSKAR_ALGORITHM_DFT_3D) oskar_mem_copy_contents(n, h->n, 0, block_start, block_size, status); /* Run DFT for the block. */ oskar_dft_c2r(num_vis, 2.0 * M_PI, uu, vv, ww, amp, weight, (int) block_size, l, m, n, block, status); /* Add data to existing pixels. */ oskar_mem_add(plane, plane, block, block_start, block_start, 0, block_size, status); } /* Free memory. */ oskar_mem_free(uu, status); oskar_mem_free(vv, status); oskar_mem_free(ww, status); oskar_mem_free(amp, status); oskar_mem_free(weight, status); oskar_mem_free(block, status); oskar_mem_free(l, status); oskar_mem_free(m, status); oskar_mem_free(n, status); return 0; }