Пример #1
0
TEST(Mem, set_value_real_double_complex_matrix)
{
    // Double precision complex matrix.
    int n = 100, status = 0;
    oskar_Mem *mem, *mem2;
    mem = oskar_mem_create(OSKAR_DOUBLE_COMPLEX_MATRIX, OSKAR_GPU, n,
            &status);
    oskar_mem_set_value_real(mem, 6.5, 0, 0, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    mem2 = oskar_mem_create_copy(mem, OSKAR_CPU, &status);
    double4c* v = oskar_mem_double4c(mem2, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
    for (int i = 0; i < n; ++i)
    {
        EXPECT_DOUBLE_EQ(v[i].a.x, 6.5);
        EXPECT_DOUBLE_EQ(v[i].a.y, 0.0);
        EXPECT_DOUBLE_EQ(v[i].b.x, 0.0);
        EXPECT_DOUBLE_EQ(v[i].b.y, 0.0);
        EXPECT_DOUBLE_EQ(v[i].c.x, 0.0);
        EXPECT_DOUBLE_EQ(v[i].c.y, 0.0);
        EXPECT_DOUBLE_EQ(v[i].d.x, 6.5);
        EXPECT_DOUBLE_EQ(v[i].d.y, 0.0);
    }
    oskar_mem_free(mem, &status);
    oskar_mem_free(mem2, &status);
}
Пример #2
0
oskar_Mem* oskar_mem_read_binary_raw(const char* filename, int type,
        int location, int* status)
{
    size_t num_elements, element_size, size_bytes;
    oskar_Mem *mem = 0;
    FILE* stream;

    /* Check if safe to proceed. */
    if (*status) return 0;

    /* Open the input file. */
    stream = fopen(filename, "rb");
    if (!stream)
    {
        *status = OSKAR_ERR_FILE_IO;
        return 0;
    }

    /* Get the file size. */
    fseek(stream, 0, SEEK_END);
    size_bytes = ftell(stream);

    /* Create memory block of the right size. */
    element_size = oskar_mem_element_size(type);
    num_elements = (size_t)ceil(size_bytes / element_size);
    mem = oskar_mem_create(type, OSKAR_CPU, num_elements, status);
    if (*status)
    {
        oskar_mem_free(mem, status);
        fclose(stream);
        return 0;
    }

    /* Read the data. */
    fseek(stream, 0, SEEK_SET);
    if (fread(oskar_mem_void(mem), 1, size_bytes, stream) != size_bytes)
    {
        oskar_mem_free(mem, status);
        fclose(stream);
        *status = OSKAR_ERR_FILE_IO;
        return 0;
    }

    /* Close the input file. */
    fclose(stream);

    /* Copy to GPU memory if required. */
    if (location != OSKAR_CPU)
    {
        oskar_Mem* gpu;
        gpu = oskar_mem_create_copy(mem, location, status);
        oskar_mem_free(mem, status);
        return gpu;
    }

    return mem;
}
Пример #3
0
void oskar_fft_exec(oskar_FFT* h, oskar_Mem* data, int* status)
{
    oskar_Mem *data_copy = 0, *data_ptr = data;
    if (oskar_mem_location(data) != h->location)
    {
        data_copy = oskar_mem_create_copy(data, h->location, status);
        data_ptr = data_copy;
    }
    if (h->location == OSKAR_CPU)
    {
        if (h->num_dim == 1)
        {
            *status = OSKAR_ERR_FUNCTION_NOT_AVAILABLE;
        }
        else if (h->num_dim == 2)
        {
            if (h->precision == OSKAR_DOUBLE)
                oskar_fftpack_cfft2f(h->dim_size, h->dim_size, h->dim_size,
                        oskar_mem_double(data_ptr, status),
                        oskar_mem_double(h->fftpack_wsave, status),
                        oskar_mem_double(h->fftpack_work, status));
            else
                oskar_fftpack_cfft2f_f(h->dim_size, h->dim_size, h->dim_size,
                        oskar_mem_float(data_ptr, status),
                        oskar_mem_float(h->fftpack_wsave, status),
                        oskar_mem_float(h->fftpack_work, status));
            /* This step not needed for W-kernel generation, so turn it off. */
            if (h->ensure_consistent_norm)
                oskar_mem_scale_real(data_ptr, (double)h->num_cells_total,
                        0, h->num_cells_total, status);
        }
    }
    else if (h->location == OSKAR_GPU)
    {
#ifdef OSKAR_HAVE_CUDA
        if (h->precision == OSKAR_DOUBLE)
            cufftExecZ2Z(h->cufft_plan,
                    (cufftDoubleComplex*) oskar_mem_void(data_ptr),
                    (cufftDoubleComplex*) oskar_mem_void(data_ptr),
                    CUFFT_FORWARD);
        else
            cufftExecC2C(h->cufft_plan,
                    (cufftComplex*) oskar_mem_void(data_ptr),
                    (cufftComplex*) oskar_mem_void(data_ptr),
                    CUFFT_FORWARD);
#endif
    }
    else
        *status = OSKAR_ERR_BAD_LOCATION;
    if (oskar_mem_location(data) != h->location)
        oskar_mem_copy(data, data_ptr, status);
    oskar_mem_free(data_copy, status);
}
Пример #4
0
TEST(Mem, set_value_real_single_complex)
{
    // Single precision complex.
    int n = 100, status = 0;
    oskar_Mem *mem, *mem2;
    mem = oskar_mem_create(OSKAR_SINGLE_COMPLEX, OSKAR_GPU, n,
            &status);
    oskar_mem_set_value_real(mem, 6.5, 0, 0, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    mem2 = oskar_mem_create_copy(mem, OSKAR_CPU, &status);
    float2* v = oskar_mem_float2(mem2, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
    for (int i = 0; i < n; ++i)
    {
        EXPECT_FLOAT_EQ(v[i].x, 6.5);
        EXPECT_FLOAT_EQ(v[i].y, 0.0);
    }
    oskar_mem_free(mem, &status);
    oskar_mem_free(mem2, &status);
}
void oskar_mem_evaluate_relative_error(const oskar_Mem* val_approx,
        const oskar_Mem* val_accurate, double* min_rel_error,
        double* max_rel_error, double* avg_rel_error, double* std_rel_error,
        int* status)
{
    int prec_approx, prec_accurate;
    size_t i, n;
    const oskar_Mem *app_ptr, *acc_ptr;
    oskar_Mem *approx_temp = 0, *accurate_temp = 0;
    double old_m = 0.0, new_m = 0.0, old_s = 0.0, new_s = 0.0;

    /* Check if safe to proceed. */
    if (*status) return;

    /* Initialise outputs. */
    if (max_rel_error) *max_rel_error = -DBL_MAX;
    if (min_rel_error) *min_rel_error = DBL_MAX;
    if (avg_rel_error) *avg_rel_error = DBL_MAX;
    if (std_rel_error) *std_rel_error = DBL_MAX;

    /* Type and dimension check. */
    if (oskar_mem_is_matrix(val_approx) && !oskar_mem_is_matrix(val_accurate))
    {
        *status = OSKAR_ERR_TYPE_MISMATCH;
        return;
    }
    if (oskar_mem_is_complex(val_approx) && !oskar_mem_is_complex(val_accurate))
    {
        *status = OSKAR_ERR_TYPE_MISMATCH;
        return;
    }

    /* Get and check base types. */
    prec_approx = oskar_mem_precision(val_approx);
    prec_accurate = oskar_mem_precision(val_accurate);
    if (prec_approx != OSKAR_SINGLE && prec_approx != OSKAR_DOUBLE)
    {
        *status = OSKAR_ERR_BAD_DATA_TYPE;
        return;
    }
    if (prec_accurate != OSKAR_SINGLE && prec_accurate != OSKAR_DOUBLE)
    {
        *status = OSKAR_ERR_BAD_DATA_TYPE;
        return;
    }

    /* Get number of elements to check. */
    n = oskar_mem_length(val_approx) < oskar_mem_length(val_accurate) ?
            oskar_mem_length(val_approx) : oskar_mem_length(val_accurate);
    if (oskar_mem_is_matrix(val_approx)) n *= 4;

    /* Copy input data to temporary CPU arrays if required. */
    app_ptr = val_approx;
    acc_ptr = val_accurate;
    if (oskar_mem_location(val_approx) != OSKAR_CPU)
    {
        approx_temp = oskar_mem_create_copy(val_approx, OSKAR_CPU,
                status);
        if (*status)
        {
            oskar_mem_free(approx_temp, status);
            return;
        }
        app_ptr = approx_temp;
    }
    if (oskar_mem_location(val_accurate) != OSKAR_CPU)
    {
        accurate_temp = oskar_mem_create_copy(val_accurate, OSKAR_CPU,
                status);
        if (*status)
        {
            oskar_mem_free(accurate_temp, status);
            return;
        }
        acc_ptr = accurate_temp;
    }

    /* Check numbers are the same, to appropriate precision. */
    if (prec_approx == OSKAR_SINGLE && prec_accurate == OSKAR_SINGLE)
    {
        const float *approx, *accurate;
        approx = oskar_mem_float_const(app_ptr, status);
        accurate = oskar_mem_float_const(acc_ptr, status);
        CHECK_ELEMENTS(1e-5)
    }
    else if (prec_approx == OSKAR_DOUBLE && prec_accurate == OSKAR_SINGLE)
Пример #6
0
void oskar_mem_save_ascii(FILE* file, size_t num_mem,
        size_t offset, size_t num_elements, int* status, ...)
{
    int type;
    size_t i, j;
    va_list args;
    oskar_Mem** handles; /* Array of oskar_Mem pointers in CPU memory. */

    /* Check if safe to proceed. */
    if (*status) return;

    /* Check there are at least the number of specified elements in
     * each array. */
    va_start(args, status);
    for (i = 0; i < num_mem; ++i)
    {
        const oskar_Mem* mem;
        mem = va_arg(args, const oskar_Mem*);
        if (oskar_mem_length(mem) < num_elements)
            *status = OSKAR_ERR_DIMENSION_MISMATCH;
    }
    va_end(args);

    /* Check if safe to proceed. */
    if (*status) return;

    /* Allocate and set up the handle array. */
    handles = (oskar_Mem**) malloc(num_mem * sizeof(oskar_Mem*));
    va_start(args, status);
    for (i = 0; i < num_mem; ++i)
    {
        oskar_Mem* mem;
        mem = va_arg(args, oskar_Mem*);
        if (oskar_mem_location(mem) != OSKAR_CPU)
        {
            handles[i] = oskar_mem_create_copy(mem, OSKAR_CPU, status);
        }
        else
        {
            handles[i] = mem;
        }
    }
    va_end(args);

    for (j = 0; j < num_elements; ++j)
    {
        /* Break if error. */
        if (*status) break;

        for (i = 0; i < num_mem; ++i)
        {
            const void* data;
            data = oskar_mem_void_const(handles[i]);
            type = oskar_mem_type(handles[i]);
            switch (type)
            {
            case OSKAR_SINGLE:
            {
                fprintf(file, SDF, ((const float*)data)[j + offset]);
                continue;
            }
            case OSKAR_DOUBLE:
            {
                fprintf(file, SDD, ((const double*)data)[j + offset]);
                continue;
            }
            case OSKAR_SINGLE_COMPLEX:
            {
                float2 d;
                d = ((const float2*)data)[j + offset];
                fprintf(file, SDF SDF, d.x, d.y);
                continue;
            }
            case OSKAR_DOUBLE_COMPLEX:
            {
                double2 d;
                d = ((const double2*)data)[j + offset];
                fprintf(file, SDD SDD, d.x, d.y);
                continue;
            }
            case OSKAR_SINGLE_COMPLEX_MATRIX:
            {
                float4c d;
                d = ((const float4c*)data)[j + offset];
                fprintf(file, SDF SDF SDF SDF SDF SDF SDF SDF,
                        d.a.x, d.a.y, d.b.x, d.b.y, d.c.x, d.c.y, d.d.x, d.d.y);
                continue;
            }
            case OSKAR_DOUBLE_COMPLEX_MATRIX:
            {
                double4c d;
                d = ((const double4c*)data)[j + offset];
                fprintf(file, SDD SDD SDD SDD SDD SDD SDD SDD,
                        d.a.x, d.a.y, d.b.x, d.b.y, d.c.x, d.c.y, d.d.x, d.d.y);
                continue;
            }
            case OSKAR_CHAR:
            {
                putc(((const char*)data)[j + offset], file);
                continue;
            }
            case OSKAR_INT:
            {
                fprintf(file, "%5d ", ((const int*)data)[j + offset]);
                continue;
            }
            default:
            {
                *status = OSKAR_ERR_BAD_DATA_TYPE;
                continue;
            }
            }
        }
        putc('\n', file);
    }

    /* Free any temporary memory used by this function. */
    va_start(args, status);
    for (i = 0; i < num_mem; ++i)
    {
        const oskar_Mem* mem;
        mem = va_arg(args, const oskar_Mem*);
        if (oskar_mem_location(mem) != OSKAR_CPU)
        {
            oskar_mem_free(handles[i], status);
        }
    }
    va_end(args);

    /* Free the handle array. */
    free(handles);
}
Пример #7
0
TEST(binary_file, binary_read_write_mem)
{
    const char filename[] = "temp_test_mem_binary.dat";
    int num_cpu = 1000;
    int num_gpu = 2048;
    int status = 0;

    // Create the handle.
    oskar_Binary* h = oskar_binary_create(filename, 'w', &status);

    // Save data from CPU.
    {
        oskar_Mem* mem = oskar_mem_create(OSKAR_SINGLE, OSKAR_CPU,
                num_cpu, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        float* data = oskar_mem_float(mem, &status);

        // Fill array with data.
        for (int i = 0; i < num_cpu; ++i)
        {
            data[i] = i * 1024.0;
        }

        // Save CPU data.
        oskar_binary_write_mem_ext(h, mem, "USER", "TEST", 987654, 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_mem_free(mem, &status);
    }

    // Save data from GPU.
    {
        oskar_Mem *mem_cpu, *mem_gpu;
        mem_cpu = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_CPU,
                num_gpu, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        double2* data = oskar_mem_double2(mem_cpu, &status);

        // Fill array with data.
        for (int i = 0; i < num_gpu; ++i)
        {
            data[i].x = i * 10.0;
            data[i].y = i * 20.0 + 1.0;
        }

        // Copy data to GPU.
        mem_gpu = oskar_mem_create_copy(mem_cpu, OSKAR_GPU, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Save GPU data.
        oskar_binary_write_mem_ext(h, mem_gpu, "AA", "BB", 2, 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_mem_free(mem_cpu, &status);
        oskar_mem_free(mem_gpu, &status);
    }

    // Save a single integer with a large index.
    int val = 0xFFFFFF;
    oskar_binary_write_int(h, 50, 9, 800000, val, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Save data from CPU with blank tags.
    {
        oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU,
                num_cpu, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        double* data = oskar_mem_double(mem, &status);

        // Fill array with data.
        for (int i = 0; i < num_cpu; ++i)
        {
            data[i] = i * 500.0;
        }

        // Save CPU data.
        oskar_binary_write_mem_ext(h, mem, "", "", 10, 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Fill array with data.
        for (int i = 0; i < num_cpu; ++i)
        {
            data[i] = i * 501.0;
        }

        // Save CPU data.
        oskar_binary_write_mem_ext(h, mem, "", "", 11, 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_mem_free(mem, &status);
    }

    // Save CPU data with tags that are equal lengths.
    {
        oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU,
                num_cpu, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        double* data = oskar_mem_double(mem, &status);

        // Fill array with data.
        for (int i = 0; i < num_cpu; ++i)
        {
            data[i] = i * 1001.0;
        }

        // Save CPU data.
        oskar_binary_write_mem_ext(h, mem, "DOG", "CAT", 0, 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Fill array with data.
        for (int i = 0; i < num_cpu; ++i)
        {
            data[i] = i * 127.0;
        }

        // Save CPU data.
        oskar_binary_write_mem_ext(h, mem, "ONE", "TWO", 0, 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_mem_free(mem, &status);
    }

    // Create the handle for reading.
    oskar_binary_free(h);
    h = oskar_binary_create(filename, 'r', &status);

    // Load data directly to GPU.
    {
        oskar_Mem *mem_gpu, *mem_cpu;
        mem_gpu = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_GPU,
                0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_binary_read_mem_ext(h, mem_gpu, "AA", "BB", 2, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        EXPECT_EQ(num_gpu, (int)oskar_mem_length(mem_gpu));

        // Copy back to CPU and examine contents.
        mem_cpu = oskar_mem_create_copy(mem_gpu, OSKAR_CPU, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        double2* data = oskar_mem_double2(mem_cpu, &status);
        for (int i = 0; i < num_gpu; ++i)
        {
            EXPECT_DOUBLE_EQ(i * 10.0,       data[i].x);
            EXPECT_DOUBLE_EQ(i * 20.0 + 1.0, data[i].y);
        }
        oskar_mem_free(mem_cpu, &status);
        oskar_mem_free(mem_gpu, &status);
    }

    // Load integer with a large index.
    int new_val = 0;
    oskar_binary_read_int(h, 50, 9, 800000, &new_val, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
    EXPECT_EQ(val, new_val);

    // Load CPU data.
    {
        oskar_Mem* mem = oskar_mem_create(OSKAR_SINGLE, OSKAR_CPU,
                num_cpu, &status);
        oskar_binary_read_mem_ext(h, mem, "USER", "TEST", 987654, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem));
        float* data = oskar_mem_float(mem, &status);
        for (int i = 0; i < num_cpu; ++i)
        {
            EXPECT_DOUBLE_EQ(i * 1024.0, data[i]);
        }
        oskar_mem_free(mem, &status);
    }

    // Load CPU data with blank tags.
    {
        double* data;
        oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU,
                num_cpu, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_binary_read_mem_ext(h, mem, "", "", 10, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_binary_read_mem_ext(h, mem, "DOESN'T", "EXIST", 10, &status);
        EXPECT_EQ((int)OSKAR_ERR_BINARY_TAG_NOT_FOUND, status);
        status = 0;
        ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem));
        data = oskar_mem_double(mem, &status);
        for (int i = 0; i < num_cpu; ++i)
        {
            EXPECT_DOUBLE_EQ(i * 500.0, data[i]);
        }
        oskar_binary_read_mem_ext(h, mem, "", "", 11, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem));
        data = oskar_mem_double(mem, &status);
        for (int i = 0; i < num_cpu; ++i)
        {
            EXPECT_DOUBLE_EQ(i * 501.0, data[i]);
        }
        oskar_mem_free(mem, &status);
    }

    // Load CPU data with tags that are equal lengths.
    {
        double* data;
        oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, 0,
                &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_binary_read_mem_ext(h, mem, "ONE", "TWO", 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem));
        data = oskar_mem_double(mem, &status);
        for (int i = 0; i < num_cpu; ++i)
        {
            EXPECT_DOUBLE_EQ(i * 127.0, data[i]);
        }
        oskar_binary_read_mem_ext(h, mem, "DOG", "CAT", 0, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        ASSERT_EQ(num_cpu, (int)oskar_mem_length(mem));
        data = oskar_mem_double(mem, &status);
        for (int i = 0; i < num_cpu; ++i)
        {
            EXPECT_DOUBLE_EQ(i * 1001.0, data[i]);
        }
        oskar_mem_free(mem, &status);
    }

    // Try to load data that isn't present.
    {
        oskar_Mem* mem = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, 0,
                &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_binary_read_mem_ext(h, mem, "DOESN'T", "EXIST", 10, &status);
        EXPECT_EQ((int)OSKAR_ERR_BINARY_TAG_NOT_FOUND, status);
        status = 0;
        EXPECT_EQ(0, (int)oskar_mem_length(mem));
        oskar_mem_free(mem, &status);
    }

    // Release the handle.
    oskar_binary_free(h);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
}
Пример #8
0
void oskar_mem_multiply(
        oskar_Mem* out,
        const oskar_Mem* in1,
        const oskar_Mem* in2,
        size_t offset_out,
        size_t offset_in1,
        size_t offset_in2,
        size_t num_elements,
        int* status)
{
    oskar_Mem *a_temp = 0, *b_temp = 0;
    const oskar_Mem *a_, *b_; /* Pointers. */
    if (num_elements == 0) return;
    const int location = oskar_mem_location(out);
    const unsigned int off_a = (unsigned int) offset_in1;
    const unsigned int off_b = (unsigned int) offset_in2;
    const unsigned int off_c = (unsigned int) offset_out;
    const unsigned int n = (unsigned int) num_elements;
    if (*status) return;
    a_ = in1;
    b_ = in2;
    if (oskar_mem_location(in1) != location)
    {
        a_temp = oskar_mem_create_copy(in1, location, status);
        a_ = a_temp;
    }
    if (oskar_mem_location(in2) != location)
    {
        b_temp = oskar_mem_create_copy(in2, location, status);
        b_ = b_temp;
    }
    if (location == OSKAR_CPU)
    {
        void *c = out->data;
        const void *a = a_->data, *b = b_->data;
        /* Check if types are all the same. */
        if (out->type == in1->type && out->type == in2->type)
        {
            switch (out->type)
            {
            case OSKAR_DOUBLE:
                mem_mul_rr_r_double(off_a, off_b, off_c, n,
                        (const double*)a, (const double*)b, (double*)c);
                break;
            case OSKAR_DOUBLE_COMPLEX:
                mem_mul_cc_c_double(off_a, off_b, off_c, n,
                        (const double2*)a, (const double2*)b, (double2*)c);
                break;
            case OSKAR_DOUBLE_COMPLEX_MATRIX:
                mem_mul_mm_m_double(off_a, off_b, off_c, n,
                        (const double4c*)a, (const double4c*)b, (double4c*)c);
                break;
            case OSKAR_SINGLE:
                mem_mul_rr_r_float(off_a, off_b, off_c, n,
                        (const float*)a, (const float*)b, (float*)c);
                break;
            case OSKAR_SINGLE_COMPLEX:
                mem_mul_cc_c_float(off_a, off_b, off_c, n,
                        (const float2*)a, (const float2*)b, (float2*)c);
                break;
            case OSKAR_SINGLE_COMPLEX_MATRIX:
                mem_mul_mm_m_float(off_a, off_b, off_c, n,
                        (const float4c*)a, (const float4c*)b, (float4c*)c);
                break;
            default:
                *status = OSKAR_ERR_BAD_DATA_TYPE;
                break;
            }
        }
        else
        {
            switch (out->type)
            {
            case OSKAR_DOUBLE_COMPLEX_MATRIX:
            {
                switch (in1->type)
                {
                case OSKAR_DOUBLE_COMPLEX:
                    if (in2->type == in1->type)
                        mem_mul_cc_m_double(off_a, off_b, off_c, n,
                                (const double2*)a, (const double2*)b,
                                (double4c*)c);
                    else if (in2->type == out->type)
                        mem_mul_cm_m_double(off_a, off_b, off_c, n,
                                (const double2*)a, (const double4c*)b,
                                (double4c*)c);
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                case OSKAR_DOUBLE_COMPLEX_MATRIX:
                    if (in2->type == OSKAR_DOUBLE_COMPLEX)
                        mem_mul_mc_m_double(off_a, off_b, off_c, n,
                                (const double4c*)a, (const double2*)b,
                                (double4c*)c);
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                default:
                    *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                }
                break;
            }
            case OSKAR_SINGLE_COMPLEX_MATRIX:
            {
                switch (in1->type)
                {
                case OSKAR_SINGLE_COMPLEX:
                    if (in2->type == in1->type)
                        mem_mul_cc_m_float(off_a, off_b, off_c, n,
                                (const float2*)a, (const float2*)b,
                                (float4c*)c);
                    else if (in2->type == out->type)
                        mem_mul_cm_m_float(off_a, off_b, off_c, n,
                                (const float2*)a, (const float4c*)b,
                                (float4c*)c);
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                case OSKAR_SINGLE_COMPLEX_MATRIX:
                    if (in2->type == OSKAR_SINGLE_COMPLEX)
                        mem_mul_mc_m_float(off_a, off_b, off_c, n,
                                (const float4c*)a, (const float2*)b,
                                (float4c*)c);
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                default:
                    *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                }
                break;
            }
            default:
                *status = OSKAR_ERR_TYPE_MISMATCH;
                break;
            }
        }
    }
    else
    {
        const char* k = 0;
        /* Check if types are all the same. */
        if (out->type == in1->type && out->type == in2->type)
        {
            switch (out->type)
            {
            case OSKAR_DOUBLE:                k = "mem_mul_rr_r_double"; break;
            case OSKAR_DOUBLE_COMPLEX:        k = "mem_mul_cc_c_double"; break;
            case OSKAR_DOUBLE_COMPLEX_MATRIX: k = "mem_mul_mm_m_double"; break;
            case OSKAR_SINGLE:                k = "mem_mul_rr_r_float"; break;
            case OSKAR_SINGLE_COMPLEX:        k = "mem_mul_cc_c_float"; break;
            case OSKAR_SINGLE_COMPLEX_MATRIX: k = "mem_mul_mm_m_float"; break;
            default:
                *status = OSKAR_ERR_BAD_DATA_TYPE;
                break;
            }
        }
        else
        {
            switch (out->type)
            {
            case OSKAR_DOUBLE_COMPLEX_MATRIX:
            {
                switch (in1->type)
                {
                case OSKAR_DOUBLE_COMPLEX:
                    if (in2->type == in1->type)
                        k = "mem_mul_cc_m_double";
                    else if (in2->type == out->type)
                        k = "mem_mul_cm_m_double";
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                case OSKAR_DOUBLE_COMPLEX_MATRIX:
                    if (in2->type == OSKAR_DOUBLE_COMPLEX)
                        k = "mem_mul_mc_m_double";
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                default:
                    *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                }
                break;
            }
            case OSKAR_SINGLE_COMPLEX_MATRIX:
            {
                switch (in1->type)
                {
                case OSKAR_SINGLE_COMPLEX:
                    if (in2->type == in1->type)
                        k = "mem_mul_cc_m_float";
                    else if (in2->type == out->type)
                        k = "mem_mul_cm_m_float";
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                case OSKAR_SINGLE_COMPLEX_MATRIX:
                    if (in2->type == OSKAR_SINGLE_COMPLEX)
                        k = "mem_mul_mc_m_float";
                    else
                        *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                default:
                    *status = OSKAR_ERR_TYPE_MISMATCH;
                    break;
                }
                break;
            }
            default:
                *status = OSKAR_ERR_TYPE_MISMATCH;
                break;
            }
        }
        if (!*status)
        {
            size_t local_size[] = {256, 1, 1}, global_size[] = {1, 1, 1};
            oskar_device_check_local_size(location, 0, local_size);
            global_size[0] = oskar_device_global_size(
                    num_elements, local_size[0]);
            const oskar_Arg args[] = {
                    {INT_SZ, &off_a},
                    {INT_SZ, &off_b},
                    {INT_SZ, &off_c},
                    {INT_SZ, &n},
                    {PTR_SZ, oskar_mem_buffer_const(a_)},
                    {PTR_SZ, oskar_mem_buffer_const(b_)},
                    {PTR_SZ, oskar_mem_buffer(out)}
            };
            oskar_device_launch_kernel(k, location, 1, local_size, global_size,
                    sizeof(args) / sizeof(oskar_Arg), args, 0, 0, status);
        }
    }

    /* Free temporary arrays. */
    oskar_mem_free(a_temp, status);
    oskar_mem_free(b_temp, status);
}
Пример #9
0
TEST(prefix_sum, test)
{
    int n = 100000, status = 0, exclusive = 1;
    oskar_Mem* in_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status);
    oskar_Mem* out_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status);
    oskar_Timer* tmr = oskar_timer_create(OSKAR_TIMER_NATIVE);

    // Fill input with random integers from 0 to 9.
    int* t = oskar_mem_int(in_cpu, &status);
    srand(1556);
    for (int i = 0; i < n; ++i)
        t[i] = (int) (10.0 * rand() / ((double) RAND_MAX));
    t[0] = 3;

    // Run on CPU.
    oskar_timer_start(tmr);
    oskar_prefix_sum(n, in_cpu, out_cpu, 0, exclusive, &status);
    EXPECT_EQ(0, status);
    printf("Prefix sum on CPU took %.3f sec\n", oskar_timer_elapsed(tmr));

#ifdef OSKAR_HAVE_CUDA
    // Run on GPU with CUDA.
    oskar_Mem* in_gpu = oskar_mem_create_copy(in_cpu, OSKAR_GPU, &status);
    oskar_Mem* out_gpu = oskar_mem_create(OSKAR_INT, OSKAR_GPU, n, &status);
    oskar_timer_start(tmr);
    oskar_prefix_sum(n, in_gpu, out_gpu, 0, exclusive, &status);
    EXPECT_EQ(0, status);
    printf("Prefix sum on GPU took %.3f sec\n", oskar_timer_elapsed(tmr));

    // Check consistency between CPU and GPU results.
    oskar_Mem* out_cmp_gpu = oskar_mem_create_copy(out_gpu, OSKAR_CPU, &status);
    EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_gpu, n, &status));
#endif

#ifdef OSKAR_HAVE_OPENCL
    // Run on OpenCL.
    oskar_Mem* in_cl = oskar_mem_create_copy(in_cpu, OSKAR_CL, &status);
    oskar_Mem* out_cl = oskar_mem_create(OSKAR_INT, OSKAR_CL, n, &status);
    oskar_timer_start(tmr);
    printf("Using %s\n", oskar_cl_device_name());
    oskar_prefix_sum(n, in_cl, out_cl, 0, exclusive, &status);
    EXPECT_EQ(0, status);
    printf("Prefix sum on OpenCL took %.3f sec\n", oskar_timer_elapsed(tmr));

    // Check consistency between CPU and OpenCL results.
    oskar_Mem* out_cmp_cl = oskar_mem_create_copy(out_cl, OSKAR_CPU, &status);
    EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_cl, n, &status));
#endif

    if (save)
    {
        size_t num_mem = 1;
        FILE* fhan = fopen("prefix_sum_test.txt", "w");
#ifdef OSKAR_HAVE_CUDA
        num_mem += 1;
#endif
#ifdef OSKAR_HAVE_OPENCL
        num_mem += 1;
#endif
        oskar_mem_save_ascii(fhan, num_mem, n, &status, out_cpu
#ifdef OSKAR_HAVE_CUDA
                , out_cmp_gpu
#endif
#ifdef OSKAR_HAVE_OPENCL
                , out_cmp_cl
#endif
                );
        fclose(fhan);
    }

    // Clean up.
    oskar_timer_free(tmr);
    oskar_mem_free(in_cpu, &status);
    oskar_mem_free(out_cpu, &status);
#ifdef OSKAR_HAVE_CUDA
    oskar_mem_free(in_gpu, &status);
    oskar_mem_free(out_gpu, &status);
    oskar_mem_free(out_cmp_gpu, &status);
#endif
#ifdef OSKAR_HAVE_OPENCL
    oskar_mem_free(in_cl, &status);
    oskar_mem_free(out_cl, &status);
    oskar_mem_free(out_cmp_cl, &status);
#endif
}
Пример #10
0
void oskar_mem_write_fits_cube(oskar_Mem* data, const char* root_name,
        int width, int height, int num_planes, int i_plane, int* status)
{
    oskar_Mem *copy = 0, *ptr = 0;
    size_t len, buf_len;
    char* fname;

    /* Checks. */
    if (*status) return;
    if (oskar_mem_is_matrix(data))
    {
        *status = OSKAR_ERR_BAD_DATA_TYPE;
        return;
    }

    /* Construct the filename. */
    len = strlen(root_name);
    buf_len = 11 + len;
    fname = (char*) calloc(buf_len, sizeof(char));

    /* Copy to host memory if necessary. */
    ptr = data;
    if (oskar_mem_location(data) != OSKAR_CPU)
    {
        copy = oskar_mem_create_copy(ptr, OSKAR_CPU, status);
        ptr = copy;
    }

    /* Deal with complex data. */
    if (oskar_mem_is_complex(ptr))
    {
        oskar_Mem *temp;
        temp = oskar_mem_create(oskar_mem_precision(ptr), OSKAR_CPU,
                oskar_mem_length(ptr), status);

        /* Extract the real part and write it. */
        SNPRINTF(fname, buf_len, "%s_REAL.fits", root_name);
        convert_complex(ptr, temp, 0, status);
        write_pixels(temp, fname, width, height, num_planes, i_plane, status);

        /* Extract the imaginary part and write it. */
        SNPRINTF(fname, buf_len, "%s_IMAG.fits", root_name);
        convert_complex(ptr, temp, 1, status);
        write_pixels(temp, fname, width, height, num_planes, i_plane, status);
        oskar_mem_free(temp, status);
    }
    else
    {
        /* No conversion needed. */
        if ((len >= 5) && (
                !strcmp(&(root_name[len-5]), ".fits") ||
                !strcmp(&(root_name[len-5]), ".FITS") ))
        {
            SNPRINTF(fname, buf_len, "%s", root_name);
        }
        else
        {
            SNPRINTF(fname, buf_len, "%s.fits", root_name);
        }
        write_pixels(ptr, fname, width, height, num_planes, i_plane, status);
    }
    free(fname);
    oskar_mem_free(copy, status);
}
Пример #11
0
TEST(element_weights_errors, test_apply)
{
    int num_elements   = 10000;
    int status = 0;

    double gain        = 1.5;
    double gain_error  = 0.2;
    double phase       = 0.1 * M_PI;
    double phase_error = (5 / 180.0) * M_PI;

    double weight_gain  = 1.0;
    double weight_phase = 0.5 * M_PI;

    double2 weight;
    weight.x = weight_gain * cos(weight_phase);
    weight.y = weight_gain * sin(weight_phase);

    oskar_Mem *d_gain, *d_gain_error, *d_phase, *d_phase_error, *d_errors;
    oskar_Mem *h_weights, *d_weights;
    d_errors = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_GPU,
            num_elements, &status);
    d_gain = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    d_gain_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    d_phase = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    d_phase_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    h_weights = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_CPU,
            num_elements, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    oskar_mem_set_value_real(d_gain, gain, 0, 0, &status);
    oskar_mem_set_value_real(d_gain_error, gain_error, 0, 0, &status);
    oskar_mem_set_value_real(d_phase, phase, 0, 0, &status);
    oskar_mem_set_value_real(d_phase_error, phase_error, 0, 0, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    double2* h_weights_ = oskar_mem_double2(h_weights, &status);
    for (int i = 0; i < num_elements; ++i)
    {
        h_weights_[i].x = weight.x;
        h_weights_[i].y = weight.y;
    }
    d_weights = oskar_mem_create_copy(h_weights, OSKAR_GPU, &status);

    oskar_evaluate_element_weights_errors(num_elements,
            d_gain, d_gain_error, d_phase, d_phase_error,
            0, 0, 0, d_errors, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
    oskar_mem_element_multiply(NULL, d_weights, d_errors, num_elements, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Write memory to file for inspection.
    const char* fname = "temp_test_weights.dat";
    FILE* file = fopen(fname, "w");
    oskar_mem_save_ascii(file, 7, num_elements, &status,
            d_gain, d_gain_error, d_phase, d_phase_error, d_errors,
            h_weights, d_weights);
    fclose(file);
    remove(fname);

    // Free memory.
    oskar_mem_free(d_gain, &status);
    oskar_mem_free(d_gain_error, &status);
    oskar_mem_free(d_phase, &status);
    oskar_mem_free(d_phase_error, &status);
    oskar_mem_free(d_errors, &status);
    oskar_mem_free(h_weights, &status);
    oskar_mem_free(d_weights, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
}
Пример #12
0
TEST(element_weights_errors, test_reinit)
{
    int num_elements   = 5;
    int status = 0;

    double gain        = 1.5;
    double gain_error  = 0.2;
    double phase       = 0.1 * M_PI;
    double phase_error = (5 / 180.0) * M_PI;

    oskar_Mem *d_errors, *d_gain, *d_gain_error, *d_phase, *d_phase_error;
    d_errors = oskar_mem_create(OSKAR_DOUBLE_COMPLEX, OSKAR_GPU,
            num_elements, &status);
    d_gain = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    d_gain_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    d_phase = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    d_phase_error = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU,
            num_elements, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    oskar_mem_set_value_real(d_gain, gain, 0, 0, &status);
    oskar_mem_set_value_real(d_gain_error, gain_error, 0, 0, &status);
    oskar_mem_set_value_real(d_phase, phase, 0, 0, &status);
    oskar_mem_set_value_real(d_phase_error, phase_error, 0, 0, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    int num_channels = 2;
    int num_chunks = 3;
    int num_stations = 5;
    int num_times = 3;
    unsigned int seed = 1;

    const char* fname = "temp_test_weights_error_reinit.dat";
    FILE* file = fopen(fname, "w");
    for (int chan = 0; chan < num_channels; ++chan)
    {
        fprintf(file, "channel: %i\n", chan);
        for (int chunk = 0; chunk < num_chunks; ++chunk)
        {
            fprintf(file, "  chunk: %i\n", chunk);
            ASSERT_EQ(0, status) << oskar_get_error_string(status);

            for (int t = 0; t < num_times; ++t)
            {
                fprintf(file, "    time: %i\n", t);
                for (int s = 0; s < num_stations; ++s)
                {
                    fprintf(file, "      station: %i  ==> ", s);
                    oskar_evaluate_element_weights_errors(num_elements,
                            d_gain, d_gain_error, d_phase, d_phase_error,
                            seed, t, s, d_errors, &status);
                    ASSERT_EQ(0, status) << oskar_get_error_string(status);
                    oskar_Mem *h_errors = oskar_mem_create_copy(d_errors,
                            OSKAR_CPU, &status);
                    double2* errors = oskar_mem_double2(h_errors, &status);
                    for (int i = 0; i < num_elements; ++i)
                    {
                        fprintf(file, "(% -6.4f, % -6.4f), ",
                                errors[i].x, errors[i].y);
                    }
                    fprintf(file, "\n");
                    oskar_mem_free(h_errors, &status);
                }
            }
            ASSERT_EQ(0, status) << oskar_get_error_string(status);
        }
    }
    fclose(file);
//    remove(fname);

    oskar_mem_free(d_gain, &status);
    oskar_mem_free(d_gain_error, &status);
    oskar_mem_free(d_phase, &status);
    oskar_mem_free(d_phase_error, &status);
    oskar_mem_free(d_errors, &status);
}
Пример #13
0
TEST(evaluate_baselines, cpu_gpu)
{
    oskar_Mem *u, *v, *w, *uu, *vv, *ww;
    oskar_Mem *u_gpu, *v_gpu, *w_gpu, *uu_gpu, *vv_gpu, *ww_gpu;
    int num_baselines, num_stations = 50, status = 0, type, location;
    double *u_, *v_, *w_, *uu_, *vv_, *ww_;

    num_baselines = num_stations * (num_stations - 1) / 2;

    type = OSKAR_DOUBLE;

    // Allocate host memory.
    location = OSKAR_CPU;
    u = oskar_mem_create(type, location, num_stations, &status);
    v = oskar_mem_create(type, location, num_stations, &status);
    w = oskar_mem_create(type, location, num_stations, &status);
    uu = oskar_mem_create(type, location, num_baselines, &status);
    vv = oskar_mem_create(type, location, num_baselines, &status);
    ww = oskar_mem_create(type, location, num_baselines, &status);
    u_ = oskar_mem_double(u, &status);
    v_ = oskar_mem_double(v, &status);
    w_ = oskar_mem_double(w, &status);
    uu_ = oskar_mem_double(uu, &status);
    vv_ = oskar_mem_double(vv, &status);
    ww_ = oskar_mem_double(ww, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Fill station coordinates with test data.
    for (int i = 0; i < num_stations; ++i)
    {
        u_[i] = (double)(i + 1);
        v_[i] = (double)(i + 2);
        w_[i] = (double)(i + 3);
    }

    // Evaluate baseline coordinates on CPU.
    oskar_convert_station_uvw_to_baseline_uvw(num_stations,
            0, u, v, w, 0, uu, vv, ww, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Check results are correct.
    for (int s1 = 0, b = 0; s1 < num_stations; ++s1)
    {
        for (int s2 = s1 + 1; s2 < num_stations; ++s2, ++b)
        {
            EXPECT_DOUBLE_EQ(u_[s2] - u_[s1], uu_[b]);
            EXPECT_DOUBLE_EQ(v_[s2] - v_[s1], vv_[b]);
            EXPECT_DOUBLE_EQ(w_[s2] - w_[s1], ww_[b]);
        }
    }

    // Allocate device memory and copy input data.
#ifdef OSKAR_HAVE_CUDA
    location = OSKAR_GPU;
#endif
    u_gpu = oskar_mem_create_copy(u, location, &status);
    v_gpu = oskar_mem_create_copy(v, location, &status);
    w_gpu = oskar_mem_create_copy(w, location, &status);
    uu_gpu = oskar_mem_create(type, location, num_baselines, &status);
    vv_gpu = oskar_mem_create(type, location, num_baselines, &status);
    ww_gpu = oskar_mem_create(type, location, num_baselines, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Evaluate baseline coordinates on device.
    oskar_convert_station_uvw_to_baseline_uvw(num_stations,
            0, u_gpu, v_gpu, w_gpu, 0, uu_gpu, vv_gpu, ww_gpu, &status);
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Check results are consistent.
    double max_, avg;
    oskar_mem_evaluate_relative_error(uu_gpu, uu, 0, &max_, &avg, 0, &status);
    ASSERT_LT(max_, 1e-12);
    ASSERT_LT(avg, 1e-12);
    oskar_mem_evaluate_relative_error(vv_gpu, vv, 0, &max_, &avg, 0, &status);
    ASSERT_LT(max_, 1e-12);
    ASSERT_LT(avg, 1e-12);
    oskar_mem_evaluate_relative_error(ww_gpu, ww, 0, &max_, &avg, 0, &status);
    ASSERT_LT(max_, 1e-12);
    ASSERT_LT(avg, 1e-12);

    // Free memory.
    oskar_mem_free(u, &status);
    oskar_mem_free(v, &status);
    oskar_mem_free(w, &status);
    oskar_mem_free(uu, &status);
    oskar_mem_free(vv, &status);
    oskar_mem_free(ww, &status);
    oskar_mem_free(u_gpu, &status);
    oskar_mem_free(v_gpu, &status);
    oskar_mem_free(w_gpu, &status);
    oskar_mem_free(uu_gpu, &status);
    oskar_mem_free(vv_gpu, &status);
    oskar_mem_free(ww_gpu, &status);

    ASSERT_EQ(0, status) << oskar_get_error_string(status);
}
static void* run_blocks(void* arg)
{
    oskar_Imager* h;
    oskar_Mem *plane, *uu, *vv, *ww = 0, *amp, *weight, *block, *l, *m, *n;
    size_t max_size;
    const size_t smallest = 1024, largest = 65536;
    int dev_loc = OSKAR_CPU, *status;

    /* Get thread function arguments. */
    h = ((ThreadArgs*)arg)->h;
    const int thread_id = ((ThreadArgs*)arg)->thread_id;
    const int num_vis = ((ThreadArgs*)arg)->num_vis;
    plane = ((ThreadArgs*)arg)->plane;
    status = &(h->status);

    /* Set the device used by the thread. */
    if (thread_id < h->num_gpus)
    {
        dev_loc = h->dev_loc;
        oskar_device_set(h->dev_loc, h->gpu_ids[thread_id], status);
    }

    /* Copy visibility data to device. */
    uu = oskar_mem_create_copy(((ThreadArgs*)arg)->uu, dev_loc, status);
    vv = oskar_mem_create_copy(((ThreadArgs*)arg)->vv, dev_loc, status);
    amp = oskar_mem_create_copy(((ThreadArgs*)arg)->amp, dev_loc, status);
    weight = oskar_mem_create_copy(((ThreadArgs*)arg)->weight, dev_loc, status);
    if (h->algorithm == OSKAR_ALGORITHM_DFT_3D)
        ww = oskar_mem_create_copy(((ThreadArgs*)arg)->ww, dev_loc, status);

#ifdef _OPENMP
    /* Disable nested parallelism. */
    omp_set_nested(0);
    omp_set_num_threads(1);
#endif

    /* Calculate the maximum pixel block size, and number of blocks. */
    const size_t num_pixels = (size_t)h->image_size * (size_t)h->image_size;
    max_size = num_pixels / h->num_devices;
    max_size = ((max_size + smallest - 1) / smallest) * smallest;
    if (max_size > largest) max_size = largest;
    if (max_size < smallest) max_size = smallest;
    const int num_blocks = (int) ((num_pixels + max_size - 1) / max_size);

    /* Allocate device memory for pixel block data. */
    block = oskar_mem_create(h->imager_prec, dev_loc, 0, status);
    l = oskar_mem_create(h->imager_prec, dev_loc, max_size, status);
    m = oskar_mem_create(h->imager_prec, dev_loc, max_size, status);
    n = oskar_mem_create(h->imager_prec, dev_loc, max_size, status);

    /* Loop until all blocks are done. */
    for (;;)
    {
        size_t block_size;

        /* Get a unique block index. */
        oskar_mutex_lock(h->mutex);
        const int i_block = (h->i_block)++;
        oskar_mutex_unlock(h->mutex);
        if ((i_block >= num_blocks) || *status) break;

        /* Calculate the block size. */
        const size_t block_start = i_block * max_size;
        block_size = num_pixels - block_start;
        if (block_size > max_size) block_size = max_size;

        /* Copy the (l,m,n) positions for the block. */
        oskar_mem_copy_contents(l, h->l, 0, block_start, block_size, status);
        oskar_mem_copy_contents(m, h->m, 0, block_start, block_size, status);
        if (h->algorithm == OSKAR_ALGORITHM_DFT_3D)
            oskar_mem_copy_contents(n, h->n, 0, block_start,
                    block_size, status);

        /* Run DFT for the block. */
        oskar_dft_c2r(num_vis, 2.0 * M_PI, uu, vv, ww, amp, weight,
                (int) block_size, l, m, n, block, status);

        /* Add data to existing pixels. */
        oskar_mem_add(plane, plane, block,
                block_start, block_start, 0, block_size, status);
    }

    /* Free memory. */
    oskar_mem_free(uu, status);
    oskar_mem_free(vv, status);
    oskar_mem_free(ww, status);
    oskar_mem_free(amp, status);
    oskar_mem_free(weight, status);
    oskar_mem_free(block, status);
    oskar_mem_free(l, status);
    oskar_mem_free(m, status);
    oskar_mem_free(n, status);
    return 0;
}