Exemplo n.º 1
0
/* if transpose != 0: compute tA %*% A, else A %*% tA */
void covariance(double *A, double *Sigma, int nrow, int ncol, int transpose){

        int i;

        if (transpose){
                tAA(A, Sigma, nrow, ncol);
        //        for (i=0; i<ncol*ncol; i++) Sigma[i] /= nrow - 1;
                /* tA %*% A */
        } else {
                double *tA = calloc(nrow*ncol, sizeof(double));
	//      for (i=0; i<nrow*ncol; i++) tA[i] = A[i]/(nrow - 1);
                tr(tA, nrow, ncol);
                printf("tA done\n");
                /* A %*% tA */
                prodMatrix(A, tA, Sigma, nrow, ncol, ncol, nrow);
                free(tA);
        }
}
Exemplo n.º 2
0
int main(int, char**)
{
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_1;
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_2;

#if SYNTHETIC_INPUT
    Halide::Buffer<uint8_t> im1(10, 10);
    Halide::Buffer<uint8_t> im2(10, 10);

    for (int i = 0; i < 10; i++)
	    for (int j = 0; j < 10; j++)
	    {
		    im1(i, j) = (uint8_t) i*i+j*j;
		    im2(i, j) = (uint8_t) i*i+j*j;
	    }
#else
    Halide::Buffer<uint8_t> im1 = Halide::Tools::load_image("./utils/images/rgb.png");
    Halide::Buffer<uint8_t> im2 = Halide::Tools::load_image("./utils/images/rgb.png");
#endif

    Halide::Buffer<float> Ix_m(im1.width(), im1.height());
    Halide::Buffer<float> Iy_m(im1.width(), im1.height());
    Halide::Buffer<float> It_m(im1.width(), im1.height());
    Halide::Buffer<int> C1(_NC);
    Halide::Buffer<int> C2(_NC);
    Halide::Buffer<int> SIZES(2);
    Halide::Buffer<int> u(_NC);
    Halide::Buffer<int> v(_NC);
    Halide::Buffer<float> A(2, 4*w*w);
    Halide::Buffer<float> tA(4*w*w, 2);
    Halide::Buffer<double> pinvA(4*w*w, 2);
    Halide::Buffer<double> det(1);
    Halide::Buffer<float> tAA(2, 2);
    Halide::Buffer<double> X(2, 2);

    SIZES(0) = im1.height();
    SIZES(1) = im1.width();
    C1(0) = 500; C2(0) = 400;
    C1(1) = 800; C2(1) = 900;
    C1(2) = 200; C2(2) = 400;
    C1(3) = 400; C2(3) = 200;
    C1(4) = 400; C2(4) = 500;
    C1(5) = 800; C2(5) = 200;
    C1(6) = 200; C2(6) = 900;
    C1(7) = 900; C2(7) = 200;

    det(0) = 0;
    init_buffer(Ix_m, (float) 0);
    init_buffer(Iy_m, (float) 0);
    init_buffer(It_m, (float) 0);
    init_buffer(A, (float) 0);
    init_buffer(tA, (float) 0);
    init_buffer(pinvA, (double) 0);
    init_buffer(tAA, (float) 0);
    init_buffer(X, (double) 0);

    // Warm up
    optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(),
			  Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(),
			  C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer());

    // Tiramisu
    for (int i=0; i<NB_TESTS; i++)
    {
        auto start1 = std::chrono::high_resolution_clock::now();
        optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(),
			  Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(),
			  C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer());
        auto end1 = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double,std::milli> duration1 = end1 - start1;
        duration_vector_1.push_back(duration1);
    }

    std::cout << "Time: " << median(duration_vector_1) << std::endl;

#if SYNTHETIC_INPUT
    print_buffer(im1);
    print_buffer(im2);

    print_buffer(Ix_m);
    print_buffer(Iy_m);
    print_buffer(It_m);

    print_buffer(A);
    print_buffer(tA);
    print_buffer(tAA);
    print_buffer(det);
    print_buffer(X);
    print_buffer(pinvA);
#endif

    std::cout << "Output" << std::endl;

    print_buffer(u);
    print_buffer(v);

    return 0;
}