Ejemplo n.º 1
0
void test_svd(const std::string & fn, ScalarType EPS)
{
  std::size_t sz1, sz2;

  //read matrix

  // sz1 = 2048, sz2 = 2048;
  // std::vector<ScalarType> in(sz1 * sz2);
  // random_fill(in);

  // read file
  std::fstream f(fn.c_str(), std::fstream::in);
  //read size of input matrix
  read_matrix_size(f, sz1, sz2);

  std::size_t to = std::min(sz1, sz2);

  viennacl::matrix<ScalarType> Ai(sz1, sz2), Aref(sz1, sz2), QL(sz1, sz1), QR(sz2, sz2);
  read_matrix_body(f, Ai);

  std::vector<ScalarType> sigma_ref(to);
  read_vector_body(f, sigma_ref);

  f.close();

  // viennacl::fast_copy(&in[0], &in[0] + in.size(), Ai);

  Aref = Ai;

  viennacl::tools::timer timer;
  timer.start();

  viennacl::linalg::svd(Ai, QL, QR);

  viennacl::backend::finish();

  double time_spend = timer.get();

  viennacl::matrix<ScalarType> result1(sz1, sz2), result2(sz1, sz2);
  result1 = viennacl::linalg::prod(QL, Ai);
  result2 = viennacl::linalg::prod(result1, trans(QR));

  ScalarType sigma_diff = sigmas_compare(Ai, sigma_ref);
  ScalarType prods_diff  = matrix_compare(result2, Aref);

  bool sigma_ok = (fabs(sigma_diff) < EPS)
                   && (fabs(prods_diff) < std::sqrt(EPS));  //note: computing the product is not accurate down to 10^{-16}, so we allow for a higher tolerance here

  printf("%6s [%dx%d] %40s sigma_diff = %.6f; prod_diff = %.6f; time = %.6f\n", sigma_ok?"[[OK]]":"[FAIL]", (int)Aref.size1(), (int)Aref.size2(), fn.c_str(), sigma_diff, prods_diff, time_spend);
  if (!sigma_ok)
    exit(EXIT_FAILURE);
}
Ejemplo n.º 2
0
void test_nmf(std::size_t m, std::size_t k, std::size_t n)
{
    std::vector<ScalarType> stl_w(m * k);
    std::vector<ScalarType> stl_h(k * n);

    viennacl::matrix<ScalarType> v_ref(m, n);
    viennacl::matrix<ScalarType> w_ref(m, k);
    viennacl::matrix<ScalarType> h_ref(k, n);

    fill_random(stl_w);
    fill_random(stl_h);

    viennacl::fast_copy(&stl_w[0], &stl_w[0] + stl_w.size(), w_ref);
    viennacl::fast_copy(&stl_h[0], &stl_h[0] + stl_h.size(), h_ref);

    v_ref = viennacl::linalg::prod(w_ref, h_ref);  //reference

    // Fill again with random numbers:
    fill_random(stl_w);
    fill_random(stl_h);

    viennacl::matrix<ScalarType> w_nmf(m, k);
    viennacl::matrix<ScalarType> h_nmf(k, n);

    viennacl::fast_copy(&stl_w[0], &stl_w[0] + stl_w.size(), w_nmf);
    viennacl::fast_copy(&stl_h[0], &stl_h[0] + stl_h.size(), h_nmf);



    viennacl::linalg::nmf_config conf;
    viennacl::linalg::nmf(v_ref, w_nmf, h_nmf, conf);

    viennacl::matrix<ScalarType> v_nmf = viennacl::linalg::prod(w_nmf, h_nmf);

    float diff  = matrix_compare(v_ref, v_nmf);
    bool diff_ok = fabs(diff) < EPS;

    long iterations = static_cast<long>(conf.iters());
    printf("%6s [%lux%lux%lu] diff = %.6f (%ld iterations)\n", diff_ok ? "[[OK]]":"[FAIL]", m, k, n, diff, iterations);

    if (!diff_ok)
      exit(EXIT_FAILURE);
}
Ejemplo n.º 3
0
void record_demo_frame(void)
{
	vms_angvec	pbh;

//mprintf(0, "Record start...");

mprintf(0, "Curtime = %6i, Last time = %6i\n", Player_stats.time_total, Demo_last_time);

  if (GameTime - Demo_last_time >= 65536) {
	Demo_last_time = GameTime;
	if (Demo_record_index < MAX_DEMO_RECS) {
		demorec *demo_ptr = &Demo_records[Demo_record_index];
		vms_matrix tempmat;

		demo_ptr->time = GameTime - Demo_start_time;

		demo_ptr->x = Player->pos.x;
		demo_ptr->y = Player->pos.y;
		demo_ptr->z = Player->pos.z;

		vm_extract_angles_matrix(&pbh, &Player->orient);
		vm_angles_2_matrix(&tempmat, &pbh);
		matrix_compare(&tempmat, &Player->orient);
		demo_ptr->p = pbh.p;
		demo_ptr->b = pbh.b;
		demo_ptr->h = pbh.h;

		demo_ptr->segnum = Player->segnum;

		Demo_record_index++;
		Num_demo_recs = Demo_record_index;

//		if (firing)
//			demo_ptr->specials = 1;
//		else
//			demo_ptr->specials = 0;
	}
  }
//mprintf(0, "Record end\n");
}
Ejemplo n.º 4
0
void test_eigen(const std::string& fn, bool is_symm)
{
    std::cout << "Reading..." << "\n";
    std::size_t sz;
    // read file
    std::fstream f(fn.c_str(), std::fstream::in);
    //read size of input matrix
    read_matrix_size(f, sz);

    bool is_row = viennacl::is_row_major<MatrixLayout>::value;
    if (is_row)
      std::cout << "Testing row-major matrix of size " << sz << "-by-" << sz << std::endl;
    else
      std::cout << "Testing column-major matrix of size " << sz << "-by-" << sz << std::endl;

    viennacl::matrix<ScalarType> A_input(sz, sz), A_ref(sz, sz), Q(sz, sz);
    // reference vector with reference values from file
    std::vector<ScalarType> eigen_ref_re(sz);
    // calculated real eigenvalues
    std::vector<ScalarType> eigen_re(sz);
    // calculated im. eigenvalues
    std::vector<ScalarType> eigen_im(sz);

    // read input matrix from file
    read_matrix_body(f, A_input);
    // read reference eigenvalues from file
    read_vector_body(f, eigen_ref_re);


    f.close();

    A_ref = A_input;

    std::cout << "Calculation..." << "\n";

    Timer timer;
    timer.start();
    // Start the calculation
    if(is_symm)
        viennacl::linalg::qr_method_sym(A_input, Q, eigen_re);
    else
        viennacl::linalg::qr_method_nsm(A_input, Q, eigen_re, eigen_im);
/*

    std::cout << "\n\n Matrix A: \n\n";
    matrix_print(A_input);
    std::cout << "\n\n";

    std::cout << "\n\n Matrix Q: \n\n";
    matrix_print(Q);
    std::cout << "\n\n";
*/

    double time_spend = timer.get();

    std::cout << "Verification..." << "\n";

    bool is_hessenberg = check_hessenberg(A_input);
    bool is_tridiag = check_tridiag(A_input);

    ublas::matrix<ScalarType> A_ref_ublas(sz, sz), A_input_ublas(sz, sz), Q_ublas(sz, sz), result1(sz, sz), result2(sz, sz);
    viennacl::copy(A_ref, A_ref_ublas);
    viennacl::copy(A_input, A_input_ublas);
    viennacl::copy(Q, Q_ublas);

    // compute result1 = ublas::prod(Q_ublas, A_input_ublas);   (terribly slow when using ublas directly)
    for (std::size_t i=0; i<result1.size1(); ++i)
      for (std::size_t j=0; j<result1.size2(); ++j)
      {
        ScalarType value = 0;
        for (std::size_t k=0; k<Q_ublas.size2(); ++k)
          value += Q_ublas(i, k) * A_input_ublas(k, j);
        result1(i,j) = value;
      }
    // compute result2 = ublas::prod(A_ref_ublas, Q_ublas);   (terribly slow when using ublas directly)
    for (std::size_t i=0; i<result2.size1(); ++i)
      for (std::size_t j=0; j<result2.size2(); ++j)
      {
        ScalarType value = 0;
        for (std::size_t k=0; k<A_ref_ublas.size2(); ++k)
          value += A_ref_ublas(i, k) * Q_ublas(k, j);
        result2(i,j) = value;
      }


    ScalarType prods_diff = matrix_compare(result1, result2);
    ScalarType eigen_diff = vector_compare(eigen_re, eigen_ref_re);


    bool is_ok = is_hessenberg;

    if(is_symm)
        is_ok = is_ok && is_tridiag;

    is_ok = is_ok && (eigen_diff < EPS);
    is_ok = is_ok && (prods_diff < EPS);

    // std::cout << A_ref << "\n";
    // std::cout << A_input << "\n";
    // std::cout << Q << "\n";
    // std::cout << eigen_re << "\n";
    // std::cout << eigen_im << "\n";
    // std::cout << eigen_ref_re << "\n";
    // std::cout << eigen_ref_im << "\n";

    // std::cout << result1 << "\n";
    // std::cout << result2 << "\n";
    // std::cout << eigen_ref << "\n";
    // std::cout << eigen << "\n";

    printf("%6s [%dx%d] %40s time = %.4f\n", is_ok?"[[OK]]":"[FAIL]", (int)A_ref.size1(), (int)A_ref.size2(), fn.c_str(), time_spend);
    printf("tridiagonal = %d, hessenberg = %d prod-diff = %f eigen-diff = %f\n", is_tridiag, is_hessenberg, prods_diff, eigen_diff);
    std::cout << std::endl << std::endl;

    if (!is_ok)
      exit(EXIT_FAILURE);

}
Ejemplo n.º 5
0
Archivo: main.c Proyecto: shaze/wcdest
int main(int argc, char *argv[]) {
  struct tms usage;
  FILE *finp;
  int i,j, ticks;
  int numinfirst;
  char chkfile[255];

  i=0;
  dump_file=NULL;

  do_cluster=do_pairwise_cluster;
  srandom(563573);
  bzero(&prog_opts,sizeof(ProgOptionsType));
  outf=stdout;
  // set default distance function
  dist = d2;
  distpair= d2pair;
#ifdef MPI
  MPI_Init(&argc, &argv);
  MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
#endif


  if(myid==0) { // Master
    process_options(argc, argv);
  } else {
    process_slave_options(argc, argv);
  }

  if (prog_opts.show_version || (argc==1)) {
      if (myid==0) printf("Version \n%s\n",version);
#ifdef MPI      
      MPI_Finalize();
#endif
      exit(0);
    }


  // Allocate space for the RC table for big words
  rc_big = calloc(BIG_WORD_TSIZE, sizeof(SeqElt));

  // work is an array of work blocks. If non-parallel, there'll only
  // be one. work[0] acts a template

  work = (WorkPtr) calloc(num_threads,sizeof(WorkBlock));
  work->filename = argv[optind];
  work->index    = NULL;


  if(prog_opts.do_dump) dump_file = fopen(prog_opts.dname,"w");

#ifdef MPI
  if (numprocs > 1)  
    if (myid>0) {  // slaves
      if (prog_opts.split) {
	MPI_Finalize();
	return 0;
      }
      handleMPISlaveSetup(&num_seqs);
      initialise(work, prog_opts.edfile);
      internalTest();

      perform_clustering(work);
      transmitMPISlaveResponse(work);
      if (prog_opts.show_perf)     show_performance(outf);
      MPI_Finalize();
      exit(0);
    }   
#else
  if (numprocs > 1) {
    printf("This version of wcd is not compiled with MPI\n");
    printf("You cannot run it with a multiple processes\n");
    printf("Either only run it with one process or do a \n");
    printf("  ./configure --enable-mpi\n");
    printf("  make clean\n");
    printf("  make \n");
    exit(5);
  }
#endif

  // work out number of sequences
  // if the user has specified a value for num_seqs then
  // use that, else use the number of sequences in the file
  num_seqs = count_seqs(argv[optind], &data_size)+reindex_value;

  seq = (SeqPtr *) calloc(num_seqs,sizeof(SeqPtr));
  seqInfo     = (SeqInfoPtr) calloc(num_seqs,sizeof(SeqInfoStruct));
  tree= (UnionFindPtr) calloc(num_seqs,sizeof(UnionFindStruct));
  data= (SeqPtr)  calloc(data_size,sizeof(SeqElt));
  init_dummy_sequences();
#ifndef AUXINFO
  seqID = (SeqIDPtr) calloc(num_seqs,sizeof(SeqIDStruct));
#endif
  if (seq == NULL) {
    perror("SeqStruct allocation");
    exit(50);
  }
  numinfirst = global_i_end = num_seqs;
  global_j_beg = 0;
  // if merging, need to check the other file too
  if (prog_opts.domerge || prog_opts.doadd ) {
    global_j_beg = global_i_end;
    num_seqs = handleMerge(argv[optind+2], num_seqs);
    if (prog_opts.doadd) global_i_end = num_seqs; 
  }

  initialise(work, prog_opts.edfile);
  if (data == NULL) {
    sprintf(chkfile,"Main data store (%d bytes)",data_size);
    perror(chkfile);
    exit(51);
  }
  for(i=0; i<num_seqs; i++) seqInfo[i].flag=0;
  // reopen sequence file for reading
  finp = fopen(argv[optind],"r");
  if (finp == NULL)  {
    perror(argv[optind]);
    exit(51);
  }
  // Some messy stuff to hande auxiliary options
  // Skip to next comment on first reading
  if (prog_opts.pairwise==1) {
    sscanf(argv[optind+1], "%d", &i);
    sscanf(argv[optind+2], "%d", &j);
    show_pairwise(finp,i,j);
    return 0;
  }
  if (prog_opts.statgen) {
    compared2nummatches(finp,prog_opts.statgen);
    return 0;
  }
  if (prog_opts.range) {
    sscanf(argv[optind+1], "%d", &global_i_beg);
    sscanf(argv[optind+2], "%d", &global_i_end);
  }     
  if (prog_opts.show_comp==41) {
    char * fname; fname = malloc(255);
    sscanf(argv[optind+1], "%s", fname);
    read_sequences(finp,reindex_value,num_seqs); 
    checkfile = fopen(fname,"r");
    sscanf(argv[optind+2], "%d", &j);
    while (fscanf(checkfile,"%d", &i) != -1) {
    	  do_compare(finp,i,j,1);
}
    return 0;
  }

  if (prog_opts.show_comp) {
    sscanf(argv[optind+1], "%d", &i);
    sscanf(argv[optind+2], "%d", &j);
    //printf("Comparing %d and %d of %d flag %d\n",i,j,num_seqs,prog_opts.flag);
    read_sequences(finp,reindex_value,num_seqs); 
    do_compare(finp,i,j,prog_opts.flag);
    return 0;
  }
  if (prog_opts.show_index) {
    show_sequence(finp, prog_opts.index,prog_opts.flag);
    return 0;
  }
  // Now read in the sequences
  if (do_cluster == do_pairwise_cluster||do_cluster==do_MPImaster_cluster||do_cluster == do_suffix_cluster) 
    read_sequences(finp,reindex_value,numinfirst);
  else
    init_sequences(finp,reindex_value,numinfirst);
  fclose(finp);
  //printf("%d Allocated %d, start=%d, last=%d\n",num_seqs,data_size,data,seq[num_seqs-1].seq);

  if (prog_opts.split) {
    process_split(prog_opts.clfname1, prog_opts.split);
#ifdef MPI
    MPI_Finalize();
#endif
    return 0;
  }
  if (prog_opts.consfname1) process_constraints(prog_opts.consfname1,0);
  if (prog_opts.clustercomp) {
    cluster_compare(argv[optind+1]);
    return 0;
  }
  // If merging or adding need to open the second sequence file
  if (prog_opts.domerge || prog_opts.doadd) {
    finp = fopen(argv[optind+2], "r");
    if (finp == NULL)  {
      perror(argv[optind]);
      exit(1);
    }
    if (do_cluster == do_pairwise_cluster)
      read_sequences(finp,numinfirst+reindex_value,num_seqs);
    else
      init_sequences(finp,numinfirst+reindex_value,num_seqs);
    get_clustering(argv[optind+1],0);
    if (prog_opts.domerge) get_clustering(argv[optind+3],numinfirst);
  }
  if (prog_opts.init_cluster) get_clustering(prog_opts.clfname1, 0);
  if (prog_opts.recluster)
    reclustering(work,prog_opts.clfname2);
  else {
    // This really assumes there is only one thread for suffix
    if (prog_opts.pairwise==2) {
      matrix_compare(finp);
      return 0;
    }
    work->workflag = prog_opts.noninterleavednlc;//kludge for suffixarray
    global_j_end = num_seqs;
    perform_clustering(work);
#ifdef MPI
    if (myid>0) transmitMPISlaveResponse(work);
#endif
  }
  if (prog_opts.show_ext)      show_EXT(outf);
  if (prog_opts.show_histo)    show_histogram(work);
  if (prog_opts.show_clust&1) show_clusters(outf); 
  if (prog_opts.show_clust&8) 
    produce_clusters(prog_opts.clthresh,prog_opts.dirname);
  if (prog_opts.show_perf)     show_performance(outf);
  if (prog_opts.do_dump) {
    strcpy(chkfile,prog_opts.dname);
    strcat(chkfile,"-FIN");
    fclose(dump_file);
    dump_file = fopen(chkfile,"w");
    times(&usage);
    ticks = sysconf(_SC_CLK_TCK);
    fprintf(dump_file,"Completed %ld %ld", usage.tms_utime/ticks, usage.tms_stime*1000/ticks);
    fclose(dump_file);
  }
  if (prog_opts.show_version) fprintf(outf,"\n%s\n",version);
  fclose(outf);
#ifdef MPI
  MPI_Finalize();
#endif
  exit(0);
}
Ejemplo n.º 6
0
/* Try various ways to do matmul and time them.  Tiled algorithms
 * running serially; multi-threaded QUARK runtime with tiled
 * algorithms; and direct serial computation over standard layout. */
int main_algorithm(int NB, int N, int THREADS)
{
    int i, j, k, nerr=0;
    int BB = N/NB;
    double *A = (double*)malloc(N*N*sizeof(double));
    double *Ablk = (double*)malloc(N*N*sizeof(double));
    double *B = (double*)malloc(N*N*sizeof(double));
    double *Bblk = (double*)malloc(N*N*sizeof(double));
    double *C_direct = (double*)malloc(N*N*sizeof(double));
    double *C = (double*)malloc(N*N*sizeof(double));
    double *Cblk = (double*)malloc(N*N*sizeof(double));
    double *C_quark = (double*)malloc(N*N*sizeof(double));
    double *C_quark_blk = (double*)malloc(N*N*sizeof(double));
    struct timeval tstart, tend, tdiff;
    double t_blk=0, t_quark=0, t_direct=0;

    // Initialize
    for (i = 0; i < N; i++) {
        for (j = 0; j < N; j++) {
            A[i+j*N] = (double)1.0+i;
            B[i+j*N] = (double)2.0+i+j;
            C_quark[i+j*N] = C_direct[i+j*N] = C[i+j*N] = 3.0;
        }
    }

    matrix_print("Printing A", A, N);
    matrix_print("Printing B", B, N);
    matrix_print("Printing C before computation", C, N);

    // Move from F77 to BDL
    std_to_bdl( A, Ablk, N, NB );
    std_to_bdl( B, Bblk, N, NB );
    std_to_bdl( C, Cblk, N, NB );
    std_to_bdl( C_quark, C_quark_blk, N, NB );

    /* ORIGINAL TILED ROUTINE */
    /* This is the code for the serial tile-by-tile multiplication */
    printf("Doing matrix multiplication using serial tile-by-tile algorithm\n");
    gettimeofday( &tstart, NULL );
    for (i = 0; i < BB; i++)
        for (j = 0; j < BB; j++)
            for (k = 0; k < BB; k++)
                matmul ( &Ablk[NB*NB*i + NB*NB*BB*k], &Bblk[NB*NB*k + NB*NB*BB*j], &Cblk[NB*NB*i + NB*NB*BB*j], NB);
    gettimeofday( &tend, NULL );
    t_blk = timeval_subtract( &tdiff, &tend, &tstart );
    printf("Time taken: %f\n", tdiff.tv_sec + (double)tdiff.tv_usec/1000000 );
    bdl_to_std( C, Cblk, N, NB );
    matrix_print("Printing C produced by serial tile-algorithm after computation", C, N);
    printf("\n");

    /* QUARK PARALLEL TILED ROUTINE */
    /* This is the code for the QUARK runtime do do the parallel multi-threaded tile-by-tile algorithm */
    printf("Doing matrix multiplication using the multi-threaded QUARK runtime for a tile based algorithm\n");
    Quark *quark = QUARK_New(THREADS);
    gettimeofday( &tstart, NULL );
    for (i = 0; i < BB; i++)
        for (j = 0; j < BB; j++)
            for (k = 0; k < BB; k++) 
                matmul_quark_call ( quark, &Ablk[NB*NB*i + NB*NB*BB*k], &Bblk[NB*NB*k + NB*NB*BB*j], &C_quark_blk[NB*NB*i + NB*NB*BB*j], NB);
    QUARK_Barrier( quark );
    gettimeofday( &tend, NULL );
    t_quark = timeval_subtract( &tdiff, &tend, &tstart );
    printf("Time taken: %f\n", tdiff.tv_sec + (double)tdiff.tv_usec/1000000 );
    QUARK_Delete(quark);
    bdl_to_std( C_quark, C_quark_blk, N, NB );    
    matrix_print("Printing C produced by QUARK runtime after computation", C_quark, N);
    printf("\n");

    /* DIRECT COMPUTATION OVER STANDARD LAYOUT */
    /* Compute direct C if desired */
    printf("Doing matrix multiplication using direct loops (ie, view matrix as one big tile)\n");
    gettimeofday( &tstart, NULL );
    matmul ( A, B, C_direct, N );
    gettimeofday( &tend, NULL );
    t_direct = timeval_subtract( &tdiff, &tend, &tstart );
    printf("Time taken: %f\n", (double)(tdiff.tv_sec + (double)tdiff.tv_usec/1000000) );
    matrix_print("Printing C produced by direct matmul after computation", C_direct, N);
    printf("\n");

    /* Check for errors */
    printf("Comparing result matrices (direct versus QUARK)\n");
    nerr = matrix_compare( C_direct, C_quark, N );
    printf("Number of differences: %d\n", nerr);    
    printf("\n");    

    printf("Summary of time taken\n");
    printf("Direct       SerialBlock  QUARK(%d threads)\n", THREADS);
    printf("%-12.5f %-12.5f %-12.5f\n", t_direct, t_blk, t_quark);
    
    free(A); free(Ablk); free(B); free(Bblk); free(C); free(Cblk); free(C_direct); free(C_quark); free(C_quark_blk);
    return 0;
}