bool SystemSolver_FASTCG::solve(const SparseMatrix& A_in, Vector& x_in, const Vector& b_in) {

        typedef double CoeffType ;
        typedef Array1d<CoeffType> VectorType ;
        typedef SparseMatrixBCRS<CoeffType, 2, 2> MatrixType ;

        unsigned int N0 = A_in.n() ;
        std::cerr << "N0 = " << N0 << std::endl ;
        Permutation permutation; 
        compute_permutation(A_in, permutation) ;

        MatrixType A ;
        ::OGF::convert_matrix(A_in, A, permutation) ;

//      ::OGF::compress_indices(A) ;

        std::cerr << "filling ratio:" << (A.filling_ratio() * 100.0) << "%" << std::endl ;
        if(false) {
            std::cerr << "Saving matrix to disk (matrix.dat)" << std::endl ;
            std::ofstream out("matrix.dat") ;
//            A.print(out) ;
            ::OGF::output_matrix(A, out) ;
        }

        unsigned int N = A.n() ; // Can be greater than N0 due to blocking
        N = QuickBLAS::aligned_size(N, sizeof(CoeffType)) ;

        std::cerr <<"N = " << N << std::endl ;

        int max_iter = (nb_iters_ == 0) ? 5 * N : nb_iters_ ;
        double eps = threshold_ ;

        std::cerr << "nb iters = " << max_iter << "  threshold = " << eps << std::endl ;

        VectorType b(N, alignment_for_SSE2) ;
        VectorType x(N, alignment_for_SSE2) ;

        permutation.invert_permute_vector(b_in, b) ;
        permutation.invert_permute_vector(x_in, x) ;

        solve_cg(A, x, b, eps, max_iter) ;

        permutation.permute_vector(x, x_in) ;

        return true ;
    }
Example #2
0
int
RFtrain(RANDOM_FOREST *rf, double feature_fraction, double training_fraction, int *training_classes, double **training_data, int ntraining)
{
  int    n, ii, nfeatures_per_tree = 0, *feature_permutation, *training_permutation, f, 
    index, start_no, end_no, ntraining_per_tree = 0, total_to_remove = 0 ;
  TREE   *tree = NULL ;

  if (rf->max_class_ratio > 0)
  {
    int class_counts[MAX_CLASSES], max_class, max_class_count, min_class, min_class_count ;
    double **new_training_data ;
    int    *new_training_classes ;

    memset(class_counts, 0, sizeof(class_counts)) ;
    for (n = 0 ; n < ntraining ; n++)
      class_counts[training_classes[n]]++ ;
    for (min_class_count = ntraining+1, min_class = max_class = max_class_count = n = 0 ; 
	 n < rf->nclasses ; n++)
    {
      if (class_counts[n] > max_class_count)
      {
	max_class_count = class_counts[n] ;
	max_class = n ;
      }
      if (class_counts[n] < min_class_count)
      {
	min_class_count = class_counts[n] ;
	min_class = n ;
      }
    }
    total_to_remove = (max_class_count-nint(min_class_count*rf->max_class_ratio)) ;
    if (total_to_remove > 0)
    {
      int *class_indices, class_index,new_index, new_ntraining = ntraining-total_to_remove ;

      printf("class %s (%d) has too many examples (%d) relative to class %s (%d) with %d\n",
	     rf->class_names[max_class], max_class, max_class_count,
	     rf->class_names[min_class], min_class, min_class_count) ;
      new_training_classes = (int *)calloc(new_ntraining, sizeof(int)) ;
      new_training_data = (double **)calloc(new_ntraining, sizeof(double *)) ;
      class_indices = (int *)calloc(max_class_count, sizeof(int)) ;

      // first copy over everything that isn't in class max_class
      for (class_index = new_index = n = 0 ; n < ntraining ; n++)
      {
	if (training_classes[n] == max_class)
	  class_indices[class_index++] = n ;
	else  // copy over other class features and class
	{
	  new_training_classes[new_index] = training_classes[n] ;
	  new_training_data[new_index] = (double *)calloc(rf->nfeatures,sizeof(double));
	  for (ii = 0 ; ii < rf->nfeatures ; ii++)
	    new_training_data[new_index][ii] = training_data[n][ii] ;
	  new_index++ ;
	}
      }

      compute_permutation(max_class_count, class_indices) ;
      for (n = 0 ; n < max_class_count - total_to_remove ; n++)
      {
	new_training_classes[new_index] = max_class ;
	new_training_data[new_index] = (double *)calloc(rf->nfeatures,sizeof(double));
	for (ii = 0 ; ii < rf->nfeatures ; ii++)
	  new_training_data[new_index][ii] = training_data[class_indices[new_index]][ii] ;
	new_index++ ;
      }
      training_data = new_training_data ;
      training_classes = new_training_classes ;
      ntraining -= total_to_remove ;
    }
  }

  if (getenv("RF_WRITE_TRAINING"))
  {
    char *fname = getenv("RF_WRITE_TRAINING") ;
    FILE *fp ;

    printf("writing RF training to %s\n", fname) ;
    fp = fopen(fname, "w") ;

    for (n = 0 ; n < ntraining ; n++)
    {
      fprintf(fp, "%d ", training_classes[n]) ;
      for (ii = 0 ; ii < rf->nfeatures ; ii++)
	fprintf(fp, "%f ", training_data[n][ii]) ;
      fprintf(fp, "\n") ;
    }

    fclose(fp) ;
  }
  rf->ntraining = ntraining ;
  rf->training_data = training_data ;
  rf->training_classes = training_classes ;
  rf->feature_fraction = feature_fraction ;
  rf->training_fraction = training_fraction ;

  for (f = 0 ; f < rf->nfeatures ; f++)
  {
    rf->feature_min[f] = 1e20 ;
    rf->feature_max[f] = -1e20 ;
    for (ii = 0 ; ii < ntraining ; ii++)
    {
      if (training_data[ii][f] < rf->feature_min[f])
        rf->feature_min[f] = training_data[ii][f] ;
      if (training_data[ii][f] > rf->feature_max[f])
        rf->feature_max[f] = training_data[ii][f] ;
    }
  }

  nfeatures_per_tree = nint((double)rf->nfeatures * feature_fraction) ;
  ntraining_per_tree = nint((double)rf->ntraining * training_fraction) ;
  feature_permutation = compute_permutation(rf->nfeatures, NULL) ;
  training_permutation = compute_permutation(ntraining, NULL) ;

#ifdef HAVE_OPENMP
  tree = NULL;
  start_no = 0 ; // only 1 tree
  end_no = 0 ; // only 1 tree
  index = 0 ;
  n = 0 ;
  ii = 0 ;
#pragma omp parallel for firstprivate(tree,start_no,end_no,ii,index) shared(rf, nfeatures_per_tree, Gdiag,training_classes,training_data) schedule(static,1)
#endif
  for (n = 0 ; n < rf->ntrees ; n++)  // train each tree
  {
#ifdef HAVE_OPENMP
  #pragma omp critical 
#endif  
    printf("training tree %d of %d....\n", n, rf->ntrees) ;
    tree = &rf->trees[n] ;

    // randomize what features this tree will use
    tree->feature_list = (int *)calloc(nfeatures_per_tree, sizeof(tree->feature_list[0]));
    if (tree->feature_list == NULL)
      ErrorExit(ERROR_NOMEMORY, "RFtrain: could not allocate feature list %d (%d)",
                n,nfeatures_per_tree) ;
    tree->nfeatures = nfeatures_per_tree ;
    if (rf->ntrees > 1)
      start_no = nint(n*((double)(rf->nfeatures-nfeatures_per_tree))/(rf->ntrees-1.0)) ;
    else
      start_no = 0 ; // only 1 tree
    end_no = MIN(rf->nfeatures-1, start_no+nfeatures_per_tree-1) ;
    for (ii = start_no  ; ii <= end_no  ; ii++)
      tree->feature_list[ii-start_no] = feature_permutation[ii] ;
    

    // randomize what training data this tree will use
    tree->root.training_set = (int *)calloc(ntraining, sizeof(tree->root.training_set[0])) ;
    if (tree->root.training_set == NULL)
      ErrorExit(ERROR_NOMEMORY, "RFtrain: could not allocate root training set") ;
    tree->root.total_counts = 0 ;
    if (rf->ntrees > 1)
      start_no = nint(n*((double)(rf->ntraining-ntraining_per_tree))/(rf->ntrees-1.0)) ;
    else
      start_no = 0 ; // only 1 tree
    end_no = MIN(rf->ntraining-1, start_no+ntraining_per_tree-1) ;
    for (ii = start_no  ; ii <= end_no  ; ii++)
    {
      index = training_permutation[ii] ;
      if (training_classes[index] < 0 || training_classes[index] >= rf->nclasses)
      {
	ErrorPrintf(ERROR_BADPARM, "RFtrain: class at index %d = %d: out of bounds (%d)",
		    index, training_classes[index], rf->nclasses) ;
	training_classes[index] = 0 ;
      }
      tree->root.class_counts[training_classes[index]]++ ;
      tree->root.training_set[tree->root.total_counts] = index ;
      tree->root.total_counts++ ;
    }

    if (Gdiag & DIAG_SHOW && DIAG_VERBOSE_ON)
      printf("tree %d: initial entropy = %f\n", n, 
	     entropy(tree->root.class_counts, rf->nclasses, tree->root.class_counts)) ;
    rfTrainTree(rf, tree, training_classes, training_data, rf->ntraining) ;
#ifdef HAVE_OPENMP
  #pragma omp critical 
#endif  
    printf("\ttraining complete, depth %d, nleaves %d.\n", tree->depth, tree->nleaves) ;
  }

  if (total_to_remove > 0)
  {
    for (n = 0 ; n < ntraining ; n++)
      free(training_data[n]) ;
    free(training_data) ; free(training_classes) ;
  }
  free(feature_permutation) ;
  free(training_permutation) ;
  return(NO_ERROR) ;
}
Example #3
0
unsigned int optimize_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N])
{
	merge_dims(D, N, dims, strs);

	unsigned int ND = remove_empty_dims(D, N, dims, strs);

	if (0 == ND) { // atleast return a single dimension

		dims[0] = 1;
		
		for (unsigned int j = 0; j < D; j++)
			(*strs[j])[0] = 0;

		ND = 1;
	}

	debug_print_dims(DP_DEBUG4, ND, dims);

	float blocking[N];
#ifdef BERKELEY_SVN
	// actually those are not the blocking factors
	// as used below but relative to fast memory
	//demmel_factors(D, ND, blocking, strs);
	UNUSED(demmel_factors);
#endif
#if 0
	debug_printf(DP_DEBUG4, "DB: ");
	for (unsigned int i = 0; i < ND; i++)
		debug_printf(DP_DEBUG4, "%f\t", blocking[i]);
	debug_printf(DP_DEBUG4, "\n");
#endif
#if 1
	for (unsigned int i = 0; i < ND; i++)
		blocking[i] = 0.5;
	//	blocking[i] = 1.;
#endif

	// try to split dimensions according to blocking factors
	// use space up to N

	bool split = false;

	do {
		if (N == ND)
			break;

		split = split_dims(D, ND, dims, strs, blocking);

		if (split)
			ND++;

	} while(split);

//	printf("Split %c :", split ? 'y' : 'n');
//	print_dims(ND, dims);

	long max_strides[ND];

	for (unsigned int i = 0; i < ND; i++) {

		max_strides[i] = 0;

		for (unsigned int j = 0; j < D; j++)
			max_strides[i] = MAX(max_strides[i], (*strs[j])[i]);
	}

	unsigned int ord[ND];
	compute_permutation(ND, ord, max_strides);

//	for (unsigned int i = 0; i < ND; i++)
//		printf("%d: %ld %d\n", i, max_strides[i], ord[i]);
#if 1
	for (unsigned int j = 0; j < D; j++)
		reorder_long(ND, ord, *strs[j]);

	reorder_long(ND, ord, dims);
#endif

#if 0
	printf("opt dims\n");
	print_dims(ND, dims);
	if (D > 0)
		print_dims(ND, *strs[0]);
	if (D > 1)
		print_dims(ND, *strs[1]);
	if (D > 2)
		print_dims(ND, *strs[2]);
#endif

	return ND;
}