bool SystemSolver_FASTCG::solve(const SparseMatrix& A_in, Vector& x_in, const Vector& b_in) { typedef double CoeffType ; typedef Array1d<CoeffType> VectorType ; typedef SparseMatrixBCRS<CoeffType, 2, 2> MatrixType ; unsigned int N0 = A_in.n() ; std::cerr << "N0 = " << N0 << std::endl ; Permutation permutation; compute_permutation(A_in, permutation) ; MatrixType A ; ::OGF::convert_matrix(A_in, A, permutation) ; // ::OGF::compress_indices(A) ; std::cerr << "filling ratio:" << (A.filling_ratio() * 100.0) << "%" << std::endl ; if(false) { std::cerr << "Saving matrix to disk (matrix.dat)" << std::endl ; std::ofstream out("matrix.dat") ; // A.print(out) ; ::OGF::output_matrix(A, out) ; } unsigned int N = A.n() ; // Can be greater than N0 due to blocking N = QuickBLAS::aligned_size(N, sizeof(CoeffType)) ; std::cerr <<"N = " << N << std::endl ; int max_iter = (nb_iters_ == 0) ? 5 * N : nb_iters_ ; double eps = threshold_ ; std::cerr << "nb iters = " << max_iter << " threshold = " << eps << std::endl ; VectorType b(N, alignment_for_SSE2) ; VectorType x(N, alignment_for_SSE2) ; permutation.invert_permute_vector(b_in, b) ; permutation.invert_permute_vector(x_in, x) ; solve_cg(A, x, b, eps, max_iter) ; permutation.permute_vector(x, x_in) ; return true ; }
int RFtrain(RANDOM_FOREST *rf, double feature_fraction, double training_fraction, int *training_classes, double **training_data, int ntraining) { int n, ii, nfeatures_per_tree = 0, *feature_permutation, *training_permutation, f, index, start_no, end_no, ntraining_per_tree = 0, total_to_remove = 0 ; TREE *tree = NULL ; if (rf->max_class_ratio > 0) { int class_counts[MAX_CLASSES], max_class, max_class_count, min_class, min_class_count ; double **new_training_data ; int *new_training_classes ; memset(class_counts, 0, sizeof(class_counts)) ; for (n = 0 ; n < ntraining ; n++) class_counts[training_classes[n]]++ ; for (min_class_count = ntraining+1, min_class = max_class = max_class_count = n = 0 ; n < rf->nclasses ; n++) { if (class_counts[n] > max_class_count) { max_class_count = class_counts[n] ; max_class = n ; } if (class_counts[n] < min_class_count) { min_class_count = class_counts[n] ; min_class = n ; } } total_to_remove = (max_class_count-nint(min_class_count*rf->max_class_ratio)) ; if (total_to_remove > 0) { int *class_indices, class_index,new_index, new_ntraining = ntraining-total_to_remove ; printf("class %s (%d) has too many examples (%d) relative to class %s (%d) with %d\n", rf->class_names[max_class], max_class, max_class_count, rf->class_names[min_class], min_class, min_class_count) ; new_training_classes = (int *)calloc(new_ntraining, sizeof(int)) ; new_training_data = (double **)calloc(new_ntraining, sizeof(double *)) ; class_indices = (int *)calloc(max_class_count, sizeof(int)) ; // first copy over everything that isn't in class max_class for (class_index = new_index = n = 0 ; n < ntraining ; n++) { if (training_classes[n] == max_class) class_indices[class_index++] = n ; else // copy over other class features and class { new_training_classes[new_index] = training_classes[n] ; new_training_data[new_index] = (double *)calloc(rf->nfeatures,sizeof(double)); for (ii = 0 ; ii < rf->nfeatures ; ii++) new_training_data[new_index][ii] = training_data[n][ii] ; new_index++ ; } } compute_permutation(max_class_count, class_indices) ; for (n = 0 ; n < max_class_count - total_to_remove ; n++) { new_training_classes[new_index] = max_class ; new_training_data[new_index] = (double *)calloc(rf->nfeatures,sizeof(double)); for (ii = 0 ; ii < rf->nfeatures ; ii++) new_training_data[new_index][ii] = training_data[class_indices[new_index]][ii] ; new_index++ ; } training_data = new_training_data ; training_classes = new_training_classes ; ntraining -= total_to_remove ; } } if (getenv("RF_WRITE_TRAINING")) { char *fname = getenv("RF_WRITE_TRAINING") ; FILE *fp ; printf("writing RF training to %s\n", fname) ; fp = fopen(fname, "w") ; for (n = 0 ; n < ntraining ; n++) { fprintf(fp, "%d ", training_classes[n]) ; for (ii = 0 ; ii < rf->nfeatures ; ii++) fprintf(fp, "%f ", training_data[n][ii]) ; fprintf(fp, "\n") ; } fclose(fp) ; } rf->ntraining = ntraining ; rf->training_data = training_data ; rf->training_classes = training_classes ; rf->feature_fraction = feature_fraction ; rf->training_fraction = training_fraction ; for (f = 0 ; f < rf->nfeatures ; f++) { rf->feature_min[f] = 1e20 ; rf->feature_max[f] = -1e20 ; for (ii = 0 ; ii < ntraining ; ii++) { if (training_data[ii][f] < rf->feature_min[f]) rf->feature_min[f] = training_data[ii][f] ; if (training_data[ii][f] > rf->feature_max[f]) rf->feature_max[f] = training_data[ii][f] ; } } nfeatures_per_tree = nint((double)rf->nfeatures * feature_fraction) ; ntraining_per_tree = nint((double)rf->ntraining * training_fraction) ; feature_permutation = compute_permutation(rf->nfeatures, NULL) ; training_permutation = compute_permutation(ntraining, NULL) ; #ifdef HAVE_OPENMP tree = NULL; start_no = 0 ; // only 1 tree end_no = 0 ; // only 1 tree index = 0 ; n = 0 ; ii = 0 ; #pragma omp parallel for firstprivate(tree,start_no,end_no,ii,index) shared(rf, nfeatures_per_tree, Gdiag,training_classes,training_data) schedule(static,1) #endif for (n = 0 ; n < rf->ntrees ; n++) // train each tree { #ifdef HAVE_OPENMP #pragma omp critical #endif printf("training tree %d of %d....\n", n, rf->ntrees) ; tree = &rf->trees[n] ; // randomize what features this tree will use tree->feature_list = (int *)calloc(nfeatures_per_tree, sizeof(tree->feature_list[0])); if (tree->feature_list == NULL) ErrorExit(ERROR_NOMEMORY, "RFtrain: could not allocate feature list %d (%d)", n,nfeatures_per_tree) ; tree->nfeatures = nfeatures_per_tree ; if (rf->ntrees > 1) start_no = nint(n*((double)(rf->nfeatures-nfeatures_per_tree))/(rf->ntrees-1.0)) ; else start_no = 0 ; // only 1 tree end_no = MIN(rf->nfeatures-1, start_no+nfeatures_per_tree-1) ; for (ii = start_no ; ii <= end_no ; ii++) tree->feature_list[ii-start_no] = feature_permutation[ii] ; // randomize what training data this tree will use tree->root.training_set = (int *)calloc(ntraining, sizeof(tree->root.training_set[0])) ; if (tree->root.training_set == NULL) ErrorExit(ERROR_NOMEMORY, "RFtrain: could not allocate root training set") ; tree->root.total_counts = 0 ; if (rf->ntrees > 1) start_no = nint(n*((double)(rf->ntraining-ntraining_per_tree))/(rf->ntrees-1.0)) ; else start_no = 0 ; // only 1 tree end_no = MIN(rf->ntraining-1, start_no+ntraining_per_tree-1) ; for (ii = start_no ; ii <= end_no ; ii++) { index = training_permutation[ii] ; if (training_classes[index] < 0 || training_classes[index] >= rf->nclasses) { ErrorPrintf(ERROR_BADPARM, "RFtrain: class at index %d = %d: out of bounds (%d)", index, training_classes[index], rf->nclasses) ; training_classes[index] = 0 ; } tree->root.class_counts[training_classes[index]]++ ; tree->root.training_set[tree->root.total_counts] = index ; tree->root.total_counts++ ; } if (Gdiag & DIAG_SHOW && DIAG_VERBOSE_ON) printf("tree %d: initial entropy = %f\n", n, entropy(tree->root.class_counts, rf->nclasses, tree->root.class_counts)) ; rfTrainTree(rf, tree, training_classes, training_data, rf->ntraining) ; #ifdef HAVE_OPENMP #pragma omp critical #endif printf("\ttraining complete, depth %d, nleaves %d.\n", tree->depth, tree->nleaves) ; } if (total_to_remove > 0) { for (n = 0 ; n < ntraining ; n++) free(training_data[n]) ; free(training_data) ; free(training_classes) ; } free(feature_permutation) ; free(training_permutation) ; return(NO_ERROR) ; }
unsigned int optimize_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N]) { merge_dims(D, N, dims, strs); unsigned int ND = remove_empty_dims(D, N, dims, strs); if (0 == ND) { // atleast return a single dimension dims[0] = 1; for (unsigned int j = 0; j < D; j++) (*strs[j])[0] = 0; ND = 1; } debug_print_dims(DP_DEBUG4, ND, dims); float blocking[N]; #ifdef BERKELEY_SVN // actually those are not the blocking factors // as used below but relative to fast memory //demmel_factors(D, ND, blocking, strs); UNUSED(demmel_factors); #endif #if 0 debug_printf(DP_DEBUG4, "DB: "); for (unsigned int i = 0; i < ND; i++) debug_printf(DP_DEBUG4, "%f\t", blocking[i]); debug_printf(DP_DEBUG4, "\n"); #endif #if 1 for (unsigned int i = 0; i < ND; i++) blocking[i] = 0.5; // blocking[i] = 1.; #endif // try to split dimensions according to blocking factors // use space up to N bool split = false; do { if (N == ND) break; split = split_dims(D, ND, dims, strs, blocking); if (split) ND++; } while(split); // printf("Split %c :", split ? 'y' : 'n'); // print_dims(ND, dims); long max_strides[ND]; for (unsigned int i = 0; i < ND; i++) { max_strides[i] = 0; for (unsigned int j = 0; j < D; j++) max_strides[i] = MAX(max_strides[i], (*strs[j])[i]); } unsigned int ord[ND]; compute_permutation(ND, ord, max_strides); // for (unsigned int i = 0; i < ND; i++) // printf("%d: %ld %d\n", i, max_strides[i], ord[i]); #if 1 for (unsigned int j = 0; j < D; j++) reorder_long(ND, ord, *strs[j]); reorder_long(ND, ord, dims); #endif #if 0 printf("opt dims\n"); print_dims(ND, dims); if (D > 0) print_dims(ND, *strs[0]); if (D > 1) print_dims(ND, *strs[1]); if (D > 2) print_dims(ND, *strs[2]); #endif return ND; }