int print_stats_table(univar_stat * stats) { unsigned int i; int z, n_zones = zone_info.n_zones; if (n_zones == 0) n_zones = 1; /* print column headers */ if (zone_info.n_zones) { fprintf(stdout, "zone%s", zone_info.sep); fprintf(stdout, "label%s", zone_info.sep); } fprintf(stdout, "non_null_cells%s", zone_info.sep); fprintf(stdout, "null_cells%s", zone_info.sep); fprintf(stdout, "min%s", zone_info.sep); fprintf(stdout, "max%s", zone_info.sep); fprintf(stdout, "range%s", zone_info.sep); fprintf(stdout, "mean%s", zone_info.sep); fprintf(stdout, "mean_of_abs%s", zone_info.sep); fprintf(stdout, "stddev%s", zone_info.sep); fprintf(stdout, "variance%s", zone_info.sep); fprintf(stdout, "coeff_var%s", zone_info.sep); fprintf(stdout, "sum%s", zone_info.sep); fprintf(stdout, "sum_abs"); if (param.extended->answer) { fprintf(stdout, "%sfirst_quart", zone_info.sep); fprintf(stdout, "%smedian", zone_info.sep); fprintf(stdout, "%sthird_quart", zone_info.sep); for (i = 0; i < stats[0].n_perc; i++) { if (stats[0].perc[i] == (int)stats[0].perc[i]) { /* percentile is an exact integer */ fprintf(stdout, "%sperc_%d", zone_info.sep, (int)stats[0].perc[i]); } else { /* percentile is not an exact integer */ char buf[24]; sprintf(buf, "%.15g", stats[0].perc[i]); G_strchg(buf, '.', '_'); fprintf(stdout, "%sperc_%s", zone_info.sep, buf); } } } fprintf(stdout, "\n"); /* print stats */ for (z = 0; z < n_zones; z++) { char sum_str[100]; double mean, variance, stdev, var_coef; /* for extendet stats */ double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc; double median = 0.0; int qpos_25, qpos_75, *qpos_perc; /* stats collected for this zone? */ if (stats[z].n == 0) continue; i = 0; /* all these calculations get promoted to doubles, so any DIV0 becomes nan */ mean = stats[z].sum / stats[z].n; variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n; if (variance < GRASS_EPSILON) variance = 0.0; stdev = sqrt(variance); var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */ if (zone_info.n_zones) { /* zone number */ fprintf(stdout, "%d%s", z + zone_info.min, zone_info.sep); /* zone label */ fprintf(stdout,"%s%s", G_get_cat(z + zone_info.min, &(zone_info.cats)), zone_info.sep); } /* non-null cells cells */ fprintf(stdout, "%d%s", stats[z].n, zone_info.sep); /* null cells */ fprintf(stdout, "%d%s", stats[z].size - stats[z].n, zone_info.sep); /* min */ fprintf(stdout, "%.15g%s", stats[z].min, zone_info.sep); /* max */ fprintf(stdout, "%.15g%s", stats[z].max, zone_info.sep); /* range */ fprintf(stdout, "%.15g%s", stats[z].max - stats[z].min, zone_info.sep); /* mean */ fprintf(stdout, "%.15g%s", mean, zone_info.sep); /* mean of abs */ fprintf(stdout, "%.15g%s", stats[z].sum_abs / stats[z].n, zone_info.sep); /* stddev */ fprintf(stdout, "%.15g%s", stdev, zone_info.sep); /* variance */ fprintf(stdout, "%.15g%s", variance, zone_info.sep); /* coefficient of variance */ fprintf(stdout, "%.15g%s", var_coef, zone_info.sep); /* sum */ sprintf(sum_str, "%.15g", stats[z].sum); G_trim_decimal(sum_str); fprintf(stdout, "%s%s", sum_str, zone_info.sep); /* absolute sum */ sprintf(sum_str, "%.15g", stats[z].sum_abs); G_trim_decimal(sum_str); fprintf(stdout, "%s", sum_str); /* TODO: mode, skewness, kurtosis */ if (param.extended->answer) { qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int)); quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double)); for (i = 0; i < stats[z].n_perc; i++) { qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5); } qpos_25 = (int)(stats[z].n * 0.25 - 0.5); qpos_75 = (int)(stats[z].n * 0.75 - 0.5); switch (stats[z].map_type) { case CELL_TYPE: heapsort_int(stats[z].cell_array, stats[z].n); quartile_25 = (double)stats[z].cell_array[qpos_25]; if (stats[z].n % 2) /* odd */ median = (double)stats[z].cell_array[(int)(stats[z].n / 2)]; else /* even */ median = (double)(stats[z].cell_array[stats[z].n / 2 - 1] + stats[z].cell_array[stats[z].n / 2]) / 2.0; quartile_75 = (double)stats[z].cell_array[qpos_75]; for (i = 0; i < stats[z].n_perc; i++) { quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]]; } break; case FCELL_TYPE: heapsort_float(stats[z].fcell_array, stats[z].n); quartile_25 = (double)stats[z].fcell_array[qpos_25]; if (stats[z].n % 2) /* odd */ median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)]; else /* even */ median = (double)(stats[z].fcell_array[stats[z].n / 2 - 1] + stats[z].fcell_array[stats[z].n / 2]) / 2.0; quartile_75 = (double)stats[z].fcell_array[qpos_75]; for (i = 0; i < stats[z].n_perc; i++) { quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]]; } break; case DCELL_TYPE: heapsort_double(stats[z].dcell_array, stats[z].n); quartile_25 = stats[z].dcell_array[qpos_25]; if (stats[z].n % 2) /* odd */ median = stats[z].dcell_array[(int)(stats[z].n / 2)]; else /* even */ median = (stats[z].dcell_array[stats[z].n / 2 - 1] + stats[z].dcell_array[stats[z].n / 2]) / 2.0; quartile_75 = stats[z].dcell_array[qpos_75]; for (i = 0; i < stats[z].n_perc; i++) { quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]]; } break; default: break; } /* first quartile */ fprintf(stdout, "%s%g", zone_info.sep, quartile_25); /* median */ fprintf(stdout, "%s%g", zone_info.sep, median); /* third quartile */ fprintf(stdout, "%s%g", zone_info.sep, quartile_75); /* percentiles */ for (i = 0; i < stats[z].n_perc; i++) { fprintf(stdout, "%s%g", zone_info.sep , quartile_perc[i]); } G_free((void *)quartile_perc); G_free((void *)qpos_perc); } fprintf(stdout, "\n"); /* zone z finished */ } return 1; }
/* *************************************************************** */ int print_stats(univar_stat * stats) { int z, n_zones = zone_info.n_zones; if (n_zones == 0) n_zones = 1; for (z = 0; z < n_zones; z++) { char sum_str[100]; double mean, variance, stdev, var_coef; /* for extendet stats */ double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc; double median = 0.0; unsigned int i; int qpos_25, qpos_75, *qpos_perc; /* stats collected for this zone? */ if (stats[z].n == 0) continue; /* all these calculations get promoted to doubles, so any DIV0 becomes nan */ mean = stats[z].sum / stats[z].n; variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n; if (variance < GRASS_EPSILON) variance = 0.0; stdev = sqrt(variance); var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */ sprintf(sum_str, "%.15g", stats[z].sum); G_trim_decimal(sum_str); if (zone_info.n_zones) fprintf(stdout, "\nzone %d %s\n\n", z + zone_info.min, G_get_cat(z + zone_info.min, &(zone_info.cats))); if (!param.shell_style->answer) { fprintf(stdout, "total null and non-null cells: %d\n", stats[z].size); fprintf(stdout, "total null cells: %d\n\n", stats[z].size - stats[z].n); fprintf(stdout, "Of the non-null cells:\n----------------------\n"); } if (param.shell_style->answer) { fprintf(stdout, "n=%d\n", stats[z].n); fprintf(stdout, "null_cells=%d\n", stats[z].size - stats[z].n); fprintf(stdout, "cells=%d\n", stats->size); fprintf(stdout, "min=%.15g\n", stats[z].min); fprintf(stdout, "max=%.15g\n", stats[z].max); fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min); fprintf(stdout, "mean=%.15g\n", mean); fprintf(stdout, "mean_of_abs=%.15g\n", stats[z].sum_abs / stats[z].n); fprintf(stdout, "stddev=%.15g\n", stdev); fprintf(stdout, "variance=%.15g\n", variance); fprintf(stdout, "coeff_var=%.15g\n", var_coef); fprintf(stdout, "sum=%s\n", sum_str); } else { fprintf(stdout, "n: %d\n", stats[z].n); fprintf(stdout, "minimum: %g\n", stats[z].min); fprintf(stdout, "maximum: %g\n", stats[z].max); fprintf(stdout, "range: %g\n", stats[z].max - stats[z].min); fprintf(stdout, "mean: %g\n", mean); fprintf(stdout, "mean of absolute values: %g\n", stats[z].sum_abs / stats[z].n); fprintf(stdout, "standard deviation: %g\n", stdev); fprintf(stdout, "variance: %g\n", variance); fprintf(stdout, "variation coefficient: %g %%\n", var_coef); fprintf(stdout, "sum: %s\n", sum_str); } /* TODO: mode, skewness, kurtosis */ if (param.extended->answer) { qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int)); quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double)); for (i = 0; i < stats[z].n_perc; i++) { qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5); } qpos_25 = (int)(stats[z].n * 0.25 - 0.5); qpos_75 = (int)(stats[z].n * 0.75 - 0.5); switch (stats[z].map_type) { case CELL_TYPE: heapsort_int(stats[z].cell_array, stats[z].n); quartile_25 = (double)stats[z].cell_array[qpos_25]; if (stats[z].n % 2) /* odd */ median = (double)stats[z].cell_array[(int)(stats[z].n / 2)]; else /* even */ median = (double)(stats[z].cell_array[stats[z].n / 2 - 1] + stats[z].cell_array[stats[z].n / 2]) / 2.0; quartile_75 = (double)stats[z].cell_array[qpos_75]; for (i = 0; i < stats[z].n_perc; i++) { quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]]; } break; case FCELL_TYPE: heapsort_float(stats[z].fcell_array, stats[z].n); quartile_25 = (double)stats[z].fcell_array[qpos_25]; if (stats[z].n % 2) /* odd */ median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)]; else /* even */ median = (double)(stats[z].fcell_array[stats[z].n / 2 - 1] + stats[z].fcell_array[stats[z].n / 2]) / 2.0; quartile_75 = (double)stats[z].fcell_array[qpos_75]; for (i = 0; i < stats[z].n_perc; i++) { quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]]; } break; case DCELL_TYPE: heapsort_double(stats[z].dcell_array, stats[z].n); quartile_25 = stats[z].dcell_array[qpos_25]; if (stats[z].n % 2) /* odd */ median = stats[z].dcell_array[(int)(stats[z].n / 2)]; else /* even */ median = (stats[z].dcell_array[stats[z].n / 2 - 1] + stats[z].dcell_array[stats[z].n / 2]) / 2.0; quartile_75 = stats[z].dcell_array[qpos_75]; for (i = 0; i < stats[z].n_perc; i++) { quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]]; } break; default: break; } if (param.shell_style->answer) { fprintf(stdout, "first_quartile=%g\n", quartile_25); fprintf(stdout, "median=%g\n", median); fprintf(stdout, "third_quartile=%g\n", quartile_75); for (i = 0; i < stats[z].n_perc; i++) { char buf[24]; sprintf(buf, "%.15g", stats[z].perc[i]); G_strchg(buf, '.', '_'); fprintf(stdout, "percentile_%s=%g\n", buf, quartile_perc[i]); } } else { fprintf(stdout, "1st quartile: %g\n", quartile_25); if (stats[z].n % 2) fprintf(stdout, "median (odd number of cells): %g\n", median); else fprintf(stdout, "median (even number of cells): %g\n", median); fprintf(stdout, "3rd quartile: %g\n", quartile_75); for (i = 0; i < stats[z].n_perc; i++) { if (stats[z].perc[i] == (int)stats[z].perc[i]) { /* percentile is an exact integer */ if ((int)stats[z].perc[i] % 10 == 1 && (int)stats[z].perc[i] != 11) fprintf(stdout, "%dst percentile: %g\n", (int)stats[z].perc[i], quartile_perc[i]); else if ((int)stats[z].perc[i] % 10 == 2 && (int)stats[z].perc[i] != 12) fprintf(stdout, "%dnd percentile: %g\n", (int)stats[z].perc[i], quartile_perc[i]); else if ((int)stats[z].perc[i] % 10 == 3 && (int)stats[z].perc[i] != 13) fprintf(stdout, "%drd percentile: %g\n", (int)stats[z].perc[i], quartile_perc[i]); else fprintf(stdout, "%dth percentile: %g\n", (int)stats[z].perc[i], quartile_perc[i]); } else { /* percentile is not an exact integer */ fprintf(stdout, "%.15g percentile: %g\n", stats[z].perc[i], quartile_perc[i]); } } } G_free((void *)quartile_perc); G_free((void *)qpos_perc); } /* G_message() prints to stderr not stdout: disabled. this \n is printed above with zone */ /* if (!(param.shell_style->answer)) G_message("\n"); */ } return 1; }
/****************************************************************************************** * index_map_find() * * calculates the symmetric CRS sparsity pattern needed for ParMETIS. The sparsity pattern * returned in (cindx,rindx,dim_major), where dim_major=length(cindx), is symmetric. * * insert flags the update of A. if update=0, only the new sparsity pattern is created * and returned, with A unchanged. If update=1, A is updated to the new sparsity pattern, * with zeros inserted at the new positions. * ******************************************************************************************/ void index_map_find( Tmtx_CRS_dist_ptr A, int **cindx, int **rindx, int *length, int insert ) { int i, j, k, spot, this_dom, nnz, nrows, ncols, nc, nr, n, pos, nd, td; Tindex split[3]; int vtxdist[4]; int *cindxCRS, *rindxCRS, *cindxCCS, *rindxCCS, *cp, *rp, *head, *index_minor, *index_major; // setup constants this_dom = A->This.this_proc; if( this_dom==0 ) { nd = 2; td = 0; } else if( this_dom==(A->This.n_proc-1) ) { nd = 2; td = 1; } else { nd = 3; td = 1; } /* * split the index into 3 sections, left, centre (the S block), and right */ vtxdist[0] = 0; vtxdist[td] = A->vtxdist[this_dom]; vtxdist[td+1] = A->vtxdist[this_dom+1]; vtxdist[nd] = A->ncols; index_split( A->mtx.cindx, A->mtx.rindx, vtxdist, A->mtx.nnz, A->mtx.nrows, nd, split, 1 ); /* * now make the centre index symmetric */ cindxCRS = split[td].index_major; rindxCRS = split[td].index_minor; nnz = split[td].dim_major; nrows = A->mtx.nrows; ncols = nrows; cindxCCS = (int *)malloc( sizeof(int)*(ncols+1) ); rindxCCS = (int *)malloc( sizeof(int)*(nnz) ); // convert the CRS index to CCS index_CRS_to_CCS( cindxCRS, rindxCRS, cindxCCS, rindxCCS, NULL, nnz, nrows, ncols ); // swap the column and row indices over for the CCS to make them "CRS" cp = cindxCCS; cindxCCS = rindxCCS; rindxCCS = cp; // combine the CRS and CCS indices to form the symmetric pattern cp = (int *)malloc( sizeof(int)*nnz*2 ); rp = (int *)malloc( sizeof(int)*(nrows+1) ); head = cp; rp[0] = 0; pos = 0; for( i=0; i<nrows; i++ ) { // determine how many indices are in This row nr = rindxCRS[i+1] - rindxCRS[i]; nc = rindxCCS[i+1] - rindxCCS[i]; n = nr + nc; // augment the indices for This row if there are elements to augment if( n ) { // augment the two sets of indices if( nr ) memcpy( head, cindxCRS + rindxCRS[i], nr*sizeof(int) ); if( nc ) memcpy( head + nr, cindxCCS + rindxCCS[i], nc*sizeof(int) ); // sort the indices heapsort_int( n, head ); // sort out the unique elements j = 0; while( j < n-1 ) { cp[pos++] = head[j++]; if( head[j]==head[j-1] ) j++; } if( j == (n-1) ) { cp[pos++] = head[j]; } } rp[i+1]=pos; head = cp + pos; } // remove extra storage from the end of index_major cp = realloc( cp, sizeof(int)*pos ); free( split[td].index_major); free( split[td].index_minor); split[td].index_major = cp; split[td].index_minor = rp; split[td].dim_major = pos; /* * recombine the left right and centre */ // find out how many nonzero are in the symmetric pattern n = 0; for( i=0; i<nd; i++ ) n += split[i].dim_major; // allocate memory for the indices index_major = (int *)malloc( sizeof(int)*n ); index_minor = (int *)malloc( sizeof(int)*(A->mtx.nrows+1) ); // stick 'em together pos = 0; index_minor[0] = 0; for( k=0; k<A->mtx.nrows; k++ ) { for( i=0; i<nd; i++ ) { if( split[i].index_major ) { spot = vtxdist[i]; for( j=split[i].index_minor[k]; j<split[i].index_minor[k+1]; j++ ) { index_major[pos++] = split[i].index_major[j] + spot; } } } index_minor[k+1] = pos; } // now save the information *cindx = index_major; *rindx = index_minor; *length = n; // free memory for( i=0; i<nd; i++ ) index_free( split + i ); free( cindxCCS ); free( rindxCCS ); // if needed, update A by inserting zeros at the new positions if( insert ) { int *cpo, *rpo, *cpn, *rpn; double *nzp; int poso, posn, no, nn; cpn = index_major; rpn = index_minor; cpo = A->mtx.cindx; rpo = A->mtx.rindx; if( !A->mtx.block ) { nzp = (double*)calloc( sizeof(double), n ); // loop over the rows poso= posn = 0; for( i=0; i<nrows; i++ ) { no = rpo[i+1]-rpo[i]; nn = rpn[i+1]-rpn[i]; // are the new and old rows different? if( no != nn ) { // have to copy over by hand, inserting new entries as we find them for( k=0; k<nn; ) { // copy the old entry over nzp[posn++] = A->mtx.nz[poso++]; k++; // skip the zeros while( k<nn && cpo[poso]!=cpn[posn] ) { k++; posn++; } } } else { // just copy the old row into the new one memcpy( nzp + posn, A->mtx.nz + poso, sizeof(double)*no ); poso += no; posn += nn; } } } else { nzp = (double*)calloc( sizeof(double), (n BLOCK_M_SHIFT) ); posn = poso = 0; for( i=0; i<nrows; i++ ) { no = rpo[i+1]-rpo[i]; nn = rpn[i+1]-rpn[i]; // are the new and old rows different? if( no != nn ) { // have to copy over by hand, inserting new entries as we find them k=0; while( k<nn && cpo[poso]!=cpn[posn] ) { k++; posn++; } for( ; k<nn; ) { // copy the old entry over memcpy( nzp + (posn BLOCK_M_SHIFT), A->mtx.nz + (poso BLOCK_M_SHIFT), BLOCK_SIZE*BLOCK_SIZE*sizeof(double) ); posn++; poso++; k++; // skip the zeros while( k<nn && cpo[poso]!=cpn[posn] ) { k++; posn++; } } } else { // just copy the old row into the new one memcpy( nzp + (posn BLOCK_M_SHIFT), A->mtx.nz + (poso BLOCK_M_SHIFT), sizeof(double)*(no BLOCK_M_SHIFT) ); poso += no; posn += nn; } } } // attach and copy the new data to A A->mtx.nnz = n; free( A->mtx.nz ); A->mtx.nz = nzp; memcpy( rpo, rpn, sizeof(int)*(A->mtx.nrows+1) ); A->mtx.cindx = (int*)realloc( A->mtx.cindx, sizeof(int)*n ); memcpy( A->mtx.cindx, cpn, sizeof(int)*(n) ); } }
// same as above, but assumes that the matrix A has symmetric sparsity pattern. Only recommended for use // on a matrix that has been formed directly from a finite volume mesh. void mtx_CRS_dist_domdec_sym( Tmtx_CRS_dist_ptr A, Tdistribution_ptr dist, int *p ) { int i, n_nodes, dom, count, n_dom, pos, edgecut, numflag=0, wgtflag=0, this_dom, n_neigh=0; int *vstarts=NULL, *indx=NULL, *starts=NULL, *counts=NULL, *part=NULL, *ppart=NULL, *_cindx=NULL, *_rindx=NULL; int ParMETIS_options[4] = {0, 0, 0, 0}; TMPI_dat_ptr This=NULL; This = &A->This; // initialise variables n_nodes = A->mtx.nrows; n_dom = This->n_proc; this_dom = This->this_proc; // initialise the distribution distribution_init( dist, n_dom, n_nodes, 1 ); // setup pointers indx = dist->indx; starts = dist->starts; counts = dist->counts; part = dist->part; ppart = dist->ppart; // make the CRS profile of the matrix into an adjacency graph _cindx = malloc( (A->mtx.nnz)*sizeof(int) ); _rindx = malloc( (n_nodes+1)*sizeof(int) ); index_make_adjacency( n_nodes, A->vtxdist[this_dom], A->mtx.cindx, A->mtx.rindx, _cindx, _rindx ); // use ParMETIS to perform domain decomp ParMETIS_PartKway( A->vtxdist, _rindx, _cindx, NULL, NULL, &wgtflag, &numflag, &n_dom, ParMETIS_options, &edgecut, part, &This->comm); // keep copy of original part in ppart memcpy( ppart, part, sizeof(int)*n_nodes ); // sort part, indx holds the permutation required for This for( i=0; i<n_nodes; i++ ) indx[i]=i; heapsort_int_index( n_nodes, indx, part); // determine the number of nodes that we have for each processor pos = 0; for( dom=0; dom<n_dom; dom++ ) { starts[dom] = pos; count=0; while( part[pos]==dom && pos<A->mtx.nrows ) { pos++; count++; } counts[dom] = count; if( count ) n_neigh++; } starts[dom] = pos; // find and store the neighbour information to dist free( dist->neighbours ); dist->neighbours = (int *)malloc( sizeof(int)*n_neigh ); dist->n_neigh = n_neigh; for( pos=0, dom=0; dom<n_dom; dom++ ) if( counts[dom] ) dist->neighbours[pos++] = dom; // sort the indices of each target domain's nodes for( dom=0; dom<n_dom; dom++ ) if( counts[dom]>1 ) heapsort_int( counts[dom], indx + starts[dom] ); // all processes update their copy of the global partition vector in p vstarts = (int *)malloc( sizeof(int)*n_dom ); for( i=0; i<n_dom; i++ ) vstarts[i] = A->vtxdist[i+1] - A->vtxdist[i]; MPI_Allgatherv( ppart, vstarts[this_dom], MPI_INT, p, vstarts, A->vtxdist, MPI_INT, This->comm ); free( vstarts ); free( _rindx ); free( _cindx ); }
// length(u) and length(indx) >= lfill+1 int vec_drop_block_( double* v, int *indx, int n, int lfill, int diag, double tol, double *u ) { int pos, ipos, i, j, k, m, p, shift, fill, from, to; double val, drop_min; shift = (n BLOCK_V_SHIFT)-BLOCK_SIZE; // search the vector v and look find the indices of the lfill entries larger than tol drop_min = 0.; //printf( "starting sort\n" ); for( i=0, pos=0, m=0; i<n; i++, pos+=BLOCK_SIZE ) { // find the size of element i ipos = pos; val = 0.; for( j=0; j<BLOCK_SIZE*BLOCK_SIZE; ipos++ ) { val += fabs(v[ipos]); if( !((++j)%BLOCK_SIZE) ) ipos+=shift; } // is the entry larger than the minimum in the dropped list so far? if( val>tol && (val>drop_min || m<lfill) ) { p = binary_search_double_bracket(m, u, val); //printf( "\t\tfound %g at %d and put in place %d with searchlen %d and drop_min=%g and lfill-1=%d\n", val, i, p, m, drop_min, lfill-1 ); for( k=m; k>p; k-- ) { indx[k+1]=indx[k]; u[k+1]=u[k]; } u[p+1] = val; indx[p+1] = i; m = (m<lfill) ? m+1 : lfill; drop_min = (m<lfill) ? 0 : u[lfill-1]; /*for( j=0; j<m; j++ ) printf( "\t(%d %g) ", indx[j], u[j] ); printf( "\n" ); */ } } //printf( "done sort\n" ); // look after the diag element, this is ridiculously complex if( diag>=0 && diag<n ) { ipos = diag BLOCK_V_SHIFT; val = 0.; for( j=0; j<BLOCK_SIZE*BLOCK_SIZE; ipos++ ) { val += fabs(v[ipos]); if( !((++j)%BLOCK_SIZE) ) ipos+=shift; } // if the diag was too small enough to be dropped then add it to the end of the list // this is a wee bit fiddly as we have to deal with some special cases/ // case 1 : it has certainly been included if( val>drop_min ) p = -1; // case 2 : it would have been dropped else if( val<=tol ) { if( m<lfill ) { p = m; m++; } else p = lfill-1; } // case 2 : we cannot be sure that it hasn't been dropped // this arrises when the value of the diag element is equal to that of // the smallest value in the list on nz entries to keep. There may // be more than one entry with this value in the array, so we must check // in case it was dropped else { p = m-1; k = 0; // search the end of while( !k && u[p]==u[m-1] ) { if( indx[p] == diag ) k=1; else p--; } // it has been included if( k ) p = -1; // it has been dropped else { p = m; if( p==lfill ) p--; else m++; } } if( p>=0 ) indx[p] = diag; } // sort the indices heapsort_int( m, indx ); // pack the entries into the start of v fill = (m<lfill) ? m : lfill; from = 0; to = 0; for( p=0; p<BLOCK_SIZE; p++ ) { to = p*(fill BLOCK_V_SHIFT); j = (m<lfill) ? 0 : m - lfill; for( i=0; i<fill; i++, j++) { pos = (indx[j] BLOCK_V_SHIFT) + from; for( k=0; k<BLOCK_SIZE; k++, to++, pos++ ) v[to] = v[pos]; } from += (n BLOCK_V_SHIFT); } j = (m<lfill) ? 0 : m - lfill; for( i=0; i<fill; i++, j++ ) indx[i] = indx[j]; // return return fill; }
/****************************************************************************************** * int vec_drop_block( double* v, int *indx, int n, int lfill, int diag, double tol ) * * the same as vec_drop(), however for blocks of dimension BLOCK_SIZExBLOCK_SIZE. see * documentation of vec_drop() for more information. * * additionally requires a vector u, of length n that is used as workspace * ******************************************************************************************/ int vec_drop_block( double* v, int *indx, int n, int lfill, int diag, double tol, double *u ) { int pos, ipos, i, j, k, m, p, dpos=-1, shift, fill, from, to; double val, mmax=0; shift = (n BLOCK_V_SHIFT)-BLOCK_SIZE; // find all of the entries that are larger than the tol and pack them into u, and their indices // into indx for( i=0, pos=0, m=0; i<n; i++, pos+=BLOCK_SIZE ) { // this test probably has to change, it is hardwired for BLOCK_SIZE=2 ipos = pos; val = 0.; for( j=0; j<BLOCK_SIZE*BLOCK_SIZE; ipos++ ) { val += fabs(v[ipos]); if( !((++j)%BLOCK_SIZE) ) ipos+=shift; } if( val>tol || diag==i ) { indx[m] = i; u[m++] = val; } if( val>mmax ) mmax = val; if( diag==i ) dpos = m-1; } // look after the diag element if( dpos>=0 ) u[dpos] = mmax+1.; // sort the entries, if there are more than L of them if( m>lfill ) { // sort the entries of u and permute indx at the same time heapsort_double_index( m, indx, u ); // sort the L largest entries of indx heapsort_int( lfill, indx + m - lfill ); } // pack the entries into the start of v fill = (m<lfill) ? m : lfill; from = 0; to = 0; for( p=0; p<BLOCK_SIZE; p++ ) { to = p*(fill BLOCK_V_SHIFT); j = (m<lfill) ? 0 : m - lfill; for( i=0; i<fill; i++, j++) { pos = (indx[j] BLOCK_V_SHIFT) + from; for( k=0; k<BLOCK_SIZE; k++, to++, pos++ ) v[to] = v[pos]; } from += (n BLOCK_V_SHIFT); } j = (m<lfill) ? 0 : m - lfill; for( i=0; i<fill; i++, j++ ) indx[i] = indx[j]; // return return fill; }