// Return the partner of this TabVector if the vector qualifies as // being a vertical text line, otherwise NULL. TabVector* TabVector::VerticalTextlinePartner() { if (!partners_.singleton()) return NULL; TabVector_C_IT partner_it(&partners_); TabVector* partner = partner_it.data(); BLOBNBOX_C_IT box_it1(&boxes_); BLOBNBOX_C_IT box_it2(&partner->boxes_); // Count how many boxes are also in the other list. // At the same time, gather the mean width and median vertical gap. if (textord_debug_tabfind > 1) { Print("Testing for vertical text"); partner->Print(" partner"); } int num_matched = 0; int num_unmatched = 0; int total_widths = 0; int width = startpt().x() - partner->startpt().x(); if (width < 0) width = -width; STATS gaps(0, width * 2); BLOBNBOX* prev_bbox = NULL; box_it2.mark_cycle_pt(); for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { BLOBNBOX* bbox = box_it1.data(); TBOX box = bbox->bounding_box(); if (prev_bbox != NULL) { gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); } while (!box_it2.cycled_list() && box_it2.data() != bbox && box_it2.data()->bounding_box().bottom() < box.bottom()) { box_it2.forward(); } if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN)) ++num_matched; else ++num_unmatched; total_widths += box.width(); prev_bbox = bbox; } if (num_unmatched + num_matched == 0) return NULL; double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; int min_box_match = static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); bool is_vertical = (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); if (textord_debug_tabfind > 1) { tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); } return (is_vertical) ? partner : NULL; }
static int read_data() { int i; int j; int prind; int runlen; unsigned int u; double a; n=0L; run_start=0L; up=-1; sum1=sum2=0.0; if (n_sub) { i_sub=0; sub_sum1=0.0; sub_min=1e30; sub_max=-1e30; for (i=0; i<n_subclass; ++i) { p_mean[i]=p_min[i]=p_max[i]=0; } } if (fr_n) for (i=0; i<fr_n; ++i) fr_f[i]=0L; lagpos=0; for (i=0; i<maxlag; ++i) lagv[i]=lagvv[i]=0.0; for (i=0; i<maxgap; ++i) gap[i]=0L; gaplen=gapmax=0L; if (permlen) { i_perm=0; for (u=0; u<n_perm; ++u) f_perm[u]=0L; } if (poklen) { i_pok=0; for (u=0; u<n_pok; ++u) f_pok[u]=0L; for (u=0; u<poklen; ++u) f1_pok[u]=0; } if (couplen) { init_coup(); for (u=0; u<coup_max; ++u) f_coup[u]=0L; } i=spfind("PRIND"); if (i>=0) prind=atoi(spb[i]); else prind=0; sur_print("\n"); for (j=d.l1; j<=d.l2; ++j) { if (unsuitable(&d,j)) continue; /*********************** if (sur_kbhit()) { sur_getch(); prind=1-prind; } ************************/ if (prind) { sprintf(sbuf,"%d ",j); sur_print(sbuf); } data_load(&d,j,var,&x); if (x==MISSING8) continue; ++n; sum1+=x; sum2+=x*x; if (n_sub) { sub_sum1+=x; if (x<sub_min) sub_min=x; if (x>sub_max) sub_max=x; ++i_sub; if (i_sub==n_sub) { sub_sum1/=(double)n_sub; a=muste_cdf_std(sqrt(12.0*(double)n_sub)*(sub_sum1-0.5)); i=a*(double)n_subclass; ++p_mean[i]; i=pow(1.0-sub_min,(double)n_sub)*(double)n_subclass; ++p_min[i]; i=pow(sub_max,(double)n_sub)*(double)n_subclass; ++p_max[i]; i_sub=0; sub_sum1=0.0; sub_min=1e30; sub_max=-1e30; } } if (fr_n) freq(); runs_updown(); lags(); if (maxgap) gaps(); if (permlen) permtest(); if (poklen) poker(); if (couplen) coupon(); xlag=x; } runlen=n-run_start; if (runlen>MAXRUN) runlen=MAXRUN-1; if (up) ++runs_up[(int)(runlen-1)]; else ++runs_down[(int)(runlen-1)]; if (maxgap) { if (gaplen>gapmax) gapmax=gaplen; if (gaplen>(unsigned int)(maxgap-1)) gaplen=maxgap-1; ++gap[(int)gaplen]; } return(1); }
/** * Does it make sense to write 1..capacity ranges? * Returns 0 if not, otherwise the number of ranges. * @param values Sorted array of signed-integer values. * @param length Number of values. * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) * Should be 0x80..0x100, must be 1..0x100. * @param ranges Output ranges array. * @param capacity Maximum number of ranges. * @return Minimum number of ranges (at most capacity) that have the desired density, * or 0 if that density cannot be achieved. */ U_CAPI int32_t U_EXPORT2 uprv_makeDenseRanges(const int32_t values[], int32_t length, int32_t density, int32_t ranges[][2], int32_t capacity) { if(length<=2) { return 0; } int32_t minValue=values[0]; int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. // Use int64_t variables for intermediate-value precision and to avoid // signed-int32_t overflow of maxValue-minValue. int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; if(length>=(density*maxLength)/0x100) { // Use one range. ranges[0][0]=minValue; ranges[0][1]=maxValue; return 1; } if(length<=4) { return 0; } // See if we can split [minValue, maxValue] into 2..capacity ranges, // divided by the 1..(capacity-1) largest gaps. LargestGaps gaps(capacity-1); int32_t i; int32_t expectedValue=minValue; for(i=1; i<length; ++i) { ++expectedValue; int32_t actualValue=values[i]; if(expectedValue!=actualValue) { gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); expectedValue=actualValue; } } // We know gaps.count()>=1 because we have fewer values (length) than // the length of the [minValue..maxValue] range (maxLength). // (Otherwise we would have returned with the one range above.) int32_t num; for(i=0, num=2;; ++i, ++num) { if(i>=gaps.count()) { // The values are too sparse for capacity or fewer ranges // of the requested density. return 0; } maxLength-=gaps.gapLength(i); if(length>num*2 && length>=(density*maxLength)/0x100) { break; } } // Use the num ranges with the num-1 largest gaps. gaps.truncate(num-1); ranges[0][0]=minValue; for(i=0; i<=num-2; ++i) { int32_t gapIndex=gaps.firstAfter(minValue); int32_t gapStart=gaps.gapStart(gapIndex); ranges[i][1]=gapStart-1; ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); } ranges[num-1][1]=maxValue; return num; }