Пример #1
0
// Return the partner of this TabVector if the vector qualifies as
// being a vertical text line, otherwise NULL.
TabVector* TabVector::VerticalTextlinePartner() {
  if (!partners_.singleton())
    return NULL;
  TabVector_C_IT partner_it(&partners_);
  TabVector* partner = partner_it.data();
  BLOBNBOX_C_IT box_it1(&boxes_);
  BLOBNBOX_C_IT box_it2(&partner->boxes_);
  // Count how many boxes are also in the other list.
  // At the same time, gather the mean width and median vertical gap.
  if (textord_debug_tabfind > 1) {
    Print("Testing for vertical text");
    partner->Print("           partner");
  }
  int num_matched = 0;
  int num_unmatched = 0;
  int total_widths = 0;
  int width = startpt().x() - partner->startpt().x();
  if (width < 0)
    width = -width;
  STATS gaps(0, width * 2);
  BLOBNBOX* prev_bbox = NULL;
  box_it2.mark_cycle_pt();
  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
    BLOBNBOX* bbox = box_it1.data();
    TBOX box = bbox->bounding_box();
    if (prev_bbox != NULL) {
      gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
    }
    while (!box_it2.cycled_list() && box_it2.data() != bbox &&
           box_it2.data()->bounding_box().bottom() < box.bottom()) {
      box_it2.forward();
    }
    if (!box_it2.cycled_list() && box_it2.data() == bbox &&
        bbox->region_type() >= BRT_UNKNOWN &&
        (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN))
      ++num_matched;
    else
      ++num_unmatched;
    total_widths += box.width();
    prev_bbox = bbox;
  }
  if (num_unmatched + num_matched == 0) return NULL;
  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
  int min_box_match = static_cast<int>((num_matched + num_unmatched) *
                                       textord_tabvector_vertical_box_ratio);
  bool is_vertical = (gaps.get_total() > 0 &&
                      num_matched >= min_box_match &&
                      gaps.median() <= max_gap);
  if (textord_debug_tabfind > 1) {
    tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d "
            "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
            gaps.get_total(), num_matched, num_unmatched, min_box_match,
            gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No");
  }
  return (is_vertical) ? partner : NULL;
}
Пример #2
0
static int read_data()
        {
        int i;
        int j;
        int prind;
        int runlen;
        unsigned int u;
        double a;

        n=0L; run_start=0L; up=-1;
        sum1=sum2=0.0;

        if (n_sub)
            {
            i_sub=0; sub_sum1=0.0; sub_min=1e30; sub_max=-1e30;
            for (i=0; i<n_subclass; ++i)
                {
                p_mean[i]=p_min[i]=p_max[i]=0;
                }
            }

        if (fr_n)
            for (i=0; i<fr_n; ++i) fr_f[i]=0L;

        lagpos=0;
        for (i=0; i<maxlag; ++i) lagv[i]=lagvv[i]=0.0;
        for (i=0; i<maxgap; ++i) gap[i]=0L;
        gaplen=gapmax=0L;
        if (permlen)
            {
            i_perm=0;
            for (u=0; u<n_perm; ++u) f_perm[u]=0L;
            }
        if (poklen)
            {
            i_pok=0;
            for (u=0; u<n_pok; ++u) f_pok[u]=0L;
            for (u=0; u<poklen; ++u) f1_pok[u]=0;
            }
        if (couplen)
            {
            init_coup();
            for (u=0; u<coup_max; ++u) f_coup[u]=0L;
            }
        i=spfind("PRIND"); if (i>=0) prind=atoi(spb[i]); else prind=0;
        sur_print("\n");
        for (j=d.l1; j<=d.l2; ++j)
            {
            if (unsuitable(&d,j)) continue;
/***********************
            if (sur_kbhit()) { sur_getch(); prind=1-prind; }
************************/
            if (prind)
                {
                sprintf(sbuf,"%d ",j); sur_print(sbuf);
                }

            data_load(&d,j,var,&x);
            if (x==MISSING8) continue;
            ++n;
            sum1+=x; sum2+=x*x;
            if (n_sub)
                {
                sub_sum1+=x;
                if (x<sub_min) sub_min=x;
                if (x>sub_max) sub_max=x;
                ++i_sub;
                if (i_sub==n_sub)
                    {
                    sub_sum1/=(double)n_sub;
                    a=muste_cdf_std(sqrt(12.0*(double)n_sub)*(sub_sum1-0.5));
                    i=a*(double)n_subclass;
                    ++p_mean[i];

                    i=pow(1.0-sub_min,(double)n_sub)*(double)n_subclass;
                    ++p_min[i];
                    i=pow(sub_max,(double)n_sub)*(double)n_subclass;
                    ++p_max[i];

                    i_sub=0; sub_sum1=0.0; sub_min=1e30; sub_max=-1e30;
                    }
                }
            if (fr_n) freq();
            runs_updown();
            lags();
            if (maxgap) gaps();
            if (permlen) permtest();
            if (poklen) poker();
            if (couplen) coupon();
            xlag=x;
            }
        runlen=n-run_start;
        if (runlen>MAXRUN) runlen=MAXRUN-1;
        if (up) ++runs_up[(int)(runlen-1)];
        else    ++runs_down[(int)(runlen-1)];
        if (maxgap)
            {
            if (gaplen>gapmax) gapmax=gaplen;
            if (gaplen>(unsigned int)(maxgap-1)) gaplen=maxgap-1;
            ++gap[(int)gaplen];
            }
        return(1);
        }
Пример #3
0
/**
 * Does it make sense to write 1..capacity ranges?
 * Returns 0 if not, otherwise the number of ranges.
 * @param values Sorted array of signed-integer values.
 * @param length Number of values.
 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
 *                Should be 0x80..0x100, must be 1..0x100.
 * @param ranges Output ranges array.
 * @param capacity Maximum number of ranges.
 * @return Minimum number of ranges (at most capacity) that have the desired density,
 *         or 0 if that density cannot be achieved.
 */
U_CAPI int32_t U_EXPORT2
uprv_makeDenseRanges(const int32_t values[], int32_t length,
                     int32_t density,
                     int32_t ranges[][2], int32_t capacity) {
    if(length<=2) {
        return 0;
    }
    int32_t minValue=values[0];
    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
    // Use int64_t variables for intermediate-value precision and to avoid
    // signed-int32_t overflow of maxValue-minValue.
    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
    if(length>=(density*maxLength)/0x100) {
        // Use one range.
        ranges[0][0]=minValue;
        ranges[0][1]=maxValue;
        return 1;
    }
    if(length<=4) {
        return 0;
    }
    // See if we can split [minValue, maxValue] into 2..capacity ranges,
    // divided by the 1..(capacity-1) largest gaps.
    LargestGaps gaps(capacity-1);
    int32_t i;
    int32_t expectedValue=minValue;
    for(i=1; i<length; ++i) {
        ++expectedValue;
        int32_t actualValue=values[i];
        if(expectedValue!=actualValue) {
            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
            expectedValue=actualValue;
        }
    }
    // We know gaps.count()>=1 because we have fewer values (length) than
    // the length of the [minValue..maxValue] range (maxLength).
    // (Otherwise we would have returned with the one range above.)
    int32_t num;
    for(i=0, num=2;; ++i, ++num) {
        if(i>=gaps.count()) {
            // The values are too sparse for capacity or fewer ranges
            // of the requested density.
            return 0;
        }
        maxLength-=gaps.gapLength(i);
        if(length>num*2 && length>=(density*maxLength)/0x100) {
            break;
        }
    }
    // Use the num ranges with the num-1 largest gaps.
    gaps.truncate(num-1);
    ranges[0][0]=minValue;
    for(i=0; i<=num-2; ++i) {
        int32_t gapIndex=gaps.firstAfter(minValue);
        int32_t gapStart=gaps.gapStart(gapIndex);
        ranges[i][1]=gapStart-1;
        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
    }
    ranges[num-1][1]=maxValue;
    return num;
}