bool Clusterizer::clusterize() { if(Basis::debugSet()){ std::cout<<"Clusterizer::clusterize(): Status:\n"; std::cout<<" _nHits "<<_nHits<<std::endl; std::cout<<" _framefirstHit "<<_framefirstHit<<"\n"; std::cout<<" _framelastHit "<<_framelastHit<<"\n"; std::cout<<" _minColHitPos "<<_minColHitPos<<"\n"; std::cout<<" _maxColHitPos "<<_maxColHitPos<<"\n"; std::cout<<" _minRowHitPos "<<_minRowHitPos<<"\n"; std::cout<<" _maxRowHitPos "<<_maxRowHitPos<<"\n"; } _runTime = 0; for(int iFrame = _framefirstHit; iFrame <= _framelastHit; ++iFrame){ //loop over the hit array starting from the first hit Frame to the last hit Frame for(int iCol = _minColHitPos; iCol <= _maxColHitPos; ++iCol){ //loop over the hit array from the minimum to the maximum column with a hit for(int iRow = _minRowHitPos; iRow <= _maxRowHitPos; ++iRow){ //loop over the hit array from the minimum to the maximum row with a hit if(hitExists(iCol,iRow,iFrame)){ //if a hit in iCol,iRow,iFrame exists take this as a first hit of a cluster and do: clearActualClusterData(); // clear the last cluster data _actualRelativeClusterFrame = iFrame; // set the minimum relative Frame [0:15] for the new cluster searchNextHits(iCol, iRow, iFrame); // find hits next to the actual one and update the actual cluster values, here the clustering takes place if (_actualClusterSize >= (int) _minClusterHits){ // only add cluster if it has at least _minClusterHits hits addCluster(); // add cluster to output cluster array addClusterToResults(); // add the actual cluster values to the histograms _actualClusterID++; // increase the cluster id for this event } else warning("clusterize: cluster size too small"); } if (_nHits == 0) //saves a lot of average run time, the loop is aborted if every hit is in a cluster (_nHits == 0) return true; } } } if (_nHits == 0) return true; warning("Clusterizer::clusterize: NOT ALL HITS CLUSTERED!"); showHits(); return false; }
LineLayout* VirtualFont::createLineLayout(const TextLine &line, boost::iterator_range<vector<TextRun>::const_iterator> range) { auto layout = new LineLayout(this, line.langHint, line.overallDirection); int averageCount = 0; map<hb_codepoint_t, Cluster> clusterMap; auto buffer = hb_buffer_create(); for (auto &run : range) { clusterMap.clear(); for (auto &font : getFontSet(run.language)) { if (font->reload()) { layout->maxHeight = std::max(layout->maxHeight, font->metrics.height); layout->maxAscent = std::max(layout->maxAscent, font->metrics.ascent); layout->maxDescent = std::max(layout->maxDescent, font->metrics.descent); layout->maxLineThickness = std::max(layout->maxLineThickness, font->metrics.lineThickness); layout->maxUnderlineOffset = std::max(layout->maxUnderlineOffset, font->metrics.underlineOffset); layout->averageStrikethroughOffset += font->metrics.strikethroughOffset; averageCount++; run.apply(line.text, buffer); hb_shape(font->hbFont, buffer, nullptr, 0); auto glyphCount = hb_buffer_get_length(buffer); auto glyphInfos = hb_buffer_get_glyph_infos(buffer, nullptr); auto glyphPositions = hb_buffer_get_glyph_positions(buffer, nullptr); bool hasMissingGlyphs = false; for (int i = 0; i < glyphCount; i++) { auto codepoint = glyphInfos[i].codepoint; auto cluster = glyphInfos[i].cluster; auto it = clusterMap.find(cluster); bool clusterFound = (it != clusterMap.end()); if (codepoint) { if (clusterFound && (it->second.font != font)) { continue; // CLUSTER FOUND, WITH ANOTHER FONT (E.G. SPACE) } else { auto offset = Vec2f(glyphPositions[i].x_offset, -glyphPositions[i].y_offset) * font->scale; float advance = glyphPositions[i].x_advance * font->scale.x; if (!properties.useMipmap) { offset.x = snap(offset.x); offset.y = snap(offset.y); advance = snap(advance); } if (clusterFound) { it->second.addShape(codepoint, offset, advance); } else { clusterMap.insert(make_pair(cluster, Cluster(font, run.tag, codepoint, offset, advance))); } } } else if (!clusterFound) { hasMissingGlyphs = true; } } if (!hasMissingGlyphs) { break; // NO NEED TO PROCEED TO THE NEXT FONT IN THE LIST } } } if (run.direction == HB_DIRECTION_RTL) { for (auto it = clusterMap.rbegin(); it != clusterMap.rend(); ++it) { layout->addCluster(it->second); } } else { for (auto it = clusterMap.begin(); it != clusterMap.end(); ++it) { layout->addCluster(it->second); } } } layout->averageStrikethroughOffset /= averageCount; hb_buffer_destroy(buffer); return layout; }
struct peakCluster *peakClusterItems(struct lm *lm, struct peakItem *itemList, double forceJoinScore, double weakLevel) /* Convert a list of items to a list of clusters of items. This may break up clusters that * have weakly linked parts. [ ] AAAAAAAAAAAAAAAAAA BBBBBB DDDDDD CCCC EEEE gets tranformed into [ ] [ ] AAAAAAAAAAAAAAAAAA BBBBBB DDDDDD CCCC EEEE The strategy is to build a rangeTree of coverage, which might look something like so: 123333211123333211 then define cluster ends that exceed the minimum limit, which is either weakLevel (usually 10%) of the highest or forceJoinScore if weakLevel times the highest is more than forceJoinScore. This will go to something like so: [---] [----] Finally the items that are overlapping a cluster are assigned to it. Note that this may mean that an item may be in multiple clusters. [ABC] [ ADE] */ { int easyMax = round(1.0/weakLevel); int itemCount = slCount(itemList); struct peakCluster *clusterList = NULL; if (itemCount < easyMax) { struct peakItem *item = itemList; int chromStart = item->chromStart; int chromEnd = item->chromEnd; for (item = item->next; item != NULL; item = item->next) { if (item->chromStart < chromStart) chromStart = item->chromStart; if (item->chromEnd > chromEnd) chromEnd = item->chromEnd; } addCluster(lm, itemList, chromStart, chromEnd, &clusterList); } else { /* Make up coverage tree. */ struct rbTree *covTree = rangeTreeNew(); struct peakItem *item; for (item = itemList; item != NULL; item = item->next) rangeTreeAddToCoverageDepth(covTree, item->chromStart, item->chromEnd); struct range *range, *rangeList = rangeTreeList(covTree); /* Figure out maximum coverage. */ int maxCov = 0; for (range = rangeList; range != NULL; range = range->next) { int cov = ptToInt(range->val); if (cov > maxCov) maxCov = cov; } /* Figure coverage threshold. */ int threshold = round(maxCov * weakLevel); if (threshold > forceJoinScore-1) threshold = forceJoinScore-1; /* Loop through emitting sections over threshold as clusters */ boolean inRange = FALSE; boolean start = 0, end = 0; for (range = rangeList; range != NULL; range = range->next) { int cov = ptToInt(range->val); if (cov > threshold) { if (inRange) end = range->end; else { inRange = TRUE; start = range->start; end = range->end; } } else { if (inRange) { addCluster(lm, itemList, start, end, &clusterList); inRange = FALSE; } } } if (inRange) addCluster(lm, itemList, start, end, &clusterList); } slReverse(&clusterList); return clusterList; }