void Preprocessor::buildPatterns () { boost::timer timer; timer.restart(); patterns_.clear(); patterns_.reserve(regions_.size()); Pattern pattern; // Traverse the list of iterators to regions creating a Pattern object for each region for( RegionLines::iterator k = inlineRegions_.begin(); k != inlineRegions_.end(); ++k ) { std::list<RegionIterator> line(k->second); for( std::list<RegionIterator>::iterator i = line.begin(); i != line.end(); ++i ) { (*i)->normalizeCoordinates(); // Normalize region using Magick++ facilities Magick::Image image( Magick::Geometry((*i)->width(), (*i)->height()), Magick::ColorGray(1.0) ); image.type( Magick::BilevelType ); Magick::Pixels view(image); Magick::PixelPacket *originPixel = view.get(0, 0, (*i)->width(), (*i)->height()); Magick::PixelPacket *pixel; for ( unsigned int j = 0; j < (*i)->size(); ++j ) { pixel = originPixel + ((*i)->at(j).first * view.columns() + (*i)->at(j).second); *pixel = Magick::ColorGray (0.0); } view.sync(); image.syncPixels(); image.scale( Magick::Geometry(Pattern::planeSize(), Pattern::planeSize()) ); // Preprocess the normalized region Preprocessor temporalPreprocessor (image, 0, 0, image.rows(), image.columns()); temporalPreprocessor.applyGlobalThresholding(); temporalPreprocessor.isolateRegions(); Region normalizedRegion; if ( ! temporalPreprocessor.regions_.empty() ) { // Merge subregions if preprocessing split the original region if ( temporalPreprocessor.regions_.size() > 1 ) { for ( RegionIterator j = temporalPreprocessor.regions_.begin(); j != temporalPreprocessor.regions_.end(); ++j ) normalizedRegion = normalizedRegion + *j; temporalPreprocessor.regions_.clear(); temporalPreprocessor.regions_.push_back(normalizedRegion); } else normalizedRegion = temporalPreprocessor.regions_.front(); } // Build the pattern pattern.clean(); for ( unsigned int i = 0; i < normalizedRegion.size(); ++i ) pattern.at(normalizedRegion.at(i).first, normalizedRegion.at(i).second) = 1; // Correct shifting if ( image.rows() < Pattern::planeSize() ) // Shift rows from top to the center { unsigned int offset = (Pattern::planeSize() - image.rows()) / 2; while ( offset != 0 ) { for ( int i = Pattern::planeSize()-2; i >= 0; --i ) { for ( unsigned int j = 0; j < Pattern::planeSize(); ++j ) pattern.at(i+1, j) = pattern.at(i, j); } for ( unsigned int j = 0; j < Pattern::planeSize(); ++j ) pattern.at(0, j) = 0; --offset; } } if ( image.columns() < Pattern::planeSize() ) // Shift columns from left to center { unsigned int offset = (Pattern::planeSize() - image.columns()) / 2; while ( offset != 0 ) { for ( unsigned int i = 0; i < Pattern::planeSize(); ++i ) { for ( int j = Pattern::planeSize()-2; j >= 0; --j ) pattern.at(i, j+1) = pattern.at(i, j); } for ( unsigned int i = 0; i < Pattern::planeSize(); ++i ) pattern.at(i, 0) = 0; --offset; } } patterns_.push_back( pattern ); } } statistics_.patternsBuildingTime(timer.elapsed()); }
std::vector<unsigned int> Preprocessor::isolateRegions () { boost::timer timer; timer.restart(); // Traverse the press clip searching the ink pixels where the flooding process will start from std::vector<PixelCoordinates> seeds(0); seeds.reserve(clip_.size()); for ( unsigned int i = 0; i < clipHeight_; ++i ) { for ( unsigned int j = 0; j < clipWidth_; ++j ) { if ( clip_.at(i * clipWidth_ + j) == 1 ) seeds.push_back( PixelCoordinates(i,j) ); } } // Build the initial list of regions by applying the flooding algorithm regions_.clear(); std::deque<bool> visited(clip_.size(), false); for ( std::vector<PixelCoordinates>::iterator s = seeds.begin(); s != seeds.end(); ++s ) { int row = s->first; int column = s->second; if ( not visited.at(row * clipWidth_ + column) ) { visited.at(row * clipWidth_ + column) = true; // This seed begins a new region Region region; region.addCoordinates( PixelCoordinates(row, column) ); // Explore the immediate neighbourhood for ( int i = row-1; (i <= row+1) && (i < static_cast<int>(clipHeight_)); ++i ) { for ( int j = column-1; (j <= column+1) && (j < static_cast<int>(clipWidth_)); ++j ) { if ( i >= 0 && j >= 0 ) { if ( clip_.at(i * clipWidth_ + j) == 1 && not visited.at(i * clipWidth_ + j) ) { visited.at(i * clipWidth_ + j) = true; region.addCoordinates( PixelCoordinates(i,j) ); } } } } // Explore the neighbours of the neighbours unsigned int k = 1; while ( region.size() > k ) { PixelCoordinates coordinates( region.at(k) ); for ( int i = coordinates.first-1; (i <= static_cast<int>(coordinates.first+1)) && (i < static_cast<int>(clipHeight_)); ++i ) { for ( int j = coordinates.second-1; (j <= static_cast<int>(coordinates.second+1)) && (j < static_cast<int>(clipWidth_)); ++j ) { if ( i >= 0 && j >= 0 ) { if ( clip_.at(i * clipWidth_ + j) == 1 && not visited.at(i * clipWidth_ + j) ) { visited.at(i * clipWidth_ + j) = true; region.addCoordinates( PixelCoordinates(i, j) ); } } } } ++k; } regions_.push_back(region); } } findLineDelimiters(visited); organizeRegionsIntoLines(); mergeVerticallyOverlappedRegions(); averageCharacterHeight_ = std::accumulate (regions_.begin(), regions_.end(), 0.0, accumulateHeightIncrement()) / regions_.size(); averageCharacterWidth_ = std::accumulate (regions_.begin(), regions_.end(), 0.0, accumulateWidthIncrement()) / regions_.size(); for( RegionLines::iterator i = inlineRegions_.begin(); i != inlineRegions_.end(); ++i ) sortRegions(i->second); std::vector<unsigned int> spaceLocations = findSpacesBetweenWords(); statistics_.nRegions(regions_.size()); statistics_.nLines(delimiters_.size()); statistics_.averageCharacterHeight(averageCharacterHeight_); statistics_.averageCharacterWidth(averageCharacterWidth_); statistics_.segmentationTime(timer.elapsed()); return spaceLocations; }