void CalculateLMarg (const CGraphmatFile& G, vector<WORD>& gLeftMargins) { WORD lm = 0; gLeftMargins.resize(G.GetUnits().size()); size_t HB = G.GetUnits().size(); for (size_t i=1; i<HB; i++) { gLeftMargins[i] = lm; lm += G.GetUnits()[i].GetScreenLength(); if (G.GetUnits()[i].IsEOLN()) lm = 0; } }
bool CanBeFileName(const CGraphmatFile& F, size_t LB) { return ( F.HasDescr (LB, OLLE) || F.HasDescr (LB, ORLE) || F.HasDescr (LB, ODgCh) || F.HasDescr (LB, ODg) || ( ((F.GetUnits()[LB].GetTokenLength()) == 1) && (((unsigned char)F.GetUnits()[LB].GetToken()[0]) == '*') ) || ( (F.GetUnits()[LB].GetTokenLength() == 2) && ((unsigned char)F.GetUnits()[LB].GetToken()[0] == '.') && ((unsigned char)F.GetUnits()[LB].GetToken()[1] == '.') ) ); };
void AotGraphan::analyzeString( const std::string & str, boost::ptr_vector<Unit> & units ) { setupRml(); try { CGraphmatFile file; if( !file.LoadDicts() ) { // Загружаем словари throw std::logic_error( file.GetLastError() ); } if( !file.LoadStringToGraphan( str ) ) { // Загружаем файл throw std::logic_error( file.GetLastError() ); } for( const CGraLine & line : file.GetUnits() ) { units.push_back( new Unit( line.GetToken(), line.GetTokenLength(), line.GetInputOffset(), line.IsWordOrNumberOrAbbr() ? Unit::WORD : line.IsPunct() ? Unit::PUNCT : Unit::UNKNOWN ) ); } } catch ( const std::exception & e ) { throw e; } catch ( const CExpc & e ) { throw std::logic_error( "Couldn't init morphology: " + e.m_strCause ); } catch (...) { throw std::logic_error( "Couldn't init morphology due to unknown error" ); } }
static void InitEnglishNameSlot (CGraphmatFile& C) { for (size_t i=1; i< C.GetUnits().size(); i++) if ( !C.GetUnits()[i].IsSoft() && !C.HasDescr(i,OPun) ) { if (C.HasDescr(i, OLw)) continue; const char* UpperUnit = C.GetUppercaseToken(i); vector<CEnglishName>::const_iterator It = lower_bound (C.m_pDicts->m_EnglishNames.begin(), C.m_pDicts->m_EnglishNames.end(), UpperUnit, EnglishNameLess); if ( (It != C.m_pDicts->m_EnglishNames.end()) && !strcmp(It->name, UpperUnit) ) C.GetUnit(i).SetEnglishName(); }; };
/* Building units like "Bill Bush" */ static bool DealSimpleEnglishNames (CGraphmatFile& C, size_t StartPos, size_t EndPos) { size_t i = StartPos; if (!(C.GetUnits()[i].IsEnglishName())) return false; for (i++;(i<EndPos) && C.GetUnits()[i].IsSoft(); i++) if ( C.GetUnits()[i].IsParagraphTag() || C.HasDescr(i, OPar) ) return false; if ( (i == EndPos) || !C.HasDescr(i, OLLE) || !C.HasDescr(i, OUpLw) ) return false; if (C.HasGrouped(StartPos,i+1)) return false; C.SetDes (StartPos, OFAM1); C.SetDes (i, OFAM2); C.SetState(StartPos,i+1,stGrouped); return true; };
void MapCorrectMinSpace (const CGraphmatFile& G, size_t LB, size_t HB, WORD& FuzzyMinSpace, WORD& MinSpace, int& NumOfFilledLines, const vector<WORD>& gLeftMargins ) { size_t LeftMargins [MaxLeftMargin]; MinSpace = 100; //инициализция частотоного массива левых отступов size_t k; for ( k=0; k<MaxLeftMargin; k++) LeftMargins[k] = 0; //вычисление частотного массива левых отступов и минимального левого отступа for (size_t i=LB; i<HB; i++) if ((i==1) || G.GetUnits()[i].IsEOLN()) { i++; if (i == HB) break; i = G.PSpace (i,HB); if (i == HB) break; if (!G.GetUnits()[i].IsGrouped()) { if (MinSpace < gLeftMargins[i]) MinSpace = gLeftMargins[i]; NumOfFilledLines ++; if (gLeftMargins[i] < MaxLeftMargin) LeftMargins[gLeftMargins[i]]++; } }; FuzzyMinSpace = MinSpace; for (k=0; k<MaxLeftMargin; k++) if (LeftMargins[k] > (NumOfFilledLines/100)) { FuzzyMinSpace = k; break; }; }
bool DealIndention (CGraphmatFile& G, size_t i, size_t Offset, const vector<WORD>& LeftMargins) { if (i == 0) return true; if ( G.GetUnits()[i].IsSoft()) return true; size_t nh = G.BSpace(i-1); if (!G.GetUnits()[nh].IsGrouped()) if (G.GetUnits()[nh].IsEOLN() ) { if ( ( LeftMargins[i] >= (Offset + G.m_MinParOfs) && LeftMargins[i] <= (Offset + G.m_MaxParOfs) ) || ( (i > 0) && ( G.GetUnits()[i-1].GetTokenLength() >= 1 ) && ( G.GetUnits()[i-1].GetToken()[0] == '\t' ) ) ) G.SetDes(i,OPar); } return true; }
bool CanBeRussianInitial(const CGraphmatFile&C, size_t LineNo) { return C.HasDescr(LineNo,OUp) && C.GetUnits()[LineNo].GetTokenLength()==1 && C.HasDescr(LineNo,ORLE) ; };