/////////////////////////////// // Tokenizer-Utils void TTokenizerUtil::Sentencize(const PSIn& SIn, TStrV& Sentences, const bool& SplitNewLineP) { TChA SentenceBuf; int c; while (!SIn->Eof()) { c = SIn->GetCh(); switch (c) { case '\r': case '\n': { if (!SplitNewLineP) { SentenceBuf += ' '; break; } } case '"' : case '.' : case '!' : case ':' : case ';' : case '?' : case '\t': { if (SentenceBuf.Len() > 2) { Sentences.Add(SentenceBuf); SentenceBuf.Clr(); } break; } default: SentenceBuf += c; break; } } if (SentenceBuf.Len() > 0) { Sentences.Add(SentenceBuf); } }
char THttpLx::GetFirstCh(){ if (SIn->Eof()){ if (AtEof){throw THttpEx(heUnexpectedEof);} AtEof=true; return 0; } else { Ch=SIn->GetCh(); return Ch; } }
uint TWbmp::GetMultiByteInt(const PSIn& SIn){ uint Val=0; TB8Set BSet; do { BSet=uchar(SIn->GetCh()); Val=Val*128+BSet.GetInt(0, 6); } while (BSet.In(7)); return Val; }
PWbmp TWbmp::LoadWbmp(const PSIn& SIn){ // read header uint TypeField=GetMultiByteInt(SIn); if (TypeField!=0){TExcept::Throw("Invalid WBMP TypeField.");} TB8Set FixHeaderField=uchar(SIn->GetCh()); if (FixHeaderField.In(7)){ GetMultiByteInt(SIn);} // ExtFields int Width=GetMultiByteInt(SIn); int Height=GetMultiByteInt(SIn); // create wbmp PWbmp Wbmp=TWbmp::New(Width, Height); // read & fill bitmap for (int Y=0; Y<Height; Y++){ int X=0; TB8Set BSet; while (X<Width){ if (X%8==0){BSet=uchar(SIn->GetCh());} Wbmp->PutPxVal(X, Y, BSet.In(7-X%8)); X++; } } return Wbmp; }
char THttpLx::GetCh(){ if (EofChPrS.Empty()){ if (SIn->Eof()){ if (AtEof){throw THttpEx(heUnexpectedEof);} AtEof=true; SfMem+=Ch; Ch=TCh::NullCh; return Ch; } else { SfMem+=Ch; Ch=SIn->GetCh(); return Ch; } } else { SfMem+=Ch; AtEof=EofChPrS.Top().Val1; Ch=EofChPrS.Top().Val2; EofChPrS.Pop(); return Ch; } }
void TTokenizerUtil::Paragraphize(const PSIn& SIn, TStrV& Paragraphs) { TChA ParagraphBuf; int c; bool wasSpace = false; while (!SIn->Eof()) { c = SIn->GetCh(); // two consecutive spaces signal a new paragraph if (c == ' ' || c == '\t' || c == '\n') { if (wasSpace) { Paragraphs.Add(ParagraphBuf); ParagraphBuf.Clr(); continue; } wasSpace = true; } else { wasSpace = false; } ParagraphBuf += c; } if (ParagraphBuf.Len() > 0) { Paragraphs.Add(ParagraphBuf); } }