Пример #1
0
///////////////////////////////
// Tokenizer-Utils
void TTokenizerUtil::Sentencize(const PSIn& SIn, TStrV& Sentences, const bool& SplitNewLineP) {
	TChA SentenceBuf;
	int c;
	while (!SIn->Eof()) {
		c = SIn->GetCh();
		switch (c) {
			case '\r':
			case '\n':	{
				if (!SplitNewLineP) {
					SentenceBuf += ' ';
					break;
				}
			}
			case '"' :
			case '.' :
			case '!' :
			case ':' :
			case ';' :
			case '?' :
			case '\t': {
				if (SentenceBuf.Len() > 2) {
					Sentences.Add(SentenceBuf);
					SentenceBuf.Clr();
				}
				break;
			}
			default: 
				SentenceBuf += c;
				break;
		}
	}
	if (SentenceBuf.Len() > 0) {
		Sentences.Add(SentenceBuf);
	}	
}
Пример #2
0
char THttpLx::GetFirstCh(){
  if (SIn->Eof()){
    if (AtEof){throw THttpEx(heUnexpectedEof);}
    AtEof=true; return 0;
  } else {
    Ch=SIn->GetCh(); return Ch;
  }
}
Пример #3
0
uint TWbmp::GetMultiByteInt(const PSIn& SIn){
  uint Val=0; TB8Set BSet;
  do {
    BSet=uchar(SIn->GetCh());
    Val=Val*128+BSet.GetInt(0, 6);
  } while (BSet.In(7));
  return Val;
}
Пример #4
0
PWbmp TWbmp::LoadWbmp(const PSIn& SIn){
  // read header
  uint TypeField=GetMultiByteInt(SIn);
  if (TypeField!=0){TExcept::Throw("Invalid WBMP TypeField.");}
  TB8Set FixHeaderField=uchar(SIn->GetCh());
  if (FixHeaderField.In(7)){
    GetMultiByteInt(SIn);} // ExtFields
  int Width=GetMultiByteInt(SIn);
  int Height=GetMultiByteInt(SIn);
  // create wbmp
  PWbmp Wbmp=TWbmp::New(Width, Height);
  // read & fill bitmap
  for (int Y=0; Y<Height; Y++){
    int X=0; TB8Set BSet;
    while (X<Width){
      if (X%8==0){BSet=uchar(SIn->GetCh());}
      Wbmp->PutPxVal(X, Y, BSet.In(7-X%8));
      X++;
    }
  }
  return Wbmp;
}
Пример #5
0
char THttpLx::GetCh(){
  if (EofChPrS.Empty()){
    if (SIn->Eof()){
      if (AtEof){throw THttpEx(heUnexpectedEof);}
      AtEof=true; SfMem+=Ch; Ch=TCh::NullCh; return Ch;
    } else {
      SfMem+=Ch; Ch=SIn->GetCh(); return Ch;
    }
  } else {
    SfMem+=Ch;
    AtEof=EofChPrS.Top().Val1; Ch=EofChPrS.Top().Val2; EofChPrS.Pop();
    return Ch;
  }
}
void TTokenizerUtil::Paragraphize(const PSIn& SIn, TStrV& Paragraphs) {
	TChA ParagraphBuf;
	int c;
	bool wasSpace = false;
	while (!SIn->Eof()) {
		c = SIn->GetCh();
		// two consecutive spaces signal a new paragraph
		if (c == ' ' || c == '\t' || c == '\n') {
			if (wasSpace) {
				Paragraphs.Add(ParagraphBuf);
				ParagraphBuf.Clr();
				continue;
			}
			wasSpace = true;
		} else {
			wasSpace = false;
		}
		ParagraphBuf += c;
	}
	if (ParagraphBuf.Len() > 0) {
		Paragraphs.Add(ParagraphBuf);
	}
}