Beispiel #1
0
TStr TSecTm::GetMonthNm(const TLoc& Loc) const {
  struct tm Tm;
  IAssert(IsDef() && GetTmStruct(AbsSecs(), Tm));
  return TTmInfo::GetMonthNm(Tm.tm_mon+1, Loc);
}
Beispiel #2
0
int main(int argc, char* argv[]) {
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Flow. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  double NetPRTimeSum = 0;
  double NetEKTimeSum = 0;
  int NumWins = 0;
  Try
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "Input file");
  const int Iters = Env.GetIfArgPrefixInt("-n:", 10, "Number of runs per thread");
  const int Threads = Env.GetIfArgPrefixInt("-t:", 4, "Number of threads");
  printf("Integer Flow Test\n");
  printf("Filename: %s\n", InFNm.CStr());
  printf("Building Network...\n");
  TFIn InFile(InFNm);
  // If the input file is a binary, use the following line to load the network
  PNEANet Net = TNEANet::Load(InFile);
  // If the input file is a text file, use the following to load the network and save as binary
  // PNEANet Net;
  // int MaxEdgeCap = BuildCapacityNetwork(InFNm, Net);
  // const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "Output file");
  // TFOut OutFile(OutFNm);
  // Net->Save(OutFile);
  printf("PNEANet Nodes: %d, Edges: %d\n\n", Net->GetNodes(), Net->GetEdges());
  #pragma omp parallel for reduction(+:NetEKTimeSum,NetPRTimeSum,NumWins) schedule(static, 1)
  for (int t = 0; t < Threads; t++) {
    TRnd Random(t);
    for (int i = 0; i < Iters; i++) {
      int SrcNId = Net->GetRndNId(Random);
      int SnkNId = Net->GetRndNId(Random);

      double PRBeginTime = getcputime();
      int NetMaxFlowPR = TSnap::GetMaxFlowIntPR(Net, SrcNId, SnkNId);
      double PREndTime = getcputime();
      double NetPRFlowRunTime = PREndTime - PRBeginTime;

      double EKBeginTime = getcputime();
      int NetMaxFlowEK = TSnap::GetMaxFlowIntEK(Net, SrcNId, SnkNId);
      double EKEndTime = getcputime();
      double NetEKFlowRunTime = EKEndTime - EKBeginTime;
      
      IAssert(NetMaxFlowPR == NetMaxFlowEK);

      if (NetPRFlowRunTime < NetEKFlowRunTime) { NumWins++; }

      NetPRTimeSum += NetPRFlowRunTime;
      NetEKTimeSum += NetEKFlowRunTime;
      
      #pragma omp critical
      {
#ifndef NOMP
        printf("Thread: %d\n", omp_get_thread_num());
#endif
        printf("Source: %d, Sink %d\n", SrcNId, SnkNId);
        printf("Max Flow: %d\n", NetMaxFlowEK);
        printf("PR CPU Time: %f\n", NetPRFlowRunTime);
        printf("EK CPU Time: %f\n", NetEKFlowRunTime);
        printf("\n");
      }
    }
  }
  int TotalRuns = Iters*Threads;
  printf ("Avg PR PNEANet Time: %f\n", NetPRTimeSum/TotalRuns);
  printf ("Avg EK PNEANet Time: %f\n", NetEKTimeSum/TotalRuns);
  printf ("%d out of %d PR was faster\n", NumWins, TotalRuns);
  Catch
  return 0;
}
Beispiel #3
0
uint64 TZipIn::GetFLen(const TStr& ZipFNm) {
  #ifdef GLib_WIN
  HANDLE ZipStdoutRd, ZipStdoutWr;
  // create pipes
  SECURITY_ATTRIBUTES saAttr;
  saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
  saAttr.bInheritHandle = TRUE;
  saAttr.lpSecurityDescriptor = NULL;
    // Create a pipe for the child process's STDOUT.
  const int PipeBufferSz = 32*1024;
  EAssertR(CreatePipe(&ZipStdoutRd, &ZipStdoutWr, &saAttr, PipeBufferSz), "Stdout pipe creation failed");
  // Ensure the read handle to the pipe for STDOUT is not inherited.
  SetHandleInformation(ZipStdoutRd, HANDLE_FLAG_INHERIT, 0);
  //CreateZipProcess(GetCmd(FNm), FNm);
  { const TStr CmdLine = TStr::Fmt("7z.exe l \"%s\"", ZipFNm.CStr());
  PROCESS_INFORMATION piProcInfo;
  STARTUPINFO siStartInfo;
  ZeroMemory( &piProcInfo, sizeof(PROCESS_INFORMATION));
  ZeroMemory( &siStartInfo, sizeof(STARTUPINFO));
  siStartInfo.cb = sizeof(STARTUPINFO);
  siStartInfo.hStdOutput = ZipStdoutWr;
  siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
  // Create the child process.
  const BOOL FuncRetn = CreateProcess(NULL, (LPSTR) CmdLine.CStr(),
    NULL, NULL, TRUE, 0, NULL, NULL, &siStartInfo, &piProcInfo);
  EAssertR(FuncRetn!=0, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
  CloseHandle(piProcInfo.hProcess);
  CloseHandle(piProcInfo.hThread); }
  #else
  const TStr CmdLine = TStr::Fmt("7za l %s", ZipFNm.CStr());
  FILE* ZipStdoutRd = popen(CmdLine.CStr(), "r");
  if (ZipStdoutRd == NULL) { // try using SevenZipPath
    ZipStdoutRd = popen((TZipIn::SevenZipPath+"/"+CmdLine).CStr(), "r");
  }
  EAssertR(ZipStdoutRd != NULL, TStr::Fmt("Can not execute '%s'", CmdLine.CStr()).CStr());
  #endif
  // Read output from the child process
  const int BfSz = 32*1024;
  char* Bf = new char [BfSz];
  int BfC=0, BfL=0;
  memset(Bf, 0, BfSz);
  #ifdef GLib_WIN
  DWORD BytesRead;
  EAssert(ReadFile(ZipStdoutRd, Bf, MxBfL, &BytesRead, NULL) != 0);
  #else
  size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
  EAssert(BytesRead != 0);
  EAssert(pclose(ZipStdoutRd) != -1);
  #endif
  BfL = (int) BytesRead;  IAssert((BfC!=0)||(BfL!=0));
  BfC = 0; Bf[BfL] = 0;
  // find file lenght
  TStr Str(Bf);  delete [] Bf;
  TStrV StrV; Str.SplitOnWs(StrV);
  int n = StrV.Len()-1;
  while (n > 0 && ! StrV[n].StartsWith("-----")) { n--; }
  if (n-7 <= 0) {
    WrNotify(TStr::Fmt("Corrupt file %s: MESSAGE:\n", ZipFNm.CStr()).CStr(), Str.CStr());
    SaveToErrLog(TStr::Fmt("Corrupt file %s. Message:\n:%s\n", ZipFNm.CStr(), Str.CStr()).CStr());
    return 0;
  }
  return StrV[n-7].GetInt64();
}
Beispiel #4
0
bool TFtrGen::Reg(const TStr& TypeNm, const TFtrGenLoadF& LoadF){
	IAssert(!TypeToLoadFH.IsKey(TypeNm));
	TypeToLoadFH.AddDat(TypeNm, LoadF);
	return true;
}
Beispiel #5
0
int TFRnd::GetRecN(){
  IAssert(RecAct);
  int FPos=GetFPos()-HdLen;
  EAssertR(FPos%RecLen==0, "Invalid position in file'"+FNm+"'.");
  return FPos/RecLen;
}
Beispiel #6
0
TUNGraph::TEdgeI TUNGraph::GetEI(const int& SrcNId, const int& DstNId) const {
  const TNodeI SrcNI = GetNI(SrcNId);
  const int NodeN = SrcNI.NodeHI.GetDat().NIdV.SearchBin(DstNId);
  IAssert(NodeN != -1);
  return TEdgeI(SrcNI, EndNI(), NodeN);
}
Beispiel #7
0
/////////////////////////////////////////////////
// Roget-Base
void TRBase::LoadArtfl(const TStr& WebBaseFPath){
  PWebBase WebBase=PWebBase(new TWebMemBase(WebBaseFPath));
  int WebPgP=WebBase->FFirstWebPg(); int WebPgId;
  while (WebBase->FNextWebPg(WebPgP, WebPgId)){
    TStr UrlStr=WebBase->GetUrlStr(WebPgId);
    static TStr RgShStr="RG.sh"; if (!UrlStr.IsStrIn(RgShStr)){continue;}
//    if (!UrlStr.IsStrIn("RG.sh?^544\\")){continue;}

    PWebPg WebPg=WebBase->GetWebPg(WebPgId);
    PSIn SIn=TStrIn::New(WebPg->GetBodyStr());
    PHtmlDoc HtmlDoc=THtmlDoc::New(SIn, hdtAll);
    int TokN=0; PHtmlTok Tok; THtmlLxSym Sym; TStr Str;

    // move to <h2>
    do {HtmlDoc->GetTok(TokN++, Sym, Str);
    } while (!((Sym==hlsyBTag)&&(Str==THtmlTok::H2TagNm)));

    // parse "ddd[A|B]."
    TChA CtgNm; TChA CtgIdNm;
    HtmlDoc->GetTok(TokN++, Sym, Str);
    IAssert(Sym==hlsyNum); CtgNm+=Str; CtgIdNm+=Str;
    HtmlDoc->GetTok(TokN++, Sym, Str);
    if (Sym==hlsyStr){
      IAssert((Str=='A')||(Str=='B')); CtgNm+=Str; CtgIdNm+=Str;
      HtmlDoc->GetTok(TokN++, Sym, Str);
    }
    IAssert((Sym==hlsySSym)&&(Str=='.')); CtgNm+=Str;

    // parse to </h2>"
    TChA BracketStr;
    HtmlDoc->GetTok(TokN++, Sym, Str);
    while (!((Sym==hlsyETag)&&(Str==THtmlTok::H2TagNm))){
      if ((Sym==hlsySSym)&&(Str=='[')){
        HtmlDoc->GetTok(TokN++, Sym, Str);
        while (!((Sym==hlsySSym)&&(Str==']'))){
          if ((!BracketStr.Empty())&&(Sym==hlsyStr)){BracketStr+=' ';}
          BracketStr+=Str; HtmlDoc->GetTok(TokN++, Sym, Str);
        }
        BracketStr.Ins(0, " ["); BracketStr+=']';
      } else {
        if (Sym==hlsyStr){CtgNm+=' ';}
        CtgNm+=Str;
      }
      HtmlDoc->GetTok(TokN++, Sym, Str);
    }
    CtgNm+=BracketStr;
    TNotify::OnNotify(Notify, ntInfo, CtgNm);

    // parse words
    static TStr AdjStr="ADJ"; static TStr AdvStr="ADV";
    static TStr IntStr="INT"; static TStr PgStr="PAGE";
    static TStr PhrStr="PHR"; static TStr PrefStr="PREF";
    static TStr PronStr="PRON";
    HtmlDoc->GetTok(TokN++, Sym, Str);
    IAssert((Sym==hlsyStr)&&((Str=='N')||(Str==AdvStr)));
    while (TokN<HtmlDoc->GetToks()){
      if (Sym==hlsyStr){
        if (Str==PhrStr){break;}
        if ((Str!='N')&&(Str!='V')&&(Str!=AdjStr)&&(Str!=AdvStr)&&
         (Str!=IntStr)&&(Str!=PrefStr)&&(Str!=PronStr)){
          TChA WordStr;
          do {
            if (!WordStr.Empty()){WordStr+=' ';} WordStr+=Str;
            HtmlDoc->GetTok(TokN++, Sym, Str);
          } while (Sym==hlsyStr);
//          TNotify::OnNotify(Notify, ntInfo, WordStr);
        } else {
          HtmlDoc->GetTok(TokN++, Sym, Str);
        }
      } else
      if (Sym==hlsySSym){
        TStr ExpectStr;
        if (Str=='('){ExpectStr=')';}
        else if (Str=='['){ExpectStr=']';}
        else if (Str=='{'){ExpectStr='}';}
        else if (Str=='"'){ExpectStr='"';}
        if (!ExpectStr.Empty()){
          do {HtmlDoc->GetTok(TokN++, Sym, Str);
          } while (!((Sym==hlsySSym)&&(Str==ExpectStr)));
        }
        HtmlDoc->GetTok(TokN++, Sym, Str);
      } else {
        HtmlDoc->GetTok(TokN++, Sym, Str);
      }
    }
  }
}
Beispiel #8
0
PBowDocBs TBowFl::LoadTsactTxt(const TStr& FNm, const int& MxDocs){
  // prepare document set
  PBowDocBs BowDocBs=TBowDocBs::New();
  // open file
  PSIn SIn=TFIn::New(FNm);
  printf("Loading '%s' ...\n", FNm.CStr());
  if (!SIn->Eof()){
    // current document
    int CurDId=-1;
    TIntH CurDocWIdToFqH(100);
    // read first character
    char Ch=SIn->GetCh();
    // skip to the first digit or eof
    while ((!SIn->Eof())&&(!(('0'<=Ch)&&(Ch<='9')))){
      Ch=SIn->GetCh();}
    while (!SIn->Eof()){
      // notify
      if (BowDocBs->DocSpVV.Len()%1000==0){
        printf("%d transactions read\r", BowDocBs->DocSpVV.Len());}
      // read transaction number
      IAssert(('0'<=Ch)&&(Ch<='9'));
      int TsactN=Ch-'0'; Ch=SIn->GetCh();
      while (('0'<=Ch)&&(Ch<='9')){
        TsactN=TsactN*10+Ch-'0'; Ch=SIn->GetCh();}
      // skip space
      while (!(('0'<=Ch)&&(Ch<='9'))){
        IAssert((Ch==' ')||(Ch=='\t')); Ch=SIn->GetCh();}
      // read item number
      IAssert(('0'<=Ch)&&(Ch<='9'));
      int ItemN=Ch-'0'; Ch=SIn->GetCh();
      while (('0'<=Ch)&&(Ch<='9')){
        ItemN=ItemN*10+Ch-'0'; Ch=SIn->GetCh();}
      // skip to the next digit or eof
      while ((!SIn->Eof())&&(!(('0'<=Ch)&&(Ch<='9')))){
        Ch=SIn->GetCh();}
      // get document-id from transaction-id
      TStr DocNm=TInt::GetStr(TsactN);
      int DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      // get word-id from item-id
      TStr WordStr=TInt::GetStr(ItemN);
      int WId=BowDocBs->WordStrToDescH.AddKey(WordStr);
      BowDocBs->WordStrToDescH[WId].Fq++;
      // add word to document
      if (CurDId!=DId){
        if (CurDId!=-1){
          if ((MxDocs!=-1)&&(BowDocBs->GetDocs()>=MxDocs-1)){break;}
          PBowSpV NewSpV=TBowSpV::New(CurDId, CurDocWIdToFqH.Len());
          int NewDId=BowDocBs->DocSpVV.Add(NewSpV);
          IAssert(NewDId==CurDId);
          for (int DocWIdN=0; DocWIdN<CurDocWIdToFqH.Len(); DocWIdN++){
            int WId=CurDocWIdToFqH.GetKey(DocWIdN);
            int WordFq=CurDocWIdToFqH[DocWIdN];
            NewSpV->AddWIdWgt(WId, WordFq);
          }
          NewSpV->Sort();
        }
        CurDId=DId; CurDocWIdToFqH.Clr(false);
      }
      CurDocWIdToFqH.AddDat(WId)++;
    }
    // save last document
    if (CurDId!=-1){
      PBowSpV NewSpV=TBowSpV::New(CurDId, CurDocWIdToFqH.Len());
      BowDocBs->DocSpVV.Add(NewSpV);
      for (int DocWIdN=0; DocWIdN<CurDocWIdToFqH.Len(); DocWIdN++){
        int WId=CurDocWIdToFqH.GetKey(DocWIdN);
        int WordFq=CurDocWIdToFqH[DocWIdN];
        NewSpV->AddWIdWgt(WId, WordFq);
      }
      NewSpV->Sort();
    }
    printf("%d transactions read\n", BowDocBs->DocSpVV.Len());
  }
  printf("... Done.\n");
  // return results
  BowDocBs->AssertOk();
  return BowDocBs;
}
Beispiel #9
0
PBowDocBs TBowFl::LoadSvmLightTxt(
 const TStr& DocDefFNm, const TStr& WordDefFNm,
 const TStr& TrainDataFNm, const TStr& TestDataFNm,
 const int& MxDocs){ //TODO: use MxDocs
  // prepare document set
  PBowDocBs BowDocBs=TBowDocBs::New();
  int MOneCId=BowDocBs->CatNmToFqH.AddKey("-1");
  int POneCId=BowDocBs->CatNmToFqH.AddKey("+1");

  // document definition
  bool DocDefP=false;
  if (!DocDefFNm.Empty()&&(TFile::Exists(DocDefFNm))){
    // (DId "DoxNm"<eoln>)*
    PSIn SIn=TFIn::New(DocDefFNm);
    TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept);
    Lx.GetSym(syInt, syEof);
    while (Lx.Sym==syInt){
      int DId=Lx.Int;
      Lx.GetSym(syColon);
      Lx.GetSym(syQStr); TStr DocNm=Lx.Str;
      Lx.GetSym(syEoln);
      Lx.GetSym(syInt, syEof);
      int NewDId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      EAssertR(DId==NewDId, "Document-Ids don't match.");
    }
    DocDefP=true;
  }
  // word definition
  if (!WordDefFNm.Empty()&&(TFile::Exists(WordDefFNm))){
    BowDocBs->WordStrToDescH.AddDat("Undef").Fq=0; // ... to have WId==0
    PSIn SIn=TFIn::New(WordDefFNm);
    TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept);
    Lx.GetSym(syQStr, syEof);
    while (Lx.Sym==syQStr){
      TStr WordStr=Lx.Str;
      Lx.GetSym(syInt); int WId=Lx.Int;
      Lx.GetSym(syInt); int WordFq=Lx.Int;
      Lx.GetSym(syEoln);
      Lx.GetSym(syQStr, syEof);
      int NewWId=BowDocBs->WordStrToDescH.AddKey(WordStr);
      EAssertR(WId==NewWId, "Word-Ids don't match.");
      BowDocBs->WordStrToDescH[WId].Fq=WordFq;
    }
  }
  // train & test data
  int MxWId=-1; TIntIntH WIdToFqH;
  // train data
  if (!TrainDataFNm.Empty()){
    PSIn SIn=TFIn::New(TrainDataFNm);
    TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept);
    // skip comment lines
    while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    // parse data lines
    while (Lx.Sym==syInt){
      // document
      TStr DocNm=TInt::GetStr(BowDocBs->GetDocs());
      int DId;
      if (DocDefP){
        DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm);
      } else {
        DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      }
      BowDocBs->TrainDIdV.Add(DId);
      // category (class value)
      int CId=(Lx.Int==-1) ? MOneCId : POneCId;
      BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1);
      BowDocBs->DocCIdVV.Last().Gen(1, 0);
      BowDocBs->DocCIdVV.Last().Add(CId);
      // words (attributes)
      PBowSpV SpV=TBowSpV::New(DId);
      BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1);
      Lx.GetSym(syInt, syEoln);
      while (Lx.Sym==syInt){
        int WId=Lx.Int;
        Lx.GetSym(syColon);
        Lx.GetSym(syFlt); double WordFq=Lx.Flt;
        Lx.GetSym(syInt, syEoln);
        SpV->AddWIdWgt(WId, WordFq);
        if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);}
        WIdToFqH.AddDat(WId)++;
      }
      if (!Lx.CmtStr.Empty()){
        // change document name to 'N' if comment 'docDesc=N'
        TStr CmtStr=Lx.CmtStr;
        static TStr DocNmPrefixStr="docDesc=";
        if (CmtStr.IsPrefix(DocNmPrefixStr)){
          TStr NewDocNm=
           TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1);
          BowDocBs->DocNmToDescStrH.DelKey(DocNm);
          int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm);
          IAssert(DId==NewDId);
        }
      }
      SpV->Trunc();
      while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    }
  }
  // test data
  if (!TestDataFNm.Empty()){
    PSIn SIn=TFIn::New(TestDataFNm);
    TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept);
    while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    while (Lx.Sym==syInt){
      // document
      TStr DocNm=TInt::GetStr(BowDocBs->GetDocs());
      int DId;
      if (DocDefP){
        DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm);
      } else {
        DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      }
      BowDocBs->TestDIdV.Add(DId);
      // category (class value)
      int CId=(Lx.Int==-1) ? MOneCId : POneCId;
      BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1);
      BowDocBs->DocCIdVV.Last().Gen(1, 0);
      BowDocBs->DocCIdVV.Last().Add(CId);
      // words (attributes)
      PBowSpV SpV=TBowSpV::New(DId);
      BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1);
      Lx.GetSym(syInt, syEoln);
      while (Lx.Sym==syInt){
        int WId=Lx.Int;
        Lx.GetSym(syColon);
        Lx.GetSym(syFlt); double WordFq=Lx.Flt;
        Lx.GetSym(syInt, syEoln);
        SpV->AddWIdWgt(WId, WordFq);
        if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);}
        WIdToFqH.AddDat(WId)++;
      }
      if (!Lx.CmtStr.Empty()){
        // change document name to 'N' if comment 'docDesc=N'
        TStr CmtStr=Lx.CmtStr;
        static TStr DocNmPrefixStr="docDesc=";
        if (CmtStr.IsPrefix(DocNmPrefixStr)){
          TStr NewDocNm=
           TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1);
          BowDocBs->DocNmToDescStrH.DelKey(DocNm);
          int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm);
          IAssert(DId==NewDId);
        }
      }
      SpV->Trunc();
      while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    }
  }
  // add missing words
  for (int WId=0; WId<=MxWId; WId++){
    if (!BowDocBs->IsWId(WId)){
      TStr WordStr=TInt::GetStr(WId, "W%d");
      int _WId=BowDocBs->AddWordStr(WordStr);
      IAssert(WId==_WId);
      TInt Fq;
      if (WIdToFqH.IsKeyGetDat(WId, Fq)){
        BowDocBs->PutWordFq(WId, Fq);
      }
    }
  }

  BowDocBs->AssertOk();
  return BowDocBs;
}
Beispiel #10
0
uint TSecTm::GetDSecs(const TSecTm& SecTm1, const TSecTm& SecTm2){
  IAssert(SecTm1.IsDef()&&SecTm2.IsDef());
  const time_t Time1= time_t(SecTm1.AbsSecs());
  const time_t Time2= time_t(SecTm2.AbsSecs());
  return uint(difftime(Time2, Time1));
}
Beispiel #11
0
void TSecTm::SaveTxt(TOLx& Lx) const {
  IAssert(int(AbsSecs) < TInt::Mx);
  Lx.PutInt((int)AbsSecs);
}
Beispiel #12
0
int TSecTm::GetSecN() const {
  struct tm Tm;
  IAssert(IsDef() && GetTmStruct(AbsSecs(), Tm));
  return Tm.tm_sec;
}
Beispiel #13
0
TStr TSecTm::GetDayOfWeekNm(const TLoc& Loc) const {
  struct tm Tm;
  IAssert(IsDef() && GetTmStruct(AbsSecs(), Tm));
  return TTmInfo::GetDayOfWeekNm(Tm.tm_wday+1, Loc);
}
Beispiel #14
0
int TSecTm::GetDayOfWeekN() const {
  struct tm Tm;
  IAssert(IsDef() && GetTmStruct(AbsSecs(), Tm));
  return Tm.tm_wday + 1;
}
Beispiel #15
0
/// R-MAT Generator. The modes is based on the recursive descent into a 2x2
/// matrix [A,B; C, 1-(A+B+C)].
/// See: R-MAT Generator: A Recursive Model for Graph Mining. 
/// D. Chakrabarti, Y. Zhan and C. Faloutsos, in SIAM Data Mining 2004. 
/// URL: http://www.cs.cmu.edu/~deepay/mywww/papers/siam04.pdf
PNGraph GenRMat(const int& Nodes, const int& Edges, const double& A, const double& B, const double& C, TRnd& Rnd) {
  PNGraph GraphPt = TNGraph::New();
  TNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, Edges);
  IAssert(A+B+C < 1.0);
  int rngX, rngY, offX, offY;
  int Depth=0, Collisions=0, Cnt=0, PctDone=0;
  const int EdgeGap = Edges / 100 + 1;
  // sum of parameters (probabilities)
  TVec<double> sumA(128, 0), sumAB(128, 0), sumAC(128, 0), sumABC(128, 0);  // up to 2^128 vertices ~ 3.4e38
  for (int i = 0; i < 128; i++) {
    const double a = A * (Rnd.GetUniDev() + 0.5);
    const double b = B * (Rnd.GetUniDev() + 0.5);
    const double c = C * (Rnd.GetUniDev() + 0.5);
    const double d = (1.0 - (A+B+C)) * (Rnd.GetUniDev() + 0.5);
    const double abcd = a+b+c+d;
    sumA.Add(a / abcd);
    sumAB.Add((a+b) / abcd);
    sumAC.Add((a+c) / abcd);
    sumABC.Add((a+b+c) / abcd);
  }
  // nodes
  for (int node = 0; node < Nodes; node++) {
    IAssert(Graph.AddNode(-1) == node);
  }
  // edges
  for (int edge = 0; edge < Edges; ) {
    rngX = Nodes;  rngY = Nodes;  offX = 0;  offY = 0;
    Depth = 0;
    // recurse the matrix
    while (rngX > 1 || rngY > 1) {
      const double RndProb = Rnd.GetUniDev();
      if (rngX>1 && rngY>1) {
        if (RndProb < sumA[Depth]) { rngX/=2; rngY/=2; }
        else if (RndProb < sumAB[Depth]) { offX+=rngX/2;  rngX-=rngX/2;  rngY/=2; }
        else if (RndProb < sumABC[Depth]) { offY+=rngY/2;  rngX/=2;  rngY-=rngY/2; }
        else { offX+=rngX/2;  offY+=rngY/2;  rngX-=rngX/2;  rngY-=rngY/2; }
      } else
      if (rngX>1) { // row vector
        if (RndProb < sumAC[Depth]) { rngX/=2; rngY/=2; }
        else { offX+=rngX/2;  rngX-=rngX/2;  rngY/=2; }
      } else
      if (rngY>1) { // column vector
        if (RndProb < sumAB[Depth]) { rngX/=2; rngY/=2; }
        else { offY+=rngY/2;  rngX/=2;  rngY-=rngY/2; }
      } else { Fail; }
      Depth++;
    }
    // add edge
    const int NId1 = offX;
    const int NId2 = offY;
    if (NId1 != NId2 && ! Graph.IsEdge(NId1, NId2)) {
      Graph.AddEdge(NId1, NId2);
      if (++Cnt > EdgeGap) {
        Cnt=0;  printf("\r  %d%% edges", ++PctDone); }
      edge++;
    } else {
      Collisions++; }
  }
  printf("\r  RMat: nodes:%d, edges:%d, Iterations:%d, Collisions:%d (%.1f%%).\n", Nodes, Edges,
    Edges+Collisions, Collisions, 100*Collisions/double(Edges+Collisions));
  Graph.Defrag();
  return GraphPt;
}
//////////////////////////////////////////////////////////////////////////
// Partial-Gram-Schmidt
TPartialGS::TPartialGS(PSVMTrainSet BigSet, const int& Dim, const double& Eps) {
    IAssert(Dim <= BigSet->Len() && 0.0 <= Eps && Eps < 1.0);
    int Len = BigSet->Len();

    TVec<TKeyDat<TFlt, TBool> > NiV(Len);
    for (int i = 0; i < Len; i++) {
        //NiV[i].Key = BigSet->DotProduct(i, i);
        NiV[i].Key = BigSet->GetNorm2(i);
        NiV[i].Dat = false;
        IAssertR(NiV[i].Key.Val > 0.0 && _isnan(NiV[i].Key.Val) == 0, 
                 TInt::GetStr(i) + TStr(":") + TFlt::GetStr(NiV[i].Key));
    }
    R.Gen(Dim, 0);
    //for (i = 0; i < Dim; i++) R[i].Gen(Len-i);
    IdV.Gen(Len);
    for (int i = 0; i < Len; i++) IdV[i] = i;

    TFltV BlufV(Dim, 0); int max = -1;
    for (int j = 0; j < Dim; j++) {
        // find element with bigest residual norm
        max = -1;
        for (int t = 0, l = Len; t < l; t++)
            if (!NiV[t].Dat && (max == -1 || NiV[t].Key > NiV[max].Key)) max = t;

        // if max residual norm is reached
        if (NiV[max].Key.Val < Eps) break;
        //printf("(%.2f)", NiV[max].Key.Val);

        // permute j-th and max-th column of R
        NiV[max].Dat = true;
        int mid = IdV.SearchForw(max, j);
        { int tmp = IdV[j]; IdV[j] = max; IdV[mid] = tmp; }
        for (int t = 0; t < j; t++) {
            double tmp = R[t][j-t];
            R[t][j-t] = R[t][mid-t];
            R[t][mid-t] = tmp;
        }

        // calculate j-th row of R and update NiV (residual norms)
        if (-0.001 < NiV[max].Key.Val && NiV[max].Key.Val < 0) NiV[max].Key.Val = 0.0;
        IAssertR(NiV[max].Key.Val >= 0.0, TInt::GetStr(j) + TStr(":") + TFlt::GetStr(NiV[max].Key.Val));
        IAssert(R.Len() == j);
        R.Add(TFltV()); R[j].Gen(Len-j); // NEW
        R[j][0] = sqrt(NiV[max].Key.Val);
        BlufV.Add(NiV[IdV[j]].Key.Val);
        for (int i = j+1; i < Len; i++) {
            double RR = BigSet->DotProduct(IdV[i], IdV[j]);
            for (int t = 0; t < j; t++)
                RR -= R[t][j-t] * R[t][i-t];
            IAssertR(NiV[IdV[j]].Key.Val>0, TInt::GetStr(i));
            RR /= sqrt(NiV[IdV[j]].Key.Val);
            IAssertR(_isnan(RR) == 0, TInt::GetStr(IdV[j]) + TStr(":") + TFlt::GetStr(NiV[IdV[j]].Key.Val));
            R[j][i-j] = RR;
            NiV[IdV[i]].Key -= RR*RR;
        }
    }

    if (max == -1) max = 0;
    printf("stoped at %d/%d with residual norm %.3f\n", R.Len(), BigSet->Len(), NiV[max].Key.Val);

    NormV.Gen(Len);
    VecNormV.Gen(Len);
    for (int i = 0; i < Len; i++) {
        NormV[i] = NiV[IdV[i]].Key;
        VecNormV[i] = GetKernel(i,i);
    }
}
void THttpChDef::SetChTy(const THttpChTy& ChTy, const char& Ch){
  IAssert(ChTyV[Ch-TCh::Mn]==int(hpctUndef)); ChTyV[Ch-TCh::Mn]=TInt(ChTy);}
Beispiel #18
0
PSIn TILx::GetSIn(const char& SepCh){
  IAssert(PrevSymStStack.Empty());
  while ((Ch!=TCh::EofCh)&&(Ch!=SepCh)){GetCh();}
  return SIn;
}
Beispiel #19
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);
  // get command line parameters
  Env.PrepArgs("Crawl-Base to Text", 0);
  TStr InCrawlBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Crawl-Base-FileName");
  TStr OutTxtFNm=Env.GetIfArgPrefixStr("-ot:", "Crawl.Txt", "Output-Text-Filename");
  TStr OutStatFNm=Env.GetIfArgPrefixStr("-os:", "Crawl.Stat.Txt", "Output-Statistics-Text-Filename");
  bool SaveContP=Env.GetIfArgPrefixBool("-sc:", false, "Save-Content");
  bool SaveContOutUrlP=Env.GetIfArgPrefixBool("-scou:", true, "Save-Content-Outgoing-Urls");
  bool SaveContTagP=Env.GetIfArgPrefixBool("-sct:", true, "Save-Content-Tags");
  bool SaveOutUrlP=Env.GetIfArgPrefixBool("-sou:", false, "Save-Outgoing-Urls");
  bool SaveCTxtP=Env.GetIfArgPrefixBool("-sctc:", false, "Save-Continuos-Text-Content");
  int MnCTxtToks=Env.GetIfArgPrefixInt("-mctt:", 100, "Minimal-Continuos-Text-Tokens");
  TStrV BlockedDmNmV=Env.GetIfArgPrefixStrV("-bd:", "Blocked-Domain-Names (multiple)");
  if (Env.IsEndOfRun()){return 0;}
  // -i:si -sc:t -scou:n -sct:n -sctc:t -bd:.delo.si -bd:.dnevnik.si -bd:.vecer.si

  TStr BlobBsFMid=InCrawlBsFNm.GetFMid();
  // output file
  TFOut TxtFOut(OutTxtFNm); FILE* fTxt=TxtFOut.GetFileId();
  fprintf(fTxt, "Comment:input=%s\n", InCrawlBsFNm.CStr());
  fprintf(fTxt, "Comment:output=%s\n", OutTxtFNm.CStr());
  fprintf(fTxt, "BlobBaseName:%s\n", BlobBsFMid.CStr());
  // statistics
  TStrIntH HostNmToFqH;
  TStrIntH StatusCdToFqH;
  TStrIntH ContTypeToFqH;
  PMom HttpContLenMom=TMom::New();

  PBlobBs CrawlBBs=TMBlobBs::New(InCrawlBsFNm);
  TBlobPt TrvCrawlBPt=CrawlBBs->FFirstBlobPt();
  TBlobPt CrawlBPt; PSIn CrawlBlobSIn; int CrawlBlobN=0;
  while (CrawlBBs->FNextBlobPt(TrvCrawlBPt, CrawlBPt, CrawlBlobSIn)){
    CrawlBlobN++; printf("%d\r", CrawlBlobN);
    TStr DateTimeStr(*CrawlBlobSIn); //TStr DateTimeStr;
    TStr UrlStr(*CrawlBlobSIn);
    PUrl Url=TUrl::New(UrlStr); IAssert(Url->IsOk(usHttp));
    TMem HttpRespMem(*CrawlBlobSIn);
    PSIn HttpRespSIn=HttpRespMem.GetSIn();
    PHttpResp HttpResp=THttpResp::New(HttpRespSIn);
    // statistics
    HostNmToFqH.AddDat(Url->GetHostNm())++;
    StatusCdToFqH.AddDat(TInt::GetStr(HttpResp->GetStatusCd()))++;
    ContTypeToFqH.AddDat(HttpResp->GetFldVal(THttp::ContTypeFldNm))++;
    int ContLen=HttpResp->GetFldVal(THttp::ContLenFldNm).GetInt(-1);
    if (ContLen!=-1){
      HttpContLenMom->Add(ContLen);}
    // check blocked domain-names
    if (!BlockedDmNmV.Empty()){
      TStr DmNm=Url->GetDmNm(); int BlockedDmP=false;
      for (int BDmNmN=0; BDmNmN<BlockedDmNmV.Len(); BDmNmN++){
        if (DmNm.IsSuffix(BlockedDmNmV[BDmNmN])){
          BlockedDmP=true; break;
        }
      }
      if (BlockedDmP){
        continue;
      }
    }
    // check continuos-text
    if (SaveCTxtP&&IsCTxtHttpResp(Url, HttpResp, MnCTxtToks)){continue;}
    if (HttpResp->IsStatusCd_Ok()){
      PWebPg WebPg=TWebPg::New(UrlStr, HttpResp);
      fprintf(fTxt, "Start:HttpOk\n");
      fprintf(fTxt, "BlobBaseAddress:bb://%s/%d/%d\n",
       BlobBsFMid.CStr(), CrawlBPt.GetSeg(), CrawlBPt.GetAddr());
      fprintf(fTxt, "DateTime:%s\n", DateTimeStr.CStr());
      fprintf(fTxt, "Url:%s\n", UrlStr.CStr());
      fprintf(fTxt, "UrlMd5:%s\n", TMd5Sig(UrlStr).GetStr().CStr());
      fprintf(fTxt, "AtomName:%s\n", TUrl::GetTopDownDocNm(UrlStr).CStr());
      for (int FldN=0; FldN<HttpResp->GetFlds(); FldN++){
        TStr FldNm; TStr FldVal; HttpResp->GetFldNmVal(FldN, FldNm, FldVal);
        fprintf(fTxt, "HttpField:%s=%s\n", FldNm.CStr(), FldVal.CStr());
      }
      TMem BodyMem=HttpResp->GetBodyAsMem();
      fprintf(fTxt, "BodyMd5:%s\n", TMd5Sig(BodyMem).GetStr().CStr());
      // text
      if (SaveContP){
        if (HttpResp->IsContType(THttp::TextHtmlFldVal)){
          TStr HtmlStr=BodyMem.GetAsStr();
          TStr TxtStr=THtmlDoc::GetTxtLnDoc(HtmlStr, UrlStr, SaveContOutUrlP, SaveContTagP);
          fprintf(fTxt, "Content:%s\n", TxtStr.CStr());
        }
      }
      // outgoing-urls
      if (SaveOutUrlP){
        TUrlV OutUrlV; WebPg->GetOutUrlV(OutUrlV);
        for (int OutUrlN=0; OutUrlN<OutUrlV.Len(); OutUrlN++){
          TStr OutUrlStr=OutUrlV[OutUrlN]->GetUrlStr();
          fprintf(fTxt, "OutUrl:%s\n", OutUrlStr.CStr());
        }
      }
      fprintf(fTxt, "End:HttpOk\n");
    } else
    if (HttpResp->IsStatusCd_Redir()){
      TStr RedirUrlStr=HttpResp->GetFldVal(THttp::LocFldNm);
      PUrl RedirUrl=TUrl::New(RedirUrlStr, UrlStr);
      if (RedirUrl->IsOk(usHttp)){
        TStr RedirUrlStr=RedirUrl->GetUrlStr();
        fprintf(fTxt, "Start:HttpRedirection\n");
        fprintf(fTxt, "BlobBaseAddress:bb://%s/%d/%d\n",
         BlobBsFMid.CStr(), CrawlBPt.GetSeg(), CrawlBPt.GetAddr());
        fprintf(fTxt, "DateTime:%s\n", DateTimeStr.CStr());
        fprintf(fTxt, "Url:%s\n", UrlStr.CStr());
        fprintf(fTxt, "UrlMd5:%s\n", TMd5Sig(UrlStr).GetStr().CStr());
        fprintf(fTxt, "AtomName:%s\n", TUrl::GetTopDownDocNm(UrlStr).CStr());
        fprintf(fTxt, "RedirectionUrl:%s\n", RedirUrlStr.CStr());
        for (int FldN=0; FldN<HttpResp->GetFlds(); FldN++){
          TStr FldNm; TStr FldVal; HttpResp->GetFldNmVal(FldN, FldNm, FldVal);
          fprintf(fTxt, "HttpField:%s=%s\n", FldNm.CStr(), FldVal.CStr());
        }
        fprintf(fTxt, "End:HttpRedirection\n");
      }
    }
  }

  // statistics
  HttpContLenMom->Def();
  if (!OutStatFNm.Empty()){
    TFOut StatFOut(OutStatFNm); FILE* fStat=StatFOut.GetFileId();
    TIntStrPrV FqStatusCdPrV; StatusCdToFqH.GetDatKeyPrV(FqStatusCdPrV);
    TIntStrPrV FqContTypePrV; ContTypeToFqH.GetDatKeyPrV(FqContTypePrV);
    // hosts
    {fprintf(fStat, "================================================\n");
    TIntStrPrV FqHostNmPrV; HostNmToFqH.GetDatKeyPrV(FqHostNmPrV);
    FqHostNmPrV.Sort(false); int HostNmsSum=0;
    fprintf(fStat, "Hosts (%d):\n", FqHostNmPrV.Len());
    for (int HostNmN=0; HostNmN<FqHostNmPrV.Len(); HostNmN++){
      fprintf(fStat, "%7d   '%s'\n",
       FqHostNmPrV[HostNmN].Val1, FqHostNmPrV[HostNmN].Val2.CStr());
      HostNmsSum+=FqHostNmPrV[HostNmN].Val1;
    }
    fprintf(fStat, "----------\n");
    fprintf(fStat, "%7d   %s\n", HostNmsSum, "Sum");
    fprintf(fStat, "================================================\n");}
    // status-code
    {fprintf(fStat, "================================================\n");
    TIntStrPrV FqStatusCdPrV; StatusCdToFqH.GetDatKeyPrV(FqStatusCdPrV);
    FqStatusCdPrV.Sort(false); int StatusCdsSum=0;
    fprintf(fStat, "Status-Codes (%d):\n", FqStatusCdPrV.Len());
    for (int StatusCdN=0; StatusCdN<FqStatusCdPrV.Len(); StatusCdN++){
      fprintf(fStat, "%7d   '%s'\n",
       FqStatusCdPrV[StatusCdN].Val1, FqStatusCdPrV[StatusCdN].Val2.CStr());
      StatusCdsSum+=FqStatusCdPrV[StatusCdN].Val1;
    }
    fprintf(fStat, "----------\n");
    fprintf(fStat, "%7d   %s\n", StatusCdsSum, "Sum");
    fprintf(fStat, "================================================\n");}
    // content-type
    {fprintf(fStat, "================================================\n");
    TIntStrPrV FqContTypePrV; ContTypeToFqH.GetDatKeyPrV(FqContTypePrV);
    FqContTypePrV.Sort(false); int ContTypesSum=0;
    fprintf(fStat, "Content-Types (%d):\n", FqContTypePrV.Len());
    for (int ContTypeN=0; ContTypeN<FqContTypePrV.Len(); ContTypeN++){
      fprintf(fStat, "%7d   '%s'\n",
       FqContTypePrV[ContTypeN].Val1, FqContTypePrV[ContTypeN].Val2.CStr());
      ContTypesSum+=FqContTypePrV[ContTypeN].Val1;
    }
    fprintf(fStat, "----------\n");
    fprintf(fStat, "%7d   %s\n", ContTypesSum, "Sum");
    fprintf(fStat, "================================================\n");}
    // content-length
    {fprintf(fStat, "================================================\n");
    fprintf(fStat, "Content-length:\n");
    if (HttpContLenMom->IsUsable()){
      TStr MomStr=HttpContLenMom->GetStr('\n', ':', true, false, "%g");
      fprintf(fStat, "%s\n", MomStr.CStr());
    } else {
      fprintf(fStat, "Statistics not usable.\n");
    }
    fprintf(fStat, "================================================\n");}
  }

  return 0;
  Catch;
  return 1;
}
Beispiel #20
0
TLxSym TILx::GetSym(const TFSet& Expect){
  CmtStr.Clr();
  if (!PrevSymStStack.Empty()){
    // symbols already on the stack
    PrevSymStStack.Top().Restore(*this); PrevSymStStack.Pop();
  } else
  if (Expect.In(syLn)){
    // symbol is the whole line string
    if (Ch==TCh::EofCh){
      Sym=syEof;
    } else {
      Str.Clr();
      if (IsBof()){GetCh();}
      while (!ChDef->IsTerm(Ch)){Str.AddCh(Ch); GetCh();}
      bool _IsRetEoln=IsRetEoln; IsRetEoln=true;
      GetSym(TFSet()|syEoln|syEof); Sym=syLn;
      IsRetEoln=_IsRetEoln;
    }
  } else
  if (IsTabSep){
    // symbol is between tab characters
    if (IsBof()){GetCh();}
    if (Ch==TCh::TabCh){ // tab character
      Sym=syTab; GetCh();
    } else
    if (ChDef->IsTerm(Ch)){ // eoln & eof characters
      bool _IsRetEoln=IsRetEoln; IsRetEoln=true; IsTabSep=false;
      GetSym(TFSet()|syEoln|syEof);
      IsRetEoln=_IsRetEoln; IsTabSep=true;
    } else {
      Str.Clr();
      while ((!ChDef->IsTerm(Ch))&&(Ch!=TCh::TabCh)){
        Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
      Sym=syStr; QuoteP=false;
    }
  } else {
    // usual symbol
    while (ChDef->IsSpace(Ch)){GetCh();}
    SymLnN=LnN; SymLnChN=LnChN; SymChN=ChN;

    if (ChDef->IsAlpha(Ch)){
      if (IsUniStr){Sym=syStr;} else {Sym=syIdStr;}
      Str.Clr(); UcStr.Clr(); QuoteP=false;
      do {Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch));}
      while (ChDef->IsAlNum(GetCh()));
      if (!RwStrH.Empty()){
        TStr RwStr=Str; if (!IsCsSens){RwStr=UcStr;}
        int SymKeyId=RwStrH.GetKeyId(RwStr);
        if (SymKeyId!=-1){Sym=TLxSym(int(RwStrH[SymKeyId]));}
      }
      if (Expect.In(syBool)){
        Sym=syBool; IAssert(TBool::IsValStr(Str));
        Bool=TBool::GetValFromStr(Str);
      }
    } else
    if ((Ch=='"')||(Ch=='\'')){
      if (IsUniStr){Sym=syStr;} else {Sym=syQStr;}
      Str.Clr(); UcStr.Clr(); QuoteP=true; QuoteCh=Ch;
      GetCh();
      forever{
        while ((Ch!=QuoteCh)&&(Ch!='\\')&&(Ch!=TCh::EofCh)){
          Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh();}
        if (Ch==TCh::EofCh){
          Sym=syUndef; break;
        } else if (Ch==QuoteCh){
          GetCh(); break;
        } else {
          GetCh();
          switch (Ch){
            case '"': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case '\\': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case '\'': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case '/': Str.AddCh(Ch); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case 'b': Str.AddCh('\b'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case 'f': Str.AddCh('\f'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case 'n': Str.AddCh('\n'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case 'r': Str.AddCh('\r'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case 't': Str.AddCh('\t'); UcStr.AddCh(ChDef->GetUc(Ch)); GetCh(); break;
            case 'u': {
              // unicode character, represented using 4 hexadecimal digits
              GetCh(); EAssertR(TCh::IsHex(Ch), "Invalid hexadecimal digit in unicode escape");
              int UChCd = TCh::GetHex(Ch);
              GetCh(); EAssertR(TCh::IsHex(Ch), "Invalid hexadecimal digit in unicode escape");
              UChCd = 16 * UChCd + TCh::GetHex(Ch);
              GetCh(); EAssertR(TCh::IsHex(Ch), "Invalid hexadecimal digit in unicode escape");
              UChCd = 16 * UChCd + TCh::GetHex(Ch);
              GetCh(); EAssertR(TCh::IsHex(Ch), "Invalid hexadecimal digit in unicode escape");
              UChCd = 16 * UChCd + TCh::GetHex(Ch);
              // get as UTF8 encoded characters
              TUnicode::EncodeUtf8(UChCd, Str);
			  TUnicode::EncodeUtf8(UChCd, UcStr); }
              GetCh(); break; 
            default: Sym=syUndef; break;
          }
          if (Sym==syUndef){
            throw PExcept(new TExcept("Invalid Escape Sequence in Quoted String"));}
        }
      }
    } else
    if ((ChDef->IsNum(Ch))||(IsSigNum&&((Ch=='+')||(Ch=='-')))){
      Str.Clr(); bool IntP=true;
      do {Str.AddCh(Ch);} while (ChDef->IsNum(GetCh()));
      if (Expect.In(syFlt)){
        if (Ch=='.'){
          Str.AddCh(Ch); IntP=false;
          while (ChDef->IsNum(GetCh())){Str.AddCh(Ch);}
        }
        if ((Ch=='e')||(Ch=='E')){
          Str.AddCh(Ch); GetCh(); IntP=false;
          if ((Ch=='+')||(Ch=='-')){Str.AddCh(Ch); GetCh();}
          while (ChDef->IsNum(Ch)){Str.AddCh(Ch); GetCh();}
        }
      }
      UcStr=Str;
      if (IntP&&(Expect.In(syInt))){
        Sym=syInt; Int=atoi(Str.CStr());
      } else {
        Sym=syFlt; Flt=atof(Str.CStr());
      }
    } else
    if ((Ch==TCh::CrCh)||(Ch==TCh::LfCh)){
      Sym=syEoln;
      if (Ch==TCh::CrCh){if (GetCh()==TCh::LfCh){GetCh();}} else
      if (Ch==TCh::LfCh){if (GetCh()==TCh::CrCh){GetCh();}}
      LnN++; LnChN=0; if (!IsRetEoln){GetSym(Expect);}
    } else
    if (Ch=='/'){
      GetCh();
      if ((IsCmtAlw)&&(Ch=='/')){
        TChA _CmtStr;
        do {_CmtStr+=GetCh();} while (!ChDef->IsTerm(Ch));
        _CmtStr.Pop(); _CmtStr.Trunc();
        if (Ch==TCh::CrCh){
          if (GetCh()==TCh::LfCh){GetCh();}
        } else
        if (Ch==TCh::LfCh){
          if (GetCh()==TCh::CrCh){GetCh();}
        }
        if (IsRetEoln){Sym=syEoln;} else {GetSym(Expect);}
        CmtStr=_CmtStr;
      } else
      if (Ch=='*'){
        TChA _CmtStr;
        do {
          while (GetCh()!='*'){_CmtStr+=Ch;}
          _CmtStr+=GetCh();
        } while (Ch!='/');
        _CmtStr.Pop(); _CmtStr.Pop(); _CmtStr.Trunc();
        GetCh(); GetSym(Expect);
        CmtStr=_CmtStr;
      } else {
        Sym=sySlash;
      }
    } else
    if (Ch==TCh::EofCh){
      Sym=syEof;
    } else {
      switch (Ch){
        case '.':
          if (GetCh()=='.'){Sym=syDPeriod; GetCh();}
          else {Sym=syPeriod;} break;
        case ',': Sym=syComma; GetCh(); break;
        case ':':
          if (GetCh()==':'){Sym=syDColon; GetCh();}
          else {Sym=syColon;} break;
        case ';': Sym=sySemicolon; GetCh(); break;
        case '+': Sym=syPlus; GetCh(); break;
        case '-': Sym=syMinus; GetCh(); break;
        case '*': Sym=syAsterisk; GetCh(); break;
        case '/': Sym=sySlash; GetCh(); break;
        case '%': Sym=syPercent; GetCh(); break;
        case '!': Sym=syExclamation; GetCh(); break;
        case '|': Sym=syVBar; GetCh(); break;
        case '&': Sym=syAmpersand; GetCh(); break;
        case '=': Sym=syEq; GetCh(); break;
        case '<':
          GetCh();
          if (Ch=='='){Sym=syLEq; GetCh();}
          else if (Ch=='>'){Sym=syNEq; GetCh();}
          else {Sym=syLss;} break;
        case '>':
          if (GetCh()=='='){Sym=syGEq; GetCh();}
          else {Sym=syGtr;} break;
        case '?': Sym=syQuestion; GetCh(); break;
        case '#':
          if (IsCmtAlw){
            TChA _CmtStr;
            do {_CmtStr+=GetCh();} while (!ChDef->IsTerm(Ch));
            _CmtStr.Pop(); _CmtStr.Trunc();
            if (Ch==TCh::CrCh){
              if (GetCh()==TCh::LfCh){GetCh();}
            } else
            if (Ch==TCh::LfCh){
              if (GetCh()==TCh::CrCh){GetCh();}
            }
            if (IsRetEoln){Sym=syEoln;} else {GetSym(Expect);}
            CmtStr=_CmtStr;
          } else {
            Sym=syHash; GetCh();
          }
          break;
        case '(': Sym=syLParen; GetCh(); break;
        case ')': Sym=syRParen; GetCh(); break;
        case '[': Sym=syLBracket; GetCh(); break;
        case ']': Sym=syRBracket; GetCh(); break;
        case '{': Sym=syLBrace; GetCh(); break;
        case '}': Sym=syRBrace; GetCh(); break;
        default: Sym=syUndef; GetCh(); break;
      }
    }
  }
Beispiel #21
0
PBowDocBs TFtrGenBs::LoadCsv(TStr& FNm, const int& ClassId, 
        const TIntV& IgnoreIdV, const int& TrainLen) {

    // feature generators
	PFtrGenBs FtrGenBs = TFtrGenBs::New();
    // CSV parsing stuff
    PSIn SIn = TFIn::New(FNm); 
    char SsCh = ' '; TStrV FldValV;
    // read the headers and initialise the feature generators
    TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);  
    for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
        const TStr& FldVal = FldValV[FldValN];
        if (FldValN == ClassId) { 
            if (FldVal == "NOM") {
                FtrGenBs->PutClsFtrGen(TFtrGenNominal::New());
            } else if (FldVal == "MULTI-NOM") {
                FtrGenBs->PutClsFtrGen(TFtrGenMultiNom::New());
            } else {
                TExcept::Throw("Wrong class type '" + FldVal + "', should be NOM or MULTI-NOM!");
            }
        } else if (!IgnoreIdV.IsIn(FldValN)) {
            if (FldVal == TFtrGenNumeric::GetType()) {
				FtrGenBs->AddFtrGen(TFtrGenNumeric::New());
            } else if (FldVal == TFtrGenNominal::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenNominal::New());
            } else if (FldVal == TFtrGenToken::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenToken::New(
                    TSwSet::New(swstNone), TStemmer::New(stmtNone)));
            } else if (FldVal == TFtrGenSparseNumeric::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenSparseNumeric::New());
            } else if (FldVal == TFtrGenMultiNom::GetType()) { 
				FtrGenBs->AddFtrGen(TFtrGenMultiNom::New());
            } else {
                TExcept::Throw("Wrong type '" + FldVal + "'!");
            }
        }
    }
    const int Flds = FldValV.Len();
    // read the lines and feed them to the feature generators
    int Recs = 0;
    while (!SIn->Eof()) {
        if (Recs == TrainLen) { break; }
        Recs++; printf("%7d\r", Recs);
        TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
        // make sure line still has the same number of fields as the header
        EAssertR(FldValV.Len() == Flds, 
            TStr::Fmt("Wrong number of fields in line %d! Found %d and expected %d!",
            Recs + 1, FldValV.Len(), Flds));
        // go over lines
        try {
			TStrV FtrValV;
            for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
                const TStr& FldVal = FldValV[FldValN];
                if (FldValN == ClassId) { 
					FtrGenBs->UpdateCls(FldVal);
                } else if (!IgnoreIdV.IsIn(FldValN)) {
                    FtrValV.Add(FldVal);
                }
            }
			FtrGenBs->Update(FtrValV);
        } catch (PExcept Ex) {
            TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!", 
                Recs+1, Ex->GetMsgStr().CStr()));
        }
    }
    // read the file again and feed it to the training set
    PBowDocBs BowDocBs = FtrGenBs->MakeBowDocBs();
    // we read and ignore the headers since we parsed them already 
    SIn = TFIn::New(FNm); SsCh = ' ';
    TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);  
    // read the lines and feed them to the training set
    Recs = 0;
    while (!SIn->Eof()){
        Recs++; printf("%7d\r", Recs);
        TSs::LoadTxtFldV(ssfCommaSep, SIn, SsCh, FldValV, false);
        // make sure line still has the same number of fields as the header
        EAssertR(FldValV.Len() == Flds, 
            TStr::Fmt("Wrong number of fields in line %s! Found %d and expected %d!",
            Recs + 1, FldValV.Len(), Flds));
        // go over lines and construct the sparse vector
		TStrV FtrValV; TStr ClsFtrVal;
        try {
            for (int FldValN = 0; FldValN < FldValV.Len(); FldValN++) {
                const TStr& FldVal = FldValV[FldValN];
                if (FldValN == ClassId) { 
                    ClsFtrVal = FldVal;
                } else if (!IgnoreIdV.IsIn(FldValN)) {
                    FtrValV.Add(FldVal);
                }
            }
        } catch (PExcept Ex) {
            TExcept::Throw(TStr::Fmt("Error in line %d: '%s'!", 
                Recs+1, Ex->GetMsgStr().CStr()));
        }
        // add the feature vector to trainsets
		FtrGenBs->AddBowDoc(BowDocBs, TStr::Fmt("Line-%d", Recs), FtrValV, ClsFtrVal);
    }
	// prepare training and testing doc ids
	TIntV AllDIdV; BowDocBs->GetAllDIdV(AllDIdV); IAssert(AllDIdV.IsSorted());
	TIntV TrainDIdV = AllDIdV; TrainDIdV.Trunc(TrainLen);
	BowDocBs->PutTrainDIdV(TrainDIdV);
	TIntV TestDIdV = AllDIdV; TestDIdV.Minus(TrainDIdV);
	BowDocBs->PutTestDIdV(TestDIdV);

    return BowDocBs;
}
Beispiel #22
0
TBigStrPool::TBigStrPool(TSize MxBfLen, uint _GrowBy) : MxBfL(MxBfLen), BfL(0), GrowBy(_GrowBy), Bf(0) {
  //IAssert(MxBfL >= 0); IAssert(GrowBy >= 0);
  if (MxBfL > 0) { Bf = (char *) malloc(MxBfL);  IAssert(Bf); }
  AddStr(""); // add empty string
}
Beispiel #23
0
void TFRnd::SetRecN(const int& RecN){
  IAssert(RecAct);
  SetFPos(HdLen+RecN*RecLen);
}
Beispiel #24
0
/////////////////////////////////////////////////
// System-Console
TSysConsole::TSysConsole(){
  Ok=(AllocConsole()!=0);
  IAssert(Ok);
  hStdOut=GetStdHandle(STD_OUTPUT_HANDLE);
  IAssert(hStdOut!=INVALID_HANDLE_VALUE);
}
Beispiel #25
0
int TFRnd::GetRecs(){
  IAssert(RecAct);
  int FLen=GetFLen()-HdLen;
  EAssertR(FLen%RecLen==0, "Invalid length of file'"+FNm+"'.");
  return FLen/RecLen;
}
Beispiel #26
0
TSysConsole::~TSysConsole(){
  if (Ok){
    IAssert(FreeConsole());}
}
Beispiel #27
0
void TCpDoc::SaveAsfaToCpd(const TStr& InFPath, const TStr& OutCpdFNm){
  // create output file
  PSOut SOut=TFOut::New(OutCpdFNm);
  // traverse files
  TStrH AccessionIdH;
  TFFile FFile(TStr::GetNrFPath(InFPath)+"*.Asfa"); TStr AsfaFNm;
  while (FFile.Next(AsfaFNm)){
    printf("Processing file '%s'\n", AsfaFNm.CStr());
    PSIn SIn=TFIn::New(AsfaFNm);
    TILx Lx(SIn, TFSet(iloRetEoln, iloExcept));
    Lx.GetSym(syLn, syEof);
    while (Lx.Sym!=syEof){
      // Query Line
      TStr QueryLnStr=Lx.Str;
      TStrV QueryStrV; QueryLnStr.SplitOnAllCh('\t', QueryStrV, false);
      IAssert(QueryStrV[0]=="Query");
      // RecordNo Line
      Lx.GetSym(syLn); TStr RecNoLnStr=Lx.Str;
      TStrV RecNoStrV; RecNoLnStr.SplitOnAllCh('\t', RecNoStrV, false);
      IAssert(RecNoStrV[0]=="RecordNo");
      //int RecN=RecNoStrV[1].GetInt();
      // fields (format: Short-Name Tab Long-Name Tab Value-String)
      TStr TitleStr, AbstractStr, PublicationYearStr, AccessionId;
      TStrV AuthorNmV; TStrV TermNmV1, TermNmV2;
      while (true){
        Lx.GetSym(syLn); TStr FldLnStr=Lx.Str;
        TStrV FldStrV; FldLnStr.SplitOnAllCh('\t', FldStrV, false);
        if (FldStrV[0]=="----"){
          if (!AccessionIdH.IsKey(AccessionId)){
            AccessionIdH.AddKey(AccessionId);
            // create & save cpd document
            PCpDoc CpDoc=TCpDoc::New();
            CpDoc->DocNm=AccessionId;
            CpDoc->DateStr=PublicationYearStr;
            CpDoc->TitleStr=TitleStr;
            CpDoc->ParStrV.Add(AbstractStr);
            CpDoc->TopCdNmV=TermNmV1;
            CpDoc->GeoCdNmV=TermNmV2;
            CpDoc->IndCdNmV=AuthorNmV;
            CpDoc->Save(*SOut);
          } else {/*printf("[%s]", AccessionId.CStr());*/}
          break;
        } else
        if (FldStrV[0]=="TI"){
          TitleStr=FldStrV[2];
        } else if (FldStrV[0]=="TI"){
          TitleStr=FldStrV[2];
        } else if (FldStrV[0]=="AU"){
          FldStrV[2].SplitOnAllCh(';', AuthorNmV);
          for (int StrN=0; StrN<AuthorNmV.Len(); StrN++){AuthorNmV[StrN].ToTrunc();}
        } else if (FldStrV[0]=="AB"){
          AbstractStr=FldStrV[2];
        } else if (FldStrV[0]=="PY"){
          PublicationYearStr=FldStrV[2];
        } else if (FldStrV[0]=="DE"){
          FldStrV[2].SplitOnAllCh(';', TermNmV1);
          for (int StrN=0; StrN<TermNmV1.Len(); StrN++){TermNmV1[StrN].ToTrunc();}
        } else if (FldStrV[0]=="CL"){
          FldStrV[2].SplitOnAllCh(';', TermNmV2);
          for (int StrN=0; StrN<TermNmV2.Len(); StrN++){TermNmV2[StrN].ToTrunc();}
        } else if (FldStrV[0]=="AN"){
          AccessionId=FldStrV[2];
        }
      }
      printf("%d\r", AccessionIdH.Len());
      Lx.GetSym(syLn, syEof);
    }
  }
}
Beispiel #28
0
void TSysConsole::Put(const TStr& Str){
  DWORD ChsWritten;
  WriteConsole(hStdOut, Str.CStr(), Str.Len(), &ChsWritten, NULL);
  IAssert(ChsWritten==DWORD(Str.Len()));
}
Beispiel #29
0
void TMOut::CutBf(const int& CutBfL){
  IAssert((0<=CutBfL)&&(CutBfL<=BfL));
  if (CutBfL==BfL){BfL=0;}
  else {memmove(Bf, Bf+CutBfL, BfL-CutBfL); BfL=BfL-CutBfL;}
}
Beispiel #30
0
int TSecTm::GetMonthN() const {
  struct tm Tm;
  IAssert(IsDef() && GetTmStruct(AbsSecs(), Tm));
  return Tm.tm_mon+1;
}