Esempio n. 1
0
v8::Local<v8::Value> TNodeJsUtil::GetStrArr(const TStrV& StrV) {
    v8::Isolate* Isolate = v8::Isolate::GetCurrent();
    v8::EscapableHandleScope EscapableHandleScope(Isolate);
    v8::Local<v8::Array> JsStrV = v8::Array::New(Isolate, StrV.Len());
    for (int StrN = 0; StrN < StrV.Len(); StrN++) {
        JsStrV->Set(StrN, v8::String::NewFromUtf8(Isolate, StrV[StrN].CStr()));
    }
    return EscapableHandleScope.Escape(JsStrV);
}
int TTransCorpus::CountWords(const TIntStrH& StrH) {
    int Words = 0, KeyId = StrH.FFirstKeyId();
    while (StrH.FNextKeyId(KeyId)) {
        const TStr& Str = StrH[KeyId];
        TStrV WordV; Str.SplitOnWs(WordV);
        Words += WordV.Len();
    }
    return Words;
}
Esempio n. 3
0
void TSwSet::LoadFromFile(const TStr& FNm) {
    TStr FileStr = TStr::LoadTxt(FNm);
    FileStr.DelChAll('\r');
    TStrV WordV; FileStr.SplitOnAllCh('\n', WordV);
    for (int WordN = 0; WordN < WordV.Len(); WordN++) {
        const TStr& WordStr = WordV[WordN];
        if (!IsIn(WordStr)) { AddWord(WordStr); }
    }
}
Esempio n. 4
0
File: flx.cpp Progetto: Accio/snap
void TFile::DelWc(const TStr& WcStr, const bool& RecurseDirP){
  // collect file-names
  TStrV FNmV;
  TFFile FFile(WcStr, RecurseDirP); TStr FNm;
  while (FFile.Next(FNm)){
    FNmV.Add(FNm);}
  // delete files
  for (int FNmN=0; FNmN<FNmV.Len(); FNmN++){
    Del(FNmV[FNmN], false);}
}
Esempio n. 5
0
void TAmazonItemBs::GetVoidItemIdV(TStrV& VoidItemIdV) const {
  VoidItemIdV.Clr();
  for (int ItemIdN=0; ItemIdN<GetItems(); ItemIdN++){
    PAmazonItem Item=GetItem(ItemIdN);
    for (int NextItemIdN=0; NextItemIdN<Item->GetNextItemIds(); NextItemIdN++){
      TStr NextItemId=Item->GetNextItemId(NextItemIdN);
      if (!IsItem(NextItemId)){VoidItemIdV.AddUnique(NextItemId);}
    }
  }
}
Esempio n. 6
0
TStrV TTable::GetEdgeStrAttrV() const {
  TStrV StrEA = TStrV(StrColMaps.Len(),0);
  for (int i = 0; i < EdgeAttrV.Len(); i++) {
    TStr Attr = EdgeAttrV[i];
    if (GetColType(Attr) == STR) {
      StrEA.Add(Attr);
    }
  }
  return StrEA;
}
Esempio n. 7
0
TStrV TTable::GetDstNodeStrAttrV() const {
  TStrV StrNA = TStrV(StrColMaps.Len(),0);
  for (int i = 0; i < DstNodeAttrV.Len(); i++) {
    TStr Attr = DstNodeAttrV[i];
    if (GetColType(Attr) == STR) {
      StrNA.Add(Attr);
    }
  }
  return StrNA;
}
Esempio n. 8
0
TStrV TTable::GetEdgeFltAttrV() const {
  TStrV FltEA = TStrV(FltCols.Len(),0);;
  for (int i = 0; i < EdgeAttrV.Len(); i++) {
    TStr Attr = EdgeAttrV[i];
    if (GetColType(Attr) == FLT) {
      FltEA.Add(Attr);
    }
  }
  return FltEA;
}
Esempio n. 9
0
TStrV TTable::GetDstNodeFltAttrV() const {
  TStrV FltNA = TStrV(FltCols.Len(),0);
  for (int i = 0; i < DstNodeAttrV.Len(); i++) {
    TStr Attr = DstNodeAttrV[i];
    if (GetColType(Attr) == FLT) {
      FltNA.Add(Attr);
    }
  }
  return FltNA;
}
Esempio n. 10
0
TStrV TTable::GetEdgeIntAttrV() const {
  TStrV IntEA = TStrV(IntCols.Len(),0);
  for (int i = 0; i < EdgeAttrV.Len(); i++) {
    TStr Attr = EdgeAttrV[i];
    if (GetColType(Attr) == INT) {
      IntEA.Add(Attr);
    }
  }
  return IntEA;
}
Esempio n. 11
0
TStrV TTable::GetDstNodeIntAttrV() const {
  TStrV IntNA = TStrV(IntCols.Len(),0);
  for (int i = 0; i < DstNodeAttrV.Len(); i++) {
    TStr Attr = DstNodeAttrV[i];
    if (GetColType(Attr) == INT) {
      IntNA.Add(Attr);
    }
  }
  return IntNA;
}
Esempio n. 12
0
// return the list of folders containing backups for a given profile name
// folders are sorted from the oldest to the newest
void TFolderBackup::GetBackupFolders(const TStr& ProfileName, TStrV& FolderNmV) const
{
    FolderNmV.Clr();
    if (ProfileH.IsKey(ProfileName)) {
        TBackupProfile Profile = ProfileH.GetDat(ProfileName);
        const TVec<TBackupLogInfo>& LogV = Profile.GetLogs();
        for (int N = 0; N < LogV.Len(); N++)
            FolderNmV.Add(LogV[N].GetFolderName());
    }
}
Esempio n. 13
0
void TNGramBs::GetNGramStrV(
 const TStr& HtmlStr, TStrV& NGramStrV, TIntPrV& NGramBEChXPrV) const {
  TIntV NGramIdV; NGramStrV.Clr(); NGramBEChXPrV.Clr();
  TNGramBs::GetNGramIdV(HtmlStr, NGramIdV, NGramBEChXPrV);
  NGramStrV.Gen(NGramIdV.Len(), 0);
  for (int NGramIdN=0; NGramIdN<NGramIdV.Len(); NGramIdN++){
    TStr NGramStr=GetNGramStr(NGramIdV[NGramIdN]);
    NGramStrV.Add(NGramStr);
  }
}
Esempio n. 14
0
File: exp.cpp Progetto: Accio/snap
void TExpHelp::GetObjHdNmV(const TStr& CatNm, TStrV& ObjHdNmV) const {
  ObjHdNmV.Clr();
  for (int ObjN=0; ObjN<ObjV.Len(); ObjN++){
    TStr ObjCatNm=ObjV[ObjN]->GetCatNm();
    TStr ObjHdNm=ObjV[ObjN]->GetHdItem()->GetNm();
    if ((CatNm.Empty())||(CatNm=="All")||(CatNm==ObjCatNm)){
      ObjHdNmV.AddUnique(ObjHdNm);}
  }
  ObjHdNmV.Sort();
}
Esempio n. 15
0
PBowDocBs TBowFl::LoadReuters21578Txt(
 const TStr& FPath, const int& MxDocs,
 const TStr& SwSetTypeNm, const TStr& StemmerTypeNm,
 const int& MxNGramLen, const int& MnNGramFq, const bool& SaveDocP,
 const PNotify& Notify){
  // prepare stop-words
  PSwSet SwSet=TSwSet::GetSwSet(SwSetTypeNm);
  // prepare stemmer
  PStemmer Stemmer=TStemmer::GetStemmer(StemmerTypeNm);
  // create ngrams
  PNGramBs NGramBs;
  if (!((MxNGramLen==1)&&(MnNGramFq==1))){
    NGramBs=TNGramBs::GetNGramBsFromReuters21578(
     FPath, MxDocs,
     MxNGramLen, MnNGramFq, SwSet, Stemmer);
  }
  // create document-base
  PBowDocBs BowDocBs=TBowDocBs::New(SwSet, Stemmer, NGramBs);
  // traverse directory
  TFFile FFile(FPath, ".SGM", false); TStr FNm; int Docs=0;
  while (FFile.Next(FNm)){
    printf("Processing file '%s'\n", FNm.CStr());
    TIntH DocWIdToFqH(100);
    TXmlDocV LDocV; TXmlDoc::LoadTxt(FNm, LDocV);
    for (int LDocN=0; LDocN<LDocV.Len(); LDocN++){
      Docs++; if (Docs%100==0){printf("%d\r", Docs);}
      if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;}
      // get document-name
      PXmlDoc Doc=LDocV[LDocN];
      PXmlTok DocTok=Doc->GetTok();
      TStr DocNm=DocTok->GetArgVal("NEWID");
      // get document-categories
      TStrV CatNmV;
      TXmlTokV TopicsTokV; Doc->GetTagTokV("REUTERS|TOPICS|D", TopicsTokV);
      for (int TopicsTokN=0; TopicsTokN<TopicsTokV.Len(); TopicsTokN++){
        TStr CatNm=TopicsTokV[TopicsTokN]->GetTokStr(false);
        CatNmV.Add(CatNm);
      }
      // get document-contents
      PXmlTok DocStrTok=Doc->GetTagTok("REUTERS|TEXT");
      TStr DocStr=DocStrTok->GetTokStr(false);
      // add document to bow
      int DId=BowDocBs->AddHtmlDoc(DocNm, CatNmV, DocStr, SaveDocP);
      // train & test data
      if ((DocTok->GetArgVal("LEWISSPLIT")=="TRAIN")&&(DocTok->GetArgVal("TOPICS")=="YES")){
        BowDocBs->AddTrainDId(DId);}
      if ((DocTok->GetArgVal("LEWISSPLIT")=="TEST")&&(DocTok->GetArgVal("TOPICS")=="YES")){
        BowDocBs->AddTestDId(DId);}
    }
    if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;}
  }
  // return results
  BowDocBs->AssertOk();
  return BowDocBs;
}
Esempio n. 16
0
void TFFile::GetFNmV(
 const TStr& FPath, const TStrV& FExtV, const bool& RecurseP, TStrV& FNmV){
  // prepare file-directory traversal
  TStrV FPathV; FPathV.Add(FPath);
  TFFile FFile(FPathV, FExtV, "", RecurseP); TStr FNm;
  // traverse directory
  FNmV.Clr();
  while (FFile.Next(FNm)){
    FNmV.Add(FNm);
  }
}
Esempio n. 17
0
bool TFolderBackup::RestoreLatest(const TStr& ProfileName, const TBackupProfile::ERestoringMode& RestoringMode) const
{
    if (ProfileH.IsKey(ProfileName)) {
        TStrV FolderV; GetBackupFolders(ProfileName, FolderV);
        if (FolderV.Len() > 0) {
            ProfileH.GetDat(ProfileName).Restore(FolderV[FolderV.Len()-1], RestoringMode, ReportP);
            return true;
        }
    }
    return false;
}
Esempio n. 18
0
void TEnv::GetVarNmV(TStrV& VarNmV) {
    VarNmV.Clr();
    int VarN=0;
    while (_environ[VarN]!=NULL) {
        TStr VarNmVal=_environ[VarN++];
        TStr VarNm;
        TStr VarVal;
        VarNmVal.SplitOnCh(VarNm, '=', VarVal);
        VarNmV.Add(VarNm);
    }
}
Esempio n. 19
0
void TFtrGenBs::Update(const TStrV& FtrValV) {
	EAssert(State == fgsUpdate);
	EAssert(FtrValV.Len() == FtrGenV.Len());
	try {
		for (int FtrValN = 0; FtrValN < FtrValV.Len(); FtrValN++) {
			FtrGenV[FtrValN]->Update(FtrValV[FtrValN]);
		}
	} catch (PExcept Ex) {
		TExcept::Throw(TStr::Fmt("Error feature generation: '%s'!", Ex->GetMsgStr().CStr()));
	}
}
Esempio n. 20
0
void TFtrGenSparseNumeric::Add(const TStr& Str, TIntFltKdV& SpV, int& Offset) const {
    TStrV EltV; Str.SplitOnAllCh(';', EltV); TIntH UsedIdH;
    for (int EltN = 0; EltN < EltV.Len(); EltN++) {
        int Id; TStr Val; Split(EltV[EltN], Id, Val);
        EAssertR(!UsedIdH.IsKey(Id), "Field ID repeated in '" + Str + "'!");
        int TmpOffset = Offset + Id;
        FtrGen->Add(Val, SpV, TmpOffset);
        UsedIdH.AddKey(Id);
    }
    Offset += GetVals();
}
Esempio n. 21
0
void TWebPgFetchEvent::ChangeLastUrlToLc(const PHttpResp& HttpResp){
  static TStr MsNm="Microsoft";
  static TStr HttpsNm="HTTPS";
  TStr SrvNm=HttpResp->GetSrvNm();
  if ((SrvNm.StartsWith(MsNm))||(SrvNm.StartsWith(HttpsNm))){
    if (!UrlStrV.Last().IsLc()){
      PUrl Url=TUrl::New(UrlStrV.Last());
      Url->ToLcPath();
      UrlStrV.Last()=Url->GetUrlStr();
    }
  }
}
Esempio n. 22
0
// copy files for a particular folder info
void TBackupProfile::CopyFolder(const TStr& BaseTargetFolder, const TStr& SourceFolder, const TStrV& Extensions, const TStrV& SkipIfContainingV, const bool& IncludeSubfolders, const bool& ReportP, TStr& ErrMsg)
{
    try {
        // get the name of the source folder
        TStrV PathV; TDir::SplitPath(SourceFolder, PathV);
        EAssert(PathV.Len() > 0);

        // create the folder in the base target folder
        TStr TargetFolder = BaseTargetFolder + PathV[PathV.Len() - 1] + "/";
        if (!TDir::Exists(TargetFolder))
            TDir::GenDir(TargetFolder);

        // find files to be copied
        TStrV FileV;
        TFFile::GetFNmV(SourceFolder, Extensions, false, FileV);

        TStrV FolderV;
        // copy them
        for (int N = 0; N < FileV.Len(); N++) {
            // we found a file
            if (TFile::Exists(FileV[N])) {
                const TStr FileName = TDir::GetFileName(FileV[N]);
                // is this a file that we wish to ignore?
                bool ShouldCopy = true;
                for (int S = 0; S < SkipIfContainingV.Len(); S++) {
                    if (FileName.SearchStr(SkipIfContainingV[S]) >= 0)
                        ShouldCopy = false;
                }
                if (!ShouldCopy)
                    continue;
                const TStr TargetFNm = TargetFolder + FileName;
                if (ReportP)
                    TNotify::StdNotify->OnStatusFmt("Copying file: %s\r", FileName.CStr());
                TFile::Copy(FileV[N], TargetFNm);
            }
            // we found a folder
            else {
                FolderV.Add(FileV[N]);
            }
        }

        if (IncludeSubfolders) {
            for (int N = 0; N < FolderV.Len(); N++)
                CopyFolder(TargetFolder, FolderV[N], Extensions, SkipIfContainingV, IncludeSubfolders, ReportP, ErrMsg);
        }
    }
    catch (PExcept E) {
        if (ErrMsg != "")
            ErrMsg += "\n";
        ErrMsg += "Exception while copying from " + SourceFolder + ": " + E->GetMsgStr();
    }
    catch (...) {
        if (ErrMsg != "")
            ErrMsg += "\n";
        ErrMsg += "Exception while copying from " + SourceFolder + ": " + "Unrecognized exception occured.";
    }
}
Esempio n. 23
0
void TFtrGenToken::Update(const TStr& Val) {
    TStrV TokenStrV; GetTokenV(Val, TokenStrV); TStrH TokenStrH;
    for (int TokenStrN = 0; TokenStrN < TokenStrV.Len(); TokenStrN++) {
        const TStr& TokenStr = TokenStrV[TokenStrN];
        TokenStrH.AddKey(TokenStr);
    }
    int KeyId = TokenStrH.FFirstKeyId();
    while (TokenStrH.FNextKeyId(KeyId)) {
        const TStr& TokenStr = TokenStrH.GetKey(KeyId);
        TokenH.AddDat(TokenStr)++;
    }
    Docs++;
}
void TGreedyAlg::addCascade(const TStr& cascadeStr) {
    TStrV NIdV;
    cascadeStr.SplitOnAllCh(';', NIdV);
    TCascade C;
    for (int i = 0; i < NIdV.Len(); i++) {
    	TStr NId, Tm; NIdV[i].SplitOnCh(NId, ',', Tm);
        IAssert( IsNodeNm(NId.GetInt()) );
        GetNodeInfo(NId.GetInt()).Vol = GetNodeInfo(NId.GetInt()).Vol + 1;
        C.Add(NId.GetInt(), Tm.GetFlt());
    }
    C.Sort();
    cascadeV.Add(C);
}
void TEvalScore::Parse(const TStr& Str, TIntV& WIdV) {
    TStrV TokenV; Tokenize(Str, TokenV); WIdV.Clr();
    for (int WdN = 0; WdN < TokenV.Len(); WdN++) {
        // get the word string
        TStr WdStr = TokenV[WdN];
        // get id of the word
        int WId = WordH.GetKeyId(WdStr);
        // word does not exist yet, add it to the hash table
        if (WId == -1) { WId = WordH.AddKey(WdStr); }
        // add word to the parsed sentence
        WIdV.Add(WId);
    }
}
Esempio n. 26
0
PNGramBs TNGramBs::GetNGramBsFromLnDoc(
 const TStr& LnDocFNm, const bool& NamedP, const int& MxDocs,
 const int& MxNGramLen, const int& MnNGramFq,
 const PSwSet& SwSet, const PStemmer& Stemmer){
  // create n-gram-base
  /* printf("Generating frequent n-grams (MaxLen:%d MinFq:%d) ...\n", MxNGramLen, MnNGramFq); */
  PNGramBs NGramBs=TNGramBs::New(MxNGramLen, MnNGramFq, SwSet, Stemmer);
  // interations over document-set
  while (!NGramBs->IsFinished()){
    // open line-doc file
    TFIn FIn(LnDocFNm); char Ch=' '; int Docs=0;
    while (!FIn.Eof()){
	  if(Ch == '\r' || Ch == '\n'){Ch = FIn.GetCh(); continue;}
      Docs++; if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;}
      //printf("%d\r", Docs);
      // document name
      TChA DocNm;
      if (NamedP){
        Ch=FIn.GetCh();
        while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){
          DocNm+=Ch; Ch=FIn.GetCh();}
        DocNm.Trunc();
        if (DocNm.Empty()){Docs--; continue;}
      }
      // categories
      TStrV CatNmV;
      forever {
        while ((!FIn.Eof())&&(Ch==' ')){Ch=FIn.GetCh();}
        if (Ch=='!'){
          TChA CatNm;
          while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){
            CatNm+=Ch; Ch=FIn.GetCh();}
          if (!CatNm.Empty()){CatNmV.Add(CatNm);}
        } else {
          break;
        }
      }
      // document text
      TChA DocChA;
      while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')){
        DocChA+=Ch; Ch=FIn.GetCh();}
      // extract words & update ngram-base
      /* printf("  Pass %2d: %6d Docs\r", NGramBs->GetPassN(), Docs); */
      _UpdateNGramBsFromHtmlStr(NGramBs, DocChA, SwSet, Stemmer);
    }
    NGramBs->ConcPass();
  }
  /* printf("\nDone.\n"); */
  // return
  return NGramBs;
}
Esempio n. 27
0
void TCpDoc::SaveLnDocToCpd(
 const TStr& LnDocFNm, const TStr& OutCpdFNm, const bool& NamedP, const int& MxDocs){
  printf("Saving Line-Document '%s' to '%s' ...\n", LnDocFNm.CStr(), OutCpdFNm.CStr());
  // create output file
  PSOut SOut=TFOut::New(OutCpdFNm);
  // open line-doc file
  TFIn FIn(LnDocFNm); char Ch=' '; int Docs=0;
  while (!FIn.Eof()){
    Docs++; if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;}
    printf("%d\r", Docs);
    // document name
    TChA DocNm;
    if (NamedP){
      Ch=FIn.GetCh();
      while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){
        DocNm+=Ch; Ch=FIn.GetCh();}
      DocNm.Trunc();
      if (DocNm.Empty()){Docs--; continue;}
    }
    // categories
    TStrV CatNmV;
    forever {
      while ((!FIn.Eof())&&(Ch==' ')){Ch=FIn.GetCh();}
      if (Ch=='!'){
        if (!FIn.Eof()){Ch=FIn.GetCh();}
        TChA CatNm;
        while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')&&(Ch!=' ')){
          CatNm+=Ch; Ch=FIn.GetCh();}
        if (!CatNm.Empty()){CatNmV.Add(CatNm);}
      } else {
        break;
      }
    }
    // document text
    TChA DocChA;
    while ((!FIn.Eof())&&(Ch!='\r')&&(Ch!='\n')){
      DocChA+=Ch; Ch=FIn.GetCh();}
    // skip empty documents (empty lines)
    if (DocNm.Empty()&&DocChA.Empty()){
      continue;}
    // create & save cpd document
    PCpDoc CpDoc=TCpDoc::New();
    CpDoc->DocNm=DocNm;
    CpDoc->ParStrV.Add(DocChA, 1);
    for (int CatNmN=0; CatNmN<CatNmV.Len(); CatNmN++){
      CpDoc->TopCdNmV.Add(CatNmV[CatNmN]);}
    CpDoc->Save(*SOut);
  }
  printf("\nDone.\n");
}
Esempio n. 28
0
void TNetInfBs::AddCasc(const TStr& CascStr, const int& Model, const double& alpha) {
    TStrV NIdV; CascStr.SplitOnAllCh(',', NIdV);
    TCascade C(alpha, Model);
    for (int i = 0; i < NIdV.Len(); i+=2) {
      int NId;
      double Tm; 
      NId = NIdV[i].GetInt();
      Tm = NIdV[i+1].GetFlt();
      GetNodeInfo(NId).Vol = GetNodeInfo(NId).Vol + 1;
      C.Add(NId, Tm);
    }
    C.Sort();
    CascV.Add(C);
}
/////////////////////////////////////////////////
// EuPartner
TStr TCordisEuPart::ExtrCountry(const TStr& AddrStr){
  TStr CountryStr;
  TStrV LnV; AddrStr.SplitOnStr("<br>", LnV);
  if (LnV.Len()>0){
    CountryStr=LnV.Last();
    if (CountryStr.Empty()&&(LnV.Len()>1)){
      CountryStr=LnV[LnV.Len()-2];}
    CountryStr.DelChAll('\r');
    CountryStr.DelChAll('\n');
  }
  if (CountryStr.Empty()){
    printf("Country Field Not Found!\n");}
  return CountryStr;
}
Esempio n. 30
0
void LogOutput::WriteClusteringStatisticsToFile(TSecTm& Date) {
  if (!ShouldLog) return;
  TStr FileName = Directory + "/text/statistics/statistics_" + Date.GetDtYmdStr() + ".txt";
  FILE *F = fopen(FileName.CStr(), "w");

  TStrV Keys;
  OutputValues.GetKeyV(Keys);

  for (int i = 0; i < Keys.Len(); ++i) {
    fprintf(F, "%s\t%s\n", Keys[i].CStr(), OutputValues.GetDat(Keys[i]).CStr());
  }

  fclose(F);
}