Ejemplo n.º 1
0
void TNmObjBs::GetNmObjStrFqPrV(TStrIntPrV& NmObjStrFqPrV, const int& MnFq) const {
  int NmObjs=GetNmObjs();
  NmObjStrFqPrV.Gen(NmObjs, 0);
  for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){
    TStrIntPr NmObjStrFqPr;
    NmObjStrFqPr.Val1=GetNmObjStr(NmObjId);
    NmObjStrFqPr.Val2=GetNmObjDocs(NmObjId);
    if (NmObjStrFqPr.Val2>=MnFq){
      NmObjStrFqPrV.Add(NmObjStrFqPr);}
  }
}
Ejemplo n.º 2
0
TStr TStrUtil::GetStr(const TStrIntPrV& StrIntPrV, 
 const TStr& FieldDelimiterStr, const TStr& DelimiterStr) {
  TChA ResChA;
  for (int EltN = 0; EltN < StrIntPrV.Len(); EltN++) {
	if (!ResChA.Empty()) { ResChA+=DelimiterStr; }
    ResChA+=StrIntPrV[EltN].Val1;
	ResChA+=FieldDelimiterStr;
    ResChA+=StrIntPrV[EltN].Val2.GetStr();
  }
  return ResChA;
}
Ejemplo n.º 3
0
void TSkyGridEnt::GetDocsPerDateV(
 const TSkyGridBs* SkyGridBs, TStrIntPrV& DateStrDocsPrV, int& Docs) const {
  TStrIntH DateStrToDocsH; Docs=0;
  for (int DocN=0; DocN<GetDocIds(); DocN++){
    int DocId=GetDocId(DocN);
    PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId);
    uint64 DocTm=Doc->GetTm();
    TStr DocDateStr=TTm::GetTmFromMSecs(DocTm).GetWebLogDateStr();
    DateStrToDocsH.AddDat(DocDateStr)++; Docs++;
  }
  DateStrToDocsH.GetKeyDatPrV(DateStrDocsPrV);
  DateStrDocsPrV.Sort();
}
Ejemplo n.º 4
0
void __fastcall TContexterF::NmObjSortRgClick(TObject *Sender){
  // select radio-group & list-box
  TRadioGroup* NmObjSortRg=NULL; TListBox* NmObjLb=NULL;
  if (Sender==CtxNmObjSortRg){NmObjSortRg=CtxNmObjSortRg; NmObjLb=CtxNmObjLb;}
  else if (Sender==SrcNmObjSortRg){NmObjSortRg=SrcNmObjSortRg; NmObjLb=SrcNmObjLb;}
  else if (Sender==DstNmObjSortRg){NmObjSortRg=DstNmObjSortRg; NmObjLb=DstNmObjLb;}
  else {return;}

  // determine sort-order
  bool SortByNameP=false;
  if (NmObjSortRg->ItemIndex==0){SortByNameP=true;}
  else if (NmObjSortRg->ItemIndex==1){SortByNameP=false;}
  else {NmObjSortRg->ItemIndex=1; SortByNameP=false;}
  // fill NmObjLb
  NmObjLb->Clear();
  EnConceptWordLb->Clear();
  EnCoNmObjLb->Clear();
  if (SortByNameP){
    //...sorted by name
    TStrIntPrV NmObjStrFqPrV;
    State->NmObjBs->GetNmObjStrFqPrV(NmObjStrFqPrV, 3);
    NmObjStrFqPrV.Sort();
    for (int NmObjN=0; NmObjN<NmObjStrFqPrV.Len(); NmObjN++){
      TStr LbItemStr=NmObjStrFqPrV[NmObjN].Val1+
       TInt::GetStr(NmObjStrFqPrV[NmObjN].Val2, " (%d)");
      NmObjLb->Items->Add(LbItemStr.CStr());
    }
  } else {
    //...sorted by frequency
    TIntStrPrV NmObjFqStrPrV;
    State->NmObjBs->GetNmObjFqStrPrV(NmObjFqStrPrV, 3);
    NmObjFqStrPrV.Sort(false);
    for (int NmObjN=0; NmObjN<NmObjFqStrPrV.Len(); NmObjN++){
      TStr LbItemStr=NmObjFqStrPrV[NmObjN].Val2+
       TInt::GetStr(NmObjFqStrPrV[NmObjN].Val1, " (%d)");
      NmObjLb->Items->Add(LbItemStr.CStr());
    }
  }
}
Ejemplo n.º 5
0
void TSkyGridBs::SaveTxt(const TStr& FNm, const uint64& CurTm){
  // time-limit
  TStr CurTmStr=TTm::GetTmFromMSecs(CurTm).GetWebLogDateTimeStr();
  uint64 CurDateTm=TTm::GetMSecsFromTm(TTm::GetTmFromWebLogDateTimeStr(TTm::GetTmFromMSecs(CurTm).GetWebLogDateStr()));
  TStr CurDateTmStr=TTm::GetTmFromMSecs(CurDateTm).GetWebLogDateTimeStr();
  TUInt64V MnTmV;
  MnTmV.Add(CurDateTm-0*TTmInfo::GetDayMSecs());
  MnTmV.Add(CurDateTm-1*TTmInfo::GetDayMSecs());
  MnTmV.Add(CurDateTm-2*TTmInfo::GetDayMSecs());
  MnTmV.Add(CurDateTm-4*TTmInfo::GetDayMSecs());
  MnTmV.Add(CurDateTm-8*TTmInfo::GetDayMSecs());
  MnTmV.Add(CurDateTm-16*TTmInfo::GetDayMSecs());
  MnTmV.Add(CurDateTm-32*TTmInfo::GetDayMSecs());

  // get bow
  //PBowDocBs BowDocBs=GetBowDocBs(3, 5);
  PBowDocBs BowDocBs=GetBowDocBs();
  PBowDocWgtBs BowDocWgtBs=GetBowDocWgtBs(BowDocBs);

  // open file
  TFOut FOut(FNm); FILE* fOut=FOut.GetFileId();
  // get docs-entities sorted vector
  TIntPrV DocsEntIdPrV; GetSorted_DocsEntIdPrV(DocsEntIdPrV);
  // traverse entities
  for (int EntN=0; EntN<DocsEntIdPrV.Len(); EntN++){
    int EntId=DocsEntIdPrV[EntN].Val2;
    TStr EntNm=GetEntNm(EntId);
    int EntDocs=DocsEntIdPrV[EntN].Val1;
    TSkyGridEnt& Ent=GetEnt(EntId);
    int LinkEnts=Ent.GetLinkEnts();
    fprintf(fOut, "'%s' [%d docs] [%d ents]\n", EntNm.CStr(), EntDocs, LinkEnts);

    // output docs over dates
    {TStrIntPrV DateStrDocsPrV; int _EntDocs;
    Ent.GetDocsPerDateV(this, DateStrDocsPrV, _EntDocs);
    fprintf(fOut, "   Docs per Date (%d docs):", _EntDocs);
    for (int DateN=0; DateN<DateStrDocsPrV.Len(); DateN++){
      TStr DateStr=DateStrDocsPrV[DateN].Val1;
      int Docs=DateStrDocsPrV[DateN].Val2;
      fprintf(fOut, " [%s:%d]", DateStr.CStr(), Docs);
    }
    fprintf(fOut, "\n");}

    fprintf(fOut, "   [Now: %s]\n", CurTmStr.CStr());
    TIntPrV PrevLinkWgtDstEntIdPrV;
    TStrFltPrV PrevWordStrWgtPrV;
    for (int MnTmN=0; MnTmN<MnTmV.Len(); MnTmN++){
      uint64 MnTm=MnTmV[MnTmN];
      double PastDays=(CurDateTm-MnTm)/double(TTmInfo::GetDayMSecs());
      TStr MnTmStr=TTm::GetTmFromMSecs(MnTm).GetWebLogDateTimeStr();
      // get linked entities
      TIntPrV LinkWgtDstEntIdPrV;
      Ent.GetSorted_LinkWgtDstEntIdPrV(MnTm, 0.9, LinkWgtDstEntIdPrV);
      // output difference between previous and current centroid
      if (MnTmN>0){
        TIntPrV NegDiffLinkWgtDstEntIdPrV; TIntPrV PosDiffLinkWgtDstEntIdPrV;
        GetLinkWgtDstEntIdPrVDiff(LinkWgtDstEntIdPrV, PrevLinkWgtDstEntIdPrV,
         NegDiffLinkWgtDstEntIdPrV, PosDiffLinkWgtDstEntIdPrV);
        // output positive change
        TChA PosDiffLinkWgtDstEntIdPrVChA;
        GetLinkWgtDstEntIdPrVChA(PosDiffLinkWgtDstEntIdPrV, PosDiffLinkWgtDstEntIdPrVChA);
        fprintf(fOut, "         Pos-Diff: %s\n", PosDiffLinkWgtDstEntIdPrVChA.CStr());
        // output negative change
        TChA NegDiffLinkWgtDstEntIdPrVChA;
        GetLinkWgtDstEntIdPrVChA(NegDiffLinkWgtDstEntIdPrV, NegDiffLinkWgtDstEntIdPrVChA);
        fprintf(fOut, "         Neg-Diff: %s\n", NegDiffLinkWgtDstEntIdPrVChA.CStr());
      }
      PrevLinkWgtDstEntIdPrV=LinkWgtDstEntIdPrV;
      // output linked entities
      int TopLinkEnts=LinkWgtDstEntIdPrV.Len();
      TChA LinkWgtDstEntIdPrVChA;
      GetLinkWgtDstEntIdPrVChA(LinkWgtDstEntIdPrV, LinkWgtDstEntIdPrVChA);
      fprintf(fOut, "      Entities (%d ents): %s\n",
       TopLinkEnts, LinkWgtDstEntIdPrVChA.CStr());
      // get text centroid
      int CtrDocs; TStrFltPrV WordStrWgtPrV;
      Ent.GetDocCentroid(this, BowDocBs, BowDocWgtBs, MnTm, 150, 0.9, CtrDocs, WordStrWgtPrV);
      // output difference between previous and current centroid
      if (MnTmN>0){
        TStrFltPrV NegDiffWordStrWgtPrV; TStrFltPrV PosDiffWordStrWgtPrV;
        GetWordStrWgtPrVDiff(WordStrWgtPrV, PrevWordStrWgtPrV,
         NegDiffWordStrWgtPrV, PosDiffWordStrWgtPrV);
        // output positive change
        TChA PosDiffWordStrWgtPrVChA; GetWordStrWgtPrVChA(PosDiffWordStrWgtPrV, PosDiffWordStrWgtPrVChA);
        fprintf(fOut, "         Pos-Diff: %s\n", PosDiffWordStrWgtPrVChA.CStr());
        // output negative change
        TChA NegDiffWordStrWgtPrVChA; GetWordStrWgtPrVChA(NegDiffWordStrWgtPrV, NegDiffWordStrWgtPrVChA);
        fprintf(fOut, "         Neg-Diff: %s\n", NegDiffWordStrWgtPrVChA.CStr());
      }
      PrevWordStrWgtPrV=WordStrWgtPrV;
      // output centroid
      TChA WordStrWgtPrVChA; GetWordStrWgtPrVChA(WordStrWgtPrV, WordStrWgtPrVChA);
      fprintf(fOut, "      Centroid (%d docs, %d words): %s\n",
       CtrDocs, WordStrWgtPrV.Len(), WordStrWgtPrVChA.CStr());
      // output time
      fprintf(fOut, "   [-%.1f days: %s]\n", PastDays, MnTmStr.CStr());
    }
    // entity clustering
    /*TVec<TStrFltPrV> EntNmWgtPrVV;
    Ent.GetEntClustV(this, MnTmV.Last(), 100, 1000, 10, EntNmWgtPrVV);
    for (int ClustN=0; ClustN<EntNmWgtPrVV.Len(); ClustN++){
      TStrFltPrV& EntNmWgtPrV=EntNmWgtPrVV[ClustN];
      fprintf(fOut, "   Clust-%d:", ClustN);
      for (int EntN=0; EntN<EntNmWgtPrV.Len(); EntN++){
        TStr EntNm=EntNmWgtPrV[EntN].Val1;
        double Wgt=EntNmWgtPrV[EntN].Val2;
        fprintf(fOut, " ['%s':%.3f]", EntNm.CStr(), Wgt);
      }
      fprintf(fOut, "\n");
    }*/
    fprintf(fOut, "\n");
  }
}