void LogOutput::ComputeOldRankString(THash<TInt, TInt>& OldRankings, TInt& ClusterId, TInt CurRank, TStr& OldRankStr) { TInt OldRanking; if (OldRankings.IsKeyGetDat(ClusterId, OldRanking)) { TInt Difference = OldRanking - CurRank; if (Difference < 0) { OldRankStr = "<b><center><font color=\\\"red\\\">" + Difference.GetStr() + "</font></center></b>"; } else if (Difference > 0) { OldRankStr = "<b><center><font color=\\\"green\\\">+" + Difference.GetStr() + "</font></center></b>"; } else { OldRankStr = "<b><center>0</center></b>"; } } else { OldRankStr = "<center>new!</center>"; } }
int Intersect1(TUNGraph::TNodeI Node, TStr NNodes){ int br = 0; for (int i = 0; i<Node.GetDeg(); i++) { TInt digi = Node.GetNbrNId(i); TStr buf = ""; buf = digi.GetStr(); if (NNodes.SearchStr(buf.CStr()) != -1) br++; } TInt digi = Node.GetId(); TStr buf = digi.GetStr(); if (NNodes.SearchStr(buf.CStr()) != -1) br++; return br; }
int Intersect(TUNGraph::TNodeI Node, TStr NNodes){ int br = 0; TInt digi = -1; TStr buf = ""; for (int i = 0; i<Node.GetDeg(); i++) { digi = Node.GetNbrNId(i); TStr buf = digi.GetStr(); if (NNodes.IsStrIn(buf.CStr())) br++; } digi = Node.GetId(); buf = digi.GetStr(); if (NNodes.IsStrIn(buf.CStr())) br++; return br; }
//Function to read a table of nodes PTable AddNodeTable(TTableContext& Context) { Schema NodeScm; NodeScm.Add(TPair<TStr, TAttrType>("NodeID", atStr)); char FileName[50]; int ColCnt = 0; printf("Adding Node Table\n"); printf("Enter filename and number of columns (>= 1) \n"); scanf("%s %d", FileName, &ColCnt); for (TInt i = 1; i < ColCnt; i++) { TStr ColName = "Attribute" + i.GetStr(); NodeScm.Add(TPair<TStr, TAttrType>(ColName, atStr)); } TStr FName(FileName); PTable T = TTable::LoadSS(NodeScm, FName, Context); return T; }
int main(){ TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); PTable P = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols); P->SaveSS("tests/p1.txt"); TStrV cols; cols.Add("Size"); cols.Add("Number"); TVec<PTable> R = P->SpliceByGroup(cols); for (TInt i = 0; i < R.Len(); i++) { TStr fn = i.GetStr(); R[i]->SaveSS("tests/sznumber" + fn + ".txt"); } P->Unique(cols, true); P->SaveSS("tests/p2.txt"); TStrV group1; group1.Add("Location"); P->Group(group1, "LocationGroup"); P->SaveSS("tests/p3.txt"); return 0; }
TStr DCluster::GetClusterString(TQuoteBase *QB, TDocBase *DB, TCluster& C, TFreqTripleV &FreqV, TInt NumPeaks, TStr &End) { TStr Response; TSecTm Start; TStr RepStr, RepURL; C.GetBirthDate(Start); C.GetRepresentativeQuoteString(RepStr, QB); C.GetRepresentativeQuoteURL(QB, DB, RepURL); TStrV QuoteV; RepStr.SplitOnStr(" ", QuoteV); TSecTm First, Last, Peak; DCluster::GetFMP(FreqV, First, Last, Peak, RepStr); Response = C.GetId().GetStr() + "\t" + Start.GetDtYmdStr() + "\t" + End + "\t"; Response += C.GetNumUniqueQuotes().GetStr() + "\t" + C.GetNumQuotes().GetStr() + "\t"; Response += NumPeaks.GetStr() + "\t" + TInt(QuoteV.Len()).GetStr() + "\t"; Response += RepStr + "\t" + RepURL + "\t"; Response += First.GetYmdTmStr() + "\t" + Last.GetYmdTmStr() + "\t" + Peak.GetYmdTmStr() + "\t"; Response += TBool::GetStr(TBool(C.IsArchived())) + "\t" + C.GetDiscardState().GetStr(); return Response; }
// Function to read in a table of edges PTable AddEdgeTable(TTableContext& Context) { char FileName[200]; int ColCnt; int Reverse; printf("Adding Edge Table\n"); printf("Enter filename, number of columns (>= 2), and whether reverse? (reverse = 1, not reverse = 0\n"); scanf("%s %d %d", FileName, &ColCnt, &Reverse); Schema EdgeScm; if (Reverse == 1) { EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr)); EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr)); } else { EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr)); EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr)); } for (TInt i = 1; i < ColCnt-1; i++) { TStr ColName = "Attribute" + i.GetStr(); EdgeScm.Add(TPair<TStr, TAttrType>(ColName, atStr)); } TStr FName(FileName); PTable T = TTable::LoadSS(EdgeScm, FName, Context); return T; }
void LogOutput::LogValue(const TStr Key, TInt Value) { OutputValues.AddDat(Key, Value.GetStr()); }
TStr to_string () { if (previous == NULL) return id.GetStr() + ", NULL, " + depth.GetStr (); else return id.GetStr() + ", " + previous->id.GetStr() + ", " + depth.GetStr (); }
void LSH::MinHash(TQuoteBase *QB, THashSet<TMd5Sig>& Shingles, TVec<THash<TMd5Sig, TIntSet> >& SignatureBandBuckets) { Err("Creating buckets...\n"); THash < TMd5Sig, TIntV > Signatures; ComputeSignatures(Shingles, Signatures, NumBands * BandSize); // bucket creation for (int i = 0; i < NumBands; ++i) { SignatureBandBuckets.Add(THash<TMd5Sig, TIntSet>()); } // bucket filling int NumShingles = Shingles.Len(); THash<TInt, TQuote> Quotes; QB->GetIdToTQuotes(Quotes); THash<TInt, TQuote>::TIter CurI = Quotes.BegI(); THash<TInt, TQuote>::TIter EndI = Quotes.EndI(); TQuote Q; // SKYFALL for (; CurI < EndI; CurI++) { Q = CurI.GetDat(); TStrV Content; Q.GetParsedContent(Content); TInt Id = Q.GetId(); // signature for quote int ContentLen = Content.Len(); TVec < TIntV > Signature; for (int i = 0; i < ContentLen; i++) { const TMd5Sig ShingleMd5(Content[i]); Signature.Add(Signatures.GetDat(ShingleMd5)); } // place in bucket if (ContentLen < WordWindow) { for (int i = 0; i < NumBands; ++i) { TStr Sig; for (int j = 0; j < BandSize; ++j) { int CurSig = i * BandSize + j; TInt min = NumShingles; for (int k = 0; k < ContentLen; k++) { if (Signature[k][CurSig] < min) { min = Signature[k][CurSig]; } } Sig += min.GetStr() + "-"; } //Err(Sig.CStr()); const TMd5Sig SigMd5(Sig); TIntSet Bucket; SignatureBandBuckets[i].IsKeyGetDat(SigMd5, Bucket); Bucket.AddKey(Id); SignatureBandBuckets[i].AddDat(SigMd5, Bucket); } } else { } } Err("Minhash step complete!\n"); }