int HashFile(char **Return, char *Type, char *Path, int Encoding) { THash *Hash; STREAM *S; char *Tempstr=NULL; int result; S=STREAMOpenFile(Path,O_RDONLY); if (! S) return(FALSE); Hash=HashInit(Type); if (! Hash) { STREAMClose(S); return(FALSE); } Tempstr=SetStrLen(Tempstr,4096); result=STREAMReadBytes(S,Tempstr,4096); while (result !=EOF) { Hash->Update(Hash, Tempstr, result); result=STREAMReadBytes(S,Tempstr,4096); } DestroyString(Tempstr); STREAMClose(S); result=Hash->Finish(Hash, Encoding, Return); HashDestroy(Hash); return(result); }
void TAGM::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) { THash<TInt,TIntV> CmtyVH; for(int i=0;i<CmtyVV.Len();i++) { CmtyVH.AddDat(i,CmtyVV[i]); } GetNodeMembership(NIDComVH,CmtyVH); }
/// Shingles by words void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase, TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen, THash<TMd5Sig, TIntV>& ShingleToClusterIds) { Err("Hashing shingles of clusters...\n"); for (int i = 0; i < ClusterIds.Len(); i++) { if (i % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len()); } TCluster C; ClusterBase->GetCluster(ClusterIds[i], C); //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val); // Put x-word shingles into hash table; x is specified by ShingleLen parameter THashSet < TMd5Sig > CHashedShingles; GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles); for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI(); Hash < CHashedShingles.EndI(); Hash++) { TIntV ShingleClusterIds; if (ShingleToClusterIds.IsKey(*Hash)) { ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash); } ShingleClusterIds.Add(ClusterIds[i]); ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds); } } Err("Done hashing!\n"); }
// IN-OUT edges are swapped (so that the prog runs faster) // Send message via IN edge proportional to the OUT edge weight void TWgtNet::ReinforceEdges(const int& NIters) { THash<TInt, TFlt> OutWgtSumH; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { double wgt = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { wgt += NI.GetOutEDat(e); } OutWgtSumH.AddDat(NI.GetId(), wgt); } printf("Reinforcing edges for %d iterations\n", NIters); // iterate TExeTm ExeTm; for (int iter = 0; iter < NIters; iter++) { for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { const double X = TInt::Rnd.GetUniDev() * OutWgtSumH.GetDat(NI.GetId()); double x = 0; int e = 0; for ( ; x + NI.GetOutEDat(e) < X; e++) { x += NI.GetOutEDat(e); } IAssert(IsEdge(NI.GetOutNId(e), NI.GetId())); GetEDat(NI.GetOutNId(e), NI.GetId()) += 1; // reinforce the edge OutWgtSumH.GetDat(NI.GetOutNId(e)) += 1; } if (iter % (NIters/100) == 0) { printf("\r%d [%s]", iter, ExeTm.GetStr()); } } printf(" done.\n"); }
THash<TInt, TInt> * choose_seeds (const PUNGraph g, const int num, const int * infection_state, const int infect) { THash<TInt, TInt> choices; THash<TInt, TUNGraph::TNode> nodes; THash<TInt, TInt> * output = new THash<TInt, TInt> (); TInt weight = 0; TInt num_total = 0; for (TUNGraph::TNodeI n = g->BegNI(); n != g->EndNI(); n++) { //cout << "nodeID: " << n.GetId() << ",\tStatus: " << infection_state[n.GetId () - 1] << endl; if (infection_state[n.GetId () - 1] != infect) { weight += n.GetDeg (); choices.AddDat (num_total, weight); nodes.AddDat (num_total, n.GetId()); num_total++; } } // TRnd random ((int) time(NULL)); // TRnd random (0); TInt num_chosen = 0; while (num_chosen < num) { TInt choice = my_random.GetUniDevInt (weight); TUNGraph::TNode node_choice = nodes[find (choice, choices, 0, num_total-1)]; if (!output->IsKey(node_choice.GetId())) { num_chosen++; // cout << node_choice.GetId () << "\n"; output->AddDat(node_choice.GetId (), 1); } } return output; }
void InitListViewContorol(HWND hWnd) { HWND hWndLV = GetDlgItem(hWnd, IDC_LIST_MAILBOX); LV_COLUMN lc; LV_ITEM li; HIMAGELIST hImgList; RECT rect; GetClientRect(hWndLV, &rect); // イメージリストの作成 hImgList = ImageList_Create(16, 16, ILC_COLOR | ILC_MASK, 1 , 1); ImageList_AddIcon(hImgList, LoadIcon(g_hInstance, "IDI_MAILBOX")); ListView_SetImageList(hWndLV, hImgList, LVSIL_SMALL); ListView_SetExtendedListViewStyle(hWndLV, LVS_EX_FULLROWSELECT | LVS_EX_CHECKBOXES); // ヘッダを追加 (表示しない) lc.mask = LVCF_FMT | LVCF_SUBITEM | LVCF_TEXT | LVCF_WIDTH; lc.fmt = LVCFMT_LEFT; lc.pszText = "メールボックス"; lc.cx = rect.right; lc.iSubItem = 0; lc.cchTextMax = strlen(lc.pszText); ListView_InsertColumn(hWndLV, 0, &lc); lc.pszText = "フォルダ名"; lc.cx = 0; lc.iSubItem = 1; lc.cchTextMax = strlen(lc.pszText); ListView_InsertColumn(hWndLV, 1, &lc); // メールボックスを列挙 THash MailBoxes; EnumMailBox(MailBoxes); // メールボックスを追加 int i = 0; for (THash::iterator itr = MailBoxes.begin(); itr != MailBoxes.end(); ++itr) { li.mask = LVIF_TEXT | LVIF_IMAGE; li.iItem = i; li.iImage = 0; // 表示名 li.pszText = (char *)((*itr).second).c_str(); li.iSubItem = 0; ListView_InsertItem(hWndLV, &li); // メールボックスのディレクトリ名 li.pszText = (char *)((*itr).first).c_str(); li.iSubItem = 1; ListView_SetItem(hWndLV, &li); // 設定されているメールボックスにチェックをつける if (g_temporary_mailbox.count((*itr).first) == 1) { ListView_SetCheckState(hWndLV, i, 1); } i++; } }
int ComputeKCore(const PUNGraph& G) { int cnt = 0; for(TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) cnt = max(cnt, NI.GetOutDeg()); THashSet <TInt> D[cnt+1]; THash <TInt, TInt> deg; for(TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { TInt tmp = NI.GetOutDeg() - G->IsEdge(NI.GetId(), NI.GetId() ); D[tmp.Val].AddKey(NI.GetId()); deg.AddDat(NI.GetId()) = tmp; } int max_k = 0; for(int num_iters = 0;num_iters < G->GetNodes(); num_iters++) for(int i = 0; i < cnt; i++) if(D[i].Empty() == 0) { max_k = max(max_k, i); TInt a = *(D[i].BegI()); D[i].DelKey(a); deg.AddDat(a.Val) = -1; // Hope overwriting works TUNGraph::TNodeI NI = G->GetNI(a.Val); for(int e = 0; e < NI.GetOutDeg(); e++) { TInt b = NI.GetOutNId(e); if(deg.GetDat(b) >= 0) { int Id = deg.GetKeyId(b); D[deg[Id].Val].DelKey(b); deg[Id] = deg[Id] - 1; //Hope the overwriting works D[deg[Id]].AddKey(b); } } break; } return max_k; }
void LSH::ElCheapoHashing(TQuoteBase *QuoteBase, TInt ShingleLen, THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) { fprintf(stderr, "Hashing shingles the el cheapo way...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter TStr QContentStr; Q.GetParsedContentString(QContentStr); TChA QContentChA = TChA(QContentStr); for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) { TChA ShingleChA = TChA(); for (int j = 0; j < ShingleLen; j++) { ShingleChA.AddCh(QContentChA.GetCh(i + j)); } TStr Shingle = TStr(ShingleChA); const TMd5Sig ShingleMd5(Shingle); TIntSet ShingleQuoteIds; if (ShingleToQuoteIds.IsKey(ShingleMd5)) { ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5); } ShingleQuoteIds.AddKey(QuoteIds[qt]); ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); } } Err("Done with el cheapo hashing!\n"); }
void HMACUpdate(THash *HMAC, char *Data, int Len) { THash *Hash; Hash=(THash *) HMAC->Ctx; Hash->Update(Hash,Data,Len); }
void QuoteGraph::CompareUsingShingles(THash<TMd5Sig, TIntSet>& Shingles) { int Count = 0; int RealCount = 0; TVec<TMd5Sig> ShingleKeys; Shingles.GetKeyV(ShingleKeys); THashSet<TIntPr> EdgeCache; for (int i = 0; i < ShingleKeys.Len(); i++) { if (i % 100 == 0) { Err("Processed %d out of %d shingles, count = %d\n", i, ShingleKeys.Len(), Count); } TIntSet Bucket; Shingles.IsKeyGetDat(ShingleKeys[i], Bucket); for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) { TIntSet::TIter Quote1Copy = Quote1; Quote1Copy++; for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) { if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) { EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())); EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey())); RealCount++; AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey()); } } } int Len = Bucket.Len() * (Bucket.Len() - 1) / 2; Count += Len; } fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count); fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount); }
void LogOutput::PrintClusterInformation(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, PNGraph& QGraph, TIntV& ClusterIds, TSecTm PresentTime, TIntV &OldTopClusters) { if (!ShouldLog) return; TStr CurDateString = PresentTime.GetDtYmdStr(); Err("Writing cluster information...\n"); // PREVIOUS RANKING SETUP THash<TInt, TInt> OldRankings; if (OldTopClusters.Len() > 0) { for (int i = 0; i < OldTopClusters.Len(); i++) { OldRankings.AddDat(OldTopClusters[i], i + 1); } } TStrV RankStr; TStr ClusterJSONDirectory = Directory + "/web/json/clusters/"; for (int i = 0; i < ClusterIds.Len(); i++) { TStr OldRankStr; ComputeOldRankString(OldRankings, ClusterIds[i], i+1, OldRankStr); RankStr.Add(OldRankStr); // JSON file for each cluster! TPrintJson::PrintClusterJSON(QB, DB, CB, QGraph, ClusterJSONDirectory, ClusterIds[i], PresentTime); } Err("JSON Files for individual written!\n"); TStr JSONTableFileName = Directory + "/web/json/daily/" + CurDateString + ".json"; TPrintJson::PrintClusterTableJSON(QB, DB, CB, JSONTableFileName, ClusterIds, RankStr); Err("JSON Files for the cluster table written!\n"); }
/// rewire bipartite community affiliation graphs void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd) { THash<TInt,TIntV> CmtyVH; for (int i = 0; i < CmtyVVIn.Len(); i++) { CmtyVH.AddDat(i, CmtyVVIn[i]); } TAGMUtil::RewireCmtyNID(CmtyVH, Rnd); CmtyVH.GetDatV(CmtyVVOut); }
TStr TSockSys::GetStatusStr(){ TChA ChA; ChA+="Sockets: "; ChA+=TInt::GetStr(SockIdToHndH.Len()); ChA+="\r\n"; ChA+="Host-Resolutions: "; ChA+=TInt::GetStr(HndToSockHostH.Len()); ChA+="\r\n"; ChA+="Socket-Events: "; ChA+=TInt::GetStr(IdToSockEventH.Len()); ChA+="\r\n"; ChA+="Timers: "; ChA+=TInt::GetStr(SockIdToTimerHndH.Len()); ChA+="\r\n"; return ChA; }
int HashBytes(char **Return, char *Type, char *text, int len, int Encoding) { THash *Hash; Hash=HashInit(Type); Hash->Update(Hash, text, len); return(Hash->Finish(Hash, Encoding, Return)); }
void TTop2FriendNet::GetAvgSDevV(const THash<TFlt, TMom>& MomH, TFltTrV& ValAvgSDevV) { ValAvgSDevV.Clr(false); for (int i = 0; i < MomH.Len(); i++) { TMom Mom=MomH[i]; Mom.Def(); ValAvgSDevV.Add(TFltTr(MomH.GetKey(i), Mom.GetMean(), Mom.GetSDev())); } ValAvgSDevV.Sort(); }
void SaveAll() { printf("\n<<< SAVING STARTS (PLEASE BE PATIENT!!!) >>> .......\n"); TZipOut resOut("CascadesFullUrlsOnTwitterData.rar"); cascadesInUrlsOnTwitter.Save(resOut); printf("The size of CascadesFullUrlsOnTwitterData was: %d\n",cascadesInUrlsOnTwitter.Len()); printf("\n<<<<<<<< SAVING DONE >>>>>>>>\n\n"); }
void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++) { int CID = HI.GetKey(); for (int j = 0; j < HI.GetDat().Len(); j++) { int NID = HI.GetDat()[j]; NIDComVH.AddDat(NID).AddKey(CID); } } }
void TAGM::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { for(int i=0;i<CmtyVH.Len();i++){ int CID = CmtyVH.GetKey(i); for(int j=0;j<CmtyVH[i].Len();j++) { int NID = CmtyVH[i][j]; NIDComVH.AddDat(NID).Add(CID); } } }
void TAGMUtil::GetNodeMembership(THash<TInt,TInt >& NIDComVH, const TVec<TIntV>& CmtyVV) { NIDComVH.Clr(); for (int i = 0; i < CmtyVV.Len(); i++) { int CID = i; for (int j = 0; j < CmtyVV[i].Len(); j++) { int NID = CmtyVV[i][j]; NIDComVH.AddDat(NID)++; } } }
TFltIntIntTr FindMxQEdge() { while (true) { if (MxQHeap.Empty()) { break; } const TFltIntIntTr TopQ = MxQHeap.PopHeap(); if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; } if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; } return TopQ; } return TFltIntIntTr(-1, -1, -1); }
PSwSet TSwSet::GetSwSet(const TSwSetType& SwSetType){ static THash<TInt, PSwSet> SwSetTypeToSwSetH; PSwSet SwSet; if (SwSetTypeToSwSetH.IsKeyGetDat(TInt(int(SwSetType)), SwSet)){ } else { SwSet=TSwSet::New(SwSetType); SwSetTypeToSwSetH.AddDat(TInt(int(SwSetType)), SwSet); } return SwSet; }
int main(int argc, char* argv[]) { // TFltPrV v; // v.Add(TFltPr(1,4)); // v.Add(TFltPr(5,5)); // v.Add(TFltPr(9,11)); // v.Add(TFltPr(20,8)); // v.Add(TFltPr(21,30)); // cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl; // return 0; TExeTm ExeTm; try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // URLS THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesFullUrlsOnTwitterData_FINALFILTERED // CONTENTS //THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesOnTwitterData_FINALFILTERED // Plotting THash< TUInt , TSecTmV > twitterTotal; for(int i=0;i<twitterContents.Len();i++) { TSecTmV tmp; tmp.AddV(twitterContents[i]); tmp.AddV(twitterUrls[i]); twitterTotal.AddDat(i,tmp); } plotScatterLengthOfEachCascade(quotes,twitterUrls,"Urls"); plotScatterLengthOfEachCascade(quotes,twitterContents,"Contents"); plotScatterLengthOfEachCascade(quotes,twitterTotal,"Full"); printf("\nPlots had been drawn successfully."); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int HashBytes(char **Return, char *Type, char *text, int len, int Encoding) { THash *Hash; int result; Hash=HashInit(Type); if (! Hash) return(0); Hash->Update(Hash, text, len); result=HashFinish(Hash, Encoding, Return); return(result); }
inline TData& operator[](const TKey& oKey) { CHashIterator oIt = Find(oKey); CMapNode<TKey, TData>* pNode = m_oHash.At(oIt); if(pNode == NULL) { CMapNode<TKey, TData> oNode(oKey); oIt = m_oHash.Insert(oNode); pNode = m_oHash.At(oIt); } return pNode->oData; }
void TSockSys::DelIfSockTimer(const uint64& SockId) { if (SockIdToTimerHndH.IsKey(SockId)) { // get timer handle uv_timer_t* TimerHnd = SockIdToTimerHndH.GetDat(SockId); // stop the timer uv_timer_stop(TimerHnd); // remove shortcuts SockIdToTimerHndH.DelKey(SockId); TimerHndToSockIdH.DelKey((uint64)TimerHnd); // remove shortcuts } }
/// get hash table of <Node ID, community IDs which node belongs to>. Some nodes in NIDV might belong to no community void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) { NIDComVH.Clr(); for (int u = 0; u < NIDV.Len(); u++) { NIDComVH.AddDat(NIDV[u]); } for (int i = 0; i < CmtyVV.Len(); i++) { int CID = i; for (int j = 0; j < CmtyVV[i].Len(); j++) { int NID = CmtyVV[i][j]; NIDComVH.AddDat(NID).AddKey(CID); } } }
void TTable::StoreGroupCol(TStr GroupColName, const THash<TInt,TIntV>& Grouping){ GroupMapping.AddDat(GroupColName, Grouping); // add a column where the value of the i'th row is the group id of row i IntCols.Add(TIntV(NumRows)); TInt L = IntCols.Len(); ColTypeMap.AddDat(GroupColName, TPair<TYPE,TInt>(INT, L-1)); for(THash<TInt,TIntV>::TIter it = Grouping.BegI(); it < Grouping.EndI(); it++){ TIntV& G = it->Dat; for(TInt i = 0; i < G.Len(); i++){ IntCols[L-1][G[i]] = it->Key; } } }
void TSockSys::NewSock(const uint64& SockId, const uint64& SockEventId) { uv_tcp_t* SockHnd = (uv_tcp_t*)malloc(sizeof(uv_tcp_t)); int ResCd = uv_tcp_init(Loop, SockHnd); // check all went fine if (ResCd != 0) { // cleanup first free(SockHnd); // and throw exception throw TExcept::New("SockSys.NewSock: Error initializing TCP socket"); } // remember handles SockIdToHndH.AddDat(SockId, SockHnd); SockHndToIdH.AddDat((uint64)SockHnd, SockId); SockHndToEventIdH.AddDat((uint64)SockHnd, SockEventId); }
static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) { TCNMQMatrix QMatrix(Graph); // maximize modularity while (QMatrix.MergeBestQ()) { } // reconstruct communities THash<TInt, TIntV> IdCmtyH; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); } CmtyV.Gen(IdCmtyH.Len()); for (int j = 0; j < IdCmtyH.Len(); j++) { CmtyV[j].NIdV.Swap(IdCmtyH[j]); } return QMatrix.Q; }
/// For every quote, add it to corresponding bucket for each hashed x-character shingle of the quote // (Shingles by characters) void LSH::HashShingles(TQuoteBase *QuoteBase, TClusterBase *CB, TInt ShingleLen, THash<TMd5Sig, TShingleIdSet>& ShingleToQuoteIds) { Err("Hashing shingles...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } if (CB->IsQuoteInArchivedCluster(QuoteIds[qt])) continue; TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter TStr QContentStr; Q.GetParsedContentString(QContentStr); TChA QContentChA = TChA(QContentStr); int CurWord = 0; for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) { TChA ShingleChA = TChA(); for (int j = 0; j < ShingleLen; j++) { ShingleChA.AddCh(QContentChA.GetCh(i + j)); } TStr Shingle = TStr(ShingleChA); const TMd5Sig ShingleMd5(Shingle); TShingleIdSet ShingleQuoteIds; if (ShingleToQuoteIds.IsKey(ShingleMd5)) { ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5); } for (int j = CurWord; j > CurWord - WordWindow && j >= 0; j--) { ShingleQuoteIds.AddKey(TShingleId(QuoteIds[qt], j)); } ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); // up the current word index if we see a space if (QContentChA.GetCh(i + ShingleLen - 1) == ' ') { CurWord++; } } } Err("Done hashing!\n"); }