// remove ending /, /index.html, etc. and strip starting www. bool TStrUtil::GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut) { UrlOut = UrlIn; if (StripEnd(UrlIn, "/", UrlOut)) {} else if (StripEnd(UrlIn, "/index.html", UrlOut)) {} else if (StripEnd(UrlIn, "/index.htm", UrlOut)) {} else if (StripEnd(UrlIn, "/index.php", UrlOut)) {} if (! (UrlOut.IsPrefix("http://") || UrlOut.IsPrefix("ftp://"))) { // if UrlIn is relative url, try combine it with BaseUrl if (UrlIn.Empty() || ! (BaseUrl.IsPrefix("http://") || BaseUrl.IsPrefix("ftp://"))) { //printf("** Bad URL: base:'%s' url:'%s'\n", BaseUrl.CStr(), UrlIn.CStr()); return false; } TChA Out; if (! GetNormalizedUrl(BaseUrl, TChA(), Out)) { return false; } if (UrlIn[0] != '/') { Out.AddCh('/'); } Out += UrlOut; UrlOut = Out; } // http://www. --> http:// if (UrlOut.IsPrefix("http://www.")) { TStr prefix("http://"); UrlOut = prefix + UrlOut.GetSubStr(11, TInt::Mx); } UrlOut.ToLc(); return true; }
void MakeSlashdotSignNet(const TStr InFNm, TStr OutFNm, TStr Desc, THashSet<TChA> NIdSet) { //THashSet<TChA> NIdSet; TChA LnStr; TVec<char *> WrdV; int Sign; //PSignNet Net = TSignNet::New(); TPt<TNodeEDatNet<TInt, TInt> > Net = TNodeEDatNet<TInt, TInt>::New(); int i = 0; for (TFIn FIn(InFNm); FIn.GetNextLn(LnStr); ) { if (LnStr.Empty() || LnStr[0]=='#') { continue; } LnStr.ToLc(); TStrUtil::SplitOnCh(LnStr, WrdV, '\t', false); //NIdSet.AddKey(WrdV[0]); if (strcmp(WrdV[1], "friends")==0) { Sign = 1; } else if (strcmp(WrdV[1], "fans")==0) { continue; } // skip (fans are in-friends) else if (strcmp(WrdV[1], "foes")==0) { Sign = -1; } else { Fail; } const int SrcNId = NIdSet.AddKey(WrdV[0]); if (! Net->IsNode(SrcNId)) { Net->AddNode(SrcNId); } for (int e = 2; e < WrdV.Len(); e++) { const int DstNId = NIdSet.AddKey(WrdV[e]); i ++ ; if ((SrcNId != DstNId) && ! Net->IsEdge(SrcNId, DstNId)) { if (! Net->IsNode(DstNId)) Net->AddNode(DstNId); Net->AddEdge(SrcNId, DstNId, Sign); } } } TSnap::PrintInfo(Net, "Slashdot (" + TInt::GetStr(i) + ")"); // copied from gio.h - line 111 FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "# Directed graph: %s\n", OutFNm.CStr()); if (! Desc.Empty()) fprintf(F, "# %s\n", (Desc).CStr()); fprintf(F, "# Nodes: %d Edges: %d\n", Net->GetNodes(), Net->GetEdges()); fprintf(F, "# UserId\tGroupId\tSign\n"); for (TNodeEDatNet<TInt,TInt>::TEdgeI ei = Net->BegEI(); ei < Net->EndEI(); ei++) { fprintf(F, "%d\t%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId(), ei()()); } fclose(F); PrintGraphStatTable(Net, OutFNm, Desc); }