Exemplo n.º 1
0
Arquivo: util.cpp Projeto: pikma/Snap
// remove ending /, /index.html, etc. and strip starting www.
bool TStrUtil::GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut) {
    UrlOut = UrlIn;
    if (StripEnd(UrlIn, "/", UrlOut)) {}
    else if (StripEnd(UrlIn, "/index.html", UrlOut)) {}
    else if (StripEnd(UrlIn, "/index.htm", UrlOut)) {}
    else if (StripEnd(UrlIn, "/index.php", UrlOut)) {}
    if (! (UrlOut.IsPrefix("http://") || UrlOut.IsPrefix("ftp://"))) {
        // if UrlIn is relative url, try combine it with BaseUrl
        if (UrlIn.Empty() || ! (BaseUrl.IsPrefix("http://") || BaseUrl.IsPrefix("ftp://"))) {
            //printf("** Bad URL: base:'%s' url:'%s'\n", BaseUrl.CStr(), UrlIn.CStr());
            return false;
        }
        TChA Out;
        if (! GetNormalizedUrl(BaseUrl, TChA(), Out)) {
            return false;
        }
        if (UrlIn[0] != '/') {
            Out.AddCh('/');
        }
        Out += UrlOut;
        UrlOut = Out;
    }
    // http://www. --> http://
    if (UrlOut.IsPrefix("http://www.")) {
        TStr prefix("http://");
        UrlOut = prefix + UrlOut.GetSubStr(11, TInt::Mx);
    }
    UrlOut.ToLc();
    return true;
}
Exemplo n.º 2
0
void MakeSlashdotSignNet(const TStr InFNm, TStr OutFNm, TStr Desc, THashSet<TChA> NIdSet) {
  //THashSet<TChA> NIdSet;
  TChA LnStr;
  TVec<char *> WrdV;
  int Sign;
  //PSignNet Net = TSignNet::New();
  TPt<TNodeEDatNet<TInt, TInt> >  Net = TNodeEDatNet<TInt, TInt>::New();
  int i = 0;
  for (TFIn FIn(InFNm); FIn.GetNextLn(LnStr); ) {
    if (LnStr.Empty() || LnStr[0]=='#') { continue; }
    LnStr.ToLc();
    TStrUtil::SplitOnCh(LnStr, WrdV, '\t', false);
    //NIdSet.AddKey(WrdV[0]);
    if (strcmp(WrdV[1], "friends")==0) { Sign = 1; }
    else if (strcmp(WrdV[1], "fans")==0) { continue; } // skip (fans are in-friends)
    else if (strcmp(WrdV[1], "foes")==0) { Sign = -1; } else { Fail; }
    const int SrcNId = NIdSet.AddKey(WrdV[0]);
    if (! Net->IsNode(SrcNId)) {
      Net->AddNode(SrcNId); }   
    for (int e = 2; e < WrdV.Len(); e++) {
      const int DstNId = NIdSet.AddKey(WrdV[e]);
      i ++ ;
      if ((SrcNId != DstNId) && ! Net->IsEdge(SrcNId, DstNId)) {
        if (! Net->IsNode(DstNId))
          Net->AddNode(DstNId);
        Net->AddEdge(SrcNId, DstNId, Sign);
      }
    }  
  }  
  TSnap::PrintInfo(Net, "Slashdot (" + TInt::GetStr(i) + ")");  

  // copied from gio.h - line 111
  FILE *F = fopen(OutFNm.CStr(), "wt");
  fprintf(F, "# Directed graph: %s\n", OutFNm.CStr());
  if (! Desc.Empty()) 
    fprintf(F, "# %s\n", (Desc).CStr());
    fprintf(F, "# Nodes: %d Edges: %d\n", Net->GetNodes(), Net->GetEdges());
    fprintf(F, "# UserId\tGroupId\tSign\n"); 
  for (TNodeEDatNet<TInt,TInt>::TEdgeI ei = Net->BegEI(); ei < Net->EndEI(); ei++) {
      fprintf(F, "%d\t%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId(), ei()());
  }
  fclose(F);
  
  PrintGraphStatTable(Net, OutFNm, Desc);
}