K kclone(K a)//Deep copy -- eliminate where possible { if(!a) R 0; I t=a->t,n=a->n; K z= 7==t?Kv():newK(t,n); if (4==ABS(t)) DO(n, kS(z)[i]=kS(a)[i]) //memcpy everywhere is better else if(3==ABS(t)) DO(n, kC(z)[i]=kC(a)[i]) else if(2==ABS(t)) DO(n, kF(z)[i]=kF(a)[i]) else if(1==ABS(t)) DO(n, kI(z)[i]=kI(a)[i]) else if(0== t ) DO(n, kK(z)[i]=kclone(kK(a)[i])) else if(5== t ) DO(n, kK(z)[i]=kclone(kK(a)[i])) else if(7== t ) { I k=0; z->t=a->t; I vt=z->n = a->n; K kv; V*v; SW(vt) { CS(1, k=((K)kV(a)[CODE])->n-1; M(z,kv=newK(-4,k+1)) v=(V*)kK(kv); //v[k]=0;//superfluous reminder DO(k, V w=kW(a)[i]; if(VA(w))v[i]=w; //TODO: is this ok for NAMES? see similar code in capture() else { K r=kclone(*(K*)w); //oom V q=newE(LS,r); //oom kap((K*) kV(z)+LOCALS,&q);//oom cd(q);//kap does ci q=EVP(q); //oom free z etc. kap needs checking v[i]=q; } ) ) CS(2, M(z,kv=newK(-4,3)) v=(V*)kK(kv); memcpy(v,kW(a),3*sizeof(V)); )
/* * Read the graph from a file and store it in a Graph named G */ Graph readGraph(char * filename){ FILE *fp; Graph G; char linha[BUFFSIZE]; int head, tail, i; // Opening the file with a 'read' flag if ((fp = fopen(filename, "r")) == NULL){ printf("File not found\n"); exit(1); } // Creating graph with no nodes G = (Graph) malloc(sizeof(struct graph)); G->V = 0; G->E = 0; G->n_flow = 0; for(i = 0; i < NETSIZE; i++){ G->flow[i] = 0; G->list[i].pred = -1; G->list[i].n = NULL; G->list[i + NETSIZE].pred = -1; G->list[i + NETSIZE].n = NULL; } // Node addition from file input while(fgets(linha, BUFFSIZE, fp) != NULL){ if(sscanf(linha, "%d %d", &tail, &head) == 2) graphInsertE(G, newE(tail, head)); else printf("Invalid line format\n"); } // Closing file fclose(fp); return G; }
string WebCrawler::crawl(string & webpage, string & stopWordsFile) { URL startURL(webpage,""); string currentPageContents; try { currentPageContents = downloader->download(startURL); } catch(CS240Exception &e) { string newString = e.GetMessage(); newString += " Invalid Start URL"; CS240Exception newE(newString); throw newE; } cout << "Crawling ... " << endl; string currentPageDescription = parser->findDescription(currentPageContents); currentPage = new Page(currentPageDescription, startURL); queue->AddPage(currentPage); history->AddPage(currentPage); while (queue->HasNext()) { currentPage = queue->GetNext(); URL currentURL(currentPage->GetURL()); currentPageContents = downloader->download(currentURL); cout <<" Crawling: "<<currentURL.GetResolvedURL() << endl; //Note: findLinks adds all the links in currentPageContents to the queue, if they // are not already in the history. If a page is added to the queue, it is also // added to the history. URL * newURLstoCrawl = parser->findLinks(currentPageContents,currentPage); int numOfURLs = parser->countLinks(currentPageContents); processNewLinks(newURLstoCrawl, numOfURLs, startURL); delete [] newURLstoCrawl; //Note: findWords, finds words and adds them to the index. parser->findWords(currentPageContents, index, currentPage); } //Note: the xmlPrinter will not print words that are in the stop word file. cout << "Generating Output File..." << endl; return xmlPrinter->print(index, stopWordsFile, history); }