Ejemplo n.º 1
0
nsresult
Location::SetURI(nsIURI* aURI, bool aReplace)
{
  nsCOMPtr<nsIDocShell> docShell(do_QueryReferent(mDocShell));
  if (docShell) {
    nsCOMPtr<nsIDocShellLoadInfo> loadInfo;

    if(NS_FAILED(CheckURL(aURI, getter_AddRefs(loadInfo))))
      return NS_ERROR_FAILURE;

    if (aReplace) {
      loadInfo->SetLoadType(nsIDocShellLoadInfo::loadStopContentAndReplace);
    } else {
      loadInfo->SetLoadType(nsIDocShellLoadInfo::loadStopContent);
    }

    // Get the incumbent script's browsing context to set as source.
    nsCOMPtr<nsPIDOMWindowInner> sourceWindow =
      do_QueryInterface(mozilla::dom::GetIncumbentGlobal());
    if (sourceWindow) {
      loadInfo->SetSourceDocShell(sourceWindow->GetDocShell());
    }

    return docShell->LoadURI(aURI, loadInfo,
                             nsIWebNavigation::LOAD_FLAGS_NONE, true);
  }

  return NS_OK;
}
Ejemplo n.º 2
0
bool CProposalValidator::ValidateURL()
{
    std::string strURL;
    if(!GetDataValue("url", strURL)) {
        strErrorMessages += "url field not found;";
        return false;
    }

    if(std::find_if(strURL.begin(), strURL.end(), ::isspace) != strURL.end()) {
        strErrorMessages += "url can't have whitespaces;";
        return false;
    }

    if(strURL.size() < 4U) {
        strErrorMessages += "url too short;";
        return false;
    }

    if(!CheckURL(strURL)) {
        strErrorMessages += "url invalid;";
        return false;
    }

    return true;
}
Ejemplo n.º 3
0
void CrawlPage(WebPage webpage){
	char* nexturl= NULL;
	int lastpos = 0;
	int depth = webpage.depth + 1;
	
	if(depth > maxWebPageDepth) return;
	
	printf("\n\n[crawler]: Crawling - %s\n\n",webpage.url);
	
	while((lastpos = GetNextURL(webpage.html, lastpos, webpage.url, &nexturl))>0){
		NormalizeURL(nexturl);
		if(!CheckURL(nexturl)){
			// setup new page
			struct WebPage* newwebpage = (WebPage*)calloc(1,sizeof(WebPage));
			newwebpage->url = (char*)calloc(strlen(nexturl)+1, sizeof(char));
			strcpy(newwebpage->url,nexturl);
			newwebpage->depth = depth;
			
			// get new webpage
    		if(GetWebPage(newwebpage)){
    			if(HashTableInsert(nexturl)){											 // If not found in hash table, add to hash table
    				printf("[crawler]: Parser found new link - %s\n",nexturl);
    				struct ListNode* listentry = (ListNode*)calloc(1,sizeof(ListNode));							
    				listentry->page = newwebpage;									     // then add to list
					WebPageList->tail = InsertNode(WebPageList->tail,listentry);
					
	   				WriteFile(*newwebpage, filenum++); 									 // then write file
    			} else{
    				CleanUpPage(newwebpage);
    			}
			}
		}
		free(nexturl);
		nexturl = NULL;
		// Sleep for a second 
		sleep(INTERVAL_PER_FETCH);
	}
}
Ejemplo n.º 4
0
nsresult
nsLocation::SetURI(nsIURI* aURI, bool aReplace)
{
  nsCOMPtr<nsIDocShell> docShell(do_QueryReferent(mDocShell));
  if (docShell) {
    nsCOMPtr<nsIDocShellLoadInfo> loadInfo;
    nsCOMPtr<nsIWebNavigation> webNav(do_QueryInterface(docShell));

    if(NS_FAILED(CheckURL(aURI, getter_AddRefs(loadInfo))))
      return NS_ERROR_FAILURE;

    if (aReplace) {
      loadInfo->SetLoadType(nsIDocShellLoadInfo::loadStopContentAndReplace);
    } else {
      loadInfo->SetLoadType(nsIDocShellLoadInfo::loadStopContent);
    }

    return docShell->LoadURI(aURI, loadInfo,
                             nsIWebNavigation::LOAD_FLAGS_NONE, true);
  }

  return NS_OK;
}
Ejemplo n.º 5
0
int main(int, char **)
{
    bool success = true;

    success = success &&
              CheckURL("", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "");// Tokenstring
    success = success &&
              CheckURL(".", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       ".", // path
                       1, // pathdepth
                       ".", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "");// Tokenstring
    success = success &&
              CheckURL("..", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "..", // path
                       1, // pathdepth
                       "..", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "");// Tokenstring
    success = success &&
              CheckURL("CHANGES_2.0a", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "CHANGES_2.0a", // path
                       1, // pathdepth
                       "CHANGES_2.0a", // filename
                       "0a", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "FILENAME:CHANGES_2,EXTENSION:0a");// Tokenstring
    success = success &&
              CheckURL("patches/patch-cvs-1.9.10", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "patches/patch-cvs-1.9.10", // path
                       2, // pathdepth
                       "patch-cvs-1.9.10", // filename
                       "10", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "PATH:patches,FILENAME:patch-cvs-1,FILENAME:9,EXTENSION:10");// Tokenstring
    success = success &&
              CheckURL("http:patches/patch-ssh-1.2.14", // URL
                       "http", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "patches/patch-ssh-1.2.14", // path
                       2, // pathdepth
                       "patch-ssh-1.2.14", // filename
                       "14", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,PATH:patches,FILENAME:patch-ssh-1,FILENAME:2,EXTENSION:14");// Tokenstring
    success = success &&
              CheckURL("http://180.uninett.no/servlet/online.Bransje", // URL
                       "http", // scheme
                       "180.uninett.no", // host
                       "uninett.no", // domain
                       "uninett", // siteowner
                       "no", // tld
                       "no", // maintld
                       "europe", // tldregion
                       "", // port
                       "/servlet/online.Bransje", // path
                       2, // pathdepth
                       "online.Bransje", // filename
                       "Bransje", // extension
                       "", // query
                       "", // params
                       "", // fragment
                       "", // address
                       "SCHEME:http,HOST:180,DOMAIN:uninett,MAINTLD:no,PATH:servlet,FILENAME:online,EXTENSION:Bransje");// Tokenstring
    success = success &&
              CheckURL("Bilder.gif/rule11.GIF", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "Bilder.gif/rule11.GIF", // path
                       2, // pathdepth
                       "rule11.GIF", // filename
                       "GIF", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "PATH:Bilder,PATH:gif,FILENAME:rule11,EXTENSION:GIF");// Tokenstring
    success = success &&
              CheckURL("bilder/meny/Buer/bue_o.GIF", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "bilder/meny/Buer/bue_o.GIF", // path
                       4, // pathdepth
                       "bue_o.GIF", // filename
                       "GIF", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "PATH:bilder,PATH:meny,PATH:Buer,FILENAME:bue_o,EXTENSION:GIF");// Tokenstring
    success = success &&
              CheckURL("./fakadm/grafikk/indus_bilde.JPG", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "./fakadm/grafikk/indus_bilde.JPG", // path
                       4, // pathdepth
                       "indus_bilde.JPG", // filename
                       "JPG", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "PATH:fakadm,PATH:grafikk,FILENAME:indus_bilde,EXTENSION:JPG");// Tokenstring
    success = success &&
              CheckURL("linux-2.0.35.tar.bz2", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "linux-2.0.35.tar.bz2", // path
                       1, // pathdepth
                       "linux-2.0.35.tar.bz2", // filename
                       "bz2", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "FILENAME:linux-2,FILENAME:0,FILENAME:35,FILENAME:tar,EXTENSION:bz2");// Tokenstring
    success = success &&
              CheckURL("http://www.underdusken.no", // URL
                       "http", // scheme
                       "www.underdusken.no", // host
                       "underdusken.no", // domain
                       "underdusken", // siteowner
                       "no", // tld
                       "no", // maintld
                       "europe", // tldregion
                       "", // port
                       "", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring
    success = success &&
              CheckURL("http://www.underdusken.no/?page=dusker/html/0008/Uholdbar.html", // URL
                       "http", // scheme
                       "www.underdusken.no", // host
                       "underdusken.no", // domain
                       "underdusken", // siteowner
                       "no", // tld
                       "no", // maintld
                       "europe", // tldregion
                       "", // port
                       "/", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "page=dusker/html/0008/Uholdbar.html", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no,QUERY:page,QUERY:dusker,QUERY:html,QUERY:0008,QUERY:Uholdbar,QUERY:html");// Tokenstring
    success = success &&
              CheckURL("http://www.uni-karlsruhe.de/~ig25/ssh-faq/", // URL
                       "http", // scheme
                       "www.uni-karlsruhe.de", // host
                       "uni-karlsruhe.de", // domain
                       "uni-karlsruhe", // siteowner
                       "de", // tld
                       "de", // maintld
                       "", // tldregion
                       "", // port
                       "/~ig25/ssh-faq/", // path
                       2, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,HOST:www,DOMAIN:uni-karlsruhe,MAINTLD:de,PATH:ig25,PATH:ssh-faq");// Tokenstring
    success = success &&
              CheckURL("java/", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "java/", // path
                       1, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "PATH:java");// Tokenstring
    success = success &&
              CheckURL("javascript:OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // URL
                       "javascript", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // address
                       "SCHEME:javascript,ADDRESS:OpenWindow,ADDRESS:survey,ADDRESS:faq,ADDRESS:html,ADDRESS:Issues,ADDRESS:width,ADDRESS:635,ADDRESS:height,ADDRESS:400,ADDRESS:toolbars,ADDRESS:no,ADDRESS:location,ADDRESS:no,ADDRESS:menubar,ADDRESS:yes,ADDRESS:status,ADDRESS:no,ADDRESS:resizable,ADDRESS:yes,ADDRESS:scrollbars,ADDRESS:yes");// Tokenstring
    success = success &&
              CheckURL("mailto: [email protected]", // URL
                       "mailto", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       " [email protected]", // address
                       "SCHEME:mailto,ADDRESS:dmf-post,ADDRESS:medisin,ADDRESS:ntnu,ADDRESS:no");// Tokenstring
    success = success &&
              CheckURL("mailto:%20Harald%[email protected]", // URL
                       "mailto", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "*****@*****.**", // address
                       "SCHEME:mailto,ADDRESS:20Harald,ADDRESS:20Danielsen,ADDRESS:energy,ADDRESS:sintef,ADDRESS:no");// Tokenstring
    success = success &&
              CheckURL("www.underdusken.no", // URL
                       "", // scheme
                       "www.underdusken.no", // host
                       "underdusken.no", // domain
                       "underdusken", // siteowner
                       "no", // tld
                       "no", // maintld
                       "europe", // tldregion
                       "", // port
                       "", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring
    success = success &&
              CheckURL("~janie/", // URL
                       "", // scheme
                       "", // host
                       "", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "~janie/", // path
                       1, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "PATH:janie");// Tokenstring
    success = success &&
              CheckURL("https://dette.er.en:2020/~janie/index.htm?param1=q&param2=r", // URL
                       "https", // scheme
                       "dette.er.en", // host
                       "er.en", // domain
                       "er", // siteowner
                       "en", // tld
                       "en", // maintld
                       "", // tldregion
                       "2020", // port
                       "/~janie/index.htm", // path
                       2, // pathdepth
                       "index.htm", // filename
                       "htm", // extension
                       "", // params
                       "param1=q&param2=r", // query
                       "", // fragment
                       "", // address
                       "SCHEME:https,HOST:dette,DOMAIN:er,MAINTLD:en,PORT:2020,PATH:janie,FILENAME:index,EXTENSION:htm,QUERY:param1,QUERY:q,QUERY:param2,QUERY:r");// Tokenstring
#if 0
    success = success &&
              CheckURL("http://www.sony.co.uk/", // URL
                       "http", // scheme
                       "www.sony.co.uk", // host
                       "sony.co.uk", // domain
                       "sony", // siteowner
                       "co.uk", // tld
                       "uk", // maintld
                       "unitedkingdom", // tldregion
                       "", // port
                       "/", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,HOST:www,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring
    success = success &&
              CheckURL("http://sony.co.uk/", // URL
                       "http", // scheme
                       "sony.co.uk", // host
                       "sony.co.uk", // domain
                       "sony", // siteowner
                       "co.uk", // tld
                       "uk", // maintld
                       "unitedkingdom", // tldregion
                       "", // port
                       "/", // path
                       0, // pathdepth
                       "", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring
#endif
    // Test fixes for bugs reported in cvs commit:
    // toregge       2000/10/27 22:42:59 CEST
    success = success &&
              CheckURL("http://somehost.somedomain/this!is!it/boom", // URL
                       "http", // scheme
                       "somehost.somedomain", // host
                       "somehost.somedomain", // domain
                       "somehost", // siteowner
                       "somedomain", // tld
                       "somedomain", // maintld
                       "", // tldregion
                       "", // port
                       "/this!is!it/boom", // path
                       2, // pathdepth
                       "boom", // filename
                       "", // extension
                       "", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,DOMAIN:somehost,MAINTLD:somedomain,PATH:this,PATH:is,PATH:it,FILENAME:boom");// Tokenstring
    success = success &&
              CheckURL("http://test.com/index.htm?p1=q%20test&p2=r%10d", // URL
                       "http", // scheme
                       "test.com", // host
                       "test.com", // domain
                       "test", // siteowner
                       "com", // tld
                       "com", // maintld
                       "northamerica", // tldregion
                       "", // port
                       "/index.htm", // path
                       1, // pathdepth
                       "index.htm", // filename
                       "htm", // extension
                       "", // params
                       "p1=q%20test&p2=r%10d", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,DOMAIN:test,MAINTLD:com,FILENAME:index,EXTENSION:htm,QUERY:p1,QUERY:q,QUERY:20test,QUERY:p2,QUERY:r,QUERY:10d");// Tokenstring

    // Test bugs found 2001/06/25
    success = success &&
              CheckURL("http://arthur/qm/images/qm1.gif", // URL
                       "http", // scheme
                       "arthur", // host
                       "arthur", // domain
                       "", // siteowner
                       "", // tld
                       "", // maintld
                       "", // tldregion
                       "", // port
                       "/qm/images/qm1.gif", // path
                       3, // pathdepth
                       "qm1.gif", // filename
                       "gif", // extension
                       "", // params
                       "", // query
                       "", // address
                       "", // fragment
                       "SCHEME:http,MAINTLD:arthur,PATH:qm,PATH:images,FILENAME:qm1,EXTENSION:gif");// Tokenstring

    // Test Orjan's hypothesis 2003/02/17
    success = success &&
              CheckURL("http://foo.com/ui;.gif", // URL
                       "http", // scheme
                       "foo.com", // host
                       "foo.com", // domain
                       "foo", // siteowner
                       "com", // tld
                       "com", // maintld
                       "northamerica", // tldregion
                       "", // port
                       "/ui;.gif", // path
                       1, // pathdepth
                       "ui", // filename
                       "", // extension
                       ".gif", // params
                       "", // query
                       "", // address
                       "", // fragment
                       "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring

    // Test Orjan's hypothesis 2003/02/17
    success = success &&
              CheckURL("http://foo.com/ui;.gif", // URL
                       "http", // scheme
                       "foo.com", // host
                       "foo.com", // domain
                       "foo", // siteowner
                       "com", // tld
                       "com", // maintld
                       "northamerica", // tldregion
                       "", // port
                       "/ui;.gif", // path
                       1, // pathdepth
                       "ui", // filename
                       "", // extension
                       ".gif", // params
                       "", // query
                       "", // address
                       "", // fragment
                       "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring

    // Verify params handling
    success = success &&
              CheckURL("http://foo.com/ui;par1=1/par2=2", // URL
                       "http", // scheme
                       "foo.com", // host
                       "foo.com", // domain
                       "foo", // siteowner
                       "com", // tld
                       "com", // maintld
                       "northamerica", // tldregion
                       "", // port
                       "/ui;par1=1/par2=2", // path
                       1, // pathdepth
                       "ui", // filename
                       "", // extension
                       "par1=1/par2=2", // params
                       "", // query
                       "", // fragment
                       "", // address
                       "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:par1,PARAMS:1,PARAMS:par2,PARAMS:2");// Tokenstring

    // Verify synthetic url
    success = success &&
              CheckURL("http://www.foo.no:8080/path/filename.ext;par1=hello/par2=world?query=test#fragment", // URL
                       "http", // scheme
                       "www.foo.no", // host
                       "foo.no", // domain
                       "foo", // siteowner
                       "no", // tld
                       "no", // maintld
                       "europe", // tldregion
                       "8080", // port
                       "/path/filename.ext;par1=hello/par2=world", // path
                       2, // pathdepth
                       "filename.ext", // filename
                       "ext", // extension
                       "par1=hello/par2=world", // params
                       "query=test", // query
                       "fragment", // fragment
                       "", // address
                       "SCHEME:http,HOST:www,DOMAIN:foo,MAINTLD:no,PORT:8080,PATH:path,FILENAME:filename,EXTENSION:ext,PARAMS:par1,PARAMS:hello,PARAMS:par2,PARAMS:world,QUERY:query,QUERY:test,FRAGMENT:fragment");// Tokenstring

    // '&' should be allowed in path according to RFC 1738, 2068 og 2396
    success = success &&
              CheckURL("http://canonsarang.com/zboard/data/gallery04/HU&BANG.jpg", // URL
                       "http", // scheme
                       "canonsarang.com", // host
                       "canonsarang.com", // domain
                       "canonsarang", // siteowner
                       "com", // tld
                       "com", // maintld
                       "northamerica", // tldregion
                       "", // port
                       "/zboard/data/gallery04/HU&BANG.jpg", // path
                       4, // pathdepth
                       "HU&BANG.jpg", // filename
                       "jpg", // extension
                       "", // params
                       "", // query
                       "", // address
                       "", // fragment
                       "SCHEME:http,DOMAIN:canonsarang,MAINTLD:com,PATH:zboard,PATH:data,PATH:gallery04,FILENAME:HU,FILENAME:BANG,EXTENSION:jpg");// Tokenstring

    return !success;
}
Ejemplo n.º 6
0
int main(int argc, char* argv[])
{	
	filenum = 1;
	
    // check command line arguments
	if(argc != 4){		// check number of arguments
		fprintf(stderr,"Error: Number of input argument needs to be exactly 3\n");
		return -1;
	}else if (CheckDirectory(argv[2])){		// check if directory exist
		return -1;
	}else if(CheckDepth(argv[3])){			// check depth
		return -1;
	}else if(CheckURL(argv[1])){			// check url
		fprintf(stderr,"Error: Invalid URL. Can only crawl URL with URL prefix %s\n",URL_PREFIX);
		return -1;
	}
	
    // init curl
    curl_global_init(CURL_GLOBAL_ALL);

    // setup seed page
	seedPage = (WebPage*)calloc(1,sizeof(WebPage));
	NormalizeURL(seedURL);
    seedPage->url = (char*)calloc(strlen(seedURL)+1,sizeof(char));
	strcpy(seedPage->url,seedURL);
	seedPage->depth = 0;
	
    // get seed webpage
    if(!GetWebPage(seedPage)){				// clean up and exit if url is invalid
    	fprintf(stderr,"Error: Invalid URL\n");
		curl_global_cleanup();
		return -1;
	}

    // write seed file
	WriteFile(*seedPage, filenum++);
	
	// Exit if maxWebPageDepth = 0
	if(maxWebPageDepth == 0){				// clean up and exit if max webpage depth is 0
		printf("\n[crawler]: Crawling - %s\n\n",seedPage->url);
		printf("1 page has been crawled \n\n");
		CleanUpPage(seedPage);	
		CleanUpHash(URLsVisited);	
		curl_global_cleanup();
		return 0;
	}	
	
    // add seed page to hashtable
   	InitialiseHashTable(URLsVisited);
    HashTableInsert(seedURL);

    // add seed node to list
    WebPageList = (List*)calloc(1,sizeof(List));
    struct ListNode* seednode = (ListNode*)calloc(1,sizeof(ListNode));
    seednode->page = seedPage;
    WebPageList->head = seednode;
    WebPageList->tail = seednode;
    
    // extract urls from seed page
    CrawlPage(*seedPage);
    WebPageList->head = RemoveNode(WebPageList->head);
    
    // while there are urls to crawl
   	while(WebPageList->head != NULL && WebPageList->head->page->depth < maxWebPageDepth){
        // get next url from list, get webpage for url, write page file and extract urls from webpage
		CrawlPage(*(WebPageList->head->page));
		WebPageList->head = RemoveNode(WebPageList->head);
	}
	
	// cleanup memory
	CleanUpList(WebPageList);
	CleanUpHash(URLsVisited);

    // cleanup curl
    curl_global_cleanup();
	
	printf("\n\n %d webpages have been crawled\n\n", filenum-1);
    return 0;
}