Пример #1
0
///========================simply reload the page===================================///
void Browser::reload()
{
    if(geturl().length()>4)
    {
        std::string current_page = geturl();
        //don't reload a page that isn't a page
        assert(current_page.length()>7);
        open(current_page);
    }
    else
        std::cerr<<"\n[!] No page has been open yet\n";
}
Пример #2
0
	void warning(bsparse_error_s id, ...) {
		if(!error_callback)
			return;
		int line, column;
		getpos(p, line, column);
		error_callback(id, geturl(), line, column, (const char**)xva_start(id));
	}
Пример #3
0
///==================Get the first root of the current  url=========================///
std::string Browser::get_first_root()
{
    std::string temp_url="";
    int backward_it     = 1;

    temp_url = geturl();
    bool https = false;
    //remove the http:// to not confuse the slashes
    replaceAll(temp_url,"http://","");
    if( word_in(temp_url,"https://") )
        https = true;
    if(https)
        replaceAll(temp_url,"https://","");

    //now test if we are in a directory
    //meaning something like:
    //www.something.com/   or
    //www.somthing.com/blah.php or
    //www.something.com/else/somthing.php
    if( word_in(temp_url,"/"))
    {
        while(temp_url[temp_url.size()-backward_it]!='/')
        {
            backward_it++;
        }
        //here we are on the last slash
        if(form.url_[0]!='/')
        {
            if(!https)
                temp_url = "http://" + temp_url.substr(0,temp_url.size()-backward_it+1);
            else
                temp_url = "https://" + temp_url.substr(0,temp_url.size()-backward_it+1);
        }
        else
        {
            if(!https)
                temp_url = "http://" + temp_url.substr(0,temp_url.size()-backward_it);
            else
                temp_url = "https://" + temp_url.substr(0,temp_url.size()-backward_it);
        }
    }
    //meaning we don't have any slash, we are in the top
    //dir , so something like:
    //www.blahblah.com
    else
    {
        //here we concatenate all we need in this way:
        //http://www.blahblah.com/formurl.php
        if(!https)
            temp_url = "http://" + temp_url + "/";
        else
            temp_url = "https://" + temp_url + "/";
    }

    return temp_url;

}
Пример #4
0
int handle(int newsockfd, struct sockaddr_in socket, socklen_t socklen)
{
    char buffer[256], path[PATH_MAX], *url;
    struct stat path_stat;
    int n;

    (void) socklen;

    bzero(buffer, 256);
    n = read(newsockfd, buffer, 255);
    if (n < 0)
        error("error reading");

    url = geturl(buffer);

    info("%s GET %s", inet_ntoa(socket.sin_addr), url);

    snprintf(path, PATH_MAX, "%s/%s", basedir, url);

    while (n == 255)
        n = read(newsockfd, buffer, 255);

    if (stat(path, &path_stat)) {
        handle_notfound(newsockfd);
    } else {
        if (S_ISDIR(path_stat.st_mode)) {
            if (path[strlen(path)-1] != '/') {
                size_t len;
                len = strlen(url);
                url = realloc(url, len + 2);
                url[len] = '/';
                url[len+1] = 0;
                handle_redirection(url, url, newsockfd);
            } else {
                struct stat index_stat;
                char index_path[PATH_MAX];
                snprintf(index_path, PATH_MAX, "%s/index.html", path);
                index_stat.st_mode = 0;
                stat(index_path, &index_stat);
                if (S_ISREG(index_stat.st_mode))
                    handle_file(url, index_path, newsockfd);
                else
                    handle_directory(url, path, newsockfd);
            }
        } else {
            handle_file(url, path, newsockfd);
        }
    }

    close(newsockfd);
    free(url);
    return 0;
}
enum nss_status _nss_shib_getgrgid_r(gid_t gid, struct group *result, char *buffer, size_t buflen, int *errnop)
{
#ifdef DEBUG
	fprintf(stderr, "\nEntering _nss_shib_getgrgid_r with gid=%d.\n", gid);
#endif

	int ret = NSS_STATUS_UNAVAIL;
	readconfig();
	char newurl[1024];
	sprintf(newurl, "%s", url_group);
	if (!geturl(newurl, username, password, cafile, sslcheck) || body == NULL) {
		ret = NSS_STATUS_UNAVAIL;
		goto getgrgid_err;
	}

	BODY *cursor = body;
	while (cursor)
	{
		char *cur_row = cursor->row;
		int count_separator = count_char_in_str(cur_row, ':');
		char **array = split_str(cur_row, ':');

		if (array[0] != NULL  && count_separator >= 3 && atoi(array[2]) == gid)
		{
			int setting = setgroupfromarray(array, result, buffer, buflen);
			if (setting != 0) {
				if (setting == 1) {
					if(array) free(array);

					*errnop = ERANGE;
					ret = NSS_STATUS_TRYAGAIN;
				}
				else {
					ret = NSS_STATUS_UNAVAIL;
				}
				goto getgrgid_err;
			}

#ifdef DEBUG
			fprintf(stderr, "Found item for gid=%d: [name=%s]\n", gid, array[0]);
#endif

			ret = NSS_STATUS_SUCCESS;
		}

		if(array) free(array);
		cursor = cursor->next;
	}  

getgrgid_err:
	cleanbody();
	return ret;
}
Пример #6
0
///==============================check if in url====================================///
bool Browser::inurl(std::string str)
{
    std::string current_url = geturl();
    if(current_url=="")
        std::cerr<<"\n[!] No page in history\n";
    if (current_url.find (str) != std::string::npos)
    {
        return true;
    }
    else
    {
        return false;
    }
}
Пример #7
0
void
gnc_ui_file_access_response_cb(GtkDialog *dialog, gint response, GtkDialog *unused)
{
    FileAccessWindow* faw;
    gchar* url;

    g_return_if_fail( dialog != NULL );

    faw = g_object_get_data( G_OBJECT(dialog), "FileAccessWindow" );
    g_return_if_fail( faw != NULL );

    switch ( response )
    {
    case GTK_RESPONSE_HELP:
        gnc_gnome_help( HF_HELP, HL_GLOBPREFS );
        break;

    case GTK_RESPONSE_OK:
        url = geturl( faw );
        if ( url == NULL )
        {
            return;
        }
        if ( faw->type == FILE_ACCESS_OPEN )
        {
            gnc_file_open_file( url );
        }
        else if ( faw->type == FILE_ACCESS_SAVE_AS )
        {
            gnc_file_do_save_as( url );
        }
        break;

    case GTK_RESPONSE_CANCEL:
        break;

    default:
        PERR( "Invalid response" );
        break;
    }

    if ( response != GTK_RESPONSE_HELP )
    {
        gtk_widget_destroy( GTK_WIDGET(dialog) );
    }
}
Пример #8
0
void Browser::open(std::string url, int usertimeout,std::string post_data)
{
    init();
    timeout = usertimeout;
    assert(timeout>0);
    //set the url in the options
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str() );
    //and set it as the options for curl
    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post_data.c_str());
    //Handle the response
    if(writing_bytes==false)
    {
        addheaders("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        addheaders("Connection" ,"keep-alive");
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_to_string );
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &html_response);
        curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &header_);
    }
    else
    {
        assert(filepipe!=NULL);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, filepipe);
    }
    curl_easy_setopt(curl, CURLOPT_TIMEOUT,   timeout );
    res = curl_easy_perform(curl);
    if(error())
    {
        std::cerr<<"\n===============================================\n";
    }
    if(writing_bytes==true)
        fclose(filepipe);
    //because we don't want to parse bytes for forms
    else
    {
        if(fetching_forms == true)
            forms.initialize(html_response);
        if(fetching_links == true)
        {
            links.getlinks(html_response);
            emails.init(links);
        }
    }
    history_.push_back(geturl());
}
Пример #9
0
bool Upgrader::downloader(int targetfile)
{
    curlhandle.downloading = true;
    curlhandle.handle = curl_easy_init();
    curlhandle.success = false;

    std::string urlstring = geturl() + targetswitch(targetfile);
    const char *url = urlstring.c_str();

    bfs::path target = path(DATA) / "upgrade";
    // if user switches between upgrading client and bootstrapping blockchain, we don't want to pass around garbage
    if (bfs::exists(target)) {bfs::remove_all(target);} 
    
    if (!verifyPath(target, true)) {return false;}
    target /= targetswitch(targetfile);
    cancelDownload(false);
    if (bfs::exists(target))    {bfs::remove(target);}
    file = fopen(target.string().c_str(), "wb");
    fileInitialized=true;


    curl_easy_setopt(curlhandle.handle, CURLOPT_URL, url);
    curl_easy_setopt(curlhandle.handle, CURLOPT_WRITEFUNCTION, fwrite);
    curl_easy_setopt(curlhandle.handle, CURLOPT_WRITEDATA, file);
    curl_easy_setopt(curlhandle.handle, CURLOPT_WRITEDATA, file);
    //curl_easy_setopt(curlhandle.handle, CURLOPT_XFERINFOFUNCTION, cancelDownloader);
    curl_easy_setopt(curlhandle.handle, CURLOPT_NOPROGRESS, 0L);


    downloadThread = boost::thread(download, (void *)&curlhandle);

    printf("downloading file...\n");

    filesize = -1;
    filesizeRetrieved = false;

    while (curlhandle.downloading && !CANCEL_DOWNLOAD)
        {
            #ifdef WIN32
            Sleep(1000);
            #else
            usleep(1000*500);
            #endif
            #if defined(UPGRADERFLAG)
            int sz = getFileDone();
            printf("\r%i\tKB \t%i%%", sz/1024, getFilePerc(sz));
            fflush( stdout );
            #endif
        }

    curl_easy_cleanup(curlhandle.handle);
    fclose(file);
    fileInitialized=false;

    if(!curlhandle.success)
    {
        printf((CANCEL_DOWNLOAD)? "\ndownload interrupted\n" : "\ndownload failed\n");
        if (bfs::exists(target))    bfs::remove(target);
        cancelDownload(false);
        return false;
    }
    else
    {
        printf("\nfile downloaded successfully\n");
        return true;
    }
}
Пример #10
0
///===================submit the form depending on GET or POST=======================///
void Browser::submit(int timeout=30)
{
    std::string temp_url="";
    int backward_it     = 1;
    //get out of the program if we don't have a post or a get in the form
    assert(word_in(form.method_,"get")|| word_in(form.method_,"post") );

    //if the url is already complete
    if( word_in(form.url_,"http://"))
    {
        temp_url = form.url();
    }
    //otherwise we add after the root of dir
    //meaning after the first /
    //or if there's not, we add a slash and append it
    else
    {
        temp_url = geturl();
        bool https = false;
        //remove the http:// to not confuse the slashes
        replaceAll(temp_url,"http://","");
        if( word_in(temp_url,"https://") )
            https = true;
        if(https)
            replaceAll(temp_url,"https://","");

        //now test if we are in a directory
        //meaning something like:
        //www.something.com/   or
        //www.somthing.com/blah.php or
        //www.something.com/else/somthing.php
        if( word_in(temp_url,"/"))
        {
            while(temp_url[temp_url.size()-backward_it]!='/')
            {
                backward_it++;
            }
            //here we are on the last slash
            if(form.url_[0]!='/')
            {
                if(!https)
                    temp_url = "http://" + temp_url.substr(0,temp_url.size()-backward_it+1)+form.url();
                else
                    temp_url = "https://" + temp_url.substr(0,temp_url.size()-backward_it+1)+form.url();
            }
            else
            {
                if(!https)
                    temp_url = "http://" + temp_url.substr(0,temp_url.size()-backward_it)+form.url();
                else
                    temp_url = "https://" + temp_url.substr(0,temp_url.size()-backward_it)+form.url();
            }
        }
        //meaning we don't have any slash, we are in the top
        //dir , so something like:
        //www.blahblah.com
        else
        {
            //here we concatenate all we need in this way:
            //http://www.blahblah.com/formurl.php
            if(!https)
                temp_url = "http://" + temp_url + "/" + form.url();
            else
                temp_url = "https://" + temp_url + "/" + form.url();
        }
    }

    //we have the url where we will post or get set correctly
    //now prepare the get or post to do and submit
    if( word_in(form.method_,"get"))
    {
        //if it's a get
        //append ? to action then add value1=avalue&value2=anothervalue
        //then open the link and decide if it writes to a file or not depending on the write_bytes
        curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
        temp_url += "?";

        //loop through all that:
        //select   :name(), options[selected(),value()]
        //input    :name(), type(), value()
        //textarea :name(), value()
        //bytes    :strings...

        //select
        for(unsigned int ii=0;ii<form.select.size();ii++)
        {
            for(unsigned int jj=0;jj<form.select[ii].options.size();jj++)
            {
                if(form.select[ii].options[jj].value()!="" &&form.select[ii].options[jj].selected()==true)
                {
                    temp_url+=escape(form.select[ii].name());
                    temp_url+="=";
                    temp_url+=escape(form.select[ii].options[jj].value());
                    temp_url+="&";
                }
            }
        }

        //input
        for(unsigned int ii=0;ii<form.input.size();ii++)
        {
            if(form.input[ii].value()!="")
            {
                temp_url+=escape(form.input[ii].name());
                temp_url+="=";
                temp_url+=escape(form.input[ii].value());
                temp_url+="&";
            }
        }

        //textarea
        for(unsigned int ii=0;ii<form.textarea.size();ii++)
        {
            if(form.textarea[ii].value()!="")
            {
                temp_url+=escape(form.textarea[ii].name());
                temp_url+="=";
                temp_url+=escape(form.textarea[ii].value());
                temp_url+="&";
            }
        }
        //stil use open because we can have write_bytes as a callback
        open(temp_url,timeout);

    }
    //FOR POSTS NOW!
    else if( word_in(form.method_,"post") )
    {
        //if it's a post
        //we check what part of the form is pure bin

        //input -- here we can have the type FILE
        //which always imply uploading a file
        for(unsigned int ii=0;ii<form.input.size();ii++)
        {
            if(form.input[ii].value()!="")
            {

                //check if bytes and content type were specified
                for( std::map<std::string,std::string>::iterator inside_bytes=form.bytes_.begin();
                     inside_bytes!=form.bytes_.end();
                     ++inside_bytes)
                {
                    //if names of the input is inside the bytes
                    if( (*inside_bytes).first ==  form.input[ii].name() )
                    {
                        //if we didn't specified a content type
                        if( (*inside_bytes).second == "" )
                        {
                            // Fill in the file upload field
                            curl_formadd(&formpost,
                                     &lastptr,
                                     CURLFORM_COPYNAME, form.input[ii].name().c_str(),
                                     CURLFORM_FILE, form.input[ii].value().c_str(),
                                     CURLFORM_END);
                        }
                        //specified a content type
                        else
                        {
                            // Fill in the file upload field with the content type
                            curl_formadd(&formpost,
                                     &lastptr,
                                     CURLFORM_COPYNAME, form.input[ii].name().c_str(),
                                     CURLFORM_FILE, form.input[ii].value().c_str(),
                                     CURLFORM_CONTENTTYPE, (*inside_bytes).second.c_str(),
                                     CURLFORM_END);
                        }
                    }
                }
                //

                //if we have the type file then it's a file:
                if( word_in(form.input[ii].type(),"file") )
                {
                    // Fill in the file upload field
                    curl_formadd(&formpost,
                                 &lastptr,
                                 CURLFORM_COPYNAME, form.input[ii].name().c_str(),
                                 CURLFORM_FILE, form.input[ii].value().c_str(),
                                 CURLFORM_END);
                }
                //else if it's pure text
                else
                {
                    // Fill in the submit field too, even if this is rarely needed
                    curl_formadd(&formpost,
                                 &lastptr,
                                 CURLFORM_COPYNAME, form.input[ii].name().c_str(),
                                 CURLFORM_COPYCONTENTS, form.input[ii].value().c_str(),
                                 CURLFORM_END);
                }
            }
        }
        //select
        for(unsigned int ii=0;ii<form.select.size();ii++)
        {
            for(unsigned int jj=0;jj<form.select[ii].options.size();jj++)
            {
                if(form.select[ii].options[jj].value()!="" && form.select[ii].options[jj].selected()==true)
                {
                    curl_formadd(&formpost,
                                 &lastptr,
                                 CURLFORM_COPYNAME, form.select[ii].name().c_str(),
                                 CURLFORM_COPYCONTENTS, form.select[ii].options[jj].value().c_str(),
                                 CURLFORM_END);
                }
            }
        }
        //textarea
        for(unsigned int ii=0;ii<form.textarea.size();ii++)
        {
            if(form.textarea[ii].value()!="")
            {
                curl_formadd(&formpost,
                             &lastptr,
                             CURLFORM_COPYNAME, form.textarea[ii].name().c_str(),
                             CURLFORM_COPYCONTENTS, form.textarea[ii].value().c_str(),
                             CURLFORM_END);
            }
        }

        //add our prepared formpost to the options
        curl_easy_setopt(curl, CURLOPT_HTTPPOST, formpost);

        //stil use open because we can have write_bytes as a callback
        open_form(temp_url,timeout);
    }

}
enum nss_status _nss_shib_getgrent_r(struct group *result, char *buffer, size_t buflen, int *errnop)
{
#ifdef DEBUG
	fprintf(stderr, "\nEntering _nss_shib_getgrent_r (iter=%d).\n", last_rownum_grp);
#endif

	enum nss_status ret = NSS_STATUS_UNAVAIL;
	readconfig();
	char newurl[1024];
	sprintf(newurl, "%s", url_group);

	if (!geturl(newurl, username, password, cafile, sslcheck) || body == NULL) {
		ret = NSS_STATUS_UNAVAIL;
		goto getgrent_err;
	}

	int i = 0;
	BODY *cursor = body;
	while (cursor)
	{
		if (i > last_rownum_grp)
		{
			char *cur_row = cursor->row;
			int count_separator = count_char_in_str(cur_row, ':');
			char **array = split_str(cur_row, ':');

			if (array[0] != NULL && count_separator >= 3)
			{
				int setting = setgroupfromarray(array, result, buffer, buflen);
				if (setting != 0) {
					if (setting == 1) {
						if(array) free(array);

						*errnop = ERANGE;
						ret = NSS_STATUS_TRYAGAIN;
					}
					else {
						ret = NSS_STATUS_UNAVAIL;
					}
					goto getgrent_err;
				}

#ifdef DEBUG
				fprintf(stderr, "Found item: [grname=%s, gid=%s]\n", array[0], array[2]);
#endif

				last_rownum_grp = i;
				ret = NSS_STATUS_SUCCESS;
			}

			if (array) free(array);
			if (ret == NSS_STATUS_SUCCESS) break;
		}

		i++;
		cursor = cursor->next;
	} 

	if (ret != NSS_STATUS_SUCCESS) {
#ifdef DEBUG
				fprintf(stderr, "Item not found, end of file.\n");
#endif

		*errnop = ENOENT;
		ret = NSS_STATUS_NOTFOUND;
	}

getgrent_err:
	cleanbody(); 
	return ret;
}
Пример #12
0
int main(int argc, char *argv[]) {
  char logfile[BUFFSIZE], filesdir[BUFFSIZE], collectcmd[BUFFSIZE], lscmd[BUFFSIZE], datfile[BUFFSIZE], imgfile[BUFFSIZE];
  FILE *master, *page;
  FILE *sysout;
  char pageline[BUFFSIZE], buff[BUFFSIZE], scmd[BUFFSIZE], pagefile[BUFFSIZE];
  char pageurl[BUFFSIZE], fullurl[BUFFSIZE], *url;
  char urldone;
  pid_t wgetchild;
  time_t waitstart;
  char *data;
  unsigned masterlength, masterloc;
  unsigned pageadds;
  long tester;
  int p;
  int firstentry;

  if (argc != 3 && argc != 4) {
    printf("Usage: web-collage <file> <program>\n");
    exit(-2);
  }

  /* Fill out the commands with name */
  sprintf(logfile, LOG_FILE, argv[1]);
  sprintf(filesdir, FILES_DIR, argv[1]);
  sprintf(collectcmd, COLLECT_CMD, argv[1], argv[1]);
  sprintf(lscmd, LS_CMD, argv[1]);
  sprintf(datfile, DAT_FILE, argv[1]);
  sprintf(imgfile, IMG_FILE, argv[1]);
  /* does the temp directory already exist? */
  mkdir(filesdir, S_IRWXU);

  /* Open File */
  if (!(master = fopen(datfile, "r"))) {
    printf("Creating File...\n");
    
    master = fopen(datfile, "w+");
    if (!master) {
      perror("creating master file");
      exit(-1);
    }

    fprintf(master, "%s\n", argv[3]); /* First URL */
    fclose(master);
  } else
    fclose(master);

  printf("Reading File...\n");

  srand48(time(NULL));

  master = waitreadmaster(datfile, 0, 0, 0); /* READ Lock! */
  fseek(master, 0, SEEK_END);
  masterlength = ftell(master);
  freemaster(datfile, master, 0, 0);      /* Unlock! */

  int iter = 0;
  while (iter++ < 3) {
      /* Get random URL */
      masterloc = lrand48() % masterlength;
      firstentry = 0;
      /* READ Lock! */
      master = waitreadmaster(datfile, masterloc, masterloc, BUFFSIZE);
      fgets(pageurl, BUFFSIZE, master); /* skip to line after random char */
      do
	if (!fgets(pageurl, BUFFSIZE - strlen(collectcmd) - 1, master)) {
	  rewind(master);
	  firstentry = 1;
	  fgets(pageurl, BUFFSIZE - strlen(collectcmd) - 1, master);
	}
      while (pageurl[0] == '\t' || pageurl[0] == '\n');

      if (!firstentry) {
	/* Invalidate URL */
	/* READ->WRITE Lock! */
	master = waitreadtowrite(datfile, master, masterloc, BUFFSIZE);
	fseek(master, -(strlen(pageurl) + 0), SEEK_CUR);
	voidline(master);
      }
      freemaster(datfile, master, masterloc, BUFFSIZE);
      /* Unlock! */

      pageurl[strlen(pageurl) - 1] = '\0';

    printf("\nGetting %s\n", pageurl);

    /* Get webpage, checking for any redirection */
    sprintf(scmd, "%s \"%s\"", collectcmd, pageurl);
    waitstart = time(NULL);
    if (!(wgetchild = fork())) {
      system(scmd);
      exit(0);
    }
    while (validpid(wgetchild) && waitstart + 5 > time(NULL));
    if (validpid(wgetchild))
      kill(wgetchild, 9);
    wait(NULL);
    
    sysout = fopen(logfile, "r");
    if (!sysout)
      continue; /* File does not exist */
    while (fgets(buff, BUFFSIZE, sysout)) {
      printf("%s", buff);
      if (!strncmp(buff, "Location:", 9)) {
	if (strchr(buff + 10, ' '))
	  *strchr(buff + 10, ' ') = '\0';
	strcpy(pageurl, buff + 10);
      }
    }
    fclose(sysout);

    unlink(logfile);

    /* Collect new links */
    sysout = popen(lscmd, "r");
    if (!sysout) {
      perror("searching files directory");
      exit(-3);
    }

    strcpy(pagefile, filesdir);
    if (fgets(pagefile + strlen(pagefile), BUFFSIZE, sysout) != NULL) {
      pagefile[strlen(pagefile) - 1] = '\0';  /* remove newline */
      page = fopen(pagefile, "r");

      addfilename(pageurl, BUFFSIZE, pagefile);

      pageadds = 0;

      /* Write Additional URLs */
      rewind(master);
      if (findlikes(page) || (rand() % KEEP_PROB)) { /* possibly just throw away */
	while (nexttag(pageline, BUFFSIZE, page))
	  if (url = geturl(pageline)) {
	    if (strncasecmp(url, "http://", 7)) {
	      if (!getpath(fullurl, BUFFSIZE, url, pageurl))
		continue;
	    } else
	      strcpy(fullurl, url);
	    /* search for place in file to place */
	    urldone = 0;
	    if (strlen(fullurl) > BUFFSIZE / 2)
	      continue;  /* too big, don't add */
	    if (rand() % (pageadds + 1) > KEEP_PROB)
	      continue;  /* add fewer urls as more on page */
	    if (strstr(fullurl, ".com") && rand() % DROP_PROB)
	      continue;  /* only chance to add a .com link */
	    if ((strstr(fullurl, "yahoo.com") ||
		 strstr(fullurl, "www.google.com")) && rand() % DROP_PROB)
	      continue;  /* almost no yahoo.com's, www.google.com's */
	    for (p = 0; p < DISLIKE_CNT; p++)
	      if (strstr(fullurl, dislikes[p])) /* just skip */
		continue;
	    /* READ Lock! */
	    master = waitreadmaster(datfile, 0, 0, 0);
	    while (fgets(buff, BUFFSIZE, master)) {
	      if (!strncmp(buff, fullurl, strlen(fullurl)) ||
		  (!strncmp(buff, fullurl, strchr(buff + strlen("http://"), '/')
			    - buff) && !(rand() % HOST_CROWD))) {
		urldone = 1;
		freemaster(datfile, master, 0, 0);
		/* Unlock! */
		break;
	      }
	      if (buff[0] == '\t' || buff[0] == '\n')
		if (strlen(buff) >= strlen(fullurl) + 1) {
		  masterloc = ftell(master);
		  freemaster(datfile, master, 0, 0);
		  /* Unlock! */
		  printf("Adding %s\n", fullurl);
		  pageadds++;
		  /* WRITE Lock! */
		  waitwritemaster(datfile, masterloc - strlen(buff), 
				  masterloc - strlen(buff), BUFFSIZE);
		  fprintf(master, "%s\n", fullurl);
		  if (strlen(buff) >= strlen(fullurl) + 2)
		    fputc('\t', master);
		  fseek(master, 0, SEEK_END);
		  masterlength = ftell(master);
		  freemaster(datfile, master,
			     masterloc - strlen(buff), BUFFSIZE);
		  /* Unlock! */
		  urldone = 1;
		  break;
		}
	    }
	    if (!urldone) {
	      if (ftell(master) > MAX_SIZE) { /* too big, just remove entries */
		fseek(master, lrand48() % ftell(master), SEEK_SET);
		fgets(buff, BUFFSIZE, master);
		urldone = 1;  /* flag that was greater than MAX_SIZE */
	      }
	      masterloc = ftell(master);
	      freemaster(datfile, master, 0, 0);
	      /* Unlock! */
	      printf("Adding %s\n", fullurl);
	      pageadds++;
	      /* WRITE Lock! */
	      waitwritemaster(datfile, masterloc, masterloc - 2, BUFFSIZE);
	      fprintf(master, "%s\n", fullurl);
	      if (urldone)
		voidline(master);
	      fseek(master, 0, SEEK_END);
	      masterlength = ftell(master);
	      freemaster(datfile, master, masterloc - 2, BUFFSIZE);
	      /* Unlock! */
	    }
	  }
      }

      fclose(page);

      /* Move file to final destination */
      sprintf(scmd, "%s \"%s\" \"%s\"", argv[2], argv[1], pagefile);
      system(scmd);
      sleep(WAIT_TIME);
      sprintf(scmd, "touch %sREMOVE.tmp", filesdir);
      system(scmd);
      sprintf(scmd, "rm %s*", filesdir);
      system(scmd);
    }

    pclose(sysout);
  }
}