Beispiel #1
0
int main( int argc, char *argv[] )
{
    if (argc<2)
	{
	    printf( "Usage: %s <filename> {<filename>}\n\n", argv[0] );
	    exit(-1);
	}

    int		paramnr;

    html_parser_init();

    for (paramnr=1; paramnr<argc; paramnr++)
	{
	    FILE	*file = fopen( argv[paramnr], "r" );

	    if (!file)
		{
		    fprintf( stderr, "Could not open %s.\n", argv[1] );
		    html_parser_exit();
		    return -1;
		}

	    // Get filesize:
	    struct stat	fileinfo;
	    fstat( fileno( file ), &fileinfo );

//	    printf("Reading %i bytes...\n", fileinfo.st_size);

	    int		size = fileinfo.st_size;
	    char	*buf = (char*)malloc(sizeof(char)*size);

	    int	i;
	    for (i=0; i<size;)
		{
		    i+= fread( (void*)&(buf[i]), sizeof(char), size-i, file );
//		    printf("%i...\n", i);
		}

	    char	*title, *body;

	    html_parser_run( "http://YAHOOgroups.com/svada/index.html", buf, size, &title, &body, fn, NULL );

	    free(buf);
	    fclose(file);

	    printf("Title: %s\nBody:\n%.512s\n", title, body);

//	    printf("\n\033[1;34mTitle\033[0m: %s\n\033[1;34mBody\033[0m:\n%s\n", title, body);
	    free(title);
	    free(body);
	}

    html_parser_exit();

    exit(0);
}
Beispiel #2
0
int main(int argc, char *argv[])
{
	bblog_init("searchd");
	bblog(CLEAN, "Initializing...");

	int 	sockfd;
	int runCount;
	//int newsockfd;
	socklen_t clilen;
	struct sockaddr_in cli_addr, serv_addr;
	FILE *LOGFILE;
	//FILE *LOCK;
	struct searchd_configFORMAT searchd_config;

	struct config_t maincfg;

        searchd_config.searchport = 0;
	searchd_config.optLog = 0;
	searchd_config.optMax = 0;
	searchd_config.optSingle = 0;
	searchd_config.optrankfile = NULL;
	searchd_config.optPreOpen = 0;
	searchd_config.optFastStartup = 0;
	searchd_config.optCacheIndexes = 1;
	searchd_config.optAlarm = 60;
	
	// Needed for the speller to properly convert utf8 to wchar_t
	setlocale(LC_ALL, "en_US.UTF-8");

	/* Ignore collection updates for now */
	signal(SIGUSR2, SIG_IGN);
	/* And ignore spelling updates */
	signal(SIGUSR1, SIG_IGN);

        char c;
        while ((c=getopt(argc,argv,"clp:m:b:vsofA:L:S:a:"))!=-1) {
                switch (c) {
                        case 'p':
                                searchd_config.searchport = atoi(optarg);
				bblog(CLEAN, "searchd: Option -p: Using port %i.",searchd_config.searchport);
                                break;
                        case 'l':
				searchd_config.optLog = 1;
                                break;
                        case 'o':
				searchd_config.optPreOpen = 1;
                                break;
                        case 'm':
				searchd_config.optMax = atoi(optarg);
                                break;
                        case 'b':
				searchd_config.optrankfile = optarg;
                                break;
                        case 'v': /* XXX: Remove now that we have severity in the logger? */
				bblog(CLEAN, "searchd: Option -v: Verbose output.");
				globalOptVerbose = 1;
                                break;
                        case 's':
				bblog(INFO, "Option -s: Won't fork for new connections");
				searchd_config.optSingle = 1;
                                break;
			case 'f':
				searchd_config.optFastStartup = 1;
				break;
			case 'c':
				searchd_config.optCacheIndexes = 0;
				break;
			case 'A':
				bblog_set_appenders(atoi(optarg));
				break;
			case 'L':
				bblog_set_severity(atoi(optarg));
				break;
			case 'S':
				spelling_min_freq = strtol(optarg, NULL, 10);
				break;
                        case 'a':
				searchd_config.optAlarm = atoi(optarg);
                                break;

			default:
				bblog(ERROR, "Unknown argument: %c", c);
				errx(1, "Unknown argument: %c", c);
                }
        
	}

	#ifdef BLACK_BOKS
	bblog(CLEAN, "Blackbox mode (searchdbb)");

	time_t starttime;
	time(&starttime);

	if (searchd_config.optLog) {
		/* Only add file logging if syslog is disabled */
		if ((bblog_get_appenders() & LOGGER_APPENDER_SYSLOG) == 0)
			bblog_set_appenders(LOGGER_APPENDER_FILE|bblog_get_appenders());
	}

	/* Write pidfile */
	FILE  *pidfile = fopen(bfile("var/searchd.pid"), "w");

	if (pidfile != NULL) {
		fprintf(pidfile, "%d", getpid());
		fclose(pidfile);
	} else {
		bblog(WARN, "Unable to write to pidfile");
	}

	bblog(CLEAN, "searchd: Starting. Time is %s",ctime(&starttime));
	#endif



	#ifdef DEBUG
        bblog(DEBUGINFO, "searchd: Debug: argc %i, optind %i",argc,optind);
	#endif

	if (searchd_config.optrankfile == NULL) {
		searchd_config.optrankfile = "Brank";
	}

	#ifdef WITH_SPELLING
	if (searchd_config.optFastStartup != 1) {
        	if ((spelling = train(bfile("var/dictionarywords"))) == NULL) {
        	        bblog(ERROR, "Can't init spelling.");
	        }

		cache_spelling_keepalive(&spelling);
		signal(SIGUSR1, cache_spelling_hup);
	}
	#endif

	if (argc > optind) {
		strncpy(servername,argv[optind], sizeof(servername) -1);
	} else {
		bblog(ERROR, "No hostname supplied");
		errx(1, "You have to supply a hostname");
	}
	
	lotPreOpenStartl(&searchd_config.lotPreOpen.DocumentIndex,"DocumentIndex","www",searchd_config.optPreOpen);
	lotPreOpenStartl(&searchd_config.lotPreOpen.Summary,"summary","www",searchd_config.optPreOpen);

#ifdef BLACK_BOKS
	if (searchd_config.optCacheIndexes == 1) {
		if (searchd_config.optFastStartup != 1) {
			bblog(INFO, "Reading indexes");
			cache_indexes(0);
			bblog(INFO, "Cached indexes: %dMB, cached indexes: %d", indexcachescached[0]/(1024*1024), indexcachescached[1]);
			preopen();
			cache_fresh_lot_collection();

			cache_indexes_keepalive();
			signal(SIGUSR2, cache_indexes_hup);
		}
		else {
			signal(SIGUSR2, SIG_IGN);
		}
	} else {
		signal(SIGUSR2, SIG_IGN);
	}

#endif


        maincfg = maincfgopen();

	if (searchd_config.searchport == 0) {
        	searchd_config.searchport = maincfg_get_int(&maincfg,"BSDPORT");
	}

	searchd_config.cmc_port = maincfg_get_int(&maincfg,"CMDPORT");

	maincfgclose(&maincfg);

	
	/***********************************************************************************/
	//prøver å få fil lock. Bare en deamon kan kjøre avgangen

	/*
	#ifndef ALLOW_MULTIPLE_SEARCHD
	if ((LOCK = fopen("/tmp/searchd.loc","w")) == NULL) {
		perror("lock file");
		exit(1);
	}

	if (flock(fileno(LOCK),LOCK_EX | LOCK_NB) != 0) {
		if (errno == EWOULDBLOCK) {
			printf("En annen prosses kjører allerede. Steng denne først.\n");
		}
		else {
			perror("cant get lock file");
		}
		exit(1);
	}
	#endif
	*/
	/***********************************************************************************/

	//#ifndef BLACK_BOKS

  	/* Initialize the configuration */
  	config_init(&cfg);


  	/* Load the file */
	#ifdef DEBUG
  	bblog(DEBUGINFO, "searchd: Debug: Loading [%s] ...",bfile(cfg_searchd));
	#endif

  	if (!config_read_file(&cfg, bfile(cfg_searchd))) {
		bblog(ERROR, "config read failed: [%s]: %s at line %i",bfile(cfg_searchd),config_error_text(&cfg),config_error_line(&cfg));
		exit(1);
	}
	//#endif	

	html_parser_init();

	/*
	#ifdef WITH_THREAD
		pthread_t chld_thr;

		printf("starting whth thread\n");
	#else
		printf("starting single thread version\n");
	#endif
	*/

	bblog(CLEAN, "Servername: %s", servername);

	//ToDo: må ha låsing her
        if ((LOGFILE = bfopen("config/query.log","a")) == NULL) {
                bblog_errno(ERROR, "%s", bfile("config/query.log"));
        }
        else {
                fprintf(LOGFILE,"starting server %s\n",servername);
                fclose(LOGFILE);
        }


	#ifdef BLACK_BOKS
		// Initialiser thesaurus med ouput-filene fra 'build_thesaurus_*':
		searchd_config.thesaurus_all = NULL;
#ifndef WITHOUT_THESAURUS
		bblog(INFO, "init thesaurus");

		searchd_config.thesaurus_all = NULL;
		if (searchd_config.optFastStartup != 1) {
			searchd_config.thesaurus_all = load_all_thesauruses(bfile("data/thesaurus/"));

			if (searchd_config.thesaurus_all == NULL) {
				bblog(ERROR, "Unable to open thesaurus. Disabling stemming");
		    	} else {
				bblog(INFO, "init thesaurus done");
			}
		}

#endif
		bblog(INFO, "init file-extensions");
		searchd_config.getfiletypep = fte_init(bfile("config/file_extensions.conf"));
		if (searchd_config.getfiletypep == NULL) {
			bblog(ERROR, "Unable to open file-extensions configuration file. Disabling file-extensions.");
		}

		bblog(INFO, "init attribute descriptions");
		searchd_config.attrdescrp = adf_init(bfile("config/attribute_descriptions.conf"));
		if (searchd_config.attrdescrp == NULL) {
			bblog(ERROR, "Unable to open attribute descriptions configuration file. Disabling attribute descriptions.");
		}

		bblog(INFO, "init show-attributes");
		char	*warnings;
		/*searchd_config.showattrp = show_attributes_init(bfile("config/show_attributes.conf"), &warnings);
		if (searchd_config.showattrp == NULL)
		    {
			fprintf(stderr, "searchd: ERROR!! Unable to open show-attributes configuration file. Disabling attributes.\n");
		    }
		else if (warnings[0]!='\0')
		    {
			fprintf(stderr, "searchd: ******************* Warnings reading show-attributes config: ********************\n");
			fprintf(stderr, "%s", warnings);
			fprintf(stderr, "searchd: *********************************************************************************\n");
		    }*/

	#else

		//starter opp
		bblog(INFO, "Loading domain-ids...");
		iintegerLoadMemArray2(&global_DomainIDs,"domainid",sizeof(unsigned short), "www");

        	//laster inn alle poprankene
        	bblog(INFO, "Loading pop MemArray...");
        	popopenMemArray2("www",searchd_config.optrankfile); // ToDo: hardkoder subname her, da vi ikke vet siden vi ikke her får et inn enda
		bblog(INFO, "Loading adultWeight MemArray...");
		adultWeightopenMemArray2("www"); // ToDo: hardkoder subname her, da vi ikke vet siden vi ikke her får et inn enda
	#endif


	IIndexInaliser();

	#ifdef WITH_MEMINDEX
		IIndexLoad();
	#endif

	if((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
		bblog(ERROR, "Server error! Can't open stream socket.");
		exit(1);
	}

	memset((char *) &serv_addr, 0, sizeof(serv_addr));
	serv_addr.sin_family = AF_INET;
	serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
	serv_addr.sin_port = htons(searchd_config.searchport);
	bblog(INFO, "Will bind to port %i",searchd_config.searchport);
	//seter at sokket kan rebrukes
        int yes=1;
        if (setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
		bblog_errno(ERROR, "setsockopt()");
		exit(1);
        }
	
	if(bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
		bblog_errno(ERROR, "Can't bind local address. Port %i", searchd_config.searchport);
		exit(1);
	}	

	/* set the level of thread concurrency we desire */
	//thr_setconcurrency(5);

	listen(sockfd, 5);

	runCount = 0;

#ifndef NEW_CHILD_CATCHER
	signal(SIGCLD, SIG_IGN);  /* now I don't have to wait() for forked children! */
#else
	{
		// runarb 28 juni 2009: 
		// Hvis vi har verbose output så skal vi wait()e for våre barn, og vise prity print når de dør.
		// desverre har det vært mye kød, der hovedprosessen blir hengene i sigchild_handler() og ikke kan
		// fork'e flere barn. For å ungå dtte venter vi ikke på våre barn til vanlig.
		if (globalOptVerbose) {
			struct sigaction sa;
			int ret;

			sa.sa_sigaction = sigchild_handler;
			sigemptyset(&sa.sa_mask);
			sa.sa_flags = SA_SIGINFO;

			ret = sigaction(SIGCHLD, &sa, 0);
			if (ret) {
				bblog_errno(ERROR, "sigaction()");
				exit(1);
			}
		}
		else {
			signal(SIGCLD, SIG_IGN);  /* now I don't have to wait() for forked children! */
		}
	}
#endif

	bblog(CLEAN, "|------------------------------------------------------------------------------------------------|");
	bblog(CLEAN, "|%-40s | %-11s | %-11s | %-11s | %-11s|","query", "TotaltTreff", "showabal", "filtered", "total_usecs");
	bblog(CLEAN, "|------------------------------------------------------------------------------------------------|");

	for((clilen = sizeof(cli_addr));;)
	{
		searchd_config.newsockfd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen);


		if(searchd_config.newsockfd < 0) {
			/* Just restart */
			if (errno == EINTR)
				continue;

			bblog(WARN, "searchd: Server warning! Accept error");
		}
		else {

			if (searchd_config.optSingle) {
				do_chld((void *) &searchd_config);
			}
			else {
			#ifdef DEBUG
				bblog(DEBUGINFO, "Debug mode; will not fork to new process.");
				do_chld((void *) &searchd_config);
			#else
				/*
				#ifdef WITH_THREAD
			 		//create a new thread to process the incomming request
					//thr_create(NULL, 0, do_chld, (void *) searchd_config, THR_DETACHED, &chld_thr);
					pthread_create(&chld_thr, NULL, do_chld, (void *) &searchd_config);
					//the server is now free to accept another socket request
				#else
					do_chld((void *) &searchd_config);	
				#endif
				*/
				bblog(DEBUGINFO, "Forking new prosess.");
				if (fork() == 0) { // this is the child process

					close(sockfd); // child doesn't need the listener

					do_chld((void *) &searchd_config);	

					close(searchd_config.newsockfd);
					bblog(DEBUGINFO, "Terminating child.");
		
					exit(0);
				}
				else {
					close(searchd_config.newsockfd); // perent doesn't need the new socket
				}
			#endif
			}
		}

		++runCount;

		if ((searchd_config.optMax != 0) && (runCount >= searchd_config.optMax)) {
			//venter på siste trå. Ikke helt optimalt dette, da vi kan ha flere tråer som kjører i paralell
			/*
			#ifdef WITH_THREAD
				pthread_join(chld_thr, NULL);
			#endif
			*/
			bblog(WARN, "have reached Max runs. Exiting...");
			break;
		}

	}

	html_parser_exit();

	free(searchd_config.lotPreOpen.Summary);	
	free(searchd_config.lotPreOpen.DocumentIndex);	
	adf_destroy(searchd_config.attrdescrp);
	fte_destroy(searchd_config.getfiletypep);
	if (searchd_config.optFastStartup != 1) {
		thesaurus_destroy(searchd_config.thesaurusp);
	}
	//freegjør spelling. Trekt, men kjekt av valgring kan finne ut om noe ikke her blirr frigjort.
	if (searchd_config.optFastStartup != 1) {
		untrain(&spelling);
	}

	return(0);
}