Esempio n. 1
0
int main (int argc, char *argv[]) {

	//printf("argc %i\n",argc);

	int DocIDcount = 0;

        if (argc == 7) {

		//lager for bare en bøtte
		
		int startIndex = atoi(argv[1]);
        	int stoppIndex = atoi(argv[2]) +1;	
		char *type = argv[3];
		char *lang = argv[4];
		char *subname = argv[5];
		int bucket = atol(argv[6]);
	
		mergei(bucket,startIndex,stoppIndex,type,lang,subname,&DocIDcount);

		printf("DocIDcount: %i\n",DocIDcount);


	}
	else if (argc == 6) {

		//skal lage for alle bøttene
		int i;

		int startIndex = atoi(argv[1]);
        	int stoppIndex = atoi(argv[2]) +1;	
		char *type = argv[3];
		char *lang = argv[4];
		char *subname = argv[5];
		
		for (i=0;i<=63;i++) {
			#ifdef DEBUG
			printf("bucket: %i\n",i);
			#endif
			mergei(i,startIndex,stoppIndex,type,lang,subname,&DocIDcount);
		}

		printf("DocIDcount: %i (/64)\n",DocIDcount);

	}
	else {
                printf("Dette programet printer ut en iindex.\n\n");
		printf("\tUse:\n\n\t./mergeIIndex fralot tillot type (Main | Anchor) språk subname [bucket]\n\n");
                exit(0);
        }




}
Esempio n. 2
0
void merge(pos_lists_t &Z0, int i)
{
    if (i == 0) {
        // merge I0 with memory buffer Z0
        merge0(Z0);

    } else {
        // merge Ii with temporary index Zi
        mergei(i);
    }

}
Esempio n. 3
0
void gc_coll(char subname[], struct gcaoptFormat *gcaopt) {

	int LotNr, i;
	int DocIDcount = 0;
	FILE *LOCK;
	struct reformat *re;

	
	time_t newest_document = 0;

	gcaopt->keept = 0;
	gcaopt->gced = 0;

	if (gcaopt->dontcheckok == 0 && !isOkCrawled(subname,gcaopt)) {
		blog(gcaopt->logSummary,1,"Skipping \"%s\". Was not correctly crawled.",subname);		
		return;
	}

	if ((LOCK = lockcoll(subname)) == NULL) {
		fprintf(stderr,"Can't lock lockfile!\n");
		exit(-1);
	}
	

	#ifdef BLACK_BOX

		for(LotNr=1;LotNr<maxLots;LotNr++) {

			
			if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_READ_ONLY|RE_HAVE_4_BYTES_VERSION_PREFIX|RE_STRETCH)) == NULL) {
				//når vi ikke lengere kan åpne en DocumentIndex er det forde vi har kommet til siste lot.
				break;
			}

			//finner nyeste dokument 
			for (i=0;i<NrofDocIDsInLot;i++) {

				if ((REN_DocumentIndex(re, i)->lastSeen != 0) && (newest_document < REN_DocumentIndex(re, i)->lastSeen)) {
       			                newest_document = REN_DocumentIndex(re, i)->lastSeen;
					//printf("newest_document: i: %i, url \"%s\", time %s\n",i,REN_DocumentIndex(re, i)->Url, ctime_s(&REN_DocumentIndex(re, i)->lastSeen));
        		       	}

			}

			reclose(re);

		}

	#endif


	//hack: setter datoen til i dag. Forutsetter at vi nettopp har kjørt crawling.
	//printf("\n<######################## with runarb newest_document hack###################>\n");
	//newest_document = time(NULL);
	//printf("</######################## with runarb newest_document hack###################>\n\n");

	#ifdef BLACK_BOX
		blog(gcaopt->log,1,"Newest document: %s",ctime_s(&newest_document));
	#endif


	for(LotNr=1;LotNr<maxLots;LotNr++) {
		gcdecide(LotNr,subname, gcaopt, newest_document);
	}

	/***************************/
	//merger indexene
        //skal lage for alle bøttene
	printf("merging Main\n");
        for (i=0;i<NrOfDataDirectorys;i++) {
		#ifdef DEBUG
        	printf("gc_coll: bucket: %i\n",i);
		#endif
		mergei(i,0,0,"Main","aa",subname,&DocIDcount);
        }

	printf("merging acl_allow\n");
        for (i=0;i<NrOfDataDirectorys;i++) {
		#ifdef DEBUG
        	printf("gc_coll: bucket: %i\n",i);
		#endif
		mergei(i,0,0,"acl_allow","aa",subname,&DocIDcount);
	}

	printf("merging acl_denied\n");
        for (i=0;i<NrOfDataDirectorys;i++) {
		#ifdef DEBUG
        	printf("gc_coll: bucket: %i\n",i);
		#endif
		mergei(i,0,0,"acl_denied","aa",subname,&DocIDcount);
	}
	printf("merging attributes\n");
        for (i=0;i<NrOfDataDirectorys;i++) {
		#ifdef DEBUG
        	printf("gc_coll: bucket: %i\n",i);
		#endif
		mergei(i,0,0,"attributes","aa",subname,&DocIDcount);
	}

	// legger subnavnet til listen og huper searchd slik at cashen blir frisket opp.
	printf("Huping searchd to recache \"%s\"\n",subname);
	lot_recache_collection(subname);

        printf("DocIDcount: %i (/64)\n",DocIDcount);

	/***************************/
	blog(gcaopt->log,1,"gc'ed \"%s\". Keept %i, gced %i",subname,gcaopt->keept,gcaopt->gced);
	blog(gcaopt->logSummary,1,"gc'ed \"%s\". Keept %i, gced %i",subname,gcaopt->keept,gcaopt->gced);

	fclose(LOCK);
}