Ejemplo n.º 1
0
int
gcdecide(int LotNr, char *subname, struct gcaoptFormat *gcaopt, time_t newest_document)
{
	int i;
	struct reformat *re;
	FILE *DOCINDEXFH;
	whisper_t whisper;


	//åpner dokument indeks får å teste at vi har en, hvis ikke kan vi bare avslutte.
        if ( (DOCINDEXFH = lotOpenFileNoCasheByLotNr(LotNr,"DocumentIndex","rb", 's',subname)) == NULL) {
		#ifdef DEBUG
                	printf("lot dont have a DocumentIndex file\n");
		#endif

                return 0;
        }
	fclose(DOCINDEXFH);

	blog(gcaopt->log,1,"Runing gc for collection \"%s\", lot nr %i",subname,LotNr);

	if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_COPYONCLOSE|RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) {
		perror("can't reopen()");
		exit(1);
	}


	whisper = gcwhisper_read(subname);

	//går gjenom alle på jakt etter de som kan slettes
	for (i=0;i<NrofDocIDsInLot;i++) {

	
		if (DIS_isDeleted(REN_DocumentIndex(re, i))) {
			continue;
		}

		#ifdef DEBUG
			#ifdef BLACK_BOX
				printf("dokument \"%s\", lastSeen: %s",
					REN_DocumentIndex(re, i)->Url,
					ctime_s(&REN_DocumentIndex(re, i)->lastSeen));
			#endif
		#endif

		#ifdef BLACK_BOX
		if ((whisper & GCWHISPER_NOTOLD) == 0 &&
		    (((gcaopt->lastSeenHack == 1) && (REN_DocumentIndex(re, i)->lastSeen == 0))
		     || ((REN_DocumentIndex(re, i)->lastSeen != 0) &&
		         (newest_document > (REN_DocumentIndex(re, i)->lastSeen + gcaopt->MaxAgeDiflastSeen))))) {


			//sletter
			DIS_delete(REN_DocumentIndex(re, i));

			//sletter dokumentet i bb spesefike ting.
			bbdocument_delete (REN_DocumentIndex(re, i)->Url, subname);

			blog(gcaopt->log,2,"dokument \"%s\" can be deleted. Last seen: %s, DocID %u",REN_DocumentIndex(re, i)->Url,ctime_s(&REN_DocumentIndex(re, i)->lastSeen),LotDocIDOfset(LotNr) +i);
			++gcaopt->gced;
		
		} 
		else {
			++gcaopt->keept;
		}
		#endif
	}


	//markerer hva vi kan slette.
	gc_reduce(re, LotNr, subname);

	reclose(re);

	//trunkerer reposetoryet.
	gcrepo(LotNr, subname);


	//vasker iindex
        struct IndekserOptFormat IndekserOpt;
        IndekserOpt.optMustBeNewerThen = 0;
        IndekserOpt.optAllowDuplicates = 0;
        IndekserOpt.optValidDocIDs = NULL;
        IndekserOpt.sequenceMode =1;
        IndekserOpt.garbareCollection = 1;

	for (i=0;i<64;i++) {
		Indekser(LotNr,"Main",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"acl_allow",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"acl_denied",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"attributes",i,subname,&IndekserOpt);
	}

	//siden vi nå har lagt til alle andringer fra rev index kan vi nå slettet gced filen også
	//Indekser_deleteGcedFile(LotNr, subname);
	lotDeleteFile("gced", LotNr, subname);


	return 0;
}
Ejemplo n.º 2
0
int main (int argc, char *argv[]) {


	int lotNr;
	int lotPart;
	char path[256];
	char revpath[256];
	char iipath[256];
	unsigned lastIndexTime;

	struct revIndexArrayFomat *revIndexArray; 
	revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize);

	if (argc < 2) {
	}

	printf("lot %s, %i\n",argv[1],argc);


	if (argc == 3) {
		lotNr = atoi(argv[2]);

                //finner siste indekseringstid
                lastIndexTime =  GetLastIndexTimeForLot(lotNr,subname);


                if(lastIndexTime == 0) {
                        printf("lastIndexTime is 0\n");
                        exit(1);
                }

               //sjekker om vi har nokk palss
                if (!lotHasSufficientSpace(lotNr,4096,subname)) {
                        printf("insufficient disk space\n");
                        exit(1);
                }


        	printf("Indexing all buvkets for lot %i\n",lotNr);

		for (lotPart=0;lotPart<63;lotPart++) {
			//printf("indexint part %i for lot %i\n",lotPart,lotNr);

			//"$revindexPath/$revindexFilNr.txt";
			GetFilPathForLot(path,lotNr,subname);
			sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart);
			//ToDo: må sette språk annen plass
			sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1]);

			//oppretter paths
			makePath(iipath);			

			sprintf(iipath,"%s%i.txt",iipath,lotPart);

			Indekser(revpath,iipath,revIndexArray);	

			//sletter revindex. Ingen vits i å ha den fylle opp plass
			//remove(revpath);

		}
	}
	else if (argc == 4) {
		lotNr = atoi(argv[2]);
		lotPart = atoi(argv[3]);

		printf("indexint part %i for lot %i\n",lotPart,lotNr);

		//"$revindexPath/$revindexFilNr.txt";
		GetFilPathForLot(path,lotNr,subname);
		sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart);
		//ToDo: må sette språk annen plass
		sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1],lotPart);

		Indekser(revpath,iipath,revIndexArray);	
	
	}
	else {
		printf("usage: ./LotInvertetIndexMaker type lotnr [ lotPart ]\n\n");

	}

	//GetFilPathForLot(lotNr);

}
Ejemplo n.º 3
0
int main (int argc, char *argv[]) {


	int lotNr;
	int lotPart;
	char path[256];
	char iipath[256];
	unsigned lastIndexTime;
	int optMustBeNewerThen = 0;
	int optAllowDuplicates = 0;

	struct revIndexArrayFomat *revIndexArray; 
	revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize);

        extern char *optarg;
        extern int optind, opterr, optopt;
        char c;
        while ((c=getopt(argc,argv,"nd"))!=-1) {
                switch (c) {
                        case 'n':
                                optMustBeNewerThen = 1;
                                break;
                        case 'd':
                                optAllowDuplicates = 1;
                                break;
                        case 'v':
                                break;
                        default:
                                          exit(1);
                }
        }
        --optind;

	printf("lot %s, %i\n",argv[1],argc);

	char *type = argv[1 +optind];
	lotNr = atoi(argv[2 +optind]);
	char *subname = argv[3 +optind];

	if ((argc -optind)== 4) {

                //finner siste indekseringstid
                lastIndexTime =  GetLastIndexTimeForLot(lotNr,subname);


                if(lastIndexTime == 0) {
                        printf("lastIndexTime is 0\n");
                        exit(1);
                }

               //sjekker om vi har nokk palss
                if (!lotHasSufficientSpace(lotNr,4096,subname)) {
                        printf("insufficient disk space\n");
                        exit(1);
                }


        	printf("Indexing all buvkets for lot %i\n",lotNr);

		for (lotPart=0;lotPart<64;lotPart++) {
			//printf("indexint part %i for lot %i\n",lotPart,lotNr);

			//"$revindexPath/$revindexFilNr.txt";
			GetFilPathForLot(path,lotNr,subname);
			//ToDo: må sette språk annen plass
			sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]);

			//oppretter paths
			makePath(iipath);			

			sprintf(iipath,"%s%i.txt",iipath,lotPart);

			if ((optMustBeNewerThen != 0)) {
				if (fopen(iipath,"r") != NULL) {
					printf("we all redy have a iindex.\n");
					continue;
				}
			}


			Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates);	



		}
	}
	else if ((argc - optind) == 5) {
		lotPart = atoi(argv[4 +optind]);

		printf("indexint part %i for lot %i\n",lotPart,lotNr);

		//"$revindexPath/$revindexFilNr.txt";
		GetFilPathForLot(path,lotNr,subname);
		//ToDo: må sette språk annen plass
		//aa sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1 +optind],lotPart);
                //ToDo: må sette språk annen plass
                sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]);

                //oppretter paths
                makePath(iipath);

                sprintf(iipath,"%s%i.txt",iipath,lotPart);

		printf("iipath: \"%s\n",iipath);

		if ((optMustBeNewerThen != 0)) {
			if (fopen(iipath,"r") != NULL) {
				printf("we all redy have a iindex.\n");
				exit(1);
			}
		}

		Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates);	

	
	}
	else {
		printf("usage: ./LotInvertetIndexMaker type lotnr subname [ lotPart ]\n\n");

	}

	//GetFilPathForLot(lotNr);

}