Example #1
0
void gc_reduce(struct reformat *re, int LotNr, char subname[]) {

    FILE *GCEDFH;
    int i;
    unsigned int DocID;

    //lagrer hvilkene filer vi har slettet
    GCEDFH =  lotOpenFileNoCasheByLotNr(LotNr,"gced","a", 'e',subname);

    for (i=0; i<NrofDocIDsInLot; i++) {

        if ((REN_DocumentIndex(re, i)->Url[0] != '\0') && DIS_isDeleted(REN_DocumentIndex(re, i))) {
#ifdef DEBUG
            printf("Adding url \"%s\" to gc file\n",REN_DocumentIndex(re, i)->Url);
#endif

            DocID = LotDocIDOfset(LotNr) +i;
            if (fwrite(&DocID,sizeof(DocID),1,GCEDFH) != 1) {
                perror("can't write gc file");
            }

        }
    }

    fclose(GCEDFH);


}
Example #2
0
int
gcdecide(int LotNr, char *subname, struct gcaoptFormat *gcaopt, time_t newest_document)
{
	int i;
	struct reformat *re;
	FILE *DOCINDEXFH;
	whisper_t whisper;


	//åpner dokument indeks får å teste at vi har en, hvis ikke kan vi bare avslutte.
        if ( (DOCINDEXFH = lotOpenFileNoCasheByLotNr(LotNr,"DocumentIndex","rb", 's',subname)) == NULL) {
		#ifdef DEBUG
                	printf("lot dont have a DocumentIndex file\n");
		#endif

                return 0;
        }
	fclose(DOCINDEXFH);

	blog(gcaopt->log,1,"Runing gc for collection \"%s\", lot nr %i",subname,LotNr);

	if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_COPYONCLOSE|RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) {
		perror("can't reopen()");
		exit(1);
	}


	whisper = gcwhisper_read(subname);

	//går gjenom alle på jakt etter de som kan slettes
	for (i=0;i<NrofDocIDsInLot;i++) {

	
		if (DIS_isDeleted(REN_DocumentIndex(re, i))) {
			continue;
		}

		#ifdef DEBUG
			#ifdef BLACK_BOX
				printf("dokument \"%s\", lastSeen: %s",
					REN_DocumentIndex(re, i)->Url,
					ctime_s(&REN_DocumentIndex(re, i)->lastSeen));
			#endif
		#endif

		#ifdef BLACK_BOX
		if ((whisper & GCWHISPER_NOTOLD) == 0 &&
		    (((gcaopt->lastSeenHack == 1) && (REN_DocumentIndex(re, i)->lastSeen == 0))
		     || ((REN_DocumentIndex(re, i)->lastSeen != 0) &&
		         (newest_document > (REN_DocumentIndex(re, i)->lastSeen + gcaopt->MaxAgeDiflastSeen))))) {


			//sletter
			DIS_delete(REN_DocumentIndex(re, i));

			//sletter dokumentet i bb spesefike ting.
			bbdocument_delete (REN_DocumentIndex(re, i)->Url, subname);

			blog(gcaopt->log,2,"dokument \"%s\" can be deleted. Last seen: %s, DocID %u",REN_DocumentIndex(re, i)->Url,ctime_s(&REN_DocumentIndex(re, i)->lastSeen),LotDocIDOfset(LotNr) +i);
			++gcaopt->gced;
		
		} 
		else {
			++gcaopt->keept;
		}
		#endif
	}


	//markerer hva vi kan slette.
	gc_reduce(re, LotNr, subname);

	reclose(re);

	//trunkerer reposetoryet.
	gcrepo(LotNr, subname);


	//vasker iindex
        struct IndekserOptFormat IndekserOpt;
        IndekserOpt.optMustBeNewerThen = 0;
        IndekserOpt.optAllowDuplicates = 0;
        IndekserOpt.optValidDocIDs = NULL;
        IndekserOpt.sequenceMode =1;
        IndekserOpt.garbareCollection = 1;

	for (i=0;i<64;i++) {
		Indekser(LotNr,"Main",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"acl_allow",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"acl_denied",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"attributes",i,subname,&IndekserOpt);
	}

	//siden vi nå har lagt til alle andringer fra rev index kan vi nå slettet gced filen også
	//Indekser_deleteGcedFile(LotNr, subname);
	lotDeleteFile("gced", LotNr, subname);


	return 0;
}