void gc_reduce(struct reformat *re, int LotNr, char subname[]) { FILE *GCEDFH; int i; unsigned int DocID; //lagrer hvilkene filer vi har slettet GCEDFH = lotOpenFileNoCasheByLotNr(LotNr,"gced","a", 'e',subname); for (i=0; i<NrofDocIDsInLot; i++) { if ((REN_DocumentIndex(re, i)->Url[0] != '\0') && DIS_isDeleted(REN_DocumentIndex(re, i))) { #ifdef DEBUG printf("Adding url \"%s\" to gc file\n",REN_DocumentIndex(re, i)->Url); #endif DocID = LotDocIDOfset(LotNr) +i; if (fwrite(&DocID,sizeof(DocID),1,GCEDFH) != 1) { perror("can't write gc file"); } } } fclose(GCEDFH); }
int gcdecide(int LotNr, char *subname, struct gcaoptFormat *gcaopt, time_t newest_document) { int i; struct reformat *re; FILE *DOCINDEXFH; whisper_t whisper; //åpner dokument indeks får å teste at vi har en, hvis ikke kan vi bare avslutte. if ( (DOCINDEXFH = lotOpenFileNoCasheByLotNr(LotNr,"DocumentIndex","rb", 's',subname)) == NULL) { #ifdef DEBUG printf("lot dont have a DocumentIndex file\n"); #endif return 0; } fclose(DOCINDEXFH); blog(gcaopt->log,1,"Runing gc for collection \"%s\", lot nr %i",subname,LotNr); if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_COPYONCLOSE|RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) { perror("can't reopen()"); exit(1); } whisper = gcwhisper_read(subname); //går gjenom alle på jakt etter de som kan slettes for (i=0;i<NrofDocIDsInLot;i++) { if (DIS_isDeleted(REN_DocumentIndex(re, i))) { continue; } #ifdef DEBUG #ifdef BLACK_BOX printf("dokument \"%s\", lastSeen: %s", REN_DocumentIndex(re, i)->Url, ctime_s(&REN_DocumentIndex(re, i)->lastSeen)); #endif #endif #ifdef BLACK_BOX if ((whisper & GCWHISPER_NOTOLD) == 0 && (((gcaopt->lastSeenHack == 1) && (REN_DocumentIndex(re, i)->lastSeen == 0)) || ((REN_DocumentIndex(re, i)->lastSeen != 0) && (newest_document > (REN_DocumentIndex(re, i)->lastSeen + gcaopt->MaxAgeDiflastSeen))))) { //sletter DIS_delete(REN_DocumentIndex(re, i)); //sletter dokumentet i bb spesefike ting. bbdocument_delete (REN_DocumentIndex(re, i)->Url, subname); blog(gcaopt->log,2,"dokument \"%s\" can be deleted. Last seen: %s, DocID %u",REN_DocumentIndex(re, i)->Url,ctime_s(&REN_DocumentIndex(re, i)->lastSeen),LotDocIDOfset(LotNr) +i); ++gcaopt->gced; } else { ++gcaopt->keept; } #endif } //markerer hva vi kan slette. gc_reduce(re, LotNr, subname); reclose(re); //trunkerer reposetoryet. gcrepo(LotNr, subname); //vasker iindex struct IndekserOptFormat IndekserOpt; IndekserOpt.optMustBeNewerThen = 0; IndekserOpt.optAllowDuplicates = 0; IndekserOpt.optValidDocIDs = NULL; IndekserOpt.sequenceMode =1; IndekserOpt.garbareCollection = 1; for (i=0;i<64;i++) { Indekser(LotNr,"Main",i,subname,&IndekserOpt); } for (i=0;i<64;i++) { Indekser(LotNr,"acl_allow",i,subname,&IndekserOpt); } for (i=0;i<64;i++) { Indekser(LotNr,"acl_denied",i,subname,&IndekserOpt); } for (i=0;i<64;i++) { Indekser(LotNr,"attributes",i,subname,&IndekserOpt); } //siden vi nå har lagt til alle andringer fra rev index kan vi nå slettet gced filen også //Indekser_deleteGcedFile(LotNr, subname); lotDeleteFile("gced", LotNr, subname); return 0; }