int main (int argc, char *argv[]) { //printf("argc %i\n",argc); int DocIDcount = 0; if (argc == 7) { //lager for bare en bøtte int startIndex = atoi(argv[1]); int stoppIndex = atoi(argv[2]) +1; char *type = argv[3]; char *lang = argv[4]; char *subname = argv[5]; int bucket = atol(argv[6]); mergei(bucket,startIndex,stoppIndex,type,lang,subname,&DocIDcount); printf("DocIDcount: %i\n",DocIDcount); } else if (argc == 6) { //skal lage for alle bøttene int i; int startIndex = atoi(argv[1]); int stoppIndex = atoi(argv[2]) +1; char *type = argv[3]; char *lang = argv[4]; char *subname = argv[5]; for (i=0;i<=63;i++) { #ifdef DEBUG printf("bucket: %i\n",i); #endif mergei(i,startIndex,stoppIndex,type,lang,subname,&DocIDcount); } printf("DocIDcount: %i (/64)\n",DocIDcount); } else { printf("Dette programet printer ut en iindex.\n\n"); printf("\tUse:\n\n\t./mergeIIndex fralot tillot type (Main | Anchor) språk subname [bucket]\n\n"); exit(0); } }
void merge(pos_lists_t &Z0, int i) { if (i == 0) { // merge I0 with memory buffer Z0 merge0(Z0); } else { // merge Ii with temporary index Zi mergei(i); } }
void gc_coll(char subname[], struct gcaoptFormat *gcaopt) { int LotNr, i; int DocIDcount = 0; FILE *LOCK; struct reformat *re; time_t newest_document = 0; gcaopt->keept = 0; gcaopt->gced = 0; if (gcaopt->dontcheckok == 0 && !isOkCrawled(subname,gcaopt)) { blog(gcaopt->logSummary,1,"Skipping \"%s\". Was not correctly crawled.",subname); return; } if ((LOCK = lockcoll(subname)) == NULL) { fprintf(stderr,"Can't lock lockfile!\n"); exit(-1); } #ifdef BLACK_BOX for(LotNr=1;LotNr<maxLots;LotNr++) { if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_READ_ONLY|RE_HAVE_4_BYTES_VERSION_PREFIX|RE_STRETCH)) == NULL) { //når vi ikke lengere kan åpne en DocumentIndex er det forde vi har kommet til siste lot. break; } //finner nyeste dokument for (i=0;i<NrofDocIDsInLot;i++) { if ((REN_DocumentIndex(re, i)->lastSeen != 0) && (newest_document < REN_DocumentIndex(re, i)->lastSeen)) { newest_document = REN_DocumentIndex(re, i)->lastSeen; //printf("newest_document: i: %i, url \"%s\", time %s\n",i,REN_DocumentIndex(re, i)->Url, ctime_s(&REN_DocumentIndex(re, i)->lastSeen)); } } reclose(re); } #endif //hack: setter datoen til i dag. Forutsetter at vi nettopp har kjørt crawling. //printf("\n<######################## with runarb newest_document hack###################>\n"); //newest_document = time(NULL); //printf("</######################## with runarb newest_document hack###################>\n\n"); #ifdef BLACK_BOX blog(gcaopt->log,1,"Newest document: %s",ctime_s(&newest_document)); #endif for(LotNr=1;LotNr<maxLots;LotNr++) { gcdecide(LotNr,subname, gcaopt, newest_document); } /***************************/ //merger indexene //skal lage for alle bøttene printf("merging Main\n"); for (i=0;i<NrOfDataDirectorys;i++) { #ifdef DEBUG printf("gc_coll: bucket: %i\n",i); #endif mergei(i,0,0,"Main","aa",subname,&DocIDcount); } printf("merging acl_allow\n"); for (i=0;i<NrOfDataDirectorys;i++) { #ifdef DEBUG printf("gc_coll: bucket: %i\n",i); #endif mergei(i,0,0,"acl_allow","aa",subname,&DocIDcount); } printf("merging acl_denied\n"); for (i=0;i<NrOfDataDirectorys;i++) { #ifdef DEBUG printf("gc_coll: bucket: %i\n",i); #endif mergei(i,0,0,"acl_denied","aa",subname,&DocIDcount); } printf("merging attributes\n"); for (i=0;i<NrOfDataDirectorys;i++) { #ifdef DEBUG printf("gc_coll: bucket: %i\n",i); #endif mergei(i,0,0,"attributes","aa",subname,&DocIDcount); } // legger subnavnet til listen og huper searchd slik at cashen blir frisket opp. printf("Huping searchd to recache \"%s\"\n",subname); lot_recache_collection(subname); printf("DocIDcount: %i (/64)\n",DocIDcount); /***************************/ blog(gcaopt->log,1,"gc'ed \"%s\". Keept %i, gced %i",subname,gcaopt->keept,gcaopt->gced); blog(gcaopt->logSummary,1,"gc'ed \"%s\". Keept %i, gced %i",subname,gcaopt->keept,gcaopt->gced); fclose(LOCK); }