Beispiel #1
0
void gc_reduce(struct reformat *re, int LotNr, char subname[]) {

    FILE *GCEDFH;
    int i;
    unsigned int DocID;

    //lagrer hvilkene filer vi har slettet
    GCEDFH =  lotOpenFileNoCasheByLotNr(LotNr,"gced","a", 'e',subname);

    for (i=0; i<NrofDocIDsInLot; i++) {

        if ((REN_DocumentIndex(re, i)->Url[0] != '\0') && DIS_isDeleted(REN_DocumentIndex(re, i))) {
#ifdef DEBUG
            printf("Adding url \"%s\" to gc file\n",REN_DocumentIndex(re, i)->Url);
#endif

            DocID = LotDocIDOfset(LotNr) +i;
            if (fwrite(&DocID,sizeof(DocID),1,GCEDFH) != 1) {
                perror("can't write gc file");
            }

        }
    }

    fclose(GCEDFH);


}
Beispiel #2
0
whisper_t
gcwhisper_read(char *subname)
{
	FILE *fp;
	whisper_t w;
	char buf[2048];

	fp = lotOpenFileNoCasheByLotNr(1, "gcwhisper", "r", 's', subname);
	if (fp == NULL)
		return 0;

	w = 0;
	while (fgets(buf, sizeof(buf), fp)) {
		int i;
		char *p;

		/* remove trailing newline */
		if ((p = strrchr(buf, '\n')))
			*p = '\0';
			
		for (i = 0; whispers[i].str != NULL; i++) {
			if (strcmp(whispers[i].str, buf) == 0) {
				w |= whispers[i].flag;
				break;
			}
		}
		/* We did not find a match */
		if (whispers[i].str == NULL)
			warnx("Trying to read unknown whisper string: '%s'", buf);
	}

	fclose(fp);
	
	return w;
}
Beispiel #3
0
int main (int argc, char *argv[]) {

	struct DocumentIndexFormat DocumentIndexPost;
	int LotNr;
	unsigned int DocID;
	FILE *LOTIPDB;
	unsigned char awvalue;;

        if (argc < 2) {
                printf("Dette programet leser en DocumentIndex. Gi det et lot nr. \n\n\tUsage: ./readDocumentIndex 1\n");
               exit(0);
        }

	LotNr = atoi(argv[1]);

	if (DIHaveIndex(LotNr,subname) == 0) {
		printf("dosent hav DIindex\n");
		exit(1);
	}

	LOTIPDB = lotOpenFileNoCasheByLotNr(LotNr,"ipdb","wb", 'e',subname);

	DocID = 0;
	while (DIGetNext (&DocumentIndexPost,LotNr,&DocID,subname)) {

		fwrite(&DocumentIndexPost.IPAddress,sizeof(unsigned int),1,LOTIPDB);

		//printf("%u %u\n",DocID,DocumentIndexPost.IPAddress);
	}


	//DIClose();
	fclose(LOTIPDB);

}
int bbdocument_deletecoll(char collection[]) {

	int LotNr;
	int i;
	char FilePath[512];
	char IndexPath[512];
	char DictionaryPath[512];
	FILE *fh;

	debug("Deleting collection: \"%s\"\n",collection);

	LotNr = 1;
	while((fh =lotOpenFileNoCasheByLotNr(LotNr,"reposetory","r",'s',collection)) != NULL) {
		GetFilPathForLot(FilePath,LotNr,collection);

		fclose(fh);

		rrmdir(FilePath);

		++LotNr;
	}

	for (i=0; i < 64; i++) {
		GetFilePathForIindex(FilePath,IndexPath,i,"Main","aa",collection);
		#ifdef DEBUG
		printf("FilePath: %s\nIndexPath: %s\n",FilePath,IndexPath);
		#endif

		if ((unlink(IndexPath) != 1) && (errno != ENOENT)) { //ENOENT=No such file or directory. Viser ikke feil hvis filen ikke fantes. Det er helt normalt
                        perror("remove IndexPath");
                }


		GetFilePathForIDictionary(FilePath,DictionaryPath,i,"Main","aa",collection);
		#ifdef DEBUG
		printf("FilePath: %s\nDictionaryPath: %s\n",FilePath,DictionaryPath);
		#endif

		if ((unlink(DictionaryPath) != 0) && (errno != ENOENT)) {//ENOENT=No such file or directory. Viser ikke feil hvis filen ikke fantes. Det er helt normalt
                        perror("remove DictionaryPath");
                }
	}

	//sletter i userToSubname.db
        struct userToSubnameDbFormat userToSubnameDb;

        if (!userToSubname_open(&userToSubnameDb,'w')) {
                printf("can't open users.db\n");
        }
        else {
		userToSubname_deletecol(&userToSubnameDb,collection);

                userToSubname_close(&userToSubnameDb);
        }

	return 1;
}
Beispiel #5
0
int main (int argc, char *argv[]) {

        if (argc < 2) {
                printf("Error ingen subna,e spesifisert.\n\nEksempel på bruk for å lese lot 2:\n\trread www\n");
                exit(1);
        }


        char *subname = argv[1];
	FILE            *f_crc32_words = NULL;
	int             crc32_words_size = 0;
	struct stat     inode;
	int             attr_crc32_words_blocksize = sizeof(unsigned int) + sizeof(char)*MAX_ATTRIB_LEN;
	//void            *m_crc32_words = NULL;
	struct Crc32attrMapFormat            *m_crc32_words = NULL;
	int i;

	if ((f_crc32_words = lotOpenFileNoCasheByLotNr(1, "crc32attr.map", "r", 's', subname)) == NULL) {
		perror("Can't open thecrc32attr.map file for lot");
		return -1;
	}


     	fstat(fileno(f_crc32_words), &inode);
     	crc32_words_size = inode.st_size;

	if (crc32_words_size==0) {
		printf("Map is 0 bytes. Skipping\n");
		return -1;
	}

	if ((m_crc32_words=mmap(NULL, crc32_words_size, PROT_READ, MAP_SHARED, fileno(f_crc32_words), 0)) == MAP_FAILED)
        {
		perror("Can't mmap");
		return -1;
	}


	printf("Hvae %d elements of sise %d\n",(crc32_words_size / attr_crc32_words_blocksize), attr_crc32_words_blocksize);

	for(i=0;i<(crc32_words_size / attr_crc32_words_blocksize);i++) {
		printf("crc32 %u, text %s\n",m_crc32_words[i].crc32, m_crc32_words[i].text);
	}

	munmap(m_crc32_words,crc32_words_size);
	fclose(f_crc32_words);		


	return 1;
}
Beispiel #6
0
int main (int argc, char *argv[]) {

	struct DocumentIndexFormat DocumentIndexPost;
	int LotNr;
	unsigned int DocID;
	FILE *ADULTWEIGHTFH;
	unsigned char awvalue;;

        if (argc < 2) {
                printf("Dette programet leser en DocumentIndex. Gi det et lot nr. \n\n\tUsage: ./readDocumentIndex 1");
               exit(0);
        }

	LotNr = atoi(argv[1]);

	if (DIHaveIndex(LotNr,subname) == 0) {
		printf("dosent hav DIindex\n");
		exit(1);
	}

	ADULTWEIGHTFH = lotOpenFileNoCasheByLotNr(LotNr,"AdultWeight","wb", 'e',subname);

	DocID = 0;
	while (DIGetNext (&DocumentIndexPost,LotNr,&DocID,subname)) {

		if (DocumentIndexPost.AdultWeight >= AdultWeightForXXX) {
			//printf("DocID: %u, %hu, url: %s\n",DocID,DocumentIndexPost.AdultWeight,DocumentIndexPost.Url);
			//mark as adult
			awvalue = 1;
		}
		else {
			//not adult
			awvalue = 0;
		}

		fwrite(&awvalue,sizeof(awvalue),1,ADULTWEIGHTFH);

	}


	//DIClose();
	fclose(ADULTWEIGHTFH);

}
Beispiel #7
0
void
preopen(void)
{
	int i;
        DIR *dirh;
	FILE *FH;
	int count = 0;

	reclose_cache();
	
	if ((dirh = listAllColl_start()) == NULL) {
		bblog(ERROR, "Can't listAllColl_start()");
		return;
	}
 
        char * subname;
	while (((subname = listAllColl_next(dirh)) != NULL) && (count < MAX_PREOPEM_FILE)) {
                bblog(DEBUGINFO, "subname: %s", subname);
		for(i=1;i<maxLots;i++) {
			// vi åpner kun lotter som har DocumentIndex. Dette er spesielt viktig da vi oppretter 
			// filene hvis de ikke finnes.
			if ((FH = lotOpenFileNoCasheByLotNr(i,"DocumentIndex","rb", 'r', subname)) == NULL) {
				continue;
			}
			reopen_cache(i,4, "filtypes",subname,RE_READ_ONLY|RE_STARTS_AT_0|RE_POPULATE|RE_CREATE_AND_STRETCH);
			reopen_cache(i,sizeof(int), "dates",subname,RE_READ_ONLY|RE_STARTS_AT_0|RE_POPULATE|RE_CREATE_AND_STRETCH);
			reopen_cache(i,sizeof(unsigned int), "crc32map",subname,RE_READ_ONLY|RE_POPULATE|RE_CREATE_AND_STRETCH);

			fclose(FH);

			if (count > MAX_PREOPEM_FILE) {
				break;
			}
			// +3 da vi øker med filtypes, dates, og crc32map
			count += 3;
		}	
	}
        listAllColl_close(dirh);


	if (count >= MAX_PREOPEM_FILE) {
		bblog(WARN, "can't preopen any more. Did hit MAX_PREOPEM limit of %d files", MAX_PREOPEM_FILE);
	}
}
Beispiel #8
0
void
gcwhisper_write(char *subname, whisper_t whisper)
{
	FILE *fp;
	whisper_t has;
	int i;

	has = gcwhisper_read(subname);

	fp = lotOpenFileNoCasheByLotNr(1, "gcwhisper", ">>", 'e', subname);
	for (i = 0; whispers[i].str != NULL; i++) {
		/* want it */
		if ((whispers[i].flag & whisper) && (has & whispers[i].flag) == 0) {
			fprintf(fp, "%s\n", whispers[i].str);
		}
	}
	fclose(fp);
	
}
Beispiel #9
0
main (int argc, char *argv[]) {

        if (argc < 2) {
                printf("Error ingen subna,e spesifisert.\n\nEksempel på bruk for å lese lot 2:\n\trread www\n");
                exit(1);
        }

        char *subname = argv[1];
	FILE            *f_crc32_words = NULL;
	int             crc32_words_size = 0;
	struct stat     inode;
	int             attr_crc32_words_blocksize = sizeof(unsigned int) + sizeof(char)*MAX_ATTRIB_LEN;
	void            *m_crc32_words = NULL;

	if ((f_crc32_words = lotOpenFileNoCasheByLotNr(1, "crc32attr.map", "r+", 's', subname)) == NULL) {
		perror("Can't open thecrc32attr.map file for lot");
		exit(-1);
	}


     	fstat(fileno(f_crc32_words), &inode);
     	crc32_words_size = inode.st_size;

	if (crc32_words_size==0) {
		printf("Map is 0 bytes. Skipping\n");
	}

	if ((m_crc32_words=mmap(NULL, crc32_words_size, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(f_crc32_words), 0)) == MAP_FAILED)
        {
		perror("Can't mmap");
	}


	printf("Will sort %d elements of sise %d\n",(crc32_words_size / attr_crc32_words_blocksize), attr_crc32_words_blocksize);

	qsort(m_crc32_words,(crc32_words_size / attr_crc32_words_blocksize),attr_crc32_words_blocksize, attr_crc32_words_block_compare);

	munmap(m_crc32_words,crc32_words_size);
	fclose(f_crc32_words);		

	printf("Done\n");
}
Beispiel #10
0
//gir andre tilgan til lot filer. Casher opne filhandlere
FILE *lotOpenFile(unsigned int DocID,char resource[],char type[], char lock,char subname[]) {

        int LotNr;
        int i;
        char FilePath[128];
        char File [128];

	if (!LotFilesInalisert) {
		for(i=0; i < MaxOpenFiles; i++) {
			OpenFiles[i].LotNr = -1;
		}

		LotFilesInalisert = 1;
	}

        File[0] = '\0';

        //finner i hvilken lot vi skal lese fra
        LotNr = rLotForDOCid(DocID);

	//printf("LotNr: %i, DocID: %i\n",LotNr,DocID);

        //begynner med å søke cashen. Lopper til vi enten er ferdig, eller til vi har funne ønskede i cashen
	i = 0;
        while ((i < MaxOpenFiles) && (OpenFiles[i].LotNr != LotNr)) {
                i++;
        }
        //temp: skrur av søking her med i=0
        //type of og subname er også lagt til uten at det tar hensyn til det i søket
        i = 0;



        //hvis vi fant i casehn returnerer vi den
        if (OpenFiles[i].LotNr == LotNr  
		&& (strcmp(OpenFiles[i].subname,subname) == 0)
        	&& (strcmp(OpenFiles[i].type,type)==0)
        	&& (strcmp(OpenFiles[i].resource,resource)==0)
	) {
		#ifdef DEBUG
		printf("lotOpenFile: fant en tildigere åpnet fil, returnerer den.\n");
		printf("lotOpenFile: returnerer: i %i, subname \"%s\", type \"%s\", LotNr %i\n",i,OpenFiles[i].subname,OpenFiles[i].type,OpenFiles[i].LotNr);
		printf("lotOpenFile: file is \"%s\"\n",OpenFiles[i].filename);
		printf("lotOpenFile: returning file handler %p\n",OpenFiles[i].FILEHANDLER);
		#endif

		if (OpenFiles[i].FILEHANDLER == NULL) {
			printf("Error: FILEHANDLER is NULL\n");
			#ifdef DEBUG
				exit(-1);
			#endif
		}
                return OpenFiles[i].FILEHANDLER;
        }
        //hvis ikke åpner vi og returnerer
        else {

		//hvis dette er en åpen filhånterer, må vi lukke den
		if (OpenFiles[i].LotNr != -1) {
			printf("lotOpenFile: closeing: i %i\n",i);
			fclose(OpenFiles[i].FILEHANDLER);
			OpenFiles[i].LotNr = -1;
			
		}
	
		if ((OpenFiles[i].FILEHANDLER = lotOpenFileNoCasheByLotNr( LotNr, resource,type, lock,subname)) == NULL) {
			printf("lotOpenFileNoCashe: can't open file\n");
			return NULL;
		}

                GetFilPathForLot(FilePath,LotNr,subname);
                strscpy(File,FilePath,sizeof(File));
                strlcat(File,resource,sizeof(File));

		strscpy(OpenFiles[i].filename,File,sizeof(OpenFiles[i].filename));
		strscpy(OpenFiles[i].resource,resource,sizeof(OpenFiles[i].resource));
		strscpy(OpenFiles[i].subname,subname,sizeof(OpenFiles[i].subname));
		strscpy(OpenFiles[i].type,type,sizeof(OpenFiles[i].type));

		//#ifdef DEBUG
                	printf("lotOpenFile: opening file \"%s\" for %s\n",File,type);
		//#endif



		OpenFiles[i].LotNr = LotNr;

                return OpenFiles[i].FILEHANDLER;

        }
	
}
Beispiel #11
0
FILE *lotOpenFileNoCashe(unsigned int DocID,char resource[],char type[], char lock,char subname[]) {

	return lotOpenFileNoCasheByLotNr(rLotForDOCid(DocID),resource,type,lock,subname);
}
Beispiel #12
0
int
gcrepo(int LotNr, char *subname)
{
	int i;
	struct ReposetoryHeaderFormat ReposetoryHeader;

	char htmlbuffer[524288];
	char imagebuffer[524288];
	char *acl_allow;
	char *acl_deny;
	char *url, *attributes;
	unsigned long int raddress;
	char path[1024];
	char path2[1024];
	char path3[1024];
	FILE *FNREPO;
	struct reformat *re;

	int keept = 0;
	int gced = 0;

	container *attrkeys = ropen();


	if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_HAVE_4_BYTES_VERSION_PREFIX|RE_COPYONCLOSE)) == NULL) {
		perror("reopen DocumentIndex");
		return 0;
	}


        if ( (FNREPO = lotOpenFileNoCasheByLotNr(LotNr,"reposetory","rb", 's',subname)) == NULL) {
		#ifdef DEBUG
                	printf("lot dont have a reposetory file\n");
		#endif
                return 0;
        }


	while (rGetNext_fh(LotNr,&ReposetoryHeader,htmlbuffer,sizeof(htmlbuffer),imagebuffer,&raddress,0,0,subname,&acl_allow,&acl_deny, FNREPO ,&url, &attributes)) {


		#ifdef DEBUG
		printf("dokument \"%s\", DocID %u.\n",
			RE_DocumentIndex(re,ReposetoryHeader.DocID)->Url,
			ReposetoryHeader.DocID);
		#endif

		//printf("%p\n", docindex.RepositoryPointer);
		if (raddress != RE_DocumentIndex(re,ReposetoryHeader.DocID)->RepositoryPointer) {
			#ifdef DEBUG
			printf("Garbage collecting %d at %u. docindex has %u\n", ReposetoryHeader.DocID, raddress,RE_DocumentIndex(re,ReposetoryHeader.DocID)->RepositoryPointer);
			#endif
			++gced;
		}
		else {
			unsigned long int offset;
			offset = rApendPost(&ReposetoryHeader, htmlbuffer, imagebuffer, subname, acl_allow, acl_deny, "repo.wip", url, attributes, attrkeys);
			RE_DocumentIndex(re,ReposetoryHeader.DocID)->RepositoryPointer = offset;
			#ifdef DEBUG
			printf("Writing DocID: %d\n", ReposetoryHeader.DocID);
			#endif
			++keept;

		}
	}
	fclose(FNREPO);

	//lokker filen repo.wip
	//lotCloseFiles();
	rclose(attrkeys);

	printf("keept %i\ngced %i\n",keept,gced);

	reclose(re);


	/* And we have a race... */
	GetFilPathForLot(path, LotNr, subname);
	strcpy(path2, path);
	strcpy(path3, path);
	strcat(path, "repo.wip");
	strcat(path2, "reposetory");
	rename(path, path2);
	strcpy(path, path3);
	strcat(path, "DocumentIndex.wip");
	strcat(path3, "DocumentIndex");
	rename(path, path3);

	#ifdef DI_FILE_CASHE
		closeDICache();
	#endif


	return 0;
}
Beispiel #13
0
int main (int argc, char *argv[]) {

	int lotNr;
	int i;
	unsigned int DocID;
	char text[50];
	unsigned int radress;
	unsigned int rsize;
	char **Data;
  	int Count, TokCount;
	unsigned short hits;
	unsigned long WordID;
	int bucket;
	int y;
	int nr;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	unsigned char lang;
	FILE *FH;
	unsigned int DocIDPlace;

	int *nrOfLinkWordsToDocID = malloc(sizeof(int) * NrofDocIDsInLot);

	for (i=0;i<NrofDocIDsInLot;i++) {
		//begynner på 2000 så det skal være lett og skille de visuelt fra andre hits
		nrOfLinkWordsToDocID[i] = 2000;
	}
        //tester for at vi har fåt hvilken lot vi skal bruke
        if (argc < 3) {
                printf("Usage: ./anchorread lotnr subname\n\n");
		exit(1);
        }

	lotNr = atoi(argv[1]);
	char *subname = argv[2];

	if ( (FH = lotOpenFileNoCasheByLotNr(lotNr,"anchors","rb", 's',subname)) == NULL) {
		printf("lot dont have a anchors file\n");
		exit(1);
	}	
	fclose(FH);

	revindexFilesOpenLocal(revindexFilesHa,lotNr,"Anchor","wb",subname);

	//int anchorGetNext (int LotNr,unsigned int *DocID,char *text,unsigned int *radress,unsigned int *rsize)
	while (anchorGetNext(lotNr,&DocID,text,sizeof(text),&radress,&rsize,subname) ) {	

			DocIDPlace = (DocID - LotDocIDOfset(rLotForDOCid(DocID)));	
			++nrOfLinkWordsToDocID[DocIDPlace];



			convert_to_lowercase((unsigned char *)text);


			#ifdef DEBUG
			if (DocID == 4999999) {
				printf("DocID %i, text: \"%s\", DocIDPlace %i, nrOfLinkWordsToDocID %i\n",DocID,text,DocIDPlace,nrOfLinkWordsToDocID[DocIDPlace]);
			}
			#endif

  			if ((TokCount = split(text, " ", &Data)) == -1) {
				printf("canæt splitt \"%s\"\n",text);
			}

			//for (i=(TokCount-1);i>=0;i--) {
			i=0;
			while (Data[i] != NULL) {

				/*
				if (nrOfLinkWordsToDocID[DocIDPlace] > 65505) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("reach max nr of words for DocID %u. Hav %i+ words\n",DocID,nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif
					break;
				}
				*/

				if (Data[i][0] == '\0') {
					#ifdef DEBUG
						if (DocID == 4999999) {

							printf("emty data element\n");
						}
					#endif
				} 
				else if (strcmp(Data[i],"www") == 0) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("www\n");
						}
					#endif
					++nrOfLinkWordsToDocID[DocIDPlace];
				} 
				else if (isStoppWord(Data[i])) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("stopword \"%s\"\n",Data[i]);
						}
					#endif
					//++nrOfLinkWordsToDocID[DocIDPlace];
				}
				else {
				
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("\t\"%s\" %i\n",Data[i],nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif


			

					WordID = crc32boitho(Data[i]);

					if (WordID == 0) {
						printf("got 0 as word id for \"%s\". Somthing may be wrong.\n",Data[i]);
					}

                			bucket = WordID % NrOfDataDirectorys;

					if (nrOfLinkWordsToDocID[DocIDPlace] > 65535) {
						hits = 65535;
					}
					else {
						hits = nrOfLinkWordsToDocID[DocIDPlace];

					}

					#ifdef DEBUG
						if (DocID == 4999999) {
	    		       				printf("\thits %i: \"%s\": %hu, bucket %i\n",i,Data[i],hits,bucket);
						}
					#endif

                
        	        		if (fwrite(&DocID,sizeof(unsigned int),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite DocID");
					}
					//runarb: 13 mai 2007. vi har byttet til å bruke et tal for språk.
					//burde da dette fra DocumentIndex hvis det finnes, men lagres ikke der
					//må si i IndexRes på hvordan vi gjør det der
        	        		//fprintf(revindexFilesHa[bucket],"aa ");
					lang = 0;
					nr = 1;
					if(fwrite(&lang,sizeof(unsigned char),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite lang");
					}


        	        		if(fwrite(&WordID,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite WordID");
					}

        	        		if(fwrite(&nr,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite nr");
					}


        		        	if(fwrite(&hits,sizeof(unsigned short),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite hits");
					}

			                
        	        		++nrOfLinkWordsToDocID[DocIDPlace];
				
			
				}


				++i;
			}
  			FreeSplitList(Data);


			#ifdef DEBUG
				if (DocID == 4999999) {
				printf("\n");
				}
			#endif
	}

	free(nrOfLinkWordsToDocID);

}
Beispiel #14
0
int
gcdecide(int LotNr, char *subname, struct gcaoptFormat *gcaopt, time_t newest_document)
{
	int i;
	struct reformat *re;
	FILE *DOCINDEXFH;
	whisper_t whisper;


	//åpner dokument indeks får å teste at vi har en, hvis ikke kan vi bare avslutte.
        if ( (DOCINDEXFH = lotOpenFileNoCasheByLotNr(LotNr,"DocumentIndex","rb", 's',subname)) == NULL) {
		#ifdef DEBUG
                	printf("lot dont have a DocumentIndex file\n");
		#endif

                return 0;
        }
	fclose(DOCINDEXFH);

	blog(gcaopt->log,1,"Runing gc for collection \"%s\", lot nr %i",subname,LotNr);

	if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_COPYONCLOSE|RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) {
		perror("can't reopen()");
		exit(1);
	}


	whisper = gcwhisper_read(subname);

	//går gjenom alle på jakt etter de som kan slettes
	for (i=0;i<NrofDocIDsInLot;i++) {

	
		if (DIS_isDeleted(REN_DocumentIndex(re, i))) {
			continue;
		}

		#ifdef DEBUG
			#ifdef BLACK_BOX
				printf("dokument \"%s\", lastSeen: %s",
					REN_DocumentIndex(re, i)->Url,
					ctime_s(&REN_DocumentIndex(re, i)->lastSeen));
			#endif
		#endif

		#ifdef BLACK_BOX
		if ((whisper & GCWHISPER_NOTOLD) == 0 &&
		    (((gcaopt->lastSeenHack == 1) && (REN_DocumentIndex(re, i)->lastSeen == 0))
		     || ((REN_DocumentIndex(re, i)->lastSeen != 0) &&
		         (newest_document > (REN_DocumentIndex(re, i)->lastSeen + gcaopt->MaxAgeDiflastSeen))))) {


			//sletter
			DIS_delete(REN_DocumentIndex(re, i));

			//sletter dokumentet i bb spesefike ting.
			bbdocument_delete (REN_DocumentIndex(re, i)->Url, subname);

			blog(gcaopt->log,2,"dokument \"%s\" can be deleted. Last seen: %s, DocID %u",REN_DocumentIndex(re, i)->Url,ctime_s(&REN_DocumentIndex(re, i)->lastSeen),LotDocIDOfset(LotNr) +i);
			++gcaopt->gced;
		
		} 
		else {
			++gcaopt->keept;
		}
		#endif
	}


	//markerer hva vi kan slette.
	gc_reduce(re, LotNr, subname);

	reclose(re);

	//trunkerer reposetoryet.
	gcrepo(LotNr, subname);


	//vasker iindex
        struct IndekserOptFormat IndekserOpt;
        IndekserOpt.optMustBeNewerThen = 0;
        IndekserOpt.optAllowDuplicates = 0;
        IndekserOpt.optValidDocIDs = NULL;
        IndekserOpt.sequenceMode =1;
        IndekserOpt.garbareCollection = 1;

	for (i=0;i<64;i++) {
		Indekser(LotNr,"Main",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"acl_allow",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"acl_denied",i,subname,&IndekserOpt);
	}
	for (i=0;i<64;i++) {
		Indekser(LotNr,"attributes",i,subname,&IndekserOpt);
	}

	//siden vi nå har lagt til alle andringer fra rev index kan vi nå slettet gced filen også
	//Indekser_deleteGcedFile(LotNr, subname);
	lotDeleteFile("gced", LotNr, subname);


	return 0;
}
Beispiel #15
0
void connectHandler(int socket) {
    struct packedHedderFormat packedHedder;

    int i,n;
    int LotNr;
    char lotPath[512];
    char buf[100];
    unsigned int FilterTime;
    int filnamelen;
    FILE *FH;
    struct stat inode;      // lager en struktur for fstat å returnere.
    off_t filesize;
    char c;

    struct DocumentIndexFormat DocumentIndexPost;
    int DocID;

    struct ReposetoryHeaderFormat ReposetoryHeader;
    unsigned int radress;

    char htmlbuffer[524288];
    int destLeng;
    char dest[512];

    off_t fileBloks,filerest;
    char *filblocbuff;


    //while ((i=read(socket, &packedHedder, sizeof(struct packedHedderFormat))) > 0) {
    while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) {

        //printf("command: %i\n",packedHedder.command);
        //printf("i er %i\n",i);
        printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
        //printf("subname: %s\n",packedHedder.subname);
        //lar size reflektere hva som er igjen av pakken
        packedHedder.size = packedHedder.size - sizeof(packedHedder);

        if (packedHedder.command == C_rmkdir) {

            printf("C_rmkdir\n");

            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant read destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant read dest");
                exit(1);
            }

            GetFilPathForLot(lotPath,LotNr,packedHedder.subname);

            sprintf(lotPath,"%s%s",lotPath,dest);

            printf("mkdir %s\n",lotPath);

            makePath(lotPath);

            printf("~C_rmkdir\n");


        }
        else if (packedHedder.command == C_rComand) {


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant read destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant read dest");
                exit(1);
            }

            printf("run command %s\n",dest);

            system(dest);

        }
        else if (packedHedder.command == C_getLotToIndex) {
            printf("fikk C_getLotToIndex\n");

            int dirty;

            if ((i=recv(socket, &dirty, sizeof(dirty),MSG_WAITALL)) == -1) {
                perror("Cant read dirty");
                exit(1);
            }

            printf("dirty: %i\n",dirty);

            LotNr = findLotToIndex(packedHedder.subname,dirty);

            printf("sending respons\n");
            sendall(socket,&LotNr, sizeof(LotNr));

        }
        else if (packedHedder.command == C_getlotHasSufficientSpace) {
            printf("fikk C_getLotToIndex\n");

            int needSpace;
            int response;

            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=recv(socket, &needSpace, sizeof(needSpace),MSG_WAITALL)) == -1) {
                perror("Cant read dirty");
                exit(1);
            }


            printf("needSpace: %i, LotNr %i\n",needSpace,LotNr);


            response = lotHasSufficientSpace(LotNr, needSpace, packedHedder.subname);


            printf("sending respons\n");
            sendall(socket,&response, sizeof(response));

        }
        else if (packedHedder.command == C_rGetSize) {
            printf("fikk C_rGetSize\n");


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            if (filnamelen > sizeof(buf)) {
                printf("filname to long\n");
            };

            if ((i=read(socket, buf, filnamelen)) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            printf("filname %s\n",buf);

            if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) {
                perror(buf);
                //sending that he fil is emty
                fileBloks = 0;

                sendall(socket,&fileBloks, sizeof(fileBloks));

            }
            else {
                //finner og sender il størelse
                fstat(fileno(FH),&inode);
                //filesize = inode.st_size;
                //sendall(socket,&filesize, sizeof(filesize));

                fileBloks = inode.st_size;

                printf("size is %" PRId64 "\n",fileBloks);

                sendall(socket,&fileBloks, sizeof(fileBloks));

                fclose(FH);
            }
        }
        else if (packedHedder.command == C_rGetFile) {
            printf("fikk C_rGetFile\n");


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            if (filnamelen > sizeof(buf)) {
                printf("filname to long\n");
            };

            if ((i=read(socket, buf, filnamelen)) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            printf("filname %s\n",buf);

            if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) {
                perror(buf);
                //sending that the fil is emty
                fileBloks = 0;
                filerest = 0;

                sendall(socket,&fileBloks, sizeof(fileBloks));
                sendall(socket,&filerest, sizeof(filerest));

            }
            else {
                //finner og sender fil størelse
                fstat(fileno(FH),&inode);
                //filesize = inode.st_size;
                //sendall(socket,&filesize, sizeof(filesize));

                fileBloks = (int)floor(inode.st_size / rNetTrabsferBlok);
                filerest = inode.st_size - (fileBloks * rNetTrabsferBlok);

                sendall(socket,&fileBloks, sizeof(fileBloks));
                sendall(socket,&filerest, sizeof(filerest));

                printf("sending fil. fileBloks %"PRId64", filerest %"PRId64"\n",fileBloks,filerest);


                filblocbuff = (char *)malloc(rNetTrabsferBlok);
                for(i=0; i < fileBloks; i++) {

                    //fread(filblocbuff,sizeof(c),rNetTrabsferBlok,FH);
                    //fread_all(const void *buf, size_t size, FILE *stream)
                    fread_all(filblocbuff,rNetTrabsferBlok,FH, 4096);

                    if ((n=sendall(socket, filblocbuff, rNetTrabsferBlok)) == -1) {
                        perror("Cant recv dest");
                        exit(1);
                    }

                }

                printf("did recv %i fileBloks\n",i);


                fread(filblocbuff,sizeof(c),filerest,FH);

                if ((n=sendall(socket, filblocbuff, filerest)) == -1) {
                    perror("Cant recv filerest");
                    exit(1);
                }

                free(filblocbuff);


                /*
                   for (i=0;i<filesize;i++) {
                   fread(&c,sizeof(char),1,FH);
                   send(socket, &c, sizeof(char), 0);
                //printf("%i\n",(int)c);
                }
                 */
                printf("send file end\n");

                fclose(FH);
            }

        }
        else if (packedHedder.command == C_rGetNext) {
            printf("fikk C_rGetNext\n");

            printf("støttes ikke lengere");
            exit(1);
            /*
            		//leser data. Det skal væren en unigned int som sier hvilken lot vi vil ha
            		//har deklarert den som int her ???
            		if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
            			perror("Cant read lotnr");
            			exit(1);
            		}
            		printf("leser FilterTime\n");
            		//leser filtertime
            		if ((i=read(socket, &FilterTime, sizeof(FilterTime))) == -1) {
            			perror("Cant read lotnr");
            			exit(1);
            		}

            		printf("lotnr %i FilterTime %u\n",LotNr,FilterTime);

            		//henter inn data om den lotten
            		if (rGetNext(LotNr,&ReposetoryHeader,htmlbuffer,NULL,&radress,FilterTime,0)) {

            			//printf("DocId: %i url: %s\n",ReposetoryHeader.DocID,ReposetoryHeader.url);

            			//sender pakke hedder
            			sendpacked(socket,C_rLotData,PROTOCOLVERSION, ReposetoryHeader.htmlSize + sizeof(ReposetoryHeader) +sizeof(radress), NULL,packedHedder.subname);

            			//sennder ReposetoryHeader'en
            			sendall(socket,&ReposetoryHeader, sizeof(ReposetoryHeader));

            			//sender htmlen
            			sendall(socket,&htmlbuffer, ReposetoryHeader.htmlSize);

            			//sender adressen
            			sendall(socket,&radress,sizeof(radress));
            			//printf("data sent\n");

            			//printf("rGetNext: %i\n",ReposetoryHeader.DocID);

            		}
            		else {
            			sendpacked(socket,C_rEOF,PROTOCOLVERSION, 0, NULL,packedHedder.subname);
            			printf("ferdig\n");
            		}
            */
        }
        else if (packedHedder.command == C_DIWrite) {


            if ((i=recv(socket, &DocumentIndexPost, sizeof(struct DocumentIndexFormat),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            DIWrite(&DocumentIndexPost,DocID,packedHedder.subname, NULL);

            //printf("DIWrite: %i\n",DocID);

        }
        else if (packedHedder.command == C_DIRead) {

            int DocID;
            struct DocumentIndexFormat DocumentIndexPost;

            printf("got commane C_DIRead. sise %i hsize %i ds %i\n",packedHedder.size, sizeof(packedHedder), sizeof(DocID));

            if ((i=recv(socket, &DocID, sizeof(DocID),0)) == -1) {
                perror("recv");
                exit(1);
            }
            //printf("DocID %i\n",DocID);

            //leser inn datan
            //int DIRead (struct DocumentIndexFormat *DocumentIndexPost, int DocID);
            DIRead(&DocumentIndexPost,DocID,packedHedder.subname);

            sendall(socket,&DocumentIndexPost, sizeof(struct DocumentIndexFormat));
        }
        else if (packedHedder.command == C_rGetIndexTime) {

            int Lotnr;
            unsigned int IndexTime;
            if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) {
                perror("recv");
                exit(1);
            }

            IndexTime = GetLastIndexTimeForLot(LotNr,packedHedder.subname);

            sendall(socket,&IndexTime, sizeof(IndexTime));

        }
        else if (packedHedder.command == C_rSetIndexTime) {

            int Lotnr;
            if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) {
                perror("recv");
                exit(1);
            }

            setLastIndexTimeForLot(LotNr,NULL,packedHedder.subname);

        }
        else if (packedHedder.command == C_rSendFile) {
            //skal mota en fil for lagring i reposetoryet
            //char FilePath[156];
            FILE *FILEHANDLER;
            char c;
            char opentype[2];
            //char *filblocbuff;
            //off_t fileBloks,filerest;

            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant recv lotnr");
                exit(1);
            }

            printf("lotNr %i\n",LotNr);


            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant recv destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant recv dest");
                exit(1);
            }

            printf("coping %s as length %i in to lot %i\n",dest,destLeng,LotNr);

            if ((i=recv(socket, &opentype, sizeof(char) +1,MSG_WAITALL)) == -1) {
                perror("Cant recv opentype");
                exit(1);
            }
            printf("opentype \"%s\"\n",opentype);


            //GetFilPathForLot(FilePath,LotNr,packedHedder.subname);

            //legger til filnavnet
            //strncat(FilePath,dest,sizeof(FilePath));

            //leser inn filstørelsen
            if ((i=recv(socket, &fileBloks, sizeof(fileBloks),MSG_WAITALL)) == -1) {
                perror("Cant recv fileBloks");
                exit(1);
            }

            if ((i=recv(socket, &filerest, sizeof(filerest),MSG_WAITALL)) == -1) {
                perror("Cant recv filerest");
                exit(1);
            }

            printf("fileBloks: %" PRId64 ", filerest: %" PRId64 "\n",fileBloks,filerest);

            //åpner filen
            if ((FILEHANDLER = lotOpenFileNoCasheByLotNr(LotNr,dest,opentype,'e',packedHedder.subname)) == NULL) {
                perror(dest);
            }

            filblocbuff = (char *)malloc(rNetTrabsferBlok);
            for(i=0; i < fileBloks; i++) {

                if ((n=recv(socket, filblocbuff, rNetTrabsferBlok,MSG_WAITALL)) == -1) {
                    perror("Cant recv dest");
                    exit(1);
                }

                fwrite(filblocbuff,sizeof(c),rNetTrabsferBlok,FILEHANDLER);
            }

            printf("did recv %i fileBloks\n",i);


            if ((n=recv(socket, filblocbuff, filerest,MSG_WAITALL)) == -1) {
                perror("Cant recv filerest");
                exit(1);
            }

            fwrite(filblocbuff,sizeof(c),filerest,FILEHANDLER);


            free(filblocbuff);

            fclose(FILEHANDLER);

            printf("\n");
        }
        else if (packedHedder.command == C_DIGetIp) {


            unsigned int DocID;
            struct DocumentIndexFormat DocumentIndexPost;

            //printf("got command C_DIGetIp\n");

            if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            //printf("DocID %u\n",DocID);

            DIRead(&DocumentIndexPost,DocID,packedHedder.subname);

            //printf("ipadress: %u\n",DocumentIndexPost.IPAddress);

            sendall(socket,&DocumentIndexPost.IPAddress, sizeof(DocumentIndexPost.IPAddress));


        }
        else if (packedHedder.command == C_anchorAdd) {
            size_t textlen;
            unsigned int DocID;
            char *text;

            printf("Add anchor....\n");
            if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            } else if ((i = recv(socket, &textlen, sizeof(textlen), MSG_WAITALL)) == -1) {
                perror("recv(textlen)");
                exit(1);
            }
            text = malloc(textlen+1);
            text[textlen] = '\0';
            if ((i = recv(socket, text, textlen, MSG_WAITALL)) == -1) {
                perror("recv(text)");
                exit(1);
            }

            anchoraddnew(DocID, text, textlen, packedHedder.subname, NULL);
            printf("Text for %d: %s\n", DocID, text);

            free(text);
        }
        else if (packedHedder.command == C_anchorGet) {
            size_t len;
            char *text;
            int LotNr;
            unsigned int DocID;
            printf("Get anchor...\n");

            if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }
            printf("got DocID %u\n",DocID);
            LotNr = rLotForDOCid(DocID);
            printf("trying to read anchor\n");

            len = anchorRead(LotNr, packedHedder.subname, DocID, NULL, -1);
            printf("got anchor of length %i\n",len);

            sendall(socket, &len, sizeof(len));
            text = malloc(len+1);

            printf("readint it again\n");
            anchorRead(LotNr, packedHedder.subname, DocID, text, len+1);
            sendall(socket, text, len);
        }
        else if (packedHedder.command == C_readHTML) {
            /*
            unsigned int DocID;
            unsigned int len;
            char *text;
            char *acla, *acld;
            struct DocumentIndexFormat DocIndex;
            struct ReposetoryHeaderFormat ReposetoryHeader;

            if ((i = recv(socket, &DocID, sizeof(DocID), MSG_WAITALL)) == -1) {
            	perror("recv");
            	exit(1);
            }

            if ((i = recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) {

            	perror("recv(len)");
            	exit(1);
            }
            printf("len %u\n",len);
            text = malloc(len);

            if (text == NULL)
            	exit(1);

            DIRead(&DocIndex, DocID, packedHedder.subname);


            if (!rReadHtml(
            		text,
            		&len,
            		DocIndex.RepositoryPointer,
            		DocIndex.htmlSize,
            		DocID,
            		packedHedder.subname,
            		&ReposetoryHeader,
            		&acla,
            		&acld,
            		DocIndex.imageSize)) {
            	len = 0;
            	sendall(socket, &len, sizeof(len));
            } else {
            	++len; // \0
            	#ifdef DEBUG
            	printf("docID %u\n",DocID);
            	printf("Got: (len %i, real %i) ########################\n%s\n#####################\n", len, strlen(text), text);
            	#endif
            	sendall(socket, &len, sizeof(len));
            	sendall(socket, text, len);
            	sendall(socket, &ReposetoryHeader,sizeof(ReposetoryHeader));
            }

            free(text);
            */
        }
        /*
        runarb: 06 des 2007: vi har gåt bort fra denne metoden for nå, og bruker heller index over smb. Men tar vare på den da vi kan trenge den siden

        else if (packedHedder.command == C_urltodocid) {
        	char cmd;
        	int alloclen;
        	char *urlbuf;

        	if (urltodociddb == NULL) {
        		cmd = C_DOCID_NODB;
        		sendall(socket, &cmd, sizeof(cmd));
        		exit(1);
        	} else {
        		cmd = C_DOCID_READY;
        		sendall(socket, &cmd, sizeof(cmd));
        	}
        	cmd = C_DOCID_NEXT;

        	alloclen = 1024;
        	urlbuf = malloc(alloclen);

        	do {
        		unsigned int DocID;
        		size_t len;
        		if ((i = recv(socket, &cmd, sizeof(cmd), MSG_WAITALL)) == -1) {
        			err(1, "recv(cmd)");
        		}
        		if (cmd == C_DOCID_DONE)
        			break;

        		if ((i == recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) {
        			err(1, "recv(len)");
        		}
        		if (alloclen < len+1) {
        			free(urlbuf);
        			alloclen *= 2;
        			urlbuf = malloc(alloclen);
        		}
        		if ((i == recv(socket, urlbuf, len, MSG_WAITALL)) == -1) {
        			err(1, "recv(len)");
        		}
        		urlbuf[len] = '\0';

        		if (!getDocIDFromUrl(urltodociddb, urlbuf, &DocID)) {
        			cmd = C_DOCID_NOTFOUND;
        			sendall(socket, &cmd, sizeof(cmd));
        		} else {
        			cmd = C_DOCID_FOUND;
        			sendall(socket, &cmd, sizeof(cmd));
        			sendall(socket, &DocID, sizeof(DocID));
        		}
        	} while (1);

        	free(urlbuf);
        }
        */
        else {
            printf("unnown comand. %i\n", packedHedder.command);
        }
        //printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
    } //while

}
Beispiel #16
0
int main (int argc, char *argv[]) {

        if (argc < 2) {
                printf("Program to sort a crc32attr.map\n\nUsage:\n\t./sortCrc32attrMap subname\n");
                exit(1);
        }

        char 				*subname = argv[1];
	FILE            		*f_crc32_words = NULL, *f_crc32_words_new = NULL;
	size_t             		crc32_words_size = 0;
	struct stat     		inode;
	int             		attr_crc32_words_blocksize = sizeof(unsigned int) + sizeof(char)*MAX_ATTRIB_LEN;
	struct Crc32attrMapFormat	*m_crc32_words = NULL;
	unsigned int			last;
	size_t 				i;
	size_t				nrOfElements;

	if ((f_crc32_words = lotOpenFileNoCasheByLotNr(1, "crc32attr.map", "r+", 's', subname)) == NULL) {
		perror("Can't open the crc32attr.map file.");
		return -1;
	}


     	if (fstat(fileno(f_crc32_words), &inode) != 0) {
		perror("Can't fstat crc32attr.map");
		return -1;
	}
     	crc32_words_size = inode.st_size;

	if (crc32_words_size==0) {
		printf("crc32attr.map is 0 bytes. Skipping\n");
		return 0;
	}

	if ((m_crc32_words=mmap(NULL, crc32_words_size, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(f_crc32_words), 0)) == MAP_FAILED)
        {
		perror("Can't mmap");
		return -1;
	}

	nrOfElements = (crc32_words_size / attr_crc32_words_blocksize);
	printf("Will sort %zu elements of sise %d\n",nrOfElements, attr_crc32_words_blocksize);

	qsort(m_crc32_words,nrOfElements,attr_crc32_words_blocksize, attr_crc32_words_block_compare);


	/************************************************************************************
	 Now when we have it sorted we will print out only uniq elements in a new file. 
	************************************************************************************/
	if ((f_crc32_words_new = lotOpenFileNoCasheByLotNr(1, "crc32attr.map.new", "wb", 'e', subname)) == NULL) {
		perror("Can't open thecrc32attr.map.new file for lot");
		return -1;
	}

	last = 0;
        for(i=0;i<nrOfElements;i++) {

		if (m_crc32_words[i].crc32 != last) {
			#ifdef DEBUG
                	printf("crc32 %u, text %s\n",m_crc32_words[i].crc32, m_crc32_words[i].text);
			#endif

			if (fwrite(&m_crc32_words[i], sizeof(struct Crc32attrMapFormat), 1, f_crc32_words_new) != 1) {
				perror("fwrite crc32attr.map.new");
				return -1;
			}
		}

		last = m_crc32_words[i].crc32;
        }


	munmap(m_crc32_words,crc32_words_size);

	fclose(f_crc32_words);		
	fclose(f_crc32_words_new);

	// Swap the files
	if (lotRename(1, subname, "crc32attr.map.new", "crc32attr.map") != 0) {
		perror("rename crc32attr.map.new crc32attr.map");
		return -1;
	}


	printf("Done\n");

	return 0;
}
void connectHandler(int socket) {
        struct packedHedderFormat packedHedder;
	int isAuthenticated = 0;
	char tkeyForTest[32];
	int i,n;
	int intrespons;
	int count = 0;
	container *attrkeys = NULL;

        #ifdef DEBUG_TIME
      		struct timeval start_time, end_time;
                struct timeval tot_start_time, tot_end_time;
                gettimeofday(&tot_start_time, NULL);
        #endif

	ionice_benice();

while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) {

	#ifdef DEBUG
	printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
	#endif
	packedHedder.size = packedHedder.size - sizeof(packedHedder);

	if (attrkeys == NULL) {
		attrkeys = ropen();
	}

	if (packedHedder.command == bbc_askToAuthenticate) {
		if ((i=recv(socket, tkeyForTest, sizeof(tkeyForTest),MSG_WAITALL)) == -1) {
        	    perror("Cant read tkeyForTest");
        	    exit(1);
        	}		
		if (1) {
			printf("authenticated\n");
			intrespons = bbc_authenticate_ok;

			bbdocument_init(NULL);

			isAuthenticated = 1;
		}
		else {
			printf("authenticate faild\n");
			intrespons = bbc_authenticate_feiled;

               	}

		if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                               perror("Cant recv filerest");
                               exit(1);
               	}
			
		
	}
	else {
		if (!isAuthenticated) {
			printf("user not autentikated\n");
			exit(1);
		}


		if (packedHedder.command == bbc_docadd) {
			#ifdef DEBUG
			printf("bbc_docadd\n");
			#endif

			char *subname,*documenturi,*documenttype,*document,*acl_allow,*acl_denied,*title,*doctype;
			char *attributes;
			int dokument_size;
			unsigned int lastmodified;

			#ifdef DEBUG_TIME
                		gettimeofday(&start_time, NULL);
        		#endif

			//subname
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			subname = malloc(intrespons +1);
			if ((i=recvall(socket, subname, intrespons)) == 0) {
                                perror("Cant read subname");
                                exit(1);
                        }

			//documenturi
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			documenturi = malloc(intrespons +1);
			if ((i=recvall(socket, documenturi, intrespons)) == 0) {
                                perror("Cant read documenturi");
                                exit(1);
                        }

			//documenttype
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			documenttype = malloc(intrespons +1);
			if ((i=recvall(socket, documenttype, intrespons)) == 0) {
                                perror("Cant read documenttype");
                                exit(1);
                        }

			//document
			//dokument_size
			if ((i=recvall(socket, &dokument_size, sizeof(dokument_size))) == 0) {
                    		perror("Cant read dokument_size");
                    		exit(1);
                	}

			document = malloc(dokument_size +1);

			if (dokument_size == 0) {
				document[0] = '\0';
			}
			else {
				if ((i=recvall(socket, document, dokument_size)) == 0) {
                        	        fprintf(stderr,"Can't read document of size %i\n",dokument_size);
					perror("recvall");
                        	        exit(1);
                        	}
			}
			//lastmodified
			if ((i=recvall(socket, &lastmodified, sizeof(lastmodified))) == 0) {
                    		perror("Cant read lastmodified");
                    		exit(1);
                	}

			//acl_allow
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			acl_allow = malloc(intrespons +1);
			if ((i=recvall(socket, acl_allow, intrespons)) == 0) {
                                perror("Cant read acl_allow");
                                exit(1);
                        }

			//acl_denied
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			acl_denied = malloc(intrespons +1);
			if ((i=recvall(socket, acl_denied, intrespons)) == 0) {
                                perror("Cant read acl_denied");
                                exit(1);
                        }

			//title
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			title = malloc(intrespons +1);
			if ((i=recvall(socket, title, intrespons)) == 0) {
                                perror("Cant read title");
                                exit(1);
                        }

			//doctype
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			doctype = malloc(intrespons +1);
			if ((i=recvall(socket, doctype, intrespons)) == 0) {
                                perror("Cant read doctype");
                                exit(1);
                        }

			// Attribute list
			if ((i = recvall(socket, &intrespons, sizeof(intrespons))) == 0)
				err(1, "Can't receive attribute list len");
			attributes = malloc(intrespons +1);
			if ((i=recvall(socket, attributes, intrespons)) == 0)
				err(1, "Can't receive attribute list");

			#ifdef DEBUG_TIME
                		gettimeofday(&end_time, NULL);
                		printf("Time debug: bbdn_docadd recv data time: %f\n",getTimeDifference(&start_time, &end_time));
        		#endif

			printf("\n");
			printf("########################################################\n");
			printf("Url: %s\n",documenturi);
			printf("got subname \"%s\": title \"%s\". Nr %i, dokument_size %i attrib: %s\n",subname,title,count,dokument_size, attributes);
			printf("########################################################\n");
			printf("calling bbdocument_add():\n");
        		#ifdef DEBUG_TIME
        		        gettimeofday(&start_time, NULL);
		        #endif

			intrespons = bbdocument_add(subname,documenturi,documenttype,document,dokument_size,lastmodified,acl_allow,acl_denied,title,doctype, attributes, attrkeys);

			printf(":bbdocument_add end\n");
			printf("########################################################\n");

			#ifdef DEBUG_TIME
                		gettimeofday(&end_time, NULL);
                		printf("Time debug: bbdn_docadd runing bbdocument_add() time: %f\n",getTimeDifference(&start_time, &end_time));
        		#endif
			free(subname);
			free(documenturi);
			free(documenttype);
			free(document);
			free(acl_allow);
			free(acl_denied);
			free(title);
			free(doctype);
			free(attributes);

			// send status
	                if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                               perror("Cant recv filerest");
                               exit(1);
	                }

		}
		else if (packedHedder.command == bbc_opencollection) {
			char *subname;
			char path[PATH_MAX];

			printf("open collection\n");

                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1)
                                err(1, "Cant read intrespons");
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1)
                                err(1, "Cant read subname");

			GetFilPathForLot(path, 1, subname);
			strcat(path, "fullyCrawled");

			unlink(path);

			free(subname);
		}
		else if (packedHedder.command == bbc_closecollection) {
			printf("closecollection\n");
			char *subname;
			//subname
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read subname");
                                exit(1);
                        }

			bbdocument_close(attrkeys);
			attrkeys = NULL;

			//toDo må bruke subname, og C ikke perl her
			printf("cleanin lots start\n");
			char command[PATH_MAX];
			snprintf(command,sizeof(command),"perl %s -l -s \"%s\"",bfile("perl/cleanLots.pl"),subname);

			printf("running \"%s\"\n",command);
			intrespons = system(command);
			printf("cleanin lots end\n");

			// legger subnamet til listen over ventene subnavn, og huper searchd.
			lot_recache_collection(subname);


			/* We are done crawling  */
			{
				int fd = lotOpenFileNoCasheByLotNrl(1, "fullyCrawled", ">>", '\0', subname);

				if (fd == -1) {
					warn("Unable to write fullyCrawled file");
				} else {
					close(fd);
				}
			}

			free(subname);

                        if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                                       perror("Cant recv filerest");
                                       exit(1);
                        }
			
		}
		else if (packedHedder.command == bbc_deleteuri) {
			printf("deleteuri\n");
			char *subname, *uri;
			//subname
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read subname");
                                exit(1);
                        }
			subname[intrespons] = '\0';
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        uri = malloc(intrespons +1);
                        if ((i=recv(socket, uri, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read uri");
                                exit(1);
                        }
			uri[intrespons] = '\0';

			printf("going to delete: %s from %s\n", uri, subname);

			/* Add docid to the gced file */
			{
				FILE *fh;
				unsigned int DocID, lastmodified;
				unsigned int lotNr;
				int err = 0;

				if (uriindex_get(uri, &DocID, &lastmodified, subname) == 0) {
					fprintf(stderr,"Unable to get uri info. uri=\"%s\",subname=\"%s\".",uri,subname);
					perror("Unable to get uri info");
					err++;
				}
				if (!err) {
					lotNr = rLotForDOCid(DocID);

					if ((fh = lotOpenFileNoCasheByLotNr(lotNr,"gced","a", 'e',subname)) == NULL) {
						perror("can't open gced file");
						err++;
					} else {
						fwrite(&DocID, sizeof(DocID), 1, fh);
						fclose(fh);
					}
				}
				if (!err) {
					struct reformat *re;

					if((re = reopen(rLotForDOCid(DocID), sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) {
						perror("can't reopen()");
						err++;
					} else {
						DIS_delete(RE_DocumentIndex(re, DocID));
						reclose(re);
					}
				}
				//markerer at den er skitten
				if (!err) {
					FILE *dirtfh;
					dirtfh = lotOpenFileNoCashe(DocID,"dirty","ab",'e',subname);
					fwrite("1",1,1,dirtfh);
					fclose(dirtfh);
				}
				if (err == 0) 
					bbdocument_delete(uri, subname);
			}
			free(subname);

			intrespons = 1; // Always return ok for now
                        if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                                       perror("Cant recv filerest");
                                       exit(1);
                        }

		}
		else if (packedHedder.command == bbc_deletecollection) {
			printf("deletecollection\n");
			char *subname, *uri;
			//subname
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read subname");
                                exit(1);
                        }
			subname[intrespons] = '\0';


			printf("going to delete collection: %s\n", subname);

			intrespons = bbdocument_deletecoll(subname);

			if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                	               perror("Cant recv filerest");
        	                       exit(1);
	               	}


			free(subname);
		}
		else if (packedHedder.command == bbc_addwhisper) {
			whisper_t add;
			char *subname;

			if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) 
				err(1, "Cant read intrespons");
			subname = malloc(intrespons+1);
			if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
				perror("Cant read subname");
				exit(1);
			}
			subname[intrespons] = '\0';
			if ((i=recv(socket, &add, sizeof(add),MSG_WAITALL)) == -1) 
				err(1, "Cant read add whisper");

			gcwhisper_write(subname, add);
			free(subname);

		}
		else if (packedHedder.command == bbc_HasSufficientSpace) {

			char *subname;
			//subname
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			subname = malloc(intrespons +1);
			if ((i=recvall(socket, subname, intrespons)) == 0) {
                                perror("Cant read subname");
                                exit(1);
                        }

			// tester bare i lot 1 her. Må også sjekke andre loter når vi begynner å støtte frlere disker på ES.
			intrespons = lotHasSufficientSpace(1, 4096, subname);

			if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                	               perror("Cant recv filerest");
        	                       exit(1);
	               	}

			printf("~Asked for HasSufficientSpace for subname \"%s\". Returnerer %d\n",subname, intrespons);

			free(subname);
		}
		else {
			printf("unnown comand. %i\n", packedHedder.command);
		}
	}

	++count;

//	#ifdef DEBUG_BREAK_AFTER
//	if (count >= DEBUG_BREAK_AFTER) {
//		printf("exeting after %i docoments\n",count);
//		exit(1);
//	}
//	#endif


}

        #ifdef DEBUG_TIME
                gettimeofday(&tot_end_time, NULL);
                printf("Time debug: bbdn total time time: %f\n",getTimeDifference(&tot_start_time, &tot_end_time));
        #endif

}
Beispiel #18
0
int main (int argc, char *argv[]) {
	FILE *fp;
	char username[MAX_USER_NAME_LEN], username_last[MAX_USER_NAME_LEN];
	DB *dbp = NULL;
	DBT key, data;
	int ret;
	//int *dbpp;

	struct userToSubnameDbFormat userToSubnameDb;

	if (argc != 3) {
		printf("usgae ./mergeUserToSubname lotnr subname\n");
		exit(1);
	}

	int lotNr = atoi(argv[1]);
	char *subname = argv[2];

	if (!userToSubname_open(&userToSubnameDb,'w')) {
		perror("userToSubname_open");
		exit(1);
	}

	if ((fp = lotOpenFileNoCasheByLotNr(lotNr,"acllist","rb", 's',subname) ) == NULL) {
		perror("acllist");
	}
	else {
		username_last[0] = '\0';
		while(fgets(username,sizeof(username),fp) != NULL) {
			chomp(username);
			if (strcmp(username_last,username) != 0) {
				printf("username \"%s\"\n",username);

				userToSubname_add(&userToSubnameDb,username,subname);
				strcpy(username_last,username);
			}
		}

		fclose(fp);

	}


	if ((fp = lotOpenFileNoCasheByLotNr(lotNr,"aclcollectionlist","rb", 's',subname) ) == NULL) {
		perror("aclcollectionlist");
	}
	else {
		username_last[0] = '\0';
		while(fgets(username,sizeof(username),fp) != NULL) {
			chomp(username);
			if (strcmp(username_last,username) != 0) {
				printf("username \"%s\"\n",username);

				userToSubname_add(&userToSubnameDb,username,subname);
				strcpy(username_last,username);
			}
		}

		fclose(fp);

	}


	userToSubname_close(&userToSubnameDb);

	/*
	//temp
	userToSubname_open(&dbpp);
	char buf[128] = "*****";
	userToSubname_getsubnamesAsString(&dbpp,"Everyone",buf);
	printf("aa  subnames \"%s\"\n",buf);
	userToSubname_close(&dbpp);
	*/

	return 0;

}