예제 #1
0
void *generatePagesResults(void *arg)
{

        struct thargsF * thargs = (struct thargsF *)arg;

	struct DocumentIndexFormat DocumentIndex;
	struct ReposetoryHeaderFormat ReposetoryHeader;
	
	int canDIRead = 0;
	int canrReadHtml = 0;	

	unsigned int htmlBufferSize;
       	char *htmlBuffer;

        if ((htmlBuffer = malloc(max_html_size)) == NULL) {
                perror("can't malloc");
                return;
        }


	unsigned int DocID;

	printf("in thread\n");

	while (( DocID = NexDocID(thargs) ) != 0) {

		//leser DI
		if (!DIRead_fmode(&DocumentIndex,DocID,subname,'r')) 
		{
                        //hvis vi av en eller annen grun ikke kunne gjøre det kalger vi
                        printf("Can't read DI post for %u-%i\n",DocID,rLotForDOCid(DocID));
                        continue;
                }
		else {
			++canDIRead;
		}
		printf("url: \"%s\"\n",DocumentIndex.Url);

		htmlBufferSize = max_html_size;
		
		if (DocumentIndex.htmlSize == 0) {

		}
		else if (rReadHtml(htmlBuffer,&htmlBufferSize,DocumentIndex.RepositoryPointer,DocumentIndex.htmlSize,DocID,
				subname,&ReposetoryHeader,NULL,NULL,DocumentIndex.imageSize) != 1) {

                        printf("Can't read html post for %u-%i\n",DocID,rLotForDOCid(DocID));

			continue;
		}
		else {
			++canrReadHtml;
		}
	}

	printf("canDIRead %i\n",canDIRead);
	printf("canrReadHtml %i\n",canrReadHtml);
}
예제 #2
0
int main() {
	FILE *FH, *LOTFILE;
	struct stat inode;      // lager en struktur for fstat å returnere.
	int nrOfElements;
	int LotNr, DocIDPlace, oldLotNr,i,n,rank;

        if ( (FH = fopen(SHORTPOPFILE,"rb")) == NULL ) {
                perror("open");
        }

        fstat(fileno(FH),&inode);

	

        nrOfElements = inode.st_size;

        oldLotNr = -1;
        for (i=0;i<nrOfElements;i++) {
                if ((n=fread(&rank,sizeof(unsigned char),1,FH)) == -1) {
                        perror("read");
                }
                //finner lot og offset
                LotNr = rLotForDOCid(i);
                DocIDPlace = (i - LotDocIDOfset(LotNr));

                //if (lotlistIsLocal(LotNr)) {
                //        popMemArray[LotNr][DocIDPlace] = rank;
                //}

                /////////////////////////////
                //debug: vise hvilkene lot vi laster
                if (LotNr != oldLotNr) {
			if (oldLotNr != -1) {
				rSendFileByOpenHandler(LOTFILE,"Brank",oldLotNr,"w",subname);
				close(LOTFILE);
			}
			//oppret et midlertidig fil får å holde datane
			LOTFILE = tmpfile();
		
                        printf("lot %i\n",LotNr);
                        //printf("%i rank %i. Lot %i, ofset %i\n",i,(int)rank,LotNr,LotDocIDOfset(LotNr));
                }
                oldLotNr = LotNr;
                ////////////////////////////

		//søker til rikig plass og skiiver
		fseek(LOTFILE,DocIDPlace,SEEK_SET);
		fwrite(&rank,sizeof(unsigned char),1,LOTFILE);

		//printf("DocID %i, rank %i, DocIDPlace %i\n",i,rank,DocIDPlace);


        }

	rSendFileByOpenHandler(LOTFILE,"Brank",oldLotNr,"w",subname);
	close(LOTFILE);
        close(FH);

}
예제 #3
0
/*
Finner path for en lot fra docid
*/
void GetFilPathForLotByDocID(char *FilePath,int DocID,char subname[]) {

	int lot;
	lot = rLotForDOCid(DocID);
	
	GetFilPathForLot(FilePath,lot,subname);
	
}
int DIRead_fmode (struct DocumentIndexFormat *DocumentIndexPost, int DocID,char subname[], char filemode) {

	FILE *file;
	int forReturn = 0;

	#ifdef DEBUG
		printf("DIRead_fmode(DocID=%i, subname=\"%s\")\n",DocID,subname);
	#endif

	#ifdef DISK_PROTECTOR
		dp_lock(rLotForDOCid(DocID));
	#endif

	if ((file = GetFileHandler(DocID,filemode,subname, NULL)) != NULL) {

		if (DIRead_post_fh(DocumentIndexPost,file)) {
			forReturn = 1;
		}

		//hvis vi ikke har på DI_FILE_CASHE må vi lokke filen
		#ifndef DI_FILE_CASHE
			fclose(file);
		#endif
		
        }
        else {
		printf("can't open DocumentIndexPost for DocID %u.\n",DocID);
        }


        if ((*DocumentIndexPost).htmlSize != 0) {
                (*DocumentIndexPost).htmlSize2 = (*DocumentIndexPost).htmlSize;
        }


	#ifdef DISK_PROTECTOR
		dp_unlock(rLotForDOCid(DocID));
	#endif

	return forReturn;

}
예제 #5
0
int
lotOpenFileNoCache_direct(unsigned int DocID, char *resource, char *type, char lock, char *subname)
{
	

	unsigned int LotNr = rLotForDOCid(DocID);
	int i;
	char FilePath[PATH_MAX];
	char File [PATH_MAX];
	int fd;

	printf("lotOpenFileNoCache_direct(subname: \"%s\", resource %s)\n",subname,resource);
	GetFilPathForLot(FilePath,LotNr,subname);
	strcpy(File,FilePath);
	strncat(File,resource,PATH_MAX); //var 128

#ifdef DEBUG
	printf("lotOpenFileNoCasheByLotNr: opening file \"%s\" for %s\n",File,type);
#endif

	//hvis dette er lesing så hjelper det ikke og prøve å opprette path. Filen vil fortsatt ikke finnes
	if ((strcmp(type,"rb") == 0) || (strcmp(type,"r") == 0)) {
		if ((fd = open64(File, O_RDONLY|O_DIRECT|O_LARGEFILE)) == -1) {
			warn("open64: %d", fd);
#ifdef DEBUG
			perror(File);
#endif
			return -1;
		}
	} else {
		errx(1, "We can only open this for reading right now");
	}

#ifdef DEBUG
	printf("lotOpenFile: tryint to obtain lock \"%c\"\n",lock);
#endif
	//honterer låsning
	if (lock == 'e') {
		//skal vi ha flock64() her ?
		flock(fd, LOCK_EX);
	}
	else if (lock == 's') {
		flock(fd, LOCK_SH);
	}
#ifdef DEBUG
	printf("lotOpenFile: lock obtained\n");
#endif

#ifdef DEBUG
	printf("lotOpenFileNoCasheByLotNr: finished\n");
#endif
	return fd;

}
int DIRead_fh(struct DocumentIndexFormat *DocumentIndexPost, int DocID,char subname[], FILE *file) {

	int forReturn = 0;


	if (file == NULL) {
		#ifdef DEBUG
			printf("DIRead_fh: file isent open.\n");
		#endif
		forReturn = DIRead_fmode(DocumentIndexPost,DocID,subname,'r');
	}
	else {
		#ifdef DISK_PROTECTOR
			dp_lock(rLotForDOCid(DocID));
		#endif

		

		//søker til riktig post
		if (fseek(file,DIPostAdress(DocID),0) != 0) {
			perror("Can't seek");
			exit(1);
		}

		if (DIRead_post_fh(DocumentIndexPost,file)) {
			forReturn = 1;
		}
		#ifdef DISK_PROTECTOR
			dp_unlock(rLotForDOCid(DocID));
		#endif

	}

        if ((*DocumentIndexPost).htmlSize != 0) {
                (*DocumentIndexPost).htmlSize2 = (*DocumentIndexPost).htmlSize;
        }


	return forReturn;
}
예제 #7
0
//fjerner sider med samme domene
int filterSameDomain(int showabal,struct SiderFormat *CurentSider, struct SiderFormat *Sider) {

	int i;
	int count = 0;
	char domainCuren[65];
	char domainOther[65];

	//filtrerer ikke sider vi ikke har noe domne for. Typisk ppc anonser som peger til out.cfi side på samme domene
	if ((*CurentSider).domain[0] == '\0') {
		return 0;
		#ifdef DEBUG
			printf("Warn: domain is blank, wont try to filter it.\n");
		#endif
	}

	for (i=0;i<showabal;i++) {

		if (!Sider[i].deletet) {

			if (strcmp((*CurentSider).domain,Sider[i].domain) == 0) {
				#ifdef DEBUG
				if (count < 2) {
				printf("domain is the same. Urls Url \"%s\" (domain \"%s\", DociD %u-%i, DomainID %ho) == \"%s\" (domain \"%s\", DocID %u-%i, DomainID %ho)\n",
					Sider[i].DocumentIndex.Url,Sider[i].domain,Sider[i].iindex.DocID,rLotForDOCid(Sider[i].iindex.DocID),Sider[i].DomainID,
					(*CurentSider).DocumentIndex.Url,(*CurentSider).domain,(*CurentSider).iindex.DocID,rLotForDOCid((*CurentSider).iindex.DocID),(*CurentSider).DomainID);
				}
				#endif
				//printf("domain is the same. %s == %s\n",(*CurentSider).domain,Sider[i].domain);

				//runarb: 14.11.2007: hva gjør linjen nedenfor???
				//(*CurentSider).posisjon = Sider[i].posisjon;
			
				++count;
			}	
		}

	}

	#ifdef DEBUG
	printf("have a total of %i from this domain\n",count);
	#endif

	if (count < 2) {
		return 0;
	}
	else {
		return 1;
	}
}
예제 #8
0
//gir ful path for et bilde fra DocID
void GetFilPathForThumbnaleByDocID(char *FileName,int DocID,char subname[]) {

	int LotNr;
        int ImageBucket;

        ImageBucket = fmod(DocID,512);

        //finner path
        LotNr = rLotForDOCid(DocID);
        GetFilPathForLot(FileName,LotNr,subname);


        sprintf(FileName,"%simages/%i/%i.jpg",FileName,ImageBucket,DocID);

	
}
예제 #9
0
int popRankForDocIDMemArray(unsigned int DocID) {
	int LotNr,DocIDPlace;

	//finner lot og offset
	LotNr = rLotForDOCid(DocID);
	DocIDPlace = (DocID - LotDocIDOfset(LotNr));

	if (popMemArray[LotNr] != 0) {
		#ifdef DEBUG
			printf("have rank %u, i:%i, y:%i\n",(unsigned int)popMemArray[LotNr][DocIDPlace],LotNr,DocIDPlace);
		#endif
		return popMemArray[LotNr][DocIDPlace];
	}
	else {
		return 0;
	}
}
예제 #10
0
int DIPostAdress(unsigned int DocID) {

	int adress = -1;

	int LotNr;

	//finner lot for denne DocIDen
	LotNr = rLotForDOCid(DocID);

	#ifdef BLACK_BOX
		adress = (sizeof(struct DocumentIndexFormat) + sizeof(unsigned int))* (DocID - LotDocIDOfset(LotNr));
	#else
		adress = sizeof(struct DocumentIndexFormat) * (DocID - LotDocIDOfset(LotNr));
	#endif

	return adress;


}
예제 #11
0
int adultWeightForDocIDMemArray(int DocID) {
        int LotNr,DocIDPlace;


                //hvis vi har en negativ DocID så er noe galt
                if (DocID < 0) {
                        return -3;
                }

		//filler lot og offset
                LotNr = rLotForDOCid(DocID);
                DocIDPlace = (DocID - LotDocIDOfset(LotNr));

                if (adultWeightMemArray[LotNr] != 0) {
                        return adultWeightMemArray[LotNr][DocIDPlace];
                }
                else {
                        return 0;
                }

}
예제 #12
0
int main (int argc, char *argv[]) {

	FILE *UPDATEFILE;
	struct anchorfileFormat anchorfileData;

	//tester for at vi har fåt hvilken fil vi skal bruke
	if (argc < 2) {
		printf("Usage: ./addanchors anchorfile\n\n\tanchorfile, fil med tekster på linker\n\n");
		exit(1);
	}

	if ((UPDATEFILE = fopen(argv[1],"rb")) == NULL) {
                printf("Cant read anchorfile ");
                perror(argv[1]);
                exit(1);
        }

	lotlistLoad();
	void lotlistMarkLocals(char server[]);

	while(!feof(UPDATEFILE)) {
		fread(&anchorfileData,sizeof(struct anchorfileFormat),1,UPDATEFILE);


		//printf("%i : %s\n",anchorfileData.DocID,anchorfileData.text);
	
		//sjekker om dette er en lokal lot		
		//temp: utestet:
		if (lotlistIsLocal(rLotForDOCid(anchorfileData.DocID))) {
			anchoradd(anchorfileData.DocID,anchorfileData.text,sizeof(anchorfileData.text));
		}
		else {
			printf("lot is not locale");
		}
	}
	fclose(UPDATEFILE);
	
}
void disp_out_opensearch(int total_res, struct SiderFormat *results, struct queryNodeHederFormat *queryNodeHeder, int num_servers, int start, int res_per_page, char *query_escaped) {
	int i, x;
        
	printf("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
	printf("<rss version=\"2.0\" xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n");
	printf("  <channel>\n");
	printf("    <title>%s - Searchdaimon results</title>\n", query_escaped);
	printf("    <description>Results for \"%s\".</description>\n", query_escaped);
	printf("    <opensearch:totalResults>%i</opensearch:totalResults>\n", total_res);
	printf("    <opensearch:startIndex>%i</opensearch:startIndex>\n", start);
	printf("    <opensearch:itemsPerPage>%i</opensearch:itemsPerPage>\n", res_per_page);
	printf("    <atom:link rel=\"search\" type=\"application/opensearchdescription+xml\" href=\"http://%s/webclient/opensearchdescription.xml\"/>\n",
			getenv("HTTP_HOST"));

	i = res_per_page * (start -1);
	x = i;

	while ((x<(res_per_page * start)) && ( x < total_res) && (i < (queryNodeHeder->MaxsHits * num_servers))) {

		if (!results[i].deletet) {

			printf("<item>\n");
			printf("\t<docid>%i-%i</docid>\n",results[i].iindex.DocID, rLotForDOCid(results[i].iindex.DocID));
			printf("\t<title><![CDATA[%s]]></title>\n", results[i].title);
			printf("\t<link><![CDATA[%s]]></link>\n", results[i].url);
			printf("\t<description>%s</description>\n", results[i].description);
			printf("</item>\n");

			//teller bare normale sider
			if (results[i].type == siderType_normal) {
				++x;
			}
		}
		++i;
	}

	printf("  </channel>\n</rss>\n");
}
예제 #14
0
int main (int argc, char *argv[]) {

        int lotNr;
	char lotServer[64];
	int pageCount;
	int i;

        unsigned int FiltetTime;
        unsigned int FileOffset;

        char htmlcompressdbuffer[524288];  //0.5 mb
        char imagebuffer[524288];  //0.5 mb

	int httpResponsCodes[nrOfHttpResponsCodes];

	struct ReposetoryHeaderFormat ReposetoryHeader;
	struct DocumentIndexFormat DocumentIndexPost;
	unsigned long int radress;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	struct adultFormat adult;
	unsigned int lastIndexTime;
        if (argc < 2) {
                printf("Dette programet indekserer en lot. Gi det et lot nummer\n");
                exit(0);
        }

	for(i=0;i<nrOfHttpResponsCodes;i++) {
		httpResponsCodes[i] = 0;
	}

	lotNr = atoi(argv[1]);



	//find server based on lotnr
	lotlistLoad();
	lotlistGetServer(lotServer,lotNr);


	printf("vil index lot nr %i at %s\n",lotNr,lotServer);

	adultLoad(&adult);


	//temp: må hente dette fra slot server eller fil
	FiltetTime = 0;
	FileOffset = 0;

	pageCount = 0;

	if (0) {


		printf("will ges pages by net\n");

		revindexFilesOpenNET(revindexFilesHa);

		while (rGetNextNET(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset)) {
			
			global_curentDocID = ReposetoryHeader.DocID;		
			if (strchr(ReposetoryHeader.url,'?') == 0) {
				global_curentUrlIsDynamic = 0; 
			}
			else {
				global_curentUrlIsDynamic = 1;
			}

			
			handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult);
			//datta skal uansett kopieres over
			//kopierer over di data
			copyRepToDi(&DocumentIndexPost,&ReposetoryHeader);

			DocumentIndexPost.RepositoryPointer = radress;


			//skiver til DocumentIndex
			DIWriteNET(lotServer,&DocumentIndexPost,ReposetoryHeader.DocID);


			++pageCount;
		
			//temp: 
			//if(pageCount > 1000) {
			//	break;
			//}

		}

		printf("Sending pages\n");

		revindexFilesSendNET(revindexFilesHa,lotNr);

	}
	else {
		printf("Wil acess files localy\n");

		//finner siste indekseringstid
		lastIndexTime =  GetLastIndexTimeForLot(lotNr);

		//temp:
		/***********************************************************/
		//if(lastIndexTime != 0) {
		//	printf("lastIndexTime is not 0, but %i\n",lastIndexTime);
		//	exit(1);
		//}

		//FiltetTime = lastIndexTime;
		//if(lastIndexTime == 0) {
		//	printf("lastIndexTime is not 0, but %i\n",lastIndexTime);
		//	exit(1);
		//}
		/***********************************************************/
		
		revindexFilesOpenLocal(revindexFilesHa,lotNr);

		

		while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset)) {
		
			//printf("D: %lu, R: %lu\n",ReposetoryHeader.DocID, radress);

			//kan være siden er korupt, sjekker at docID gir samme lot som den vi leser
			if (rLotForDOCid(ReposetoryHeader.DocID) != lotNr) {
				printf("bad DocID %i\n",ReposetoryHeader.DocID);
			}
			//indekserer bare .no sider
			else if (strstr(ReposetoryHeader.url,".no/") == 0){
				//ikke no
			}
			else {
				global_curentDocID = ReposetoryHeader.DocID;
				if (strchr(ReposetoryHeader.url,'?') == 0) {
					global_curentUrlIsDynamic = 0; 
				}
				else {
					global_curentUrlIsDynamic = 1;
				}

				handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult);

				//printf("%s %i\n",ReposetoryHeader.url,DocumentIndexPost.AdultWeight);
				
				//datta skal uansett kopieres over
				//kopierer over di data
				copyRepToDi(&DocumentIndexPost,&ReposetoryHeader);
				DocumentIndexPost.RepositoryPointer = radress;



				//skiver til DocumentIndex
				//skriver ikke for nå: DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID);
				

			++pageCount;

			}
		
			//temp: 
			//if(pageCount > 10) {
			//	break;
			//}

		}

		//skriver riktig indexstide til lotten
		//temp: setLastIndexTimeForLot(lotNr);

		// vi må ikke kopiere revindex filene da vi jobber på de lokale direkte
	}

	//skriver ut en oversikt over hvilkene http responser vi kom over
	printf("http responses:\n");
	for(i=0;i<nrOfHttpResponsCodes;i++) {
		if (httpResponsCodes[i] != 0) {
			printf("%i: %i\n",i,httpResponsCodes[i]);
		}
        }

	printf("indexed %i pages\n",pageCount);

}
예제 #15
0
void connectHandler(int socket) {
        struct packedHedderFormat packedHedder;
	int isAuthenticated = 0;
	char tkeyForTest[32];
	int i,n;
	int intrespons;
	int count = 0;
	container *attrkeys = NULL;

        #ifdef DEBUG_TIME
      		struct timeval start_time, end_time;
                struct timeval tot_start_time, tot_end_time;
                gettimeofday(&tot_start_time, NULL);
        #endif

	ionice_benice();

while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) {

	#ifdef DEBUG
	printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
	#endif
	packedHedder.size = packedHedder.size - sizeof(packedHedder);

	if (attrkeys == NULL) {
		attrkeys = ropen();
	}

	if (packedHedder.command == bbc_askToAuthenticate) {
		if ((i=recv(socket, tkeyForTest, sizeof(tkeyForTest),MSG_WAITALL)) == -1) {
        	    perror("Cant read tkeyForTest");
        	    exit(1);
        	}		
		if (1) {
			printf("authenticated\n");
			intrespons = bbc_authenticate_ok;

			bbdocument_init(NULL);

			isAuthenticated = 1;
		}
		else {
			printf("authenticate faild\n");
			intrespons = bbc_authenticate_feiled;

               	}

		if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                               perror("Cant recv filerest");
                               exit(1);
               	}
			
		
	}
	else {
		if (!isAuthenticated) {
			printf("user not autentikated\n");
			exit(1);
		}


		if (packedHedder.command == bbc_docadd) {
			#ifdef DEBUG
			printf("bbc_docadd\n");
			#endif

			char *subname,*documenturi,*documenttype,*document,*acl_allow,*acl_denied,*title,*doctype;
			char *attributes;
			int dokument_size;
			unsigned int lastmodified;

			#ifdef DEBUG_TIME
                		gettimeofday(&start_time, NULL);
        		#endif

			//subname
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			subname = malloc(intrespons +1);
			if ((i=recvall(socket, subname, intrespons)) == 0) {
                                perror("Cant read subname");
                                exit(1);
                        }

			//documenturi
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			documenturi = malloc(intrespons +1);
			if ((i=recvall(socket, documenturi, intrespons)) == 0) {
                                perror("Cant read documenturi");
                                exit(1);
                        }

			//documenttype
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			documenttype = malloc(intrespons +1);
			if ((i=recvall(socket, documenttype, intrespons)) == 0) {
                                perror("Cant read documenttype");
                                exit(1);
                        }

			//document
			//dokument_size
			if ((i=recvall(socket, &dokument_size, sizeof(dokument_size))) == 0) {
                    		perror("Cant read dokument_size");
                    		exit(1);
                	}

			document = malloc(dokument_size +1);

			if (dokument_size == 0) {
				document[0] = '\0';
			}
			else {
				if ((i=recvall(socket, document, dokument_size)) == 0) {
                        	        fprintf(stderr,"Can't read document of size %i\n",dokument_size);
					perror("recvall");
                        	        exit(1);
                        	}
			}
			//lastmodified
			if ((i=recvall(socket, &lastmodified, sizeof(lastmodified))) == 0) {
                    		perror("Cant read lastmodified");
                    		exit(1);
                	}

			//acl_allow
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			acl_allow = malloc(intrespons +1);
			if ((i=recvall(socket, acl_allow, intrespons)) == 0) {
                                perror("Cant read acl_allow");
                                exit(1);
                        }

			//acl_denied
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			acl_denied = malloc(intrespons +1);
			if ((i=recvall(socket, acl_denied, intrespons)) == 0) {
                                perror("Cant read acl_denied");
                                exit(1);
                        }

			//title
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			title = malloc(intrespons +1);
			if ((i=recvall(socket, title, intrespons)) == 0) {
                                perror("Cant read title");
                                exit(1);
                        }

			//doctype
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			doctype = malloc(intrespons +1);
			if ((i=recvall(socket, doctype, intrespons)) == 0) {
                                perror("Cant read doctype");
                                exit(1);
                        }

			// Attribute list
			if ((i = recvall(socket, &intrespons, sizeof(intrespons))) == 0)
				err(1, "Can't receive attribute list len");
			attributes = malloc(intrespons +1);
			if ((i=recvall(socket, attributes, intrespons)) == 0)
				err(1, "Can't receive attribute list");

			#ifdef DEBUG_TIME
                		gettimeofday(&end_time, NULL);
                		printf("Time debug: bbdn_docadd recv data time: %f\n",getTimeDifference(&start_time, &end_time));
        		#endif

			printf("\n");
			printf("########################################################\n");
			printf("Url: %s\n",documenturi);
			printf("got subname \"%s\": title \"%s\". Nr %i, dokument_size %i attrib: %s\n",subname,title,count,dokument_size, attributes);
			printf("########################################################\n");
			printf("calling bbdocument_add():\n");
        		#ifdef DEBUG_TIME
        		        gettimeofday(&start_time, NULL);
		        #endif

			intrespons = bbdocument_add(subname,documenturi,documenttype,document,dokument_size,lastmodified,acl_allow,acl_denied,title,doctype, attributes, attrkeys);

			printf(":bbdocument_add end\n");
			printf("########################################################\n");

			#ifdef DEBUG_TIME
                		gettimeofday(&end_time, NULL);
                		printf("Time debug: bbdn_docadd runing bbdocument_add() time: %f\n",getTimeDifference(&start_time, &end_time));
        		#endif
			free(subname);
			free(documenturi);
			free(documenttype);
			free(document);
			free(acl_allow);
			free(acl_denied);
			free(title);
			free(doctype);
			free(attributes);

			// send status
	                if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                               perror("Cant recv filerest");
                               exit(1);
	                }

		}
		else if (packedHedder.command == bbc_opencollection) {
			char *subname;
			char path[PATH_MAX];

			printf("open collection\n");

                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1)
                                err(1, "Cant read intrespons");
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1)
                                err(1, "Cant read subname");

			GetFilPathForLot(path, 1, subname);
			strcat(path, "fullyCrawled");

			unlink(path);

			free(subname);
		}
		else if (packedHedder.command == bbc_closecollection) {
			printf("closecollection\n");
			char *subname;
			//subname
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read subname");
                                exit(1);
                        }

			bbdocument_close(attrkeys);
			attrkeys = NULL;

			//toDo må bruke subname, og C ikke perl her
			printf("cleanin lots start\n");
			char command[PATH_MAX];
			snprintf(command,sizeof(command),"perl %s -l -s \"%s\"",bfile("perl/cleanLots.pl"),subname);

			printf("running \"%s\"\n",command);
			intrespons = system(command);
			printf("cleanin lots end\n");

			// legger subnamet til listen over ventene subnavn, og huper searchd.
			lot_recache_collection(subname);


			/* We are done crawling  */
			{
				int fd = lotOpenFileNoCasheByLotNrl(1, "fullyCrawled", ">>", '\0', subname);

				if (fd == -1) {
					warn("Unable to write fullyCrawled file");
				} else {
					close(fd);
				}
			}

			free(subname);

                        if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                                       perror("Cant recv filerest");
                                       exit(1);
                        }
			
		}
		else if (packedHedder.command == bbc_deleteuri) {
			printf("deleteuri\n");
			char *subname, *uri;
			//subname
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read subname");
                                exit(1);
                        }
			subname[intrespons] = '\0';
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        uri = malloc(intrespons +1);
                        if ((i=recv(socket, uri, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read uri");
                                exit(1);
                        }
			uri[intrespons] = '\0';

			printf("going to delete: %s from %s\n", uri, subname);

			/* Add docid to the gced file */
			{
				FILE *fh;
				unsigned int DocID, lastmodified;
				unsigned int lotNr;
				int err = 0;

				if (uriindex_get(uri, &DocID, &lastmodified, subname) == 0) {
					fprintf(stderr,"Unable to get uri info. uri=\"%s\",subname=\"%s\".",uri,subname);
					perror("Unable to get uri info");
					err++;
				}
				if (!err) {
					lotNr = rLotForDOCid(DocID);

					if ((fh = lotOpenFileNoCasheByLotNr(lotNr,"gced","a", 'e',subname)) == NULL) {
						perror("can't open gced file");
						err++;
					} else {
						fwrite(&DocID, sizeof(DocID), 1, fh);
						fclose(fh);
					}
				}
				if (!err) {
					struct reformat *re;

					if((re = reopen(rLotForDOCid(DocID), sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) {
						perror("can't reopen()");
						err++;
					} else {
						DIS_delete(RE_DocumentIndex(re, DocID));
						reclose(re);
					}
				}
				//markerer at den er skitten
				if (!err) {
					FILE *dirtfh;
					dirtfh = lotOpenFileNoCashe(DocID,"dirty","ab",'e',subname);
					fwrite("1",1,1,dirtfh);
					fclose(dirtfh);
				}
				if (err == 0) 
					bbdocument_delete(uri, subname);
			}
			free(subname);

			intrespons = 1; // Always return ok for now
                        if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                                       perror("Cant recv filerest");
                                       exit(1);
                        }

		}
		else if (packedHedder.command == bbc_deletecollection) {
			printf("deletecollection\n");
			char *subname, *uri;
			//subname
                        if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) {
                                perror("Cant read intrespons");
                                exit(1);
                        }
                        subname = malloc(intrespons +1);
                        if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
                                perror("Cant read subname");
                                exit(1);
                        }
			subname[intrespons] = '\0';


			printf("going to delete collection: %s\n", subname);

			intrespons = bbdocument_deletecoll(subname);

			if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                	               perror("Cant recv filerest");
        	                       exit(1);
	               	}


			free(subname);
		}
		else if (packedHedder.command == bbc_addwhisper) {
			whisper_t add;
			char *subname;

			if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) 
				err(1, "Cant read intrespons");
			subname = malloc(intrespons+1);
			if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) {
				perror("Cant read subname");
				exit(1);
			}
			subname[intrespons] = '\0';
			if ((i=recv(socket, &add, sizeof(add),MSG_WAITALL)) == -1) 
				err(1, "Cant read add whisper");

			gcwhisper_write(subname, add);
			free(subname);

		}
		else if (packedHedder.command == bbc_HasSufficientSpace) {

			char *subname;
			//subname
			if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) {
                    		perror("Cant read intrespons");
                    		exit(1);
                	}
			subname = malloc(intrespons +1);
			if ((i=recvall(socket, subname, intrespons)) == 0) {
                                perror("Cant read subname");
                                exit(1);
                        }

			// tester bare i lot 1 her. Må også sjekke andre loter når vi begynner å støtte frlere disker på ES.
			intrespons = lotHasSufficientSpace(1, 4096, subname);

			if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) {
                	               perror("Cant recv filerest");
        	                       exit(1);
	               	}

			printf("~Asked for HasSufficientSpace for subname \"%s\". Returnerer %d\n",subname, intrespons);

			free(subname);
		}
		else {
			printf("unnown comand. %i\n", packedHedder.command);
		}
	}

	++count;

//	#ifdef DEBUG_BREAK_AFTER
//	if (count >= DEBUG_BREAK_AFTER) {
//		printf("exeting after %i docoments\n",count);
//		exit(1);
//	}
//	#endif


}

        #ifdef DEBUG_TIME
                gettimeofday(&tot_end_time, NULL);
                printf("Time debug: bbdn total time time: %f\n",getTimeDifference(&tot_start_time, &tot_end_time));
        #endif

}
예제 #16
0
/*
finner riktig fil og Søker seg frem til riktig adresse, slik at man bare kan lese/skrive
*/
FILE *GetFileHandler (unsigned int DocID,char type,char subname[], char *diname) {

	#ifndef DI_FILE_CASHE
		FILE *DocumentIndexHA = NULL;
	#endif
	int LotNr;
	char FileName[128];
	char FilePath[128];
	
	//finner lot for denne DocIDen
	LotNr = rLotForDOCid(DocID);

	//hvis filen ikke er open åpner vi den
	//segfeiler en skjelden gang
	#ifdef DI_FILE_CASHE
	if ((LotNr == openDocumentIndex) && (diname == NULL || strcmp(openName,diname) == 0)) {

	}
	#else
	if(0) {

	}
	#endif
	else {		

		GetFilPathForLot(FilePath,LotNr,subname);
		
		strncpy(FileName,FilePath,128);
		strncat(FileName,diname == NULL ? "DocumentIndex" : diname,128);

		#ifdef DI_FILE_CASHE
			printf("openig di file \"%s\"\n",FileName);
		#endif

		#ifdef DI_FILE_CASHE

			//hvis vi har en open fil lukkes denne
			if (openDocumentIndex != -1) {
				//segfeiler her for searchkernel
				//18,okt segefeiler her igjen ????
				fclose(DocumentIndexHA);
			}
		#endif
				
		
		//prøver først å åpne for lesing
		if (type == 'c') {
			//temp: setter filopning til r+ for å få til å samarbeid melom DIRead og DIwrite
			//dette gjør at søk ikke funker på web på grun av rettighter :-(
			if ((DocumentIndexHA = fopen(FileName,"r+b")) == NULL) {
				printf("%d: cant open file %s for c\n", __LINE__, FileName);
				perror(FileName);
			    return NULL;
			}
		}
		else if (type == 'r') {
			//temp: setter filopning til r+ for å få til å samarbeid melom DIRead og DIwrite
			//dette gjør at søk ikke funker på web på grun av rettighter :-(
			if ((DocumentIndexHA = fopen(FileName,"r+b")) == NULL) {
				printf("%d: cant open file %s for r\n",__LINE__, FileName);
				perror(FileName);
			    return NULL;
			}
		}
		else if (type == 's') {
			//en ekte r read
			if ((DocumentIndexHA = fopen(FileName,"rb")) == NULL) {
				printf("%d: cant open file %s for rb\n", __LINE__, FileName);
				perror(FileName);
			    return NULL;
			}
		}

		else if (type == 'w'){
			if ((DocumentIndexHA = fopen(FileName,"r+b")) == NULL) {
				//hvis det ikke går lager vi og åpne filen
				makePath(FilePath);
				if ((DocumentIndexHA = fopen(FileName,"w+b")) == NULL) {
					perror(FileName);

					return NULL;
				}
			}
		}

		#ifdef DI_FILE_CASHE
			openDocumentIndex = LotNr;
			strscpy(openName,diname == NULL ? "DocumentIndex" : diname,sizeof(openName));
		#endif
	}

	//søker til riktig post
	if (fseek(DocumentIndexHA,DIPostAdress(DocID),0) != 0) {
		perror("Can't seek");
		exit(1);
	}


	return DocumentIndexHA;

}
예제 #17
0
int main (int argc, char *argv[]) {

	int lotNr;
	int i;
	unsigned int DocID;
	char text[50];
	unsigned int radress;
	unsigned int rsize;
	char **Data;
  	int Count, TokCount;
	unsigned short hits;
	unsigned long WordID;
	int bucket;
	int y;
	int nr;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	unsigned char lang;
	FILE *FH;
	unsigned int DocIDPlace;

	int *nrOfLinkWordsToDocID = malloc(sizeof(int) * NrofDocIDsInLot);

	for (i=0;i<NrofDocIDsInLot;i++) {
		//begynner på 2000 så det skal være lett og skille de visuelt fra andre hits
		nrOfLinkWordsToDocID[i] = 2000;
	}
        //tester for at vi har fåt hvilken lot vi skal bruke
        if (argc < 3) {
                printf("Usage: ./anchorread lotnr subname\n\n");
		exit(1);
        }

	lotNr = atoi(argv[1]);
	char *subname = argv[2];

	if ( (FH = lotOpenFileNoCasheByLotNr(lotNr,"anchors","rb", 's',subname)) == NULL) {
		printf("lot dont have a anchors file\n");
		exit(1);
	}	
	fclose(FH);

	revindexFilesOpenLocal(revindexFilesHa,lotNr,"Anchor","wb",subname);

	//int anchorGetNext (int LotNr,unsigned int *DocID,char *text,unsigned int *radress,unsigned int *rsize)
	while (anchorGetNext(lotNr,&DocID,text,sizeof(text),&radress,&rsize,subname) ) {	

			DocIDPlace = (DocID - LotDocIDOfset(rLotForDOCid(DocID)));	
			++nrOfLinkWordsToDocID[DocIDPlace];



			convert_to_lowercase((unsigned char *)text);


			#ifdef DEBUG
			if (DocID == 4999999) {
				printf("DocID %i, text: \"%s\", DocIDPlace %i, nrOfLinkWordsToDocID %i\n",DocID,text,DocIDPlace,nrOfLinkWordsToDocID[DocIDPlace]);
			}
			#endif

  			if ((TokCount = split(text, " ", &Data)) == -1) {
				printf("canæt splitt \"%s\"\n",text);
			}

			//for (i=(TokCount-1);i>=0;i--) {
			i=0;
			while (Data[i] != NULL) {

				/*
				if (nrOfLinkWordsToDocID[DocIDPlace] > 65505) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("reach max nr of words for DocID %u. Hav %i+ words\n",DocID,nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif
					break;
				}
				*/

				if (Data[i][0] == '\0') {
					#ifdef DEBUG
						if (DocID == 4999999) {

							printf("emty data element\n");
						}
					#endif
				} 
				else if (strcmp(Data[i],"www") == 0) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("www\n");
						}
					#endif
					++nrOfLinkWordsToDocID[DocIDPlace];
				} 
				else if (isStoppWord(Data[i])) {
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("stopword \"%s\"\n",Data[i]);
						}
					#endif
					//++nrOfLinkWordsToDocID[DocIDPlace];
				}
				else {
				
					#ifdef DEBUG
						if (DocID == 4999999) {
							printf("\t\"%s\" %i\n",Data[i],nrOfLinkWordsToDocID[DocIDPlace]);
						}
					#endif


			

					WordID = crc32boitho(Data[i]);

					if (WordID == 0) {
						printf("got 0 as word id for \"%s\". Somthing may be wrong.\n",Data[i]);
					}

                			bucket = WordID % NrOfDataDirectorys;

					if (nrOfLinkWordsToDocID[DocIDPlace] > 65535) {
						hits = 65535;
					}
					else {
						hits = nrOfLinkWordsToDocID[DocIDPlace];

					}

					#ifdef DEBUG
						if (DocID == 4999999) {
	    		       				printf("\thits %i: \"%s\": %hu, bucket %i\n",i,Data[i],hits,bucket);
						}
					#endif

                
        	        		if (fwrite(&DocID,sizeof(unsigned int),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite DocID");
					}
					//runarb: 13 mai 2007. vi har byttet til å bruke et tal for språk.
					//burde da dette fra DocumentIndex hvis det finnes, men lagres ikke der
					//må si i IndexRes på hvordan vi gjør det der
        	        		//fprintf(revindexFilesHa[bucket],"aa ");
					lang = 0;
					nr = 1;
					if(fwrite(&lang,sizeof(unsigned char),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite lang");
					}


        	        		if(fwrite(&WordID,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite WordID");
					}

        	        		if(fwrite(&nr,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite nr");
					}


        		        	if(fwrite(&hits,sizeof(unsigned short),1,revindexFilesHa[bucket]) != 1) {
						perror("fwrite hits");
					}

			                
        	        		++nrOfLinkWordsToDocID[DocIDPlace];
				
			
				}


				++i;
			}
  			FreeSplitList(Data);


			#ifdef DEBUG
				if (DocID == 4999999) {
				printf("\n");
				}
			#endif
	}

	free(nrOfLinkWordsToDocID);

}
예제 #18
0
    int main(int argc, char *argv[])
    {
        int sockfd, n;
	int i,y;
	char *strpointer;  
	int res;
        char buf[MAXDATASIZE];
        struct hostent *he;
        struct sockaddr_in their_addr; // connector's address information 
	FILE *LOGFILE;
	//char hostName[] = "localhost";
        //char hostName[] = "127.0.0.1";
	char hostName[] = "bbs-001.boitho.com";
	struct SiderFormat Sider[MaxsHits];
        struct SiderHederFormat SiderHeder;
	char buff[64]; //generell buffer
	struct in_addr ipaddr;
        struct QueryDataForamt QueryData;


        //send out an HTTP header:
        printf("Content-type: text/xml\n\n");



        //hvis vi har argumeneter er det første et query
        if (getenv("QUERY_STRING") == NULL) {
                if (argc < 2 ) {
                        printf("Error ingen query spesifisert.\n\nEksempel på bruk for å søke på boitho:\n\tsearchkernel boitho\n\n\n");
                }
                else {
                        QueryData.query[0] = '\0';
                        for(i=1;i<argc ;i++) {
                                sprintf(QueryData.query,"%s %s",QueryData.query,argv[i]);
                        }
                        //strcpy(QueryData.query,argv[1]);
                        //printf("argc :%i %s %s\n",argc,argv[1],argv[2]);
                        printf("query %s\n",QueryData.query);
                }
        }
        else {
		// Initialize the CGI lib
        	res = cgi_init();

		// Was there an error initializing the CGI???
	        if (res != CGIERR_NONE) {
        	        printf("Error # %d: %s<p>\n", res, cgi_strerror(res));
        	        exit(0);
        	}
		
		if (cgi_getentrystr("query") == NULL) {
                	perror("Did'n receive any query.");
        	}
		else {
        	        strncat(QueryData.query,cgi_getentrystr("query"),sizeof(QueryData.query));
	        }

        }

        if (strlen(QueryData.query) > MaxQueryLen -1) {
                printf("query to long\n");
                exit(1);
        }

        //gjør om til liten case
        for(i=0;i<strlen(QueryData.query);i++) {
                QueryData.query[i] = tolower(QueryData.query[i]);
        }


        if ((he=gethostbyname(hostName)) == NULL) {  // get the host info 
            perror("gethostbyname");
            exit(1);
        }

        if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
            perror("socket");
            exit(1);
        }

        their_addr.sin_family = AF_INET;    // host byte order 
        their_addr.sin_port = htons(PORT);  // short, network byte order 
        their_addr.sin_addr = *((struct in_addr *)he->h_addr);
        memset(&(their_addr.sin_zero), '\0', 8);  // zero the rest of the struct 

        if (connect(sockfd, (struct sockaddr *)&their_addr,
                                              sizeof(struct sockaddr)) == -1) {
            perror("connect");
            exit(1);
        }
	struct queryNodeHederFormat queryNodeHeder;

	//kopierer inn query	
	strncpy(queryNodeHeder.query,QueryData.query,sizeof(queryNodeHeder.query) -1);
	
	//sender forespørsel
	sendall(sockfd,queryNodeHeder.query,sizeof(queryNodeHeder));

	//motter hedderen for svaret
	if ((i=recv(sockfd, &SiderHeder, sizeof(SiderHeder),MSG_WAITALL)) == -1) {
                perror("recv");
        }

	//printf("TotaltTreff %i,showabal %i,filtered %i,total_usecs %f\n",SiderHeder.TotaltTreff,SiderHeder.showabal,SiderHeder.filtered,SiderHeder.total_usecs);

	for(i=0;i<SiderHeder.showabal;i++) {

		if ((n=recv(sockfd, &Sider[i], sizeof(struct SiderFormat),MSG_WAITALL)) == -1) {
	               	perror("recv");
        	}
		//printf("url: %s\n",Sider[i].DocumentIndex.Url);
	}


        close(sockfd);


        y=0;
        //fjerner tegn som er eskapet med \, eks \" blir til &quot;
        for(i=0;i<strlen(QueryData.query);i++) {
                if (QueryData.query[i] == '\\') {

                        switch(QueryData.query[++i]) {
                                case '"':
                                        //&quot;
                                        buff[y++] = '&'; buff[y++] = 'q'; buff[y++] = 'u'; buff[y++] = 'o'; buff[y++] = 't'; buff[y++] = ';';
                                break;


                        }
                        //else {
                        //      printf("error: found \\ but no case\n");
                        //}
                }
                else {
                        //printf("%c\n",QueryData.query[i]);
                        buff[y++] = QueryData.query[i];
                }
        }
        strncpy(QueryData.query,buff,sizeof(QueryData.query) -1);

        printf("<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?> \n");
        printf("<!DOCTYPE family SYSTEM \"http://www.boitho.com/xml/search.dtd\"> \n");

        printf("<search>\n");   


        printf("<treff-info totalt=\"%i\" query=\"%s\" hilite=\"%s\" tid=\"%f\" filtered=\"%i\" showabal=\"%i\"/>\n",SiderHeder.TotaltTreff,QueryData.query,SiderHeder.hiliteQuery,SiderHeder.total_usecs,SiderHeder.filtered,SiderHeder.showabal);


	for(i=0;i<SiderHeder.showabal;i++) {

		if (!Sider[i].deletet) {

                //filtrerer ut tegn som ikke er lov i xml
                while ((strpointer = strchr(Sider[i].DocumentIndex.Url,'&')) != NULL) {
                        (*strpointer) = 'a';
                }
                //while ((strpointer = strchr(Sider[i].title,'&')) != NULL) {
                //        (*strpointer) = 'a';
                //}
                //while ((strpointer = strchr(Sider[i].description,'&')) != NULL) {
                //        (*strpointer) = 'a';
                //}


                printf("<treff>\n");

                printf("\t<DocID>%i-%i</DocID>\n",Sider[i].iindex.DocID,rLotForDOCid(Sider[i].iindex.DocID));
                printf("\t<POSISJON>%i</POSISJON>\n",i +1);

                //DocumentIndex
                printf("\t<Url>%s</Url>\n",Sider[i].DocumentIndex.Url);
                printf("\t<Title>%s</Title>\n",Sider[i].title);
                printf("\t<AdultWeight>%hu</AdultWeight>\n",Sider[i].DocumentIndex.AdultWeight);
                printf("\t<Sprok>%s</Sprok>\n",Sider[i].DocumentIndex.Sprok);
                //temp: blir rare tegn her              
		printf("\t<Dokumenttype>%s</Dokumenttype>\n",Sider[i].DocumentIndex.Dokumenttype);

                printf("\t<RepositorySize>%u</RepositorySize>\n",Sider[i].DocumentIndex.htmlSize);

                printf("\t<THUMBNALE>%s</THUMBNALE>\n",Sider[i].thumbnale);

                printf("\t<CACHE>%s</CACHE>\n",Sider[i].cacheLink);
                printf("\t<IMAGEWIDTH>100</IMAGEWIDTH>\n");
                printf("\t<IMAGEHEIGHT>100</IMAGEHEIGHT>\n");

                printf("\t<METADESCRIPTION></METADESCRIPTION>\n");
                printf("\t<CATEGORY></CATEGORY>\n");
                printf("\t<OFFENSIVE_CODE>FALSE</OFFENSIVE_CODE>\n");


                printf("\t<beskrivelse>%s</beskrivelse>\n",Sider[i].description);
                printf("\t<TermRank>%i</TermRank>\n",Sider[i].iindex.TermRank);
                printf("\t<PopRank>%i</PopRank>\n",Sider[i].iindex.PopRank);
                printf("\t<allrank>%i</allrank>\n",Sider[i].iindex.allrank);

		ipaddr.s_addr = Sider[i].DocumentIndex.IPAddress;

                printf("\t<IPAddress>%s</IPAddress>\n",inet_ntoa(ipaddr));

                printf("\t<RESPONSE>%hu</RESPONSE>\n",Sider[i].DocumentIndex.response);

                printf("\t<NrOfHits>%i</NrOfHits>\n",Sider[i].iindex.TermAntall);

                //printer ut hits (hvor i dokumenetet orde befinner seg ).
                printf("\t<hits>");
                for (y=0; (y < Sider[i].iindex.TermAntall) && (y < MaxTermHit); y++) {
                        printf("%hu ",Sider[i].iindex.hits[y]);
                }
                printf("</hits>\n");

                printf("</treff>\n");

		}
	}

	printf("</search>\n");

	//ToDo: må ha låsing her
	if ((LOGFILE = fopen("/home/boitho/config/query.log","a")) == NULL) {
		perror("logfile");
	}
        fprintf(LOGFILE,"%s %i\n",queryNodeHeder.query,SiderHeder.TotaltTreff);
        fclose(LOGFILE);


        return 0;
    } 
예제 #19
0
int main (int argc, char *argv[]) {

	FILE *LINKDBFILE;
	FILE *INDEXFILE;

	unsigned int ranged;
	struct linkdb_block linkdbPost;
	off64_t offset;
	int lastLotNr, lotNr = -1;


	unsigned int lastDocID;


        if (argc < 3) {
                printf("Dette programet tar inn en linkdb fil og gjør den søkbar\n\n\tUsage: ./BrankCalculate linkdb indexfile\n");
                exit(0);
        }

	if ((LINKDBFILE = (FILE *)fopen64(argv[1],"rb")) == NULL) {
                printf("Cant read linkdb ");
                perror(argv[1]);
                exit(1);
        }
	if ((INDEXFILE = (FILE *)fopen64(argv[2],"wb")) == NULL) {
                printf("Cant read index ");
                perror(argv[2]);
                exit(1);
        }

	ranged = 0;
	lastDocID = 0;
	while (!feof(LINKDBFILE)) {


			fread(&linkdbPost,sizeof(linkdbPost),1,LINKDBFILE);


				//
				lotNr = rLotForDOCid(linkdbPost.DocID_to);
				if (lastLotNr != lotNr) {
					printf("%i\n",lotNr);
				}
				lastLotNr = lotNr;



			if (linkdbPost.DocID_to != lastDocID) {
				//printf("\nnew\n");

				//tar vare på ofsett // -sizeof(offset) da vi skal ha starten. Vi har jo allerede lest en
				//offset = (ftello64(LINKDBFILE) - sizeof(linkdbPost));
				offset = ftello64(LINKDBFILE);
				//printf("offset %li\n",offset);
				//søker oss til riktig plass
				fseeko64(INDEXFILE,linkdbPost.DocID_to * sizeof(offset),SEEK_SET);
				//for så å skrive dette til fil
				fwrite(&offset,sizeof(offset),1,INDEXFILE);

			}

			//printf("%u -> %u\n",linkdbPost.DocID_from,linkdbPost.DocID_to);

			
			lastDocID = linkdbPost.DocID_to;

			//if (ranged > 500) {
			//	break;
			//}

		++ranged;

	}	



	fclose(LINKDBFILE);
	fclose(INDEXFILE);
	printf("Rangerte %lu linker\n",ranged);
}
예제 #20
0
//gir andre tilgan til lot filer. Casher opne filhandlere
FILE *lotOpenFile(unsigned int DocID,char resource[],char type[], char lock,char subname[]) {

        int LotNr;
        int i;
        char FilePath[128];
        char File [128];

	if (!LotFilesInalisert) {
		for(i=0; i < MaxOpenFiles; i++) {
			OpenFiles[i].LotNr = -1;
		}

		LotFilesInalisert = 1;
	}

        File[0] = '\0';

        //finner i hvilken lot vi skal lese fra
        LotNr = rLotForDOCid(DocID);

	//printf("LotNr: %i, DocID: %i\n",LotNr,DocID);

        //begynner med å søke cashen. Lopper til vi enten er ferdig, eller til vi har funne ønskede i cashen
	i = 0;
        while ((i < MaxOpenFiles) && (OpenFiles[i].LotNr != LotNr)) {
                i++;
        }
        //temp: skrur av søking her med i=0
        //type of og subname er også lagt til uten at det tar hensyn til det i søket
        i = 0;



        //hvis vi fant i casehn returnerer vi den
        if (OpenFiles[i].LotNr == LotNr  
		&& (strcmp(OpenFiles[i].subname,subname) == 0)
        	&& (strcmp(OpenFiles[i].type,type)==0)
        	&& (strcmp(OpenFiles[i].resource,resource)==0)
	) {
		#ifdef DEBUG
		printf("lotOpenFile: fant en tildigere åpnet fil, returnerer den.\n");
		printf("lotOpenFile: returnerer: i %i, subname \"%s\", type \"%s\", LotNr %i\n",i,OpenFiles[i].subname,OpenFiles[i].type,OpenFiles[i].LotNr);
		printf("lotOpenFile: file is \"%s\"\n",OpenFiles[i].filename);
		printf("lotOpenFile: returning file handler %p\n",OpenFiles[i].FILEHANDLER);
		#endif

		if (OpenFiles[i].FILEHANDLER == NULL) {
			printf("Error: FILEHANDLER is NULL\n");
			#ifdef DEBUG
				exit(-1);
			#endif
		}
                return OpenFiles[i].FILEHANDLER;
        }
        //hvis ikke åpner vi og returnerer
        else {

		//hvis dette er en åpen filhånterer, må vi lukke den
		if (OpenFiles[i].LotNr != -1) {
			printf("lotOpenFile: closeing: i %i\n",i);
			fclose(OpenFiles[i].FILEHANDLER);
			OpenFiles[i].LotNr = -1;
			
		}
	
		if ((OpenFiles[i].FILEHANDLER = lotOpenFileNoCasheByLotNr( LotNr, resource,type, lock,subname)) == NULL) {
			printf("lotOpenFileNoCashe: can't open file\n");
			return NULL;
		}

                GetFilPathForLot(FilePath,LotNr,subname);
                strscpy(File,FilePath,sizeof(File));
                strlcat(File,resource,sizeof(File));

		strscpy(OpenFiles[i].filename,File,sizeof(OpenFiles[i].filename));
		strscpy(OpenFiles[i].resource,resource,sizeof(OpenFiles[i].resource));
		strscpy(OpenFiles[i].subname,subname,sizeof(OpenFiles[i].subname));
		strscpy(OpenFiles[i].type,type,sizeof(OpenFiles[i].type));

		//#ifdef DEBUG
                	printf("lotOpenFile: opening file \"%s\" for %s\n",File,type);
		//#endif



		OpenFiles[i].LotNr = LotNr;

                return OpenFiles[i].FILEHANDLER;

        }
	
}
예제 #21
0
int lotOpenFileNoCashel(unsigned int DocID,char resource[],char type[], char lock,char subname[]) {

	return lotOpenFileNoCasheByLotNrl(rLotForDOCid(DocID),resource,type,lock,subname);
}
예제 #22
0
void connectHandler(int socket) {
    struct packedHedderFormat packedHedder;

    int i,n;
    int LotNr;
    char lotPath[512];
    char buf[100];
    unsigned int FilterTime;
    int filnamelen;
    FILE *FH;
    struct stat inode;      // lager en struktur for fstat å returnere.
    off_t filesize;
    char c;

    struct DocumentIndexFormat DocumentIndexPost;
    int DocID;

    struct ReposetoryHeaderFormat ReposetoryHeader;
    unsigned int radress;

    char htmlbuffer[524288];
    int destLeng;
    char dest[512];

    off_t fileBloks,filerest;
    char *filblocbuff;


    //while ((i=read(socket, &packedHedder, sizeof(struct packedHedderFormat))) > 0) {
    while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) {

        //printf("command: %i\n",packedHedder.command);
        //printf("i er %i\n",i);
        printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
        //printf("subname: %s\n",packedHedder.subname);
        //lar size reflektere hva som er igjen av pakken
        packedHedder.size = packedHedder.size - sizeof(packedHedder);

        if (packedHedder.command == C_rmkdir) {

            printf("C_rmkdir\n");

            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant read destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant read dest");
                exit(1);
            }

            GetFilPathForLot(lotPath,LotNr,packedHedder.subname);

            sprintf(lotPath,"%s%s",lotPath,dest);

            printf("mkdir %s\n",lotPath);

            makePath(lotPath);

            printf("~C_rmkdir\n");


        }
        else if (packedHedder.command == C_rComand) {


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant read destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant read dest");
                exit(1);
            }

            printf("run command %s\n",dest);

            system(dest);

        }
        else if (packedHedder.command == C_getLotToIndex) {
            printf("fikk C_getLotToIndex\n");

            int dirty;

            if ((i=recv(socket, &dirty, sizeof(dirty),MSG_WAITALL)) == -1) {
                perror("Cant read dirty");
                exit(1);
            }

            printf("dirty: %i\n",dirty);

            LotNr = findLotToIndex(packedHedder.subname,dirty);

            printf("sending respons\n");
            sendall(socket,&LotNr, sizeof(LotNr));

        }
        else if (packedHedder.command == C_getlotHasSufficientSpace) {
            printf("fikk C_getLotToIndex\n");

            int needSpace;
            int response;

            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=recv(socket, &needSpace, sizeof(needSpace),MSG_WAITALL)) == -1) {
                perror("Cant read dirty");
                exit(1);
            }


            printf("needSpace: %i, LotNr %i\n",needSpace,LotNr);


            response = lotHasSufficientSpace(LotNr, needSpace, packedHedder.subname);


            printf("sending respons\n");
            sendall(socket,&response, sizeof(response));

        }
        else if (packedHedder.command == C_rGetSize) {
            printf("fikk C_rGetSize\n");


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            if (filnamelen > sizeof(buf)) {
                printf("filname to long\n");
            };

            if ((i=read(socket, buf, filnamelen)) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            printf("filname %s\n",buf);

            if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) {
                perror(buf);
                //sending that he fil is emty
                fileBloks = 0;

                sendall(socket,&fileBloks, sizeof(fileBloks));

            }
            else {
                //finner og sender il størelse
                fstat(fileno(FH),&inode);
                //filesize = inode.st_size;
                //sendall(socket,&filesize, sizeof(filesize));

                fileBloks = inode.st_size;

                printf("size is %" PRId64 "\n",fileBloks);

                sendall(socket,&fileBloks, sizeof(fileBloks));

                fclose(FH);
            }
        }
        else if (packedHedder.command == C_rGetFile) {
            printf("fikk C_rGetFile\n");


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            if (filnamelen > sizeof(buf)) {
                printf("filname to long\n");
            };

            if ((i=read(socket, buf, filnamelen)) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            printf("filname %s\n",buf);

            if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) {
                perror(buf);
                //sending that the fil is emty
                fileBloks = 0;
                filerest = 0;

                sendall(socket,&fileBloks, sizeof(fileBloks));
                sendall(socket,&filerest, sizeof(filerest));

            }
            else {
                //finner og sender fil størelse
                fstat(fileno(FH),&inode);
                //filesize = inode.st_size;
                //sendall(socket,&filesize, sizeof(filesize));

                fileBloks = (int)floor(inode.st_size / rNetTrabsferBlok);
                filerest = inode.st_size - (fileBloks * rNetTrabsferBlok);

                sendall(socket,&fileBloks, sizeof(fileBloks));
                sendall(socket,&filerest, sizeof(filerest));

                printf("sending fil. fileBloks %"PRId64", filerest %"PRId64"\n",fileBloks,filerest);


                filblocbuff = (char *)malloc(rNetTrabsferBlok);
                for(i=0; i < fileBloks; i++) {

                    //fread(filblocbuff,sizeof(c),rNetTrabsferBlok,FH);
                    //fread_all(const void *buf, size_t size, FILE *stream)
                    fread_all(filblocbuff,rNetTrabsferBlok,FH, 4096);

                    if ((n=sendall(socket, filblocbuff, rNetTrabsferBlok)) == -1) {
                        perror("Cant recv dest");
                        exit(1);
                    }

                }

                printf("did recv %i fileBloks\n",i);


                fread(filblocbuff,sizeof(c),filerest,FH);

                if ((n=sendall(socket, filblocbuff, filerest)) == -1) {
                    perror("Cant recv filerest");
                    exit(1);
                }

                free(filblocbuff);


                /*
                   for (i=0;i<filesize;i++) {
                   fread(&c,sizeof(char),1,FH);
                   send(socket, &c, sizeof(char), 0);
                //printf("%i\n",(int)c);
                }
                 */
                printf("send file end\n");

                fclose(FH);
            }

        }
        else if (packedHedder.command == C_rGetNext) {
            printf("fikk C_rGetNext\n");

            printf("støttes ikke lengere");
            exit(1);
            /*
            		//leser data. Det skal væren en unigned int som sier hvilken lot vi vil ha
            		//har deklarert den som int her ???
            		if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
            			perror("Cant read lotnr");
            			exit(1);
            		}
            		printf("leser FilterTime\n");
            		//leser filtertime
            		if ((i=read(socket, &FilterTime, sizeof(FilterTime))) == -1) {
            			perror("Cant read lotnr");
            			exit(1);
            		}

            		printf("lotnr %i FilterTime %u\n",LotNr,FilterTime);

            		//henter inn data om den lotten
            		if (rGetNext(LotNr,&ReposetoryHeader,htmlbuffer,NULL,&radress,FilterTime,0)) {

            			//printf("DocId: %i url: %s\n",ReposetoryHeader.DocID,ReposetoryHeader.url);

            			//sender pakke hedder
            			sendpacked(socket,C_rLotData,PROTOCOLVERSION, ReposetoryHeader.htmlSize + sizeof(ReposetoryHeader) +sizeof(radress), NULL,packedHedder.subname);

            			//sennder ReposetoryHeader'en
            			sendall(socket,&ReposetoryHeader, sizeof(ReposetoryHeader));

            			//sender htmlen
            			sendall(socket,&htmlbuffer, ReposetoryHeader.htmlSize);

            			//sender adressen
            			sendall(socket,&radress,sizeof(radress));
            			//printf("data sent\n");

            			//printf("rGetNext: %i\n",ReposetoryHeader.DocID);

            		}
            		else {
            			sendpacked(socket,C_rEOF,PROTOCOLVERSION, 0, NULL,packedHedder.subname);
            			printf("ferdig\n");
            		}
            */
        }
        else if (packedHedder.command == C_DIWrite) {


            if ((i=recv(socket, &DocumentIndexPost, sizeof(struct DocumentIndexFormat),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            DIWrite(&DocumentIndexPost,DocID,packedHedder.subname, NULL);

            //printf("DIWrite: %i\n",DocID);

        }
        else if (packedHedder.command == C_DIRead) {

            int DocID;
            struct DocumentIndexFormat DocumentIndexPost;

            printf("got commane C_DIRead. sise %i hsize %i ds %i\n",packedHedder.size, sizeof(packedHedder), sizeof(DocID));

            if ((i=recv(socket, &DocID, sizeof(DocID),0)) == -1) {
                perror("recv");
                exit(1);
            }
            //printf("DocID %i\n",DocID);

            //leser inn datan
            //int DIRead (struct DocumentIndexFormat *DocumentIndexPost, int DocID);
            DIRead(&DocumentIndexPost,DocID,packedHedder.subname);

            sendall(socket,&DocumentIndexPost, sizeof(struct DocumentIndexFormat));
        }
        else if (packedHedder.command == C_rGetIndexTime) {

            int Lotnr;
            unsigned int IndexTime;
            if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) {
                perror("recv");
                exit(1);
            }

            IndexTime = GetLastIndexTimeForLot(LotNr,packedHedder.subname);

            sendall(socket,&IndexTime, sizeof(IndexTime));

        }
        else if (packedHedder.command == C_rSetIndexTime) {

            int Lotnr;
            if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) {
                perror("recv");
                exit(1);
            }

            setLastIndexTimeForLot(LotNr,NULL,packedHedder.subname);

        }
        else if (packedHedder.command == C_rSendFile) {
            //skal mota en fil for lagring i reposetoryet
            //char FilePath[156];
            FILE *FILEHANDLER;
            char c;
            char opentype[2];
            //char *filblocbuff;
            //off_t fileBloks,filerest;

            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant recv lotnr");
                exit(1);
            }

            printf("lotNr %i\n",LotNr);


            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant recv destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant recv dest");
                exit(1);
            }

            printf("coping %s as length %i in to lot %i\n",dest,destLeng,LotNr);

            if ((i=recv(socket, &opentype, sizeof(char) +1,MSG_WAITALL)) == -1) {
                perror("Cant recv opentype");
                exit(1);
            }
            printf("opentype \"%s\"\n",opentype);


            //GetFilPathForLot(FilePath,LotNr,packedHedder.subname);

            //legger til filnavnet
            //strncat(FilePath,dest,sizeof(FilePath));

            //leser inn filstørelsen
            if ((i=recv(socket, &fileBloks, sizeof(fileBloks),MSG_WAITALL)) == -1) {
                perror("Cant recv fileBloks");
                exit(1);
            }

            if ((i=recv(socket, &filerest, sizeof(filerest),MSG_WAITALL)) == -1) {
                perror("Cant recv filerest");
                exit(1);
            }

            printf("fileBloks: %" PRId64 ", filerest: %" PRId64 "\n",fileBloks,filerest);

            //åpner filen
            if ((FILEHANDLER = lotOpenFileNoCasheByLotNr(LotNr,dest,opentype,'e',packedHedder.subname)) == NULL) {
                perror(dest);
            }

            filblocbuff = (char *)malloc(rNetTrabsferBlok);
            for(i=0; i < fileBloks; i++) {

                if ((n=recv(socket, filblocbuff, rNetTrabsferBlok,MSG_WAITALL)) == -1) {
                    perror("Cant recv dest");
                    exit(1);
                }

                fwrite(filblocbuff,sizeof(c),rNetTrabsferBlok,FILEHANDLER);
            }

            printf("did recv %i fileBloks\n",i);


            if ((n=recv(socket, filblocbuff, filerest,MSG_WAITALL)) == -1) {
                perror("Cant recv filerest");
                exit(1);
            }

            fwrite(filblocbuff,sizeof(c),filerest,FILEHANDLER);


            free(filblocbuff);

            fclose(FILEHANDLER);

            printf("\n");
        }
        else if (packedHedder.command == C_DIGetIp) {


            unsigned int DocID;
            struct DocumentIndexFormat DocumentIndexPost;

            //printf("got command C_DIGetIp\n");

            if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            //printf("DocID %u\n",DocID);

            DIRead(&DocumentIndexPost,DocID,packedHedder.subname);

            //printf("ipadress: %u\n",DocumentIndexPost.IPAddress);

            sendall(socket,&DocumentIndexPost.IPAddress, sizeof(DocumentIndexPost.IPAddress));


        }
        else if (packedHedder.command == C_anchorAdd) {
            size_t textlen;
            unsigned int DocID;
            char *text;

            printf("Add anchor....\n");
            if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            } else if ((i = recv(socket, &textlen, sizeof(textlen), MSG_WAITALL)) == -1) {
                perror("recv(textlen)");
                exit(1);
            }
            text = malloc(textlen+1);
            text[textlen] = '\0';
            if ((i = recv(socket, text, textlen, MSG_WAITALL)) == -1) {
                perror("recv(text)");
                exit(1);
            }

            anchoraddnew(DocID, text, textlen, packedHedder.subname, NULL);
            printf("Text for %d: %s\n", DocID, text);

            free(text);
        }
        else if (packedHedder.command == C_anchorGet) {
            size_t len;
            char *text;
            int LotNr;
            unsigned int DocID;
            printf("Get anchor...\n");

            if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }
            printf("got DocID %u\n",DocID);
            LotNr = rLotForDOCid(DocID);
            printf("trying to read anchor\n");

            len = anchorRead(LotNr, packedHedder.subname, DocID, NULL, -1);
            printf("got anchor of length %i\n",len);

            sendall(socket, &len, sizeof(len));
            text = malloc(len+1);

            printf("readint it again\n");
            anchorRead(LotNr, packedHedder.subname, DocID, text, len+1);
            sendall(socket, text, len);
        }
        else if (packedHedder.command == C_readHTML) {
            /*
            unsigned int DocID;
            unsigned int len;
            char *text;
            char *acla, *acld;
            struct DocumentIndexFormat DocIndex;
            struct ReposetoryHeaderFormat ReposetoryHeader;

            if ((i = recv(socket, &DocID, sizeof(DocID), MSG_WAITALL)) == -1) {
            	perror("recv");
            	exit(1);
            }

            if ((i = recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) {

            	perror("recv(len)");
            	exit(1);
            }
            printf("len %u\n",len);
            text = malloc(len);

            if (text == NULL)
            	exit(1);

            DIRead(&DocIndex, DocID, packedHedder.subname);


            if (!rReadHtml(
            		text,
            		&len,
            		DocIndex.RepositoryPointer,
            		DocIndex.htmlSize,
            		DocID,
            		packedHedder.subname,
            		&ReposetoryHeader,
            		&acla,
            		&acld,
            		DocIndex.imageSize)) {
            	len = 0;
            	sendall(socket, &len, sizeof(len));
            } else {
            	++len; // \0
            	#ifdef DEBUG
            	printf("docID %u\n",DocID);
            	printf("Got: (len %i, real %i) ########################\n%s\n#####################\n", len, strlen(text), text);
            	#endif
            	sendall(socket, &len, sizeof(len));
            	sendall(socket, text, len);
            	sendall(socket, &ReposetoryHeader,sizeof(ReposetoryHeader));
            }

            free(text);
            */
        }
        /*
        runarb: 06 des 2007: vi har gåt bort fra denne metoden for nå, og bruker heller index over smb. Men tar vare på den da vi kan trenge den siden

        else if (packedHedder.command == C_urltodocid) {
        	char cmd;
        	int alloclen;
        	char *urlbuf;

        	if (urltodociddb == NULL) {
        		cmd = C_DOCID_NODB;
        		sendall(socket, &cmd, sizeof(cmd));
        		exit(1);
        	} else {
        		cmd = C_DOCID_READY;
        		sendall(socket, &cmd, sizeof(cmd));
        	}
        	cmd = C_DOCID_NEXT;

        	alloclen = 1024;
        	urlbuf = malloc(alloclen);

        	do {
        		unsigned int DocID;
        		size_t len;
        		if ((i = recv(socket, &cmd, sizeof(cmd), MSG_WAITALL)) == -1) {
        			err(1, "recv(cmd)");
        		}
        		if (cmd == C_DOCID_DONE)
        			break;

        		if ((i == recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) {
        			err(1, "recv(len)");
        		}
        		if (alloclen < len+1) {
        			free(urlbuf);
        			alloclen *= 2;
        			urlbuf = malloc(alloclen);
        		}
        		if ((i == recv(socket, urlbuf, len, MSG_WAITALL)) == -1) {
        			err(1, "recv(len)");
        		}
        		urlbuf[len] = '\0';

        		if (!getDocIDFromUrl(urltodociddb, urlbuf, &DocID)) {
        			cmd = C_DOCID_NOTFOUND;
        			sendall(socket, &cmd, sizeof(cmd));
        		} else {
        			cmd = C_DOCID_FOUND;
        			sendall(socket, &cmd, sizeof(cmd));
        			sendall(socket, &DocID, sizeof(DocID));
        		}
        	} while (1);

        	free(urlbuf);
        }
        */
        else {
            printf("unnown comand. %i\n", packedHedder.command);
        }
        //printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
    } //while

}
예제 #23
0
int main (int argc, char *argv[]) {

    struct DocumentIndexFormat DocumentIndexPost;
    int PopRankextern;
    int PopRankintern;
    int PopRanknoc;
    int PopRanindex;
    char ShortRank;
    FILE *FH;
    struct popl popextern;
    struct popl popintern;
    struct popl popnoc;
    struct popl popindex;
    uLong htmlBufferSize = 0;
    char *htmlBuffer = NULL;
    char *acl_allowbuffer = NULL;
    char *acl_deniedbuffer = NULL;

    char timebuf[26];

    int optShowhtml = 0;
    int optShowWords = 0;
    int optSummary = 0;
    int optAnchor = 0;
    int optResource = 0;
    int optPopRank = 0;
    int optDelete = 0;
    int optAdult = 0;

    unsigned int DocID;
    char *subname;

    if (getenv("QUERY_STRING") == NULL) {

        extern char *optarg;
        extern int optind, opterr, optopt;
        char c;
        while ((c=getopt(argc,argv,"hwsarpdu"))!=-1) {
            switch (c) {
            case 'h':
                optShowhtml = 1;
                break;
            case 'u':
                optAdult = 1;
                break;
            case 'w':
                optShowWords = 1;
                break;
            case 's':
                optSummary = 1;
                break;
            case 'a':
                optAnchor = 1;
                break;
            case 'p':
                optPopRank = 1;
                break;
            case 'r':
                optResource = 1;
                break;
            case 'd':
                optDelete = 1;
                break;
            default:
                exit(1);
            }
        }
        --optind;

#ifdef DEBUG
        printf("argc %i, optind %i\n",argc,optind);
#endif

        if ((argc - optind)!= 3) {
            printf("Dette programet gir info om en DocID\n\n\tUsage PageInfo DocID collection\n");
            exit(1);
        }


        DocID = atol(argv[1 +optind]);
        subname = argv[2 +optind];


    }
    else {
        printf("Content-type: text/plain\n\n");
        int res;
        // Initialize the CGI lib
        res = cgi_init();

        // Was there an error initializing the CGI???
        if (res != CGIERR_NONE) {
            printf("Error # %d: %s<p>\n", res, cgilib_strerror(res));
            fprintf(stderr,"Cgi-lib error.");
            return -1;
        }

        if (cgi_getentrystr("subname") == NULL) {
            fprintf(stderr,"Didn't recieve any subname.");
            return -1;
        }
        else {
            subname = cgi_getentrystr("subname");
        }

        if (cgi_getentrystr("DocID") == NULL) {
            fprintf(stderr,"Didn't recieve any DocID.");
            return -1;
        }
        else {
            DocID = atol( cgi_getentrystr("DocID") );
        }

    }

    html_parser_init();

    printf("Showing data for Collection \"%s\", DocID %u\n\n",subname,DocID);


    printf("Lot: %i\n",rLotForDOCid(DocID));

    if (optDelete) {
        memset(&DocumentIndexPost,'\0',sizeof(DocumentIndexPost));
        DIWrite(&DocumentIndexPost,DocID,subname,NULL);

        return 0;
    }

    if (DIRead_fmode(&DocumentIndexPost,DocID,subname,'s')) {

        printf("Url: \"%s\"\nLanguage: %s (id: %s)\nOffensive code: %hu\nDocument type: %s\nTime tested sins last good crawl: %hu\nAdult weight: %hu\nResource size: %u\nIP Address: %u\nHtml size: %i\nImage size: %i\nUser ID: %i\nCrawler version: %f\nRepository pointer: %u\n",

               DocumentIndexPost.Url,
               getLangCode2(atoi(DocumentIndexPost.Sprok)),
               DocumentIndexPost.Sprok,
               DocumentIndexPost.Offensive_code,
               DocumentIndexPost.Dokumenttype,
               DocumentIndexPost.AntallFeiledeCrawl,
               DocumentIndexPost.AdultWeight,
               DocumentIndexPost.ResourceSize,
               DocumentIndexPost.IPAddress,
               DocumentIndexPost.htmlSize2,
               DocumentIndexPost.imageSize,
               DocumentIndexPost.userID,
               DocumentIndexPost.clientVersion,
               DocumentIndexPost.RepositoryPointer);

        if (DocumentIndexPost.response == 200) {
            printf("HTTP response: %hu\n",DocumentIndexPost.response);
        }
        else {
            printf("HTTP response: \033[1;31m%hu\033[0m\n",DocumentIndexPost.response);

        }


        ctime_r((time_t *)&DocumentIndexPost.CrawleDato,timebuf);
        timebuf[24] = '\0';


        printf("Last crawled time: %u\n",DocumentIndexPost.CrawleDato);
        printf("Last crawled time ISO: %s\n",timebuf);

        printf("crc32: %u\n",DocumentIndexPost.crc32);

#ifdef BLACK_BOX
        printf("Last seen Unix: %u\n",DocumentIndexPost.lastSeen);
        printf("Last seen ISO: %s", ctime(&DocumentIndexPost.lastSeen));
#endif

        printf("Nr of out links: %u\n",(unsigned int)DocumentIndexPost.nrOfOutLinks);


        char *metadesc, *title, *body;
        if (DocumentIndexPost.SummarySize == 0) {
            printf("Summary: Don't have pre-parsed summery (summary size is 0)\n");

        }
        else if (rReadSummary(DocID,&metadesc, &title, &body,DocumentIndexPost.SummaryPointer,DocumentIndexPost.SummarySize,subname)) {
            printf("\nSummary:\n");
            printf("\tSummary pointer: %u\n\tSummary size: %hu\n",DocumentIndexPost.SummaryPointer,DocumentIndexPost.SummarySize);

            printf("\tTitle from summary:  \"%s\"\n\tMeta description from summary: \"%s\"\n",title,metadesc);
            if (optSummary) {
                printf("Summary body\n*******************\n%s\n*******************\n\n",body);
            }
        }
        else {
            printf("Don't have pre-parsed summery\n");
        }




        struct ReposetoryHeaderFormat ReposetoryHeader;
        char *url, *attributes;

        if (!rReadHtml(&htmlBuffer,&htmlBufferSize,DocumentIndexPost.RepositoryPointer,DocumentIndexPost.htmlSize2,DocID,subname,&ReposetoryHeader,&acl_allowbuffer,&acl_deniedbuffer,DocumentIndexPost.imageSize, &url, &attributes)) {
            printf("rReadHtml: did not returne true!\n");
            return;
        }
        printf("Entire url: %s\n", url);

#ifdef BLACK_BOX
        printf("acl allow raw: \"%s\"\n",acl_allowbuffer);
        printf("acl denied raw: \"%s\"\n",acl_deniedbuffer);

        printf("acl allow resolved: \"%s\"\n",aclResolv(acl_allowbuffer));
        printf("acl denied resolved: \"%s\"\n",aclResolv(acl_deniedbuffer));

        printf("PopRank: %d\n", ReposetoryHeader.PopRank);
#endif

        if (optShowhtml) {

            printf("html uncompresed size %i\n",htmlBufferSize);
            printf("html buff:\n*******************************\n");
            fwrite(htmlBuffer,htmlBufferSize,1,stdout);
            printf("\n*******************************\n\n");


        }
        if (optShowWords) {
            printf("words:\n");
            //run_html_parser( DocumentIndexPost.Url, htmlBuffer, htmlBufferSize, fn );
            char *title, *body;
            html_parser_run(url,htmlBuffer, htmlBufferSize,&title, &body,fn,NULL );
        }
        if (optResource) {
            char buf[500000];
            printf("Resource:\n");
            printf("Ptr: 0x%x Len: %x\n", DocumentIndexPost.ResourcePointer, DocumentIndexPost.ResourceSize);
            if (getResource(rLotForDOCid(DocID), subname, DocID, buf, sizeof(buf)) == 0) {
                printf("\tDid not get any resource\n");
                warn("");
            } else {
                printf("%s\n", buf);
            }
        }

        printf("attributes:\"%s\"\n", attributes);

        free(url);
        free(attributes);
        free(acl_allowbuffer);
        free(acl_deniedbuffer);
    }
    else {
        printf("Cant read post\n");
    }

#ifndef BLACK_BOX

    if (optAdult) {
        int httpResponsCodes[nrOfHttpResponsCodes];
        //char *title;
        //char *body;
        struct adultFormat *adult;
        struct pagewordsFormat *pagewords = malloc(sizeof(struct pagewordsFormat));
        int AdultWeight;
        unsigned char langnr;
        if ((adult = malloc(sizeof(struct adultFormat))) == NULL) {
            perror("malloc argstruct.adult");
            exit(1);
        }

        wordsInit(pagewords);
        langdetectInit();
        adultLoad(adult);

        AdultWeight -1;

        handelPage(pagewords,&ReposetoryHeader,htmlBuffer,htmlBufferSize,&title,&body);

        wordsMakeRevIndex(pagewords,adult,&AdultWeight,&langnr);

        printf("adult %i\n",AdultWeight);
    }

    if (optAnchor) {
        int anchorBufferSize;
        char *anchorBuffer;

        anchorBufferSize = anchorRead(rLotForDOCid(DocID),subname,DocID,NULL,-1);
        anchorBufferSize += 1;
        anchorBuffer = malloc(anchorBufferSize);
        anchorRead(rLotForDOCid(DocID),subname,DocID,anchorBuffer,anchorBufferSize);

        printf("#######################################\nanchors:\n%s\n#######################################\n",anchorBuffer);

        free(anchorBuffer);
    }



    if (optPopRank) {
        popopen (&popindex,"/home/boitho/config/popindex");
        PopRanindex = popRankForDocID(&popindex,DocID);
        popclose(&popindex);
        printf("popindex %i\n",PopRanindex);

        if (popopen (&popextern,"/home/boitho/config/popextern")) {
            PopRankextern =  popRankForDocID(&popextern,DocID);
            printf("PopRankextern: %i\n",PopRankextern);
            popclose(&popextern);
        }
        if (popopen (&popintern,"/home/boitho/config/popintern")) {
            PopRankintern =  popRankForDocID(&popintern,DocID);
            printf("PopRankintern %i\n",PopRankintern);
            popclose(&popintern);
        }
        if (popopen (&popnoc,"/home/boitho/config/popnoc")) {
            PopRanknoc =  popRankForDocID(&popnoc,DocID);
            printf("PopRanknoc %i\n",PopRanknoc);
            popclose(&popnoc);
        }
        if (popopen (&popindex,"/home/boitho/config/popindex")) {
            PopRanindex = popRankForDocID(&popindex,DocID);
            printf("popindex %i\n",PopRanindex);
            popclose(&popindex);
        }



        printf("PopRankextern: %i\nPopRankintern %i\nPopRanknoc %i\n",PopRankextern,PopRankintern,PopRanknoc);


        int brank;
        popopenMemArray_oneLot(subname,rLotForDOCid(DocID));
        brank = popRankForDocIDMemArray(DocID);
        printf("brank %i\n",brank);
        //short rank
        if ( (FH = fopen(SHORTPOPFILE,"rb")) == NULL ) {
            perror("open");
        }
        else {
            if ((fseek(FH,DocID* sizeof(ShortRank),SEEK_SET) == 0) && (fread(&ShortRank,sizeof(ShortRank),1,FH) != 0)) {

                printf("Short rank %u\n",(unsigned char)ShortRank);
            }
            else {
                printf("no hort rank avalibal\n");
            };

            fclose(FH);
        }
    } // if optPopRank
#endif


}