示例#1
0
int main (int argc, char *argv[]) {


	int lotNr;
	int lotPart;
	char path[256];
	char revpath[256];
	char iipath[256];
	unsigned lastIndexTime;

	struct revIndexArrayFomat *revIndexArray; 
	revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize);

	if (argc < 2) {
	}

	printf("lot %s, %i\n",argv[1],argc);


	if (argc == 3) {
		lotNr = atoi(argv[2]);

                //finner siste indekseringstid
                lastIndexTime =  GetLastIndexTimeForLot(lotNr,subname);


                if(lastIndexTime == 0) {
                        printf("lastIndexTime is 0\n");
                        exit(1);
                }

               //sjekker om vi har nokk palss
                if (!lotHasSufficientSpace(lotNr,4096,subname)) {
                        printf("insufficient disk space\n");
                        exit(1);
                }


        	printf("Indexing all buvkets for lot %i\n",lotNr);

		for (lotPart=0;lotPart<63;lotPart++) {
			//printf("indexint part %i for lot %i\n",lotPart,lotNr);

			//"$revindexPath/$revindexFilNr.txt";
			GetFilPathForLot(path,lotNr,subname);
			sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart);
			//ToDo: må sette språk annen plass
			sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1]);

			//oppretter paths
			makePath(iipath);			

			sprintf(iipath,"%s%i.txt",iipath,lotPart);

			Indekser(revpath,iipath,revIndexArray);	

			//sletter revindex. Ingen vits i å ha den fylle opp plass
			//remove(revpath);

		}
	}
	else if (argc == 4) {
		lotNr = atoi(argv[2]);
		lotPart = atoi(argv[3]);

		printf("indexint part %i for lot %i\n",lotPart,lotNr);

		//"$revindexPath/$revindexFilNr.txt";
		GetFilPathForLot(path,lotNr,subname);
		sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart);
		//ToDo: må sette språk annen plass
		sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1],lotPart);

		Indekser(revpath,iipath,revIndexArray);	
	
	}
	else {
		printf("usage: ./LotInvertetIndexMaker type lotnr [ lotPart ]\n\n");

	}

	//GetFilPathForLot(lotNr);

}
示例#2
0
int main (int argc, char *argv[]) {

        int lotNr;
	char lotServer[64];
	int pageCount;
	int i;

        unsigned int FiltetTime;
        unsigned int FileOffset;

        char htmlcompressdbuffer[524288];  //0.5 mb
        char imagebuffer[524288];  //0.5 mb

	int httpResponsCodes[nrOfHttpResponsCodes];

	struct ReposetoryHeaderFormat ReposetoryHeader;
	struct DocumentIndexFormat DocumentIndexPost;
	unsigned long int radress;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	struct adultFormat adult;
	unsigned int lastIndexTime;
        if (argc < 2) {
                printf("Dette programet indekserer en lot. Gi det et lot nummer\n");
                exit(0);
        }

	for(i=0;i<nrOfHttpResponsCodes;i++) {
		httpResponsCodes[i] = 0;
	}

	lotNr = atoi(argv[1]);



	//find server based on lotnr
	lotlistLoad();
	lotlistGetServer(lotServer,lotNr);


	printf("vil index lot nr %i at %s\n",lotNr,lotServer);

	adultLoad(&adult);


	//temp: må hente dette fra slot server eller fil
	FiltetTime = 0;
	FileOffset = 0;

	pageCount = 0;

	if (0) {


		printf("will ges pages by net\n");

		revindexFilesOpenNET(revindexFilesHa);

		while (rGetNextNET(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset)) {
			
			global_curentDocID = ReposetoryHeader.DocID;		
			if (strchr(ReposetoryHeader.url,'?') == 0) {
				global_curentUrlIsDynamic = 0; 
			}
			else {
				global_curentUrlIsDynamic = 1;
			}

			
			handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult);
			//datta skal uansett kopieres over
			//kopierer over di data
			copyRepToDi(&DocumentIndexPost,&ReposetoryHeader);

			DocumentIndexPost.RepositoryPointer = radress;


			//skiver til DocumentIndex
			DIWriteNET(lotServer,&DocumentIndexPost,ReposetoryHeader.DocID);


			++pageCount;
		
			//temp: 
			//if(pageCount > 1000) {
			//	break;
			//}

		}

		printf("Sending pages\n");

		revindexFilesSendNET(revindexFilesHa,lotNr);

	}
	else {
		printf("Wil acess files localy\n");

		//finner siste indekseringstid
		lastIndexTime =  GetLastIndexTimeForLot(lotNr);

		//temp:
		/***********************************************************/
		//if(lastIndexTime != 0) {
		//	printf("lastIndexTime is not 0, but %i\n",lastIndexTime);
		//	exit(1);
		//}

		//FiltetTime = lastIndexTime;
		//if(lastIndexTime == 0) {
		//	printf("lastIndexTime is not 0, but %i\n",lastIndexTime);
		//	exit(1);
		//}
		/***********************************************************/
		
		revindexFilesOpenLocal(revindexFilesHa,lotNr);

		

		while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset)) {
		
			//printf("D: %lu, R: %lu\n",ReposetoryHeader.DocID, radress);

			//kan være siden er korupt, sjekker at docID gir samme lot som den vi leser
			if (rLotForDOCid(ReposetoryHeader.DocID) != lotNr) {
				printf("bad DocID %i\n",ReposetoryHeader.DocID);
			}
			//indekserer bare .no sider
			else if (strstr(ReposetoryHeader.url,".no/") == 0){
				//ikke no
			}
			else {
				global_curentDocID = ReposetoryHeader.DocID;
				if (strchr(ReposetoryHeader.url,'?') == 0) {
					global_curentUrlIsDynamic = 0; 
				}
				else {
					global_curentUrlIsDynamic = 1;
				}

				handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult);

				//printf("%s %i\n",ReposetoryHeader.url,DocumentIndexPost.AdultWeight);
				
				//datta skal uansett kopieres over
				//kopierer over di data
				copyRepToDi(&DocumentIndexPost,&ReposetoryHeader);
				DocumentIndexPost.RepositoryPointer = radress;



				//skiver til DocumentIndex
				//skriver ikke for nå: DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID);
				

			++pageCount;

			}
		
			//temp: 
			//if(pageCount > 10) {
			//	break;
			//}

		}

		//skriver riktig indexstide til lotten
		//temp: setLastIndexTimeForLot(lotNr);

		// vi må ikke kopiere revindex filene da vi jobber på de lokale direkte
	}

	//skriver ut en oversikt over hvilkene http responser vi kom over
	printf("http responses:\n");
	for(i=0;i<nrOfHttpResponsCodes;i++) {
		if (httpResponsCodes[i] != 0) {
			printf("%i: %i\n",i,httpResponsCodes[i]);
		}
        }

	printf("indexed %i pages\n",pageCount);

}
示例#3
0
void connectHandler(int socket) {
    struct packedHedderFormat packedHedder;

    int i,n;
    int LotNr;
    char lotPath[512];
    char buf[100];
    unsigned int FilterTime;
    int filnamelen;
    FILE *FH;
    struct stat inode;      // lager en struktur for fstat å returnere.
    off_t filesize;
    char c;

    struct DocumentIndexFormat DocumentIndexPost;
    int DocID;

    struct ReposetoryHeaderFormat ReposetoryHeader;
    unsigned int radress;

    char htmlbuffer[524288];
    int destLeng;
    char dest[512];

    off_t fileBloks,filerest;
    char *filblocbuff;


    //while ((i=read(socket, &packedHedder, sizeof(struct packedHedderFormat))) > 0) {
    while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) {

        //printf("command: %i\n",packedHedder.command);
        //printf("i er %i\n",i);
        printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
        //printf("subname: %s\n",packedHedder.subname);
        //lar size reflektere hva som er igjen av pakken
        packedHedder.size = packedHedder.size - sizeof(packedHedder);

        if (packedHedder.command == C_rmkdir) {

            printf("C_rmkdir\n");

            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant read destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant read dest");
                exit(1);
            }

            GetFilPathForLot(lotPath,LotNr,packedHedder.subname);

            sprintf(lotPath,"%s%s",lotPath,dest);

            printf("mkdir %s\n",lotPath);

            makePath(lotPath);

            printf("~C_rmkdir\n");


        }
        else if (packedHedder.command == C_rComand) {


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant read destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant read dest");
                exit(1);
            }

            printf("run command %s\n",dest);

            system(dest);

        }
        else if (packedHedder.command == C_getLotToIndex) {
            printf("fikk C_getLotToIndex\n");

            int dirty;

            if ((i=recv(socket, &dirty, sizeof(dirty),MSG_WAITALL)) == -1) {
                perror("Cant read dirty");
                exit(1);
            }

            printf("dirty: %i\n",dirty);

            LotNr = findLotToIndex(packedHedder.subname,dirty);

            printf("sending respons\n");
            sendall(socket,&LotNr, sizeof(LotNr));

        }
        else if (packedHedder.command == C_getlotHasSufficientSpace) {
            printf("fikk C_getLotToIndex\n");

            int needSpace;
            int response;

            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=recv(socket, &needSpace, sizeof(needSpace),MSG_WAITALL)) == -1) {
                perror("Cant read dirty");
                exit(1);
            }


            printf("needSpace: %i, LotNr %i\n",needSpace,LotNr);


            response = lotHasSufficientSpace(LotNr, needSpace, packedHedder.subname);


            printf("sending respons\n");
            sendall(socket,&response, sizeof(response));

        }
        else if (packedHedder.command == C_rGetSize) {
            printf("fikk C_rGetSize\n");


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            if (filnamelen > sizeof(buf)) {
                printf("filname to long\n");
            };

            if ((i=read(socket, buf, filnamelen)) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            printf("filname %s\n",buf);

            if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) {
                perror(buf);
                //sending that he fil is emty
                fileBloks = 0;

                sendall(socket,&fileBloks, sizeof(fileBloks));

            }
            else {
                //finner og sender il størelse
                fstat(fileno(FH),&inode);
                //filesize = inode.st_size;
                //sendall(socket,&filesize, sizeof(filesize));

                fileBloks = inode.st_size;

                printf("size is %" PRId64 "\n",fileBloks);

                sendall(socket,&fileBloks, sizeof(fileBloks));

                fclose(FH);
            }
        }
        else if (packedHedder.command == C_rGetFile) {
            printf("fikk C_rGetFile\n");


            //leser data. Det skal væren en int som sier hvilken lot vi vil ha
            if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
                perror("Cant read lotnr");
                exit(1);
            }

            if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            if (filnamelen > sizeof(buf)) {
                printf("filname to long\n");
            };

            if ((i=read(socket, buf, filnamelen)) == -1) {
                perror("Cant read filnamelen");
                exit(1);
            }

            printf("filname %s\n",buf);

            if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) {
                perror(buf);
                //sending that the fil is emty
                fileBloks = 0;
                filerest = 0;

                sendall(socket,&fileBloks, sizeof(fileBloks));
                sendall(socket,&filerest, sizeof(filerest));

            }
            else {
                //finner og sender fil størelse
                fstat(fileno(FH),&inode);
                //filesize = inode.st_size;
                //sendall(socket,&filesize, sizeof(filesize));

                fileBloks = (int)floor(inode.st_size / rNetTrabsferBlok);
                filerest = inode.st_size - (fileBloks * rNetTrabsferBlok);

                sendall(socket,&fileBloks, sizeof(fileBloks));
                sendall(socket,&filerest, sizeof(filerest));

                printf("sending fil. fileBloks %"PRId64", filerest %"PRId64"\n",fileBloks,filerest);


                filblocbuff = (char *)malloc(rNetTrabsferBlok);
                for(i=0; i < fileBloks; i++) {

                    //fread(filblocbuff,sizeof(c),rNetTrabsferBlok,FH);
                    //fread_all(const void *buf, size_t size, FILE *stream)
                    fread_all(filblocbuff,rNetTrabsferBlok,FH, 4096);

                    if ((n=sendall(socket, filblocbuff, rNetTrabsferBlok)) == -1) {
                        perror("Cant recv dest");
                        exit(1);
                    }

                }

                printf("did recv %i fileBloks\n",i);


                fread(filblocbuff,sizeof(c),filerest,FH);

                if ((n=sendall(socket, filblocbuff, filerest)) == -1) {
                    perror("Cant recv filerest");
                    exit(1);
                }

                free(filblocbuff);


                /*
                   for (i=0;i<filesize;i++) {
                   fread(&c,sizeof(char),1,FH);
                   send(socket, &c, sizeof(char), 0);
                //printf("%i\n",(int)c);
                }
                 */
                printf("send file end\n");

                fclose(FH);
            }

        }
        else if (packedHedder.command == C_rGetNext) {
            printf("fikk C_rGetNext\n");

            printf("støttes ikke lengere");
            exit(1);
            /*
            		//leser data. Det skal væren en unigned int som sier hvilken lot vi vil ha
            		//har deklarert den som int her ???
            		if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) {
            			perror("Cant read lotnr");
            			exit(1);
            		}
            		printf("leser FilterTime\n");
            		//leser filtertime
            		if ((i=read(socket, &FilterTime, sizeof(FilterTime))) == -1) {
            			perror("Cant read lotnr");
            			exit(1);
            		}

            		printf("lotnr %i FilterTime %u\n",LotNr,FilterTime);

            		//henter inn data om den lotten
            		if (rGetNext(LotNr,&ReposetoryHeader,htmlbuffer,NULL,&radress,FilterTime,0)) {

            			//printf("DocId: %i url: %s\n",ReposetoryHeader.DocID,ReposetoryHeader.url);

            			//sender pakke hedder
            			sendpacked(socket,C_rLotData,PROTOCOLVERSION, ReposetoryHeader.htmlSize + sizeof(ReposetoryHeader) +sizeof(radress), NULL,packedHedder.subname);

            			//sennder ReposetoryHeader'en
            			sendall(socket,&ReposetoryHeader, sizeof(ReposetoryHeader));

            			//sender htmlen
            			sendall(socket,&htmlbuffer, ReposetoryHeader.htmlSize);

            			//sender adressen
            			sendall(socket,&radress,sizeof(radress));
            			//printf("data sent\n");

            			//printf("rGetNext: %i\n",ReposetoryHeader.DocID);

            		}
            		else {
            			sendpacked(socket,C_rEOF,PROTOCOLVERSION, 0, NULL,packedHedder.subname);
            			printf("ferdig\n");
            		}
            */
        }
        else if (packedHedder.command == C_DIWrite) {


            if ((i=recv(socket, &DocumentIndexPost, sizeof(struct DocumentIndexFormat),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            DIWrite(&DocumentIndexPost,DocID,packedHedder.subname, NULL);

            //printf("DIWrite: %i\n",DocID);

        }
        else if (packedHedder.command == C_DIRead) {

            int DocID;
            struct DocumentIndexFormat DocumentIndexPost;

            printf("got commane C_DIRead. sise %i hsize %i ds %i\n",packedHedder.size, sizeof(packedHedder), sizeof(DocID));

            if ((i=recv(socket, &DocID, sizeof(DocID),0)) == -1) {
                perror("recv");
                exit(1);
            }
            //printf("DocID %i\n",DocID);

            //leser inn datan
            //int DIRead (struct DocumentIndexFormat *DocumentIndexPost, int DocID);
            DIRead(&DocumentIndexPost,DocID,packedHedder.subname);

            sendall(socket,&DocumentIndexPost, sizeof(struct DocumentIndexFormat));
        }
        else if (packedHedder.command == C_rGetIndexTime) {

            int Lotnr;
            unsigned int IndexTime;
            if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) {
                perror("recv");
                exit(1);
            }

            IndexTime = GetLastIndexTimeForLot(LotNr,packedHedder.subname);

            sendall(socket,&IndexTime, sizeof(IndexTime));

        }
        else if (packedHedder.command == C_rSetIndexTime) {

            int Lotnr;
            if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) {
                perror("recv");
                exit(1);
            }

            setLastIndexTimeForLot(LotNr,NULL,packedHedder.subname);

        }
        else if (packedHedder.command == C_rSendFile) {
            //skal mota en fil for lagring i reposetoryet
            //char FilePath[156];
            FILE *FILEHANDLER;
            char c;
            char opentype[2];
            //char *filblocbuff;
            //off_t fileBloks,filerest;

            if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) {
                perror("Cant recv lotnr");
                exit(1);
            }

            printf("lotNr %i\n",LotNr);


            //leser destinasjonelengden
            if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) {
                perror("Cant recv destLeng");
                exit(1);
            }

            if (destLeng > sizeof(dest)) {
                printf("dest filname is to long at %i\n",destLeng);
                exit(1);
            }

            //leser destinasjonene
            if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) {
                perror("Cant recv dest");
                exit(1);
            }

            printf("coping %s as length %i in to lot %i\n",dest,destLeng,LotNr);

            if ((i=recv(socket, &opentype, sizeof(char) +1,MSG_WAITALL)) == -1) {
                perror("Cant recv opentype");
                exit(1);
            }
            printf("opentype \"%s\"\n",opentype);


            //GetFilPathForLot(FilePath,LotNr,packedHedder.subname);

            //legger til filnavnet
            //strncat(FilePath,dest,sizeof(FilePath));

            //leser inn filstørelsen
            if ((i=recv(socket, &fileBloks, sizeof(fileBloks),MSG_WAITALL)) == -1) {
                perror("Cant recv fileBloks");
                exit(1);
            }

            if ((i=recv(socket, &filerest, sizeof(filerest),MSG_WAITALL)) == -1) {
                perror("Cant recv filerest");
                exit(1);
            }

            printf("fileBloks: %" PRId64 ", filerest: %" PRId64 "\n",fileBloks,filerest);

            //åpner filen
            if ((FILEHANDLER = lotOpenFileNoCasheByLotNr(LotNr,dest,opentype,'e',packedHedder.subname)) == NULL) {
                perror(dest);
            }

            filblocbuff = (char *)malloc(rNetTrabsferBlok);
            for(i=0; i < fileBloks; i++) {

                if ((n=recv(socket, filblocbuff, rNetTrabsferBlok,MSG_WAITALL)) == -1) {
                    perror("Cant recv dest");
                    exit(1);
                }

                fwrite(filblocbuff,sizeof(c),rNetTrabsferBlok,FILEHANDLER);
            }

            printf("did recv %i fileBloks\n",i);


            if ((n=recv(socket, filblocbuff, filerest,MSG_WAITALL)) == -1) {
                perror("Cant recv filerest");
                exit(1);
            }

            fwrite(filblocbuff,sizeof(c),filerest,FILEHANDLER);


            free(filblocbuff);

            fclose(FILEHANDLER);

            printf("\n");
        }
        else if (packedHedder.command == C_DIGetIp) {


            unsigned int DocID;
            struct DocumentIndexFormat DocumentIndexPost;

            //printf("got command C_DIGetIp\n");

            if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }

            //printf("DocID %u\n",DocID);

            DIRead(&DocumentIndexPost,DocID,packedHedder.subname);

            //printf("ipadress: %u\n",DocumentIndexPost.IPAddress);

            sendall(socket,&DocumentIndexPost.IPAddress, sizeof(DocumentIndexPost.IPAddress));


        }
        else if (packedHedder.command == C_anchorAdd) {
            size_t textlen;
            unsigned int DocID;
            char *text;

            printf("Add anchor....\n");
            if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            } else if ((i = recv(socket, &textlen, sizeof(textlen), MSG_WAITALL)) == -1) {
                perror("recv(textlen)");
                exit(1);
            }
            text = malloc(textlen+1);
            text[textlen] = '\0';
            if ((i = recv(socket, text, textlen, MSG_WAITALL)) == -1) {
                perror("recv(text)");
                exit(1);
            }

            anchoraddnew(DocID, text, textlen, packedHedder.subname, NULL);
            printf("Text for %d: %s\n", DocID, text);

            free(text);
        }
        else if (packedHedder.command == C_anchorGet) {
            size_t len;
            char *text;
            int LotNr;
            unsigned int DocID;
            printf("Get anchor...\n");

            if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) {
                perror("recv");
                exit(1);
            }
            printf("got DocID %u\n",DocID);
            LotNr = rLotForDOCid(DocID);
            printf("trying to read anchor\n");

            len = anchorRead(LotNr, packedHedder.subname, DocID, NULL, -1);
            printf("got anchor of length %i\n",len);

            sendall(socket, &len, sizeof(len));
            text = malloc(len+1);

            printf("readint it again\n");
            anchorRead(LotNr, packedHedder.subname, DocID, text, len+1);
            sendall(socket, text, len);
        }
        else if (packedHedder.command == C_readHTML) {
            /*
            unsigned int DocID;
            unsigned int len;
            char *text;
            char *acla, *acld;
            struct DocumentIndexFormat DocIndex;
            struct ReposetoryHeaderFormat ReposetoryHeader;

            if ((i = recv(socket, &DocID, sizeof(DocID), MSG_WAITALL)) == -1) {
            	perror("recv");
            	exit(1);
            }

            if ((i = recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) {

            	perror("recv(len)");
            	exit(1);
            }
            printf("len %u\n",len);
            text = malloc(len);

            if (text == NULL)
            	exit(1);

            DIRead(&DocIndex, DocID, packedHedder.subname);


            if (!rReadHtml(
            		text,
            		&len,
            		DocIndex.RepositoryPointer,
            		DocIndex.htmlSize,
            		DocID,
            		packedHedder.subname,
            		&ReposetoryHeader,
            		&acla,
            		&acld,
            		DocIndex.imageSize)) {
            	len = 0;
            	sendall(socket, &len, sizeof(len));
            } else {
            	++len; // \0
            	#ifdef DEBUG
            	printf("docID %u\n",DocID);
            	printf("Got: (len %i, real %i) ########################\n%s\n#####################\n", len, strlen(text), text);
            	#endif
            	sendall(socket, &len, sizeof(len));
            	sendall(socket, text, len);
            	sendall(socket, &ReposetoryHeader,sizeof(ReposetoryHeader));
            }

            free(text);
            */
        }
        /*
        runarb: 06 des 2007: vi har gåt bort fra denne metoden for nå, og bruker heller index over smb. Men tar vare på den da vi kan trenge den siden

        else if (packedHedder.command == C_urltodocid) {
        	char cmd;
        	int alloclen;
        	char *urlbuf;

        	if (urltodociddb == NULL) {
        		cmd = C_DOCID_NODB;
        		sendall(socket, &cmd, sizeof(cmd));
        		exit(1);
        	} else {
        		cmd = C_DOCID_READY;
        		sendall(socket, &cmd, sizeof(cmd));
        	}
        	cmd = C_DOCID_NEXT;

        	alloclen = 1024;
        	urlbuf = malloc(alloclen);

        	do {
        		unsigned int DocID;
        		size_t len;
        		if ((i = recv(socket, &cmd, sizeof(cmd), MSG_WAITALL)) == -1) {
        			err(1, "recv(cmd)");
        		}
        		if (cmd == C_DOCID_DONE)
        			break;

        		if ((i == recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) {
        			err(1, "recv(len)");
        		}
        		if (alloclen < len+1) {
        			free(urlbuf);
        			alloclen *= 2;
        			urlbuf = malloc(alloclen);
        		}
        		if ((i == recv(socket, urlbuf, len, MSG_WAITALL)) == -1) {
        			err(1, "recv(len)");
        		}
        		urlbuf[len] = '\0';

        		if (!getDocIDFromUrl(urltodociddb, urlbuf, &DocID)) {
        			cmd = C_DOCID_NOTFOUND;
        			sendall(socket, &cmd, sizeof(cmd));
        		} else {
        			cmd = C_DOCID_FOUND;
        			sendall(socket, &cmd, sizeof(cmd));
        			sendall(socket, &DocID, sizeof(DocID));
        		}
        	} while (1);

        	free(urlbuf);
        }
        */
        else {
            printf("unnown comand. %i\n", packedHedder.command);
        }
        //printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command);
    } //while

}
示例#4
0
int main (int argc, char *argv[]) {


	int lotNr;
	int lotPart;
	char path[256];
	char iipath[256];
	unsigned lastIndexTime;
	int optMustBeNewerThen = 0;
	int optAllowDuplicates = 0;

	struct revIndexArrayFomat *revIndexArray; 
	revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize);

        extern char *optarg;
        extern int optind, opterr, optopt;
        char c;
        while ((c=getopt(argc,argv,"nd"))!=-1) {
                switch (c) {
                        case 'n':
                                optMustBeNewerThen = 1;
                                break;
                        case 'd':
                                optAllowDuplicates = 1;
                                break;
                        case 'v':
                                break;
                        default:
                                          exit(1);
                }
        }
        --optind;

	printf("lot %s, %i\n",argv[1],argc);

	char *type = argv[1 +optind];
	lotNr = atoi(argv[2 +optind]);
	char *subname = argv[3 +optind];

	if ((argc -optind)== 4) {

                //finner siste indekseringstid
                lastIndexTime =  GetLastIndexTimeForLot(lotNr,subname);


                if(lastIndexTime == 0) {
                        printf("lastIndexTime is 0\n");
                        exit(1);
                }

               //sjekker om vi har nokk palss
                if (!lotHasSufficientSpace(lotNr,4096,subname)) {
                        printf("insufficient disk space\n");
                        exit(1);
                }


        	printf("Indexing all buvkets for lot %i\n",lotNr);

		for (lotPart=0;lotPart<64;lotPart++) {
			//printf("indexint part %i for lot %i\n",lotPart,lotNr);

			//"$revindexPath/$revindexFilNr.txt";
			GetFilPathForLot(path,lotNr,subname);
			//ToDo: må sette språk annen plass
			sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]);

			//oppretter paths
			makePath(iipath);			

			sprintf(iipath,"%s%i.txt",iipath,lotPart);

			if ((optMustBeNewerThen != 0)) {
				if (fopen(iipath,"r") != NULL) {
					printf("we all redy have a iindex.\n");
					continue;
				}
			}


			Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates);	



		}
	}
	else if ((argc - optind) == 5) {
		lotPart = atoi(argv[4 +optind]);

		printf("indexint part %i for lot %i\n",lotPart,lotNr);

		//"$revindexPath/$revindexFilNr.txt";
		GetFilPathForLot(path,lotNr,subname);
		//ToDo: må sette språk annen plass
		//aa sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1 +optind],lotPart);
                //ToDo: må sette språk annen plass
                sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]);

                //oppretter paths
                makePath(iipath);

                sprintf(iipath,"%s%i.txt",iipath,lotPart);

		printf("iipath: \"%s\n",iipath);

		if ((optMustBeNewerThen != 0)) {
			if (fopen(iipath,"r") != NULL) {
				printf("we all redy have a iindex.\n");
				exit(1);
			}
		}

		Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates);	

	
	}
	else {
		printf("usage: ./LotInvertetIndexMaker type lotnr subname [ lotPart ]\n\n");

	}

	//GetFilPathForLot(lotNr);

}
示例#5
0
int main (int argc, char *argv[]) {

        int lotNr;
	char lotServer[64];
	int pageCount;
	int i;

        unsigned int FiltetTime;
        unsigned int FileOffset;

        char htmlcompressdbuffer[524288];  //0.5 mb
        char imagebuffer[524288];  //0.5 mb
	
	int httpResponsCodes[nrOfHttpResponsCodes];

	struct ReposetoryHeaderFormat ReposetoryHeader;
	struct DocumentIndexFormat DocumentIndexPost;
	unsigned long int radress;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	struct adultFormat adult;
	unsigned int lastIndexTime;

        if (argc < 3) {
                printf("Dette programet indekserer en lot. Usage:\n\tIndexerLot lotNr subname\n");
                exit(0);
        }

	for(i=0;i<nrOfHttpResponsCodes;i++) {
		httpResponsCodes[i] = 0;
	}

	lotNr = atoi(argv[1]);
	strncpy(subname,argv[2],sizeof(subname) -1);


	//find server based on lotnr
	lotlistLoad();
	lotlistGetServer(lotServer,lotNr);


	printf("vil index lot nr %i at %s\n",lotNr,lotServer);

	adultLoad(&adult);

	langdetectInit();



	//temp: må hente dette fra slot server eller fil
	FiltetTime = 0;
	FileOffset = 0;

	pageCount = 0;

	if (0) {


		printf("will ges pages by net\n");

		revindexFilesOpenNET(revindexFilesHa);

		while (rGetNextNET(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset,subname)) {

                                global_curentDocID = ReposetoryHeader.DocID;
                                if (strchr(ReposetoryHeader.url,'?') == 0) {
                                        global_curentUrlIsDynamic = 0;
                                }
                                else {
                                        global_curentUrlIsDynamic = 1;
                                }
		
			
			
			handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult);
			//datta skal uansett kopieres over
			//kopierer over di data
			copyRepToDi(&DocumentIndexPost,&ReposetoryHeader);

			DocumentIndexPost.RepositoryPointer = radress;


			//skiver til DocumentIndex
			DIWriteNET(lotServer,&DocumentIndexPost,ReposetoryHeader.DocID,subname);


			++pageCount;
		
			//temp: 
			//if(pageCount > 999) {
			//	printf("Exeting after only %i docs\n",pageCount);
			//	break;
			//}

		}

		printf("Sending pages\n");

		revindexFilesSendNET(revindexFilesHa,lotNr);

	}
	else {
		printf("Wil acess files localy\n");

		//sjekker om vi har nokk palss
		if (!lotHasSufficientSpace(lotNr,4096,subname)) {
			printf("insufficient disk space\n");
			exit(1);
		}


		//finner siste indekseringstid
		lastIndexTime =  GetLastIndexTimeForLot(lotNr,subname);

		
		if(lastIndexTime != 0) {
			printf("lastIndexTime is not 0, but %i\n",lastIndexTime);
			exit(1);
		}
		
		
		revindexFilesOpenLocal(revindexFilesHa,lotNr,"Main","wb",subname);

		//temp:Søker til problemområdet
		//FileOffset = 334603785;		

		while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,sizeof(htmlcompressdbuffer),imagebuffer,&radress,FiltetTime,FileOffset,subname)) {		
			//printf("D: %u, R: %lu\n",ReposetoryHeader.DocID, radress);


                                global_curentDocID = ReposetoryHeader.DocID;
                                if (strchr(ReposetoryHeader.url,'?') == 0) {
                                        global_curentUrlIsDynamic = 0;
                                }
                                else {
                                        global_curentUrlIsDynamic = 1;
                                }

								
				printf("%s\n",ReposetoryHeader.url);
				
				
				handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult);

				

				//datta skal kopieres over uanset hva som skjer
				//kopierer over di data
				copyRepToDi(&DocumentIndexPost,&ReposetoryHeader);

				DocumentIndexPost.RepositoryPointer = radress;

				//skiver til DocumentIndex
				DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID,subname);
				

			++pageCount;
		
			//if(pageCount > 9999) {
			//	printf("Exeting after only %i docs\n",pageCount);
			//	//break;
			//	exit(1);
			//}

		}

		//skriver riktig indexstide til lotten
		setLastIndexTimeForLot(lotNr,httpResponsCodes,subname);

		// vi må ikke kopiere revindex filene da vi jobber på de lokale direkte
	}


	langdetectDestroy();

	printf("indexed %i pages\n\n\n",pageCount);

	return 0;
}
示例#6
0
int main (int argc, char *argv[]) {

        int lotNr;
	char lotServer[64];
	int pageCount;
	int i;

        unsigned int FiltetTime;
        unsigned int FileOffset;

        char htmlcompressdbuffer[524288];  //0.5 mb
        char imagebuffer[524288];  //0.5 mb
	
	int httpResponsCodes[nrOfHttpResponsCodes];

	struct ReposetoryHeaderFormat ReposetoryHeader;
	struct DocumentIndexFormat DocumentIndexPost;
	unsigned long int radress;
	FILE *revindexFilesHa[NrOfDataDirectorys];
	struct adultFormat adult;
	unsigned int lastIndexTime;
        if (argc < 2) {
                printf("Dette programet indekserer en lot. Gi det et lot nummer\n");
                exit(0);
        }

	for(i=0;i<nrOfHttpResponsCodes;i++) {
		httpResponsCodes[i] = 0;
	}

	lotNr = atoi(argv[1]);



	//find server based on lotnr
	lotlistLoad();
	lotlistGetServer(lotServer,lotNr);


	printf("vil index lot nr %i at %s\n",lotNr,lotServer);

                //finner siste indekseringstid
                lastIndexTime =  GetLastIndexTimeForLot(lotNr,subname);


                if(lastIndexTime == 0) {
                        printf("lastIndexTime is 0, skiping.\n");
                        exit(1);
                }

	//temp: må hente dette fra slot server eller fil
	FiltetTime = 0;
	FileOffset = 2140483648;
	//FileOffset = 1997015914;
	
	pageCount = 0;

		printf("Wil acess files localy\n");


		while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset,subname)) {


				DIRead(&DocumentIndexPost,ReposetoryHeader.DocID,subname);		

				DocumentIndexPost.RepositoryPointer = radress;

				//skiver til DocumentIndex
				DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID,subname);
				

			++pageCount;
		
		}




	printf("indexed %i pages\n\n\n",pageCount);

	return 0;
}