int main (int argc, char *argv[]) { int lotNr; int lotPart; char path[256]; char revpath[256]; char iipath[256]; unsigned lastIndexTime; struct revIndexArrayFomat *revIndexArray; revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize); if (argc < 2) { } printf("lot %s, %i\n",argv[1],argc); if (argc == 3) { lotNr = atoi(argv[2]); //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime == 0) { printf("lastIndexTime is 0\n"); exit(1); } //sjekker om vi har nokk palss if (!lotHasSufficientSpace(lotNr,4096,subname)) { printf("insufficient disk space\n"); exit(1); } printf("Indexing all buvkets for lot %i\n",lotNr); for (lotPart=0;lotPart<63;lotPart++) { //printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1]); //oppretter paths makePath(iipath); sprintf(iipath,"%s%i.txt",iipath,lotPart); Indekser(revpath,iipath,revIndexArray); //sletter revindex. Ingen vits i å ha den fylle opp plass //remove(revpath); } } else if (argc == 4) { lotNr = atoi(argv[2]); lotPart = atoi(argv[3]); printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1],lotPart); Indekser(revpath,iipath,revIndexArray); } else { printf("usage: ./LotInvertetIndexMaker type lotnr [ lotPart ]\n\n"); } //GetFilPathForLot(lotNr); }
int main (int argc, char *argv[]) { int lotNr; char lotServer[64]; int pageCount; int i; unsigned int FiltetTime; unsigned int FileOffset; char htmlcompressdbuffer[524288]; //0.5 mb char imagebuffer[524288]; //0.5 mb int httpResponsCodes[nrOfHttpResponsCodes]; struct ReposetoryHeaderFormat ReposetoryHeader; struct DocumentIndexFormat DocumentIndexPost; unsigned long int radress; FILE *revindexFilesHa[NrOfDataDirectorys]; struct adultFormat adult; unsigned int lastIndexTime; if (argc < 2) { printf("Dette programet indekserer en lot. Gi det et lot nummer\n"); exit(0); } for(i=0;i<nrOfHttpResponsCodes;i++) { httpResponsCodes[i] = 0; } lotNr = atoi(argv[1]); //find server based on lotnr lotlistLoad(); lotlistGetServer(lotServer,lotNr); printf("vil index lot nr %i at %s\n",lotNr,lotServer); adultLoad(&adult); //temp: må hente dette fra slot server eller fil FiltetTime = 0; FileOffset = 0; pageCount = 0; if (0) { printf("will ges pages by net\n"); revindexFilesOpenNET(revindexFilesHa); while (rGetNextNET(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset)) { global_curentDocID = ReposetoryHeader.DocID; if (strchr(ReposetoryHeader.url,'?') == 0) { global_curentUrlIsDynamic = 0; } else { global_curentUrlIsDynamic = 1; } handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult); //datta skal uansett kopieres over //kopierer over di data copyRepToDi(&DocumentIndexPost,&ReposetoryHeader); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex DIWriteNET(lotServer,&DocumentIndexPost,ReposetoryHeader.DocID); ++pageCount; //temp: //if(pageCount > 1000) { // break; //} } printf("Sending pages\n"); revindexFilesSendNET(revindexFilesHa,lotNr); } else { printf("Wil acess files localy\n"); //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr); //temp: /***********************************************************/ //if(lastIndexTime != 0) { // printf("lastIndexTime is not 0, but %i\n",lastIndexTime); // exit(1); //} //FiltetTime = lastIndexTime; //if(lastIndexTime == 0) { // printf("lastIndexTime is not 0, but %i\n",lastIndexTime); // exit(1); //} /***********************************************************/ revindexFilesOpenLocal(revindexFilesHa,lotNr); while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset)) { //printf("D: %lu, R: %lu\n",ReposetoryHeader.DocID, radress); //kan være siden er korupt, sjekker at docID gir samme lot som den vi leser if (rLotForDOCid(ReposetoryHeader.DocID) != lotNr) { printf("bad DocID %i\n",ReposetoryHeader.DocID); } //indekserer bare .no sider else if (strstr(ReposetoryHeader.url,".no/") == 0){ //ikke no } else { global_curentDocID = ReposetoryHeader.DocID; if (strchr(ReposetoryHeader.url,'?') == 0) { global_curentUrlIsDynamic = 0; } else { global_curentUrlIsDynamic = 1; } handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult); //printf("%s %i\n",ReposetoryHeader.url,DocumentIndexPost.AdultWeight); //datta skal uansett kopieres over //kopierer over di data copyRepToDi(&DocumentIndexPost,&ReposetoryHeader); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex //skriver ikke for nå: DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID); ++pageCount; } //temp: //if(pageCount > 10) { // break; //} } //skriver riktig indexstide til lotten //temp: setLastIndexTimeForLot(lotNr); // vi må ikke kopiere revindex filene da vi jobber på de lokale direkte } //skriver ut en oversikt over hvilkene http responser vi kom over printf("http responses:\n"); for(i=0;i<nrOfHttpResponsCodes;i++) { if (httpResponsCodes[i] != 0) { printf("%i: %i\n",i,httpResponsCodes[i]); } } printf("indexed %i pages\n",pageCount); }
void connectHandler(int socket) { struct packedHedderFormat packedHedder; int i,n; int LotNr; char lotPath[512]; char buf[100]; unsigned int FilterTime; int filnamelen; FILE *FH; struct stat inode; // lager en struktur for fstat å returnere. off_t filesize; char c; struct DocumentIndexFormat DocumentIndexPost; int DocID; struct ReposetoryHeaderFormat ReposetoryHeader; unsigned int radress; char htmlbuffer[524288]; int destLeng; char dest[512]; off_t fileBloks,filerest; char *filblocbuff; //while ((i=read(socket, &packedHedder, sizeof(struct packedHedderFormat))) > 0) { while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) { //printf("command: %i\n",packedHedder.command); //printf("i er %i\n",i); printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); //printf("subname: %s\n",packedHedder.subname); //lar size reflektere hva som er igjen av pakken packedHedder.size = packedHedder.size - sizeof(packedHedder); if (packedHedder.command == C_rmkdir) { printf("C_rmkdir\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant read lotnr"); exit(1); } //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant read destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant read dest"); exit(1); } GetFilPathForLot(lotPath,LotNr,packedHedder.subname); sprintf(lotPath,"%s%s",lotPath,dest); printf("mkdir %s\n",lotPath); makePath(lotPath); printf("~C_rmkdir\n"); } else if (packedHedder.command == C_rComand) { //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant read lotnr"); exit(1); } //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant read destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant read dest"); exit(1); } printf("run command %s\n",dest); system(dest); } else if (packedHedder.command == C_getLotToIndex) { printf("fikk C_getLotToIndex\n"); int dirty; if ((i=recv(socket, &dirty, sizeof(dirty),MSG_WAITALL)) == -1) { perror("Cant read dirty"); exit(1); } printf("dirty: %i\n",dirty); LotNr = findLotToIndex(packedHedder.subname,dirty); printf("sending respons\n"); sendall(socket,&LotNr, sizeof(LotNr)); } else if (packedHedder.command == C_getlotHasSufficientSpace) { printf("fikk C_getLotToIndex\n"); int needSpace; int response; if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=recv(socket, &needSpace, sizeof(needSpace),MSG_WAITALL)) == -1) { perror("Cant read dirty"); exit(1); } printf("needSpace: %i, LotNr %i\n",needSpace,LotNr); response = lotHasSufficientSpace(LotNr, needSpace, packedHedder.subname); printf("sending respons\n"); sendall(socket,&response, sizeof(response)); } else if (packedHedder.command == C_rGetSize) { printf("fikk C_rGetSize\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) { perror("Cant read filnamelen"); exit(1); } if (filnamelen > sizeof(buf)) { printf("filname to long\n"); }; if ((i=read(socket, buf, filnamelen)) == -1) { perror("Cant read filnamelen"); exit(1); } printf("filname %s\n",buf); if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) { perror(buf); //sending that he fil is emty fileBloks = 0; sendall(socket,&fileBloks, sizeof(fileBloks)); } else { //finner og sender il størelse fstat(fileno(FH),&inode); //filesize = inode.st_size; //sendall(socket,&filesize, sizeof(filesize)); fileBloks = inode.st_size; printf("size is %" PRId64 "\n",fileBloks); sendall(socket,&fileBloks, sizeof(fileBloks)); fclose(FH); } } else if (packedHedder.command == C_rGetFile) { printf("fikk C_rGetFile\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) { perror("Cant read filnamelen"); exit(1); } if (filnamelen > sizeof(buf)) { printf("filname to long\n"); }; if ((i=read(socket, buf, filnamelen)) == -1) { perror("Cant read filnamelen"); exit(1); } printf("filname %s\n",buf); if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) { perror(buf); //sending that the fil is emty fileBloks = 0; filerest = 0; sendall(socket,&fileBloks, sizeof(fileBloks)); sendall(socket,&filerest, sizeof(filerest)); } else { //finner og sender fil størelse fstat(fileno(FH),&inode); //filesize = inode.st_size; //sendall(socket,&filesize, sizeof(filesize)); fileBloks = (int)floor(inode.st_size / rNetTrabsferBlok); filerest = inode.st_size - (fileBloks * rNetTrabsferBlok); sendall(socket,&fileBloks, sizeof(fileBloks)); sendall(socket,&filerest, sizeof(filerest)); printf("sending fil. fileBloks %"PRId64", filerest %"PRId64"\n",fileBloks,filerest); filblocbuff = (char *)malloc(rNetTrabsferBlok); for(i=0; i < fileBloks; i++) { //fread(filblocbuff,sizeof(c),rNetTrabsferBlok,FH); //fread_all(const void *buf, size_t size, FILE *stream) fread_all(filblocbuff,rNetTrabsferBlok,FH, 4096); if ((n=sendall(socket, filblocbuff, rNetTrabsferBlok)) == -1) { perror("Cant recv dest"); exit(1); } } printf("did recv %i fileBloks\n",i); fread(filblocbuff,sizeof(c),filerest,FH); if ((n=sendall(socket, filblocbuff, filerest)) == -1) { perror("Cant recv filerest"); exit(1); } free(filblocbuff); /* for (i=0;i<filesize;i++) { fread(&c,sizeof(char),1,FH); send(socket, &c, sizeof(char), 0); //printf("%i\n",(int)c); } */ printf("send file end\n"); fclose(FH); } } else if (packedHedder.command == C_rGetNext) { printf("fikk C_rGetNext\n"); printf("støttes ikke lengere"); exit(1); /* //leser data. Det skal væren en unigned int som sier hvilken lot vi vil ha //har deklarert den som int her ??? if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } printf("leser FilterTime\n"); //leser filtertime if ((i=read(socket, &FilterTime, sizeof(FilterTime))) == -1) { perror("Cant read lotnr"); exit(1); } printf("lotnr %i FilterTime %u\n",LotNr,FilterTime); //henter inn data om den lotten if (rGetNext(LotNr,&ReposetoryHeader,htmlbuffer,NULL,&radress,FilterTime,0)) { //printf("DocId: %i url: %s\n",ReposetoryHeader.DocID,ReposetoryHeader.url); //sender pakke hedder sendpacked(socket,C_rLotData,PROTOCOLVERSION, ReposetoryHeader.htmlSize + sizeof(ReposetoryHeader) +sizeof(radress), NULL,packedHedder.subname); //sennder ReposetoryHeader'en sendall(socket,&ReposetoryHeader, sizeof(ReposetoryHeader)); //sender htmlen sendall(socket,&htmlbuffer, ReposetoryHeader.htmlSize); //sender adressen sendall(socket,&radress,sizeof(radress)); //printf("data sent\n"); //printf("rGetNext: %i\n",ReposetoryHeader.DocID); } else { sendpacked(socket,C_rEOF,PROTOCOLVERSION, 0, NULL,packedHedder.subname); printf("ferdig\n"); } */ } else if (packedHedder.command == C_DIWrite) { if ((i=recv(socket, &DocumentIndexPost, sizeof(struct DocumentIndexFormat),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } DIWrite(&DocumentIndexPost,DocID,packedHedder.subname, NULL); //printf("DIWrite: %i\n",DocID); } else if (packedHedder.command == C_DIRead) { int DocID; struct DocumentIndexFormat DocumentIndexPost; printf("got commane C_DIRead. sise %i hsize %i ds %i\n",packedHedder.size, sizeof(packedHedder), sizeof(DocID)); if ((i=recv(socket, &DocID, sizeof(DocID),0)) == -1) { perror("recv"); exit(1); } //printf("DocID %i\n",DocID); //leser inn datan //int DIRead (struct DocumentIndexFormat *DocumentIndexPost, int DocID); DIRead(&DocumentIndexPost,DocID,packedHedder.subname); sendall(socket,&DocumentIndexPost, sizeof(struct DocumentIndexFormat)); } else if (packedHedder.command == C_rGetIndexTime) { int Lotnr; unsigned int IndexTime; if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) { perror("recv"); exit(1); } IndexTime = GetLastIndexTimeForLot(LotNr,packedHedder.subname); sendall(socket,&IndexTime, sizeof(IndexTime)); } else if (packedHedder.command == C_rSetIndexTime) { int Lotnr; if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) { perror("recv"); exit(1); } setLastIndexTimeForLot(LotNr,NULL,packedHedder.subname); } else if (packedHedder.command == C_rSendFile) { //skal mota en fil for lagring i reposetoryet //char FilePath[156]; FILE *FILEHANDLER; char c; char opentype[2]; //char *filblocbuff; //off_t fileBloks,filerest; if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant recv lotnr"); exit(1); } printf("lotNr %i\n",LotNr); //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant recv destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant recv dest"); exit(1); } printf("coping %s as length %i in to lot %i\n",dest,destLeng,LotNr); if ((i=recv(socket, &opentype, sizeof(char) +1,MSG_WAITALL)) == -1) { perror("Cant recv opentype"); exit(1); } printf("opentype \"%s\"\n",opentype); //GetFilPathForLot(FilePath,LotNr,packedHedder.subname); //legger til filnavnet //strncat(FilePath,dest,sizeof(FilePath)); //leser inn filstørelsen if ((i=recv(socket, &fileBloks, sizeof(fileBloks),MSG_WAITALL)) == -1) { perror("Cant recv fileBloks"); exit(1); } if ((i=recv(socket, &filerest, sizeof(filerest),MSG_WAITALL)) == -1) { perror("Cant recv filerest"); exit(1); } printf("fileBloks: %" PRId64 ", filerest: %" PRId64 "\n",fileBloks,filerest); //åpner filen if ((FILEHANDLER = lotOpenFileNoCasheByLotNr(LotNr,dest,opentype,'e',packedHedder.subname)) == NULL) { perror(dest); } filblocbuff = (char *)malloc(rNetTrabsferBlok); for(i=0; i < fileBloks; i++) { if ((n=recv(socket, filblocbuff, rNetTrabsferBlok,MSG_WAITALL)) == -1) { perror("Cant recv dest"); exit(1); } fwrite(filblocbuff,sizeof(c),rNetTrabsferBlok,FILEHANDLER); } printf("did recv %i fileBloks\n",i); if ((n=recv(socket, filblocbuff, filerest,MSG_WAITALL)) == -1) { perror("Cant recv filerest"); exit(1); } fwrite(filblocbuff,sizeof(c),filerest,FILEHANDLER); free(filblocbuff); fclose(FILEHANDLER); printf("\n"); } else if (packedHedder.command == C_DIGetIp) { unsigned int DocID; struct DocumentIndexFormat DocumentIndexPost; //printf("got command C_DIGetIp\n"); if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } //printf("DocID %u\n",DocID); DIRead(&DocumentIndexPost,DocID,packedHedder.subname); //printf("ipadress: %u\n",DocumentIndexPost.IPAddress); sendall(socket,&DocumentIndexPost.IPAddress, sizeof(DocumentIndexPost.IPAddress)); } else if (packedHedder.command == C_anchorAdd) { size_t textlen; unsigned int DocID; char *text; printf("Add anchor....\n"); if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } else if ((i = recv(socket, &textlen, sizeof(textlen), MSG_WAITALL)) == -1) { perror("recv(textlen)"); exit(1); } text = malloc(textlen+1); text[textlen] = '\0'; if ((i = recv(socket, text, textlen, MSG_WAITALL)) == -1) { perror("recv(text)"); exit(1); } anchoraddnew(DocID, text, textlen, packedHedder.subname, NULL); printf("Text for %d: %s\n", DocID, text); free(text); } else if (packedHedder.command == C_anchorGet) { size_t len; char *text; int LotNr; unsigned int DocID; printf("Get anchor...\n"); if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } printf("got DocID %u\n",DocID); LotNr = rLotForDOCid(DocID); printf("trying to read anchor\n"); len = anchorRead(LotNr, packedHedder.subname, DocID, NULL, -1); printf("got anchor of length %i\n",len); sendall(socket, &len, sizeof(len)); text = malloc(len+1); printf("readint it again\n"); anchorRead(LotNr, packedHedder.subname, DocID, text, len+1); sendall(socket, text, len); } else if (packedHedder.command == C_readHTML) { /* unsigned int DocID; unsigned int len; char *text; char *acla, *acld; struct DocumentIndexFormat DocIndex; struct ReposetoryHeaderFormat ReposetoryHeader; if ((i = recv(socket, &DocID, sizeof(DocID), MSG_WAITALL)) == -1) { perror("recv"); exit(1); } if ((i = recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) { perror("recv(len)"); exit(1); } printf("len %u\n",len); text = malloc(len); if (text == NULL) exit(1); DIRead(&DocIndex, DocID, packedHedder.subname); if (!rReadHtml( text, &len, DocIndex.RepositoryPointer, DocIndex.htmlSize, DocID, packedHedder.subname, &ReposetoryHeader, &acla, &acld, DocIndex.imageSize)) { len = 0; sendall(socket, &len, sizeof(len)); } else { ++len; // \0 #ifdef DEBUG printf("docID %u\n",DocID); printf("Got: (len %i, real %i) ########################\n%s\n#####################\n", len, strlen(text), text); #endif sendall(socket, &len, sizeof(len)); sendall(socket, text, len); sendall(socket, &ReposetoryHeader,sizeof(ReposetoryHeader)); } free(text); */ } /* runarb: 06 des 2007: vi har gåt bort fra denne metoden for nå, og bruker heller index over smb. Men tar vare på den da vi kan trenge den siden else if (packedHedder.command == C_urltodocid) { char cmd; int alloclen; char *urlbuf; if (urltodociddb == NULL) { cmd = C_DOCID_NODB; sendall(socket, &cmd, sizeof(cmd)); exit(1); } else { cmd = C_DOCID_READY; sendall(socket, &cmd, sizeof(cmd)); } cmd = C_DOCID_NEXT; alloclen = 1024; urlbuf = malloc(alloclen); do { unsigned int DocID; size_t len; if ((i = recv(socket, &cmd, sizeof(cmd), MSG_WAITALL)) == -1) { err(1, "recv(cmd)"); } if (cmd == C_DOCID_DONE) break; if ((i == recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) { err(1, "recv(len)"); } if (alloclen < len+1) { free(urlbuf); alloclen *= 2; urlbuf = malloc(alloclen); } if ((i == recv(socket, urlbuf, len, MSG_WAITALL)) == -1) { err(1, "recv(len)"); } urlbuf[len] = '\0'; if (!getDocIDFromUrl(urltodociddb, urlbuf, &DocID)) { cmd = C_DOCID_NOTFOUND; sendall(socket, &cmd, sizeof(cmd)); } else { cmd = C_DOCID_FOUND; sendall(socket, &cmd, sizeof(cmd)); sendall(socket, &DocID, sizeof(DocID)); } } while (1); free(urlbuf); } */ else { printf("unnown comand. %i\n", packedHedder.command); } //printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); } //while }
int main (int argc, char *argv[]) { int lotNr; int lotPart; char path[256]; char iipath[256]; unsigned lastIndexTime; int optMustBeNewerThen = 0; int optAllowDuplicates = 0; struct revIndexArrayFomat *revIndexArray; revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize); extern char *optarg; extern int optind, opterr, optopt; char c; while ((c=getopt(argc,argv,"nd"))!=-1) { switch (c) { case 'n': optMustBeNewerThen = 1; break; case 'd': optAllowDuplicates = 1; break; case 'v': break; default: exit(1); } } --optind; printf("lot %s, %i\n",argv[1],argc); char *type = argv[1 +optind]; lotNr = atoi(argv[2 +optind]); char *subname = argv[3 +optind]; if ((argc -optind)== 4) { //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime == 0) { printf("lastIndexTime is 0\n"); exit(1); } //sjekker om vi har nokk palss if (!lotHasSufficientSpace(lotNr,4096,subname)) { printf("insufficient disk space\n"); exit(1); } printf("Indexing all buvkets for lot %i\n",lotNr); for (lotPart=0;lotPart<64;lotPart++) { //printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]); //oppretter paths makePath(iipath); sprintf(iipath,"%s%i.txt",iipath,lotPart); if ((optMustBeNewerThen != 0)) { if (fopen(iipath,"r") != NULL) { printf("we all redy have a iindex.\n"); continue; } } Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates); } } else if ((argc - optind) == 5) { lotPart = atoi(argv[4 +optind]); printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); //ToDo: må sette språk annen plass //aa sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1 +optind],lotPart); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]); //oppretter paths makePath(iipath); sprintf(iipath,"%s%i.txt",iipath,lotPart); printf("iipath: \"%s\n",iipath); if ((optMustBeNewerThen != 0)) { if (fopen(iipath,"r") != NULL) { printf("we all redy have a iindex.\n"); exit(1); } } Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates); } else { printf("usage: ./LotInvertetIndexMaker type lotnr subname [ lotPart ]\n\n"); } //GetFilPathForLot(lotNr); }
int main (int argc, char *argv[]) { int lotNr; char lotServer[64]; int pageCount; int i; unsigned int FiltetTime; unsigned int FileOffset; char htmlcompressdbuffer[524288]; //0.5 mb char imagebuffer[524288]; //0.5 mb int httpResponsCodes[nrOfHttpResponsCodes]; struct ReposetoryHeaderFormat ReposetoryHeader; struct DocumentIndexFormat DocumentIndexPost; unsigned long int radress; FILE *revindexFilesHa[NrOfDataDirectorys]; struct adultFormat adult; unsigned int lastIndexTime; if (argc < 3) { printf("Dette programet indekserer en lot. Usage:\n\tIndexerLot lotNr subname\n"); exit(0); } for(i=0;i<nrOfHttpResponsCodes;i++) { httpResponsCodes[i] = 0; } lotNr = atoi(argv[1]); strncpy(subname,argv[2],sizeof(subname) -1); //find server based on lotnr lotlistLoad(); lotlistGetServer(lotServer,lotNr); printf("vil index lot nr %i at %s\n",lotNr,lotServer); adultLoad(&adult); langdetectInit(); //temp: må hente dette fra slot server eller fil FiltetTime = 0; FileOffset = 0; pageCount = 0; if (0) { printf("will ges pages by net\n"); revindexFilesOpenNET(revindexFilesHa); while (rGetNextNET(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset,subname)) { global_curentDocID = ReposetoryHeader.DocID; if (strchr(ReposetoryHeader.url,'?') == 0) { global_curentUrlIsDynamic = 0; } else { global_curentUrlIsDynamic = 1; } handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult); //datta skal uansett kopieres over //kopierer over di data copyRepToDi(&DocumentIndexPost,&ReposetoryHeader); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex DIWriteNET(lotServer,&DocumentIndexPost,ReposetoryHeader.DocID,subname); ++pageCount; //temp: //if(pageCount > 999) { // printf("Exeting after only %i docs\n",pageCount); // break; //} } printf("Sending pages\n"); revindexFilesSendNET(revindexFilesHa,lotNr); } else { printf("Wil acess files localy\n"); //sjekker om vi har nokk palss if (!lotHasSufficientSpace(lotNr,4096,subname)) { printf("insufficient disk space\n"); exit(1); } //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime != 0) { printf("lastIndexTime is not 0, but %i\n",lastIndexTime); exit(1); } revindexFilesOpenLocal(revindexFilesHa,lotNr,"Main","wb",subname); //temp:Søker til problemområdet //FileOffset = 334603785; while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,sizeof(htmlcompressdbuffer),imagebuffer,&radress,FiltetTime,FileOffset,subname)) { //printf("D: %u, R: %lu\n",ReposetoryHeader.DocID, radress); global_curentDocID = ReposetoryHeader.DocID; if (strchr(ReposetoryHeader.url,'?') == 0) { global_curentUrlIsDynamic = 0; } else { global_curentUrlIsDynamic = 1; } printf("%s\n",ReposetoryHeader.url); handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult); //datta skal kopieres over uanset hva som skjer //kopierer over di data copyRepToDi(&DocumentIndexPost,&ReposetoryHeader); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID,subname); ++pageCount; //if(pageCount > 9999) { // printf("Exeting after only %i docs\n",pageCount); // //break; // exit(1); //} } //skriver riktig indexstide til lotten setLastIndexTimeForLot(lotNr,httpResponsCodes,subname); // vi må ikke kopiere revindex filene da vi jobber på de lokale direkte } langdetectDestroy(); printf("indexed %i pages\n\n\n",pageCount); return 0; }
int main (int argc, char *argv[]) { int lotNr; char lotServer[64]; int pageCount; int i; unsigned int FiltetTime; unsigned int FileOffset; char htmlcompressdbuffer[524288]; //0.5 mb char imagebuffer[524288]; //0.5 mb int httpResponsCodes[nrOfHttpResponsCodes]; struct ReposetoryHeaderFormat ReposetoryHeader; struct DocumentIndexFormat DocumentIndexPost; unsigned long int radress; FILE *revindexFilesHa[NrOfDataDirectorys]; struct adultFormat adult; unsigned int lastIndexTime; if (argc < 2) { printf("Dette programet indekserer en lot. Gi det et lot nummer\n"); exit(0); } for(i=0;i<nrOfHttpResponsCodes;i++) { httpResponsCodes[i] = 0; } lotNr = atoi(argv[1]); //find server based on lotnr lotlistLoad(); lotlistGetServer(lotServer,lotNr); printf("vil index lot nr %i at %s\n",lotNr,lotServer); //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime == 0) { printf("lastIndexTime is 0, skiping.\n"); exit(1); } //temp: må hente dette fra slot server eller fil FiltetTime = 0; FileOffset = 2140483648; //FileOffset = 1997015914; pageCount = 0; printf("Wil acess files localy\n"); while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset,subname)) { DIRead(&DocumentIndexPost,ReposetoryHeader.DocID,subname); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID,subname); ++pageCount; } printf("indexed %i pages\n\n\n",pageCount); return 0; }