int main (int argc, char *argv[]) { int lotNr; int lotPart; char path[256]; char revpath[256]; char iipath[256]; unsigned lastIndexTime; struct revIndexArrayFomat *revIndexArray; revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize); if (argc < 2) { } printf("lot %s, %i\n",argv[1],argc); if (argc == 3) { lotNr = atoi(argv[2]); //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime == 0) { printf("lastIndexTime is 0\n"); exit(1); } //sjekker om vi har nokk palss if (!lotHasSufficientSpace(lotNr,4096,subname)) { printf("insufficient disk space\n"); exit(1); } printf("Indexing all buvkets for lot %i\n",lotNr); for (lotPart=0;lotPart<63;lotPart++) { //printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1]); //oppretter paths makePath(iipath); sprintf(iipath,"%s%i.txt",iipath,lotPart); Indekser(revpath,iipath,revIndexArray); //sletter revindex. Ingen vits i å ha den fylle opp plass //remove(revpath); } } else if (argc == 4) { lotNr = atoi(argv[2]); lotPart = atoi(argv[3]); printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); sprintf(revpath,"%srevindex/%s/%i.txt",path,argv[1],lotPart); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1],lotPart); Indekser(revpath,iipath,revIndexArray); } else { printf("usage: ./LotInvertetIndexMaker type lotnr [ lotPart ]\n\n"); } //GetFilPathForLot(lotNr); }
void connectHandler(int socket) { struct packedHedderFormat packedHedder; int isAuthenticated = 0; char tkeyForTest[32]; int i,n; int intrespons; int count = 0; container *attrkeys = NULL; #ifdef DEBUG_TIME struct timeval start_time, end_time; struct timeval tot_start_time, tot_end_time; gettimeofday(&tot_start_time, NULL); #endif ionice_benice(); while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) { #ifdef DEBUG printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); #endif packedHedder.size = packedHedder.size - sizeof(packedHedder); if (attrkeys == NULL) { attrkeys = ropen(); } if (packedHedder.command == bbc_askToAuthenticate) { if ((i=recv(socket, tkeyForTest, sizeof(tkeyForTest),MSG_WAITALL)) == -1) { perror("Cant read tkeyForTest"); exit(1); } if (1) { printf("authenticated\n"); intrespons = bbc_authenticate_ok; bbdocument_init(NULL); isAuthenticated = 1; } else { printf("authenticate faild\n"); intrespons = bbc_authenticate_feiled; } if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else { if (!isAuthenticated) { printf("user not autentikated\n"); exit(1); } if (packedHedder.command == bbc_docadd) { #ifdef DEBUG printf("bbc_docadd\n"); #endif char *subname,*documenturi,*documenttype,*document,*acl_allow,*acl_denied,*title,*doctype; char *attributes; int dokument_size; unsigned int lastmodified; #ifdef DEBUG_TIME gettimeofday(&start_time, NULL); #endif //subname if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recvall(socket, subname, intrespons)) == 0) { perror("Cant read subname"); exit(1); } //documenturi if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } documenturi = malloc(intrespons +1); if ((i=recvall(socket, documenturi, intrespons)) == 0) { perror("Cant read documenturi"); exit(1); } //documenttype if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } documenttype = malloc(intrespons +1); if ((i=recvall(socket, documenttype, intrespons)) == 0) { perror("Cant read documenttype"); exit(1); } //document //dokument_size if ((i=recvall(socket, &dokument_size, sizeof(dokument_size))) == 0) { perror("Cant read dokument_size"); exit(1); } document = malloc(dokument_size +1); if (dokument_size == 0) { document[0] = '\0'; } else { if ((i=recvall(socket, document, dokument_size)) == 0) { fprintf(stderr,"Can't read document of size %i\n",dokument_size); perror("recvall"); exit(1); } } //lastmodified if ((i=recvall(socket, &lastmodified, sizeof(lastmodified))) == 0) { perror("Cant read lastmodified"); exit(1); } //acl_allow if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } acl_allow = malloc(intrespons +1); if ((i=recvall(socket, acl_allow, intrespons)) == 0) { perror("Cant read acl_allow"); exit(1); } //acl_denied if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } acl_denied = malloc(intrespons +1); if ((i=recvall(socket, acl_denied, intrespons)) == 0) { perror("Cant read acl_denied"); exit(1); } //title if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } title = malloc(intrespons +1); if ((i=recvall(socket, title, intrespons)) == 0) { perror("Cant read title"); exit(1); } //doctype if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } doctype = malloc(intrespons +1); if ((i=recvall(socket, doctype, intrespons)) == 0) { perror("Cant read doctype"); exit(1); } // Attribute list if ((i = recvall(socket, &intrespons, sizeof(intrespons))) == 0) err(1, "Can't receive attribute list len"); attributes = malloc(intrespons +1); if ((i=recvall(socket, attributes, intrespons)) == 0) err(1, "Can't receive attribute list"); #ifdef DEBUG_TIME gettimeofday(&end_time, NULL); printf("Time debug: bbdn_docadd recv data time: %f\n",getTimeDifference(&start_time, &end_time)); #endif printf("\n"); printf("########################################################\n"); printf("Url: %s\n",documenturi); printf("got subname \"%s\": title \"%s\". Nr %i, dokument_size %i attrib: %s\n",subname,title,count,dokument_size, attributes); printf("########################################################\n"); printf("calling bbdocument_add():\n"); #ifdef DEBUG_TIME gettimeofday(&start_time, NULL); #endif intrespons = bbdocument_add(subname,documenturi,documenttype,document,dokument_size,lastmodified,acl_allow,acl_denied,title,doctype, attributes, attrkeys); printf(":bbdocument_add end\n"); printf("########################################################\n"); #ifdef DEBUG_TIME gettimeofday(&end_time, NULL); printf("Time debug: bbdn_docadd runing bbdocument_add() time: %f\n",getTimeDifference(&start_time, &end_time)); #endif free(subname); free(documenturi); free(documenttype); free(document); free(acl_allow); free(acl_denied); free(title); free(doctype); free(attributes); // send status if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else if (packedHedder.command == bbc_opencollection) { char *subname; char path[PATH_MAX]; printf("open collection\n"); if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) err(1, "Cant read intrespons"); subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) err(1, "Cant read subname"); GetFilPathForLot(path, 1, subname); strcat(path, "fullyCrawled"); unlink(path); free(subname); } else if (packedHedder.command == bbc_closecollection) { printf("closecollection\n"); char *subname; //subname if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } bbdocument_close(attrkeys); attrkeys = NULL; //toDo må bruke subname, og C ikke perl her printf("cleanin lots start\n"); char command[PATH_MAX]; snprintf(command,sizeof(command),"perl %s -l -s \"%s\"",bfile("perl/cleanLots.pl"),subname); printf("running \"%s\"\n",command); intrespons = system(command); printf("cleanin lots end\n"); // legger subnamet til listen over ventene subnavn, og huper searchd. lot_recache_collection(subname); /* We are done crawling */ { int fd = lotOpenFileNoCasheByLotNrl(1, "fullyCrawled", ">>", '\0', subname); if (fd == -1) { warn("Unable to write fullyCrawled file"); } else { close(fd); } } free(subname); if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else if (packedHedder.command == bbc_deleteuri) { printf("deleteuri\n"); char *subname, *uri; //subname if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } subname[intrespons] = '\0'; if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } uri = malloc(intrespons +1); if ((i=recv(socket, uri, intrespons,MSG_WAITALL)) == -1) { perror("Cant read uri"); exit(1); } uri[intrespons] = '\0'; printf("going to delete: %s from %s\n", uri, subname); /* Add docid to the gced file */ { FILE *fh; unsigned int DocID, lastmodified; unsigned int lotNr; int err = 0; if (uriindex_get(uri, &DocID, &lastmodified, subname) == 0) { fprintf(stderr,"Unable to get uri info. uri=\"%s\",subname=\"%s\".",uri,subname); perror("Unable to get uri info"); err++; } if (!err) { lotNr = rLotForDOCid(DocID); if ((fh = lotOpenFileNoCasheByLotNr(lotNr,"gced","a", 'e',subname)) == NULL) { perror("can't open gced file"); err++; } else { fwrite(&DocID, sizeof(DocID), 1, fh); fclose(fh); } } if (!err) { struct reformat *re; if((re = reopen(rLotForDOCid(DocID), sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) { perror("can't reopen()"); err++; } else { DIS_delete(RE_DocumentIndex(re, DocID)); reclose(re); } } //markerer at den er skitten if (!err) { FILE *dirtfh; dirtfh = lotOpenFileNoCashe(DocID,"dirty","ab",'e',subname); fwrite("1",1,1,dirtfh); fclose(dirtfh); } if (err == 0) bbdocument_delete(uri, subname); } free(subname); intrespons = 1; // Always return ok for now if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else if (packedHedder.command == bbc_deletecollection) { printf("deletecollection\n"); char *subname, *uri; //subname if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } subname[intrespons] = '\0'; printf("going to delete collection: %s\n", subname); intrespons = bbdocument_deletecoll(subname); if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } free(subname); } else if (packedHedder.command == bbc_addwhisper) { whisper_t add; char *subname; if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) err(1, "Cant read intrespons"); subname = malloc(intrespons+1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } subname[intrespons] = '\0'; if ((i=recv(socket, &add, sizeof(add),MSG_WAITALL)) == -1) err(1, "Cant read add whisper"); gcwhisper_write(subname, add); free(subname); } else if (packedHedder.command == bbc_HasSufficientSpace) { char *subname; //subname if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recvall(socket, subname, intrespons)) == 0) { perror("Cant read subname"); exit(1); } // tester bare i lot 1 her. Må også sjekke andre loter når vi begynner å støtte frlere disker på ES. intrespons = lotHasSufficientSpace(1, 4096, subname); if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } printf("~Asked for HasSufficientSpace for subname \"%s\". Returnerer %d\n",subname, intrespons); free(subname); } else { printf("unnown comand. %i\n", packedHedder.command); } } ++count; // #ifdef DEBUG_BREAK_AFTER // if (count >= DEBUG_BREAK_AFTER) { // printf("exeting after %i docoments\n",count); // exit(1); // } // #endif } #ifdef DEBUG_TIME gettimeofday(&tot_end_time, NULL); printf("Time debug: bbdn total time time: %f\n",getTimeDifference(&tot_start_time, &tot_end_time)); #endif }
int main (int argc, char *argv[]) { int lotNr; int lotPart; char path[256]; char iipath[256]; unsigned lastIndexTime; int optMustBeNewerThen = 0; int optAllowDuplicates = 0; struct revIndexArrayFomat *revIndexArray; revIndexArray = malloc(sizeof(struct revIndexArrayFomat) * revIndexArraySize); extern char *optarg; extern int optind, opterr, optopt; char c; while ((c=getopt(argc,argv,"nd"))!=-1) { switch (c) { case 'n': optMustBeNewerThen = 1; break; case 'd': optAllowDuplicates = 1; break; case 'v': break; default: exit(1); } } --optind; printf("lot %s, %i\n",argv[1],argc); char *type = argv[1 +optind]; lotNr = atoi(argv[2 +optind]); char *subname = argv[3 +optind]; if ((argc -optind)== 4) { //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime == 0) { printf("lastIndexTime is 0\n"); exit(1); } //sjekker om vi har nokk palss if (!lotHasSufficientSpace(lotNr,4096,subname)) { printf("insufficient disk space\n"); exit(1); } printf("Indexing all buvkets for lot %i\n",lotNr); for (lotPart=0;lotPart<64;lotPart++) { //printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]); //oppretter paths makePath(iipath); sprintf(iipath,"%s%i.txt",iipath,lotPart); if ((optMustBeNewerThen != 0)) { if (fopen(iipath,"r") != NULL) { printf("we all redy have a iindex.\n"); continue; } } Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates); } } else if ((argc - optind) == 5) { lotPart = atoi(argv[4 +optind]); printf("indexint part %i for lot %i\n",lotPart,lotNr); //"$revindexPath/$revindexFilNr.txt"; GetFilPathForLot(path,lotNr,subname); //ToDo: må sette språk annen plass //aa sprintf(iipath,"%siindex/%s/index/aa/%i.txt",path,argv[1 +optind],lotPart); //ToDo: må sette språk annen plass sprintf(iipath,"%siindex/%s/index/aa/",path,argv[1 +optind]); //oppretter paths makePath(iipath); sprintf(iipath,"%s%i.txt",iipath,lotPart); printf("iipath: \"%s\n",iipath); if ((optMustBeNewerThen != 0)) { if (fopen(iipath,"r") != NULL) { printf("we all redy have a iindex.\n"); exit(1); } } Indekser(iipath,revIndexArray,lotNr,type,lotPart,subname,optAllowDuplicates); } else { printf("usage: ./LotInvertetIndexMaker type lotnr subname [ lotPart ]\n\n"); } //GetFilPathForLot(lotNr); }
void connectHandler(int socket) { struct packedHedderFormat packedHedder; int i,n; int LotNr; char lotPath[512]; char buf[100]; unsigned int FilterTime; int filnamelen; FILE *FH; struct stat inode; // lager en struktur for fstat å returnere. off_t filesize; char c; struct DocumentIndexFormat DocumentIndexPost; int DocID; struct ReposetoryHeaderFormat ReposetoryHeader; unsigned int radress; char htmlbuffer[524288]; int destLeng; char dest[512]; off_t fileBloks,filerest; char *filblocbuff; //while ((i=read(socket, &packedHedder, sizeof(struct packedHedderFormat))) > 0) { while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) { //printf("command: %i\n",packedHedder.command); //printf("i er %i\n",i); printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); //printf("subname: %s\n",packedHedder.subname); //lar size reflektere hva som er igjen av pakken packedHedder.size = packedHedder.size - sizeof(packedHedder); if (packedHedder.command == C_rmkdir) { printf("C_rmkdir\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant read lotnr"); exit(1); } //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant read destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant read dest"); exit(1); } GetFilPathForLot(lotPath,LotNr,packedHedder.subname); sprintf(lotPath,"%s%s",lotPath,dest); printf("mkdir %s\n",lotPath); makePath(lotPath); printf("~C_rmkdir\n"); } else if (packedHedder.command == C_rComand) { //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant read lotnr"); exit(1); } //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant read destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant read dest"); exit(1); } printf("run command %s\n",dest); system(dest); } else if (packedHedder.command == C_getLotToIndex) { printf("fikk C_getLotToIndex\n"); int dirty; if ((i=recv(socket, &dirty, sizeof(dirty),MSG_WAITALL)) == -1) { perror("Cant read dirty"); exit(1); } printf("dirty: %i\n",dirty); LotNr = findLotToIndex(packedHedder.subname,dirty); printf("sending respons\n"); sendall(socket,&LotNr, sizeof(LotNr)); } else if (packedHedder.command == C_getlotHasSufficientSpace) { printf("fikk C_getLotToIndex\n"); int needSpace; int response; if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=recv(socket, &needSpace, sizeof(needSpace),MSG_WAITALL)) == -1) { perror("Cant read dirty"); exit(1); } printf("needSpace: %i, LotNr %i\n",needSpace,LotNr); response = lotHasSufficientSpace(LotNr, needSpace, packedHedder.subname); printf("sending respons\n"); sendall(socket,&response, sizeof(response)); } else if (packedHedder.command == C_rGetSize) { printf("fikk C_rGetSize\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) { perror("Cant read filnamelen"); exit(1); } if (filnamelen > sizeof(buf)) { printf("filname to long\n"); }; if ((i=read(socket, buf, filnamelen)) == -1) { perror("Cant read filnamelen"); exit(1); } printf("filname %s\n",buf); if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) { perror(buf); //sending that he fil is emty fileBloks = 0; sendall(socket,&fileBloks, sizeof(fileBloks)); } else { //finner og sender il størelse fstat(fileno(FH),&inode); //filesize = inode.st_size; //sendall(socket,&filesize, sizeof(filesize)); fileBloks = inode.st_size; printf("size is %" PRId64 "\n",fileBloks); sendall(socket,&fileBloks, sizeof(fileBloks)); fclose(FH); } } else if (packedHedder.command == C_rGetFile) { printf("fikk C_rGetFile\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) { perror("Cant read filnamelen"); exit(1); } if (filnamelen > sizeof(buf)) { printf("filname to long\n"); }; if ((i=read(socket, buf, filnamelen)) == -1) { perror("Cant read filnamelen"); exit(1); } printf("filname %s\n",buf); if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) { perror(buf); //sending that the fil is emty fileBloks = 0; filerest = 0; sendall(socket,&fileBloks, sizeof(fileBloks)); sendall(socket,&filerest, sizeof(filerest)); } else { //finner og sender fil størelse fstat(fileno(FH),&inode); //filesize = inode.st_size; //sendall(socket,&filesize, sizeof(filesize)); fileBloks = (int)floor(inode.st_size / rNetTrabsferBlok); filerest = inode.st_size - (fileBloks * rNetTrabsferBlok); sendall(socket,&fileBloks, sizeof(fileBloks)); sendall(socket,&filerest, sizeof(filerest)); printf("sending fil. fileBloks %"PRId64", filerest %"PRId64"\n",fileBloks,filerest); filblocbuff = (char *)malloc(rNetTrabsferBlok); for(i=0; i < fileBloks; i++) { //fread(filblocbuff,sizeof(c),rNetTrabsferBlok,FH); //fread_all(const void *buf, size_t size, FILE *stream) fread_all(filblocbuff,rNetTrabsferBlok,FH, 4096); if ((n=sendall(socket, filblocbuff, rNetTrabsferBlok)) == -1) { perror("Cant recv dest"); exit(1); } } printf("did recv %i fileBloks\n",i); fread(filblocbuff,sizeof(c),filerest,FH); if ((n=sendall(socket, filblocbuff, filerest)) == -1) { perror("Cant recv filerest"); exit(1); } free(filblocbuff); /* for (i=0;i<filesize;i++) { fread(&c,sizeof(char),1,FH); send(socket, &c, sizeof(char), 0); //printf("%i\n",(int)c); } */ printf("send file end\n"); fclose(FH); } } else if (packedHedder.command == C_rGetNext) { printf("fikk C_rGetNext\n"); printf("støttes ikke lengere"); exit(1); /* //leser data. Det skal væren en unigned int som sier hvilken lot vi vil ha //har deklarert den som int her ??? if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } printf("leser FilterTime\n"); //leser filtertime if ((i=read(socket, &FilterTime, sizeof(FilterTime))) == -1) { perror("Cant read lotnr"); exit(1); } printf("lotnr %i FilterTime %u\n",LotNr,FilterTime); //henter inn data om den lotten if (rGetNext(LotNr,&ReposetoryHeader,htmlbuffer,NULL,&radress,FilterTime,0)) { //printf("DocId: %i url: %s\n",ReposetoryHeader.DocID,ReposetoryHeader.url); //sender pakke hedder sendpacked(socket,C_rLotData,PROTOCOLVERSION, ReposetoryHeader.htmlSize + sizeof(ReposetoryHeader) +sizeof(radress), NULL,packedHedder.subname); //sennder ReposetoryHeader'en sendall(socket,&ReposetoryHeader, sizeof(ReposetoryHeader)); //sender htmlen sendall(socket,&htmlbuffer, ReposetoryHeader.htmlSize); //sender adressen sendall(socket,&radress,sizeof(radress)); //printf("data sent\n"); //printf("rGetNext: %i\n",ReposetoryHeader.DocID); } else { sendpacked(socket,C_rEOF,PROTOCOLVERSION, 0, NULL,packedHedder.subname); printf("ferdig\n"); } */ } else if (packedHedder.command == C_DIWrite) { if ((i=recv(socket, &DocumentIndexPost, sizeof(struct DocumentIndexFormat),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } DIWrite(&DocumentIndexPost,DocID,packedHedder.subname, NULL); //printf("DIWrite: %i\n",DocID); } else if (packedHedder.command == C_DIRead) { int DocID; struct DocumentIndexFormat DocumentIndexPost; printf("got commane C_DIRead. sise %i hsize %i ds %i\n",packedHedder.size, sizeof(packedHedder), sizeof(DocID)); if ((i=recv(socket, &DocID, sizeof(DocID),0)) == -1) { perror("recv"); exit(1); } //printf("DocID %i\n",DocID); //leser inn datan //int DIRead (struct DocumentIndexFormat *DocumentIndexPost, int DocID); DIRead(&DocumentIndexPost,DocID,packedHedder.subname); sendall(socket,&DocumentIndexPost, sizeof(struct DocumentIndexFormat)); } else if (packedHedder.command == C_rGetIndexTime) { int Lotnr; unsigned int IndexTime; if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) { perror("recv"); exit(1); } IndexTime = GetLastIndexTimeForLot(LotNr,packedHedder.subname); sendall(socket,&IndexTime, sizeof(IndexTime)); } else if (packedHedder.command == C_rSetIndexTime) { int Lotnr; if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) { perror("recv"); exit(1); } setLastIndexTimeForLot(LotNr,NULL,packedHedder.subname); } else if (packedHedder.command == C_rSendFile) { //skal mota en fil for lagring i reposetoryet //char FilePath[156]; FILE *FILEHANDLER; char c; char opentype[2]; //char *filblocbuff; //off_t fileBloks,filerest; if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant recv lotnr"); exit(1); } printf("lotNr %i\n",LotNr); //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant recv destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant recv dest"); exit(1); } printf("coping %s as length %i in to lot %i\n",dest,destLeng,LotNr); if ((i=recv(socket, &opentype, sizeof(char) +1,MSG_WAITALL)) == -1) { perror("Cant recv opentype"); exit(1); } printf("opentype \"%s\"\n",opentype); //GetFilPathForLot(FilePath,LotNr,packedHedder.subname); //legger til filnavnet //strncat(FilePath,dest,sizeof(FilePath)); //leser inn filstørelsen if ((i=recv(socket, &fileBloks, sizeof(fileBloks),MSG_WAITALL)) == -1) { perror("Cant recv fileBloks"); exit(1); } if ((i=recv(socket, &filerest, sizeof(filerest),MSG_WAITALL)) == -1) { perror("Cant recv filerest"); exit(1); } printf("fileBloks: %" PRId64 ", filerest: %" PRId64 "\n",fileBloks,filerest); //åpner filen if ((FILEHANDLER = lotOpenFileNoCasheByLotNr(LotNr,dest,opentype,'e',packedHedder.subname)) == NULL) { perror(dest); } filblocbuff = (char *)malloc(rNetTrabsferBlok); for(i=0; i < fileBloks; i++) { if ((n=recv(socket, filblocbuff, rNetTrabsferBlok,MSG_WAITALL)) == -1) { perror("Cant recv dest"); exit(1); } fwrite(filblocbuff,sizeof(c),rNetTrabsferBlok,FILEHANDLER); } printf("did recv %i fileBloks\n",i); if ((n=recv(socket, filblocbuff, filerest,MSG_WAITALL)) == -1) { perror("Cant recv filerest"); exit(1); } fwrite(filblocbuff,sizeof(c),filerest,FILEHANDLER); free(filblocbuff); fclose(FILEHANDLER); printf("\n"); } else if (packedHedder.command == C_DIGetIp) { unsigned int DocID; struct DocumentIndexFormat DocumentIndexPost; //printf("got command C_DIGetIp\n"); if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } //printf("DocID %u\n",DocID); DIRead(&DocumentIndexPost,DocID,packedHedder.subname); //printf("ipadress: %u\n",DocumentIndexPost.IPAddress); sendall(socket,&DocumentIndexPost.IPAddress, sizeof(DocumentIndexPost.IPAddress)); } else if (packedHedder.command == C_anchorAdd) { size_t textlen; unsigned int DocID; char *text; printf("Add anchor....\n"); if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } else if ((i = recv(socket, &textlen, sizeof(textlen), MSG_WAITALL)) == -1) { perror("recv(textlen)"); exit(1); } text = malloc(textlen+1); text[textlen] = '\0'; if ((i = recv(socket, text, textlen, MSG_WAITALL)) == -1) { perror("recv(text)"); exit(1); } anchoraddnew(DocID, text, textlen, packedHedder.subname, NULL); printf("Text for %d: %s\n", DocID, text); free(text); } else if (packedHedder.command == C_anchorGet) { size_t len; char *text; int LotNr; unsigned int DocID; printf("Get anchor...\n"); if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } printf("got DocID %u\n",DocID); LotNr = rLotForDOCid(DocID); printf("trying to read anchor\n"); len = anchorRead(LotNr, packedHedder.subname, DocID, NULL, -1); printf("got anchor of length %i\n",len); sendall(socket, &len, sizeof(len)); text = malloc(len+1); printf("readint it again\n"); anchorRead(LotNr, packedHedder.subname, DocID, text, len+1); sendall(socket, text, len); } else if (packedHedder.command == C_readHTML) { /* unsigned int DocID; unsigned int len; char *text; char *acla, *acld; struct DocumentIndexFormat DocIndex; struct ReposetoryHeaderFormat ReposetoryHeader; if ((i = recv(socket, &DocID, sizeof(DocID), MSG_WAITALL)) == -1) { perror("recv"); exit(1); } if ((i = recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) { perror("recv(len)"); exit(1); } printf("len %u\n",len); text = malloc(len); if (text == NULL) exit(1); DIRead(&DocIndex, DocID, packedHedder.subname); if (!rReadHtml( text, &len, DocIndex.RepositoryPointer, DocIndex.htmlSize, DocID, packedHedder.subname, &ReposetoryHeader, &acla, &acld, DocIndex.imageSize)) { len = 0; sendall(socket, &len, sizeof(len)); } else { ++len; // \0 #ifdef DEBUG printf("docID %u\n",DocID); printf("Got: (len %i, real %i) ########################\n%s\n#####################\n", len, strlen(text), text); #endif sendall(socket, &len, sizeof(len)); sendall(socket, text, len); sendall(socket, &ReposetoryHeader,sizeof(ReposetoryHeader)); } free(text); */ } /* runarb: 06 des 2007: vi har gåt bort fra denne metoden for nå, og bruker heller index over smb. Men tar vare på den da vi kan trenge den siden else if (packedHedder.command == C_urltodocid) { char cmd; int alloclen; char *urlbuf; if (urltodociddb == NULL) { cmd = C_DOCID_NODB; sendall(socket, &cmd, sizeof(cmd)); exit(1); } else { cmd = C_DOCID_READY; sendall(socket, &cmd, sizeof(cmd)); } cmd = C_DOCID_NEXT; alloclen = 1024; urlbuf = malloc(alloclen); do { unsigned int DocID; size_t len; if ((i = recv(socket, &cmd, sizeof(cmd), MSG_WAITALL)) == -1) { err(1, "recv(cmd)"); } if (cmd == C_DOCID_DONE) break; if ((i == recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) { err(1, "recv(len)"); } if (alloclen < len+1) { free(urlbuf); alloclen *= 2; urlbuf = malloc(alloclen); } if ((i == recv(socket, urlbuf, len, MSG_WAITALL)) == -1) { err(1, "recv(len)"); } urlbuf[len] = '\0'; if (!getDocIDFromUrl(urltodociddb, urlbuf, &DocID)) { cmd = C_DOCID_NOTFOUND; sendall(socket, &cmd, sizeof(cmd)); } else { cmd = C_DOCID_FOUND; sendall(socket, &cmd, sizeof(cmd)); sendall(socket, &DocID, sizeof(DocID)); } } while (1); free(urlbuf); } */ else { printf("unnown comand. %i\n", packedHedder.command); } //printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); } //while }
int main (int argc, char *argv[]) { int lotNr; char lotServer[64]; int pageCount; int i; unsigned int FiltetTime; unsigned int FileOffset; char htmlcompressdbuffer[524288]; //0.5 mb char imagebuffer[524288]; //0.5 mb int httpResponsCodes[nrOfHttpResponsCodes]; struct ReposetoryHeaderFormat ReposetoryHeader; struct DocumentIndexFormat DocumentIndexPost; unsigned long int radress; FILE *revindexFilesHa[NrOfDataDirectorys]; struct adultFormat adult; unsigned int lastIndexTime; if (argc < 3) { printf("Dette programet indekserer en lot. Usage:\n\tIndexerLot lotNr subname\n"); exit(0); } for(i=0;i<nrOfHttpResponsCodes;i++) { httpResponsCodes[i] = 0; } lotNr = atoi(argv[1]); strncpy(subname,argv[2],sizeof(subname) -1); //find server based on lotnr lotlistLoad(); lotlistGetServer(lotServer,lotNr); printf("vil index lot nr %i at %s\n",lotNr,lotServer); adultLoad(&adult); langdetectInit(); //temp: må hente dette fra slot server eller fil FiltetTime = 0; FileOffset = 0; pageCount = 0; if (0) { printf("will ges pages by net\n"); revindexFilesOpenNET(revindexFilesHa); while (rGetNextNET(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,&radress,FiltetTime,FileOffset,subname)) { global_curentDocID = ReposetoryHeader.DocID; if (strchr(ReposetoryHeader.url,'?') == 0) { global_curentUrlIsDynamic = 0; } else { global_curentUrlIsDynamic = 1; } handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult); //datta skal uansett kopieres over //kopierer over di data copyRepToDi(&DocumentIndexPost,&ReposetoryHeader); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex DIWriteNET(lotServer,&DocumentIndexPost,ReposetoryHeader.DocID,subname); ++pageCount; //temp: //if(pageCount > 999) { // printf("Exeting after only %i docs\n",pageCount); // break; //} } printf("Sending pages\n"); revindexFilesSendNET(revindexFilesHa,lotNr); } else { printf("Wil acess files localy\n"); //sjekker om vi har nokk palss if (!lotHasSufficientSpace(lotNr,4096,subname)) { printf("insufficient disk space\n"); exit(1); } //finner siste indekseringstid lastIndexTime = GetLastIndexTimeForLot(lotNr,subname); if(lastIndexTime != 0) { printf("lastIndexTime is not 0, but %i\n",lastIndexTime); exit(1); } revindexFilesOpenLocal(revindexFilesHa,lotNr,"Main","wb",subname); //temp:Søker til problemområdet //FileOffset = 334603785; while (rGetNext(lotNr,&ReposetoryHeader,htmlcompressdbuffer,sizeof(htmlcompressdbuffer),imagebuffer,&radress,FiltetTime,FileOffset,subname)) { //printf("D: %u, R: %lu\n",ReposetoryHeader.DocID, radress); global_curentDocID = ReposetoryHeader.DocID; if (strchr(ReposetoryHeader.url,'?') == 0) { global_curentUrlIsDynamic = 0; } else { global_curentUrlIsDynamic = 1; } printf("%s\n",ReposetoryHeader.url); handelPage(lotServer,lotNr,&ReposetoryHeader,htmlcompressdbuffer,imagebuffer,revindexFilesHa,&DocumentIndexPost,ReposetoryHeader.DocID,httpResponsCodes,&adult); //datta skal kopieres over uanset hva som skjer //kopierer over di data copyRepToDi(&DocumentIndexPost,&ReposetoryHeader); DocumentIndexPost.RepositoryPointer = radress; //skiver til DocumentIndex DIWrite(&DocumentIndexPost,ReposetoryHeader.DocID,subname); ++pageCount; //if(pageCount > 9999) { // printf("Exeting after only %i docs\n",pageCount); // //break; // exit(1); //} } //skriver riktig indexstide til lotten setLastIndexTimeForLot(lotNr,httpResponsCodes,subname); // vi må ikke kopiere revindex filene da vi jobber på de lokale direkte } langdetectDestroy(); printf("indexed %i pages\n\n\n",pageCount); return 0; }