void gc_reduce(struct reformat *re, int LotNr, char subname[]) { FILE *GCEDFH; int i; unsigned int DocID; //lagrer hvilkene filer vi har slettet GCEDFH = lotOpenFileNoCasheByLotNr(LotNr,"gced","a", 'e',subname); for (i=0; i<NrofDocIDsInLot; i++) { if ((REN_DocumentIndex(re, i)->Url[0] != '\0') && DIS_isDeleted(REN_DocumentIndex(re, i))) { #ifdef DEBUG printf("Adding url \"%s\" to gc file\n",REN_DocumentIndex(re, i)->Url); #endif DocID = LotDocIDOfset(LotNr) +i; if (fwrite(&DocID,sizeof(DocID),1,GCEDFH) != 1) { perror("can't write gc file"); } } } fclose(GCEDFH); }
whisper_t gcwhisper_read(char *subname) { FILE *fp; whisper_t w; char buf[2048]; fp = lotOpenFileNoCasheByLotNr(1, "gcwhisper", "r", 's', subname); if (fp == NULL) return 0; w = 0; while (fgets(buf, sizeof(buf), fp)) { int i; char *p; /* remove trailing newline */ if ((p = strrchr(buf, '\n'))) *p = '\0'; for (i = 0; whispers[i].str != NULL; i++) { if (strcmp(whispers[i].str, buf) == 0) { w |= whispers[i].flag; break; } } /* We did not find a match */ if (whispers[i].str == NULL) warnx("Trying to read unknown whisper string: '%s'", buf); } fclose(fp); return w; }
int main (int argc, char *argv[]) { struct DocumentIndexFormat DocumentIndexPost; int LotNr; unsigned int DocID; FILE *LOTIPDB; unsigned char awvalue;; if (argc < 2) { printf("Dette programet leser en DocumentIndex. Gi det et lot nr. \n\n\tUsage: ./readDocumentIndex 1\n"); exit(0); } LotNr = atoi(argv[1]); if (DIHaveIndex(LotNr,subname) == 0) { printf("dosent hav DIindex\n"); exit(1); } LOTIPDB = lotOpenFileNoCasheByLotNr(LotNr,"ipdb","wb", 'e',subname); DocID = 0; while (DIGetNext (&DocumentIndexPost,LotNr,&DocID,subname)) { fwrite(&DocumentIndexPost.IPAddress,sizeof(unsigned int),1,LOTIPDB); //printf("%u %u\n",DocID,DocumentIndexPost.IPAddress); } //DIClose(); fclose(LOTIPDB); }
int bbdocument_deletecoll(char collection[]) { int LotNr; int i; char FilePath[512]; char IndexPath[512]; char DictionaryPath[512]; FILE *fh; debug("Deleting collection: \"%s\"\n",collection); LotNr = 1; while((fh =lotOpenFileNoCasheByLotNr(LotNr,"reposetory","r",'s',collection)) != NULL) { GetFilPathForLot(FilePath,LotNr,collection); fclose(fh); rrmdir(FilePath); ++LotNr; } for (i=0; i < 64; i++) { GetFilePathForIindex(FilePath,IndexPath,i,"Main","aa",collection); #ifdef DEBUG printf("FilePath: %s\nIndexPath: %s\n",FilePath,IndexPath); #endif if ((unlink(IndexPath) != 1) && (errno != ENOENT)) { //ENOENT=No such file or directory. Viser ikke feil hvis filen ikke fantes. Det er helt normalt perror("remove IndexPath"); } GetFilePathForIDictionary(FilePath,DictionaryPath,i,"Main","aa",collection); #ifdef DEBUG printf("FilePath: %s\nDictionaryPath: %s\n",FilePath,DictionaryPath); #endif if ((unlink(DictionaryPath) != 0) && (errno != ENOENT)) {//ENOENT=No such file or directory. Viser ikke feil hvis filen ikke fantes. Det er helt normalt perror("remove DictionaryPath"); } } //sletter i userToSubname.db struct userToSubnameDbFormat userToSubnameDb; if (!userToSubname_open(&userToSubnameDb,'w')) { printf("can't open users.db\n"); } else { userToSubname_deletecol(&userToSubnameDb,collection); userToSubname_close(&userToSubnameDb); } return 1; }
int main (int argc, char *argv[]) { if (argc < 2) { printf("Error ingen subna,e spesifisert.\n\nEksempel på bruk for å lese lot 2:\n\trread www\n"); exit(1); } char *subname = argv[1]; FILE *f_crc32_words = NULL; int crc32_words_size = 0; struct stat inode; int attr_crc32_words_blocksize = sizeof(unsigned int) + sizeof(char)*MAX_ATTRIB_LEN; //void *m_crc32_words = NULL; struct Crc32attrMapFormat *m_crc32_words = NULL; int i; if ((f_crc32_words = lotOpenFileNoCasheByLotNr(1, "crc32attr.map", "r", 's', subname)) == NULL) { perror("Can't open thecrc32attr.map file for lot"); return -1; } fstat(fileno(f_crc32_words), &inode); crc32_words_size = inode.st_size; if (crc32_words_size==0) { printf("Map is 0 bytes. Skipping\n"); return -1; } if ((m_crc32_words=mmap(NULL, crc32_words_size, PROT_READ, MAP_SHARED, fileno(f_crc32_words), 0)) == MAP_FAILED) { perror("Can't mmap"); return -1; } printf("Hvae %d elements of sise %d\n",(crc32_words_size / attr_crc32_words_blocksize), attr_crc32_words_blocksize); for(i=0;i<(crc32_words_size / attr_crc32_words_blocksize);i++) { printf("crc32 %u, text %s\n",m_crc32_words[i].crc32, m_crc32_words[i].text); } munmap(m_crc32_words,crc32_words_size); fclose(f_crc32_words); return 1; }
int main (int argc, char *argv[]) { struct DocumentIndexFormat DocumentIndexPost; int LotNr; unsigned int DocID; FILE *ADULTWEIGHTFH; unsigned char awvalue;; if (argc < 2) { printf("Dette programet leser en DocumentIndex. Gi det et lot nr. \n\n\tUsage: ./readDocumentIndex 1"); exit(0); } LotNr = atoi(argv[1]); if (DIHaveIndex(LotNr,subname) == 0) { printf("dosent hav DIindex\n"); exit(1); } ADULTWEIGHTFH = lotOpenFileNoCasheByLotNr(LotNr,"AdultWeight","wb", 'e',subname); DocID = 0; while (DIGetNext (&DocumentIndexPost,LotNr,&DocID,subname)) { if (DocumentIndexPost.AdultWeight >= AdultWeightForXXX) { //printf("DocID: %u, %hu, url: %s\n",DocID,DocumentIndexPost.AdultWeight,DocumentIndexPost.Url); //mark as adult awvalue = 1; } else { //not adult awvalue = 0; } fwrite(&awvalue,sizeof(awvalue),1,ADULTWEIGHTFH); } //DIClose(); fclose(ADULTWEIGHTFH); }
void preopen(void) { int i; DIR *dirh; FILE *FH; int count = 0; reclose_cache(); if ((dirh = listAllColl_start()) == NULL) { bblog(ERROR, "Can't listAllColl_start()"); return; } char * subname; while (((subname = listAllColl_next(dirh)) != NULL) && (count < MAX_PREOPEM_FILE)) { bblog(DEBUGINFO, "subname: %s", subname); for(i=1;i<maxLots;i++) { // vi åpner kun lotter som har DocumentIndex. Dette er spesielt viktig da vi oppretter // filene hvis de ikke finnes. if ((FH = lotOpenFileNoCasheByLotNr(i,"DocumentIndex","rb", 'r', subname)) == NULL) { continue; } reopen_cache(i,4, "filtypes",subname,RE_READ_ONLY|RE_STARTS_AT_0|RE_POPULATE|RE_CREATE_AND_STRETCH); reopen_cache(i,sizeof(int), "dates",subname,RE_READ_ONLY|RE_STARTS_AT_0|RE_POPULATE|RE_CREATE_AND_STRETCH); reopen_cache(i,sizeof(unsigned int), "crc32map",subname,RE_READ_ONLY|RE_POPULATE|RE_CREATE_AND_STRETCH); fclose(FH); if (count > MAX_PREOPEM_FILE) { break; } // +3 da vi øker med filtypes, dates, og crc32map count += 3; } } listAllColl_close(dirh); if (count >= MAX_PREOPEM_FILE) { bblog(WARN, "can't preopen any more. Did hit MAX_PREOPEM limit of %d files", MAX_PREOPEM_FILE); } }
void gcwhisper_write(char *subname, whisper_t whisper) { FILE *fp; whisper_t has; int i; has = gcwhisper_read(subname); fp = lotOpenFileNoCasheByLotNr(1, "gcwhisper", ">>", 'e', subname); for (i = 0; whispers[i].str != NULL; i++) { /* want it */ if ((whispers[i].flag & whisper) && (has & whispers[i].flag) == 0) { fprintf(fp, "%s\n", whispers[i].str); } } fclose(fp); }
main (int argc, char *argv[]) { if (argc < 2) { printf("Error ingen subna,e spesifisert.\n\nEksempel på bruk for å lese lot 2:\n\trread www\n"); exit(1); } char *subname = argv[1]; FILE *f_crc32_words = NULL; int crc32_words_size = 0; struct stat inode; int attr_crc32_words_blocksize = sizeof(unsigned int) + sizeof(char)*MAX_ATTRIB_LEN; void *m_crc32_words = NULL; if ((f_crc32_words = lotOpenFileNoCasheByLotNr(1, "crc32attr.map", "r+", 's', subname)) == NULL) { perror("Can't open thecrc32attr.map file for lot"); exit(-1); } fstat(fileno(f_crc32_words), &inode); crc32_words_size = inode.st_size; if (crc32_words_size==0) { printf("Map is 0 bytes. Skipping\n"); } if ((m_crc32_words=mmap(NULL, crc32_words_size, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(f_crc32_words), 0)) == MAP_FAILED) { perror("Can't mmap"); } printf("Will sort %d elements of sise %d\n",(crc32_words_size / attr_crc32_words_blocksize), attr_crc32_words_blocksize); qsort(m_crc32_words,(crc32_words_size / attr_crc32_words_blocksize),attr_crc32_words_blocksize, attr_crc32_words_block_compare); munmap(m_crc32_words,crc32_words_size); fclose(f_crc32_words); printf("Done\n"); }
//gir andre tilgan til lot filer. Casher opne filhandlere FILE *lotOpenFile(unsigned int DocID,char resource[],char type[], char lock,char subname[]) { int LotNr; int i; char FilePath[128]; char File [128]; if (!LotFilesInalisert) { for(i=0; i < MaxOpenFiles; i++) { OpenFiles[i].LotNr = -1; } LotFilesInalisert = 1; } File[0] = '\0'; //finner i hvilken lot vi skal lese fra LotNr = rLotForDOCid(DocID); //printf("LotNr: %i, DocID: %i\n",LotNr,DocID); //begynner med å søke cashen. Lopper til vi enten er ferdig, eller til vi har funne ønskede i cashen i = 0; while ((i < MaxOpenFiles) && (OpenFiles[i].LotNr != LotNr)) { i++; } //temp: skrur av søking her med i=0 //type of og subname er også lagt til uten at det tar hensyn til det i søket i = 0; //hvis vi fant i casehn returnerer vi den if (OpenFiles[i].LotNr == LotNr && (strcmp(OpenFiles[i].subname,subname) == 0) && (strcmp(OpenFiles[i].type,type)==0) && (strcmp(OpenFiles[i].resource,resource)==0) ) { #ifdef DEBUG printf("lotOpenFile: fant en tildigere åpnet fil, returnerer den.\n"); printf("lotOpenFile: returnerer: i %i, subname \"%s\", type \"%s\", LotNr %i\n",i,OpenFiles[i].subname,OpenFiles[i].type,OpenFiles[i].LotNr); printf("lotOpenFile: file is \"%s\"\n",OpenFiles[i].filename); printf("lotOpenFile: returning file handler %p\n",OpenFiles[i].FILEHANDLER); #endif if (OpenFiles[i].FILEHANDLER == NULL) { printf("Error: FILEHANDLER is NULL\n"); #ifdef DEBUG exit(-1); #endif } return OpenFiles[i].FILEHANDLER; } //hvis ikke åpner vi og returnerer else { //hvis dette er en åpen filhånterer, må vi lukke den if (OpenFiles[i].LotNr != -1) { printf("lotOpenFile: closeing: i %i\n",i); fclose(OpenFiles[i].FILEHANDLER); OpenFiles[i].LotNr = -1; } if ((OpenFiles[i].FILEHANDLER = lotOpenFileNoCasheByLotNr( LotNr, resource,type, lock,subname)) == NULL) { printf("lotOpenFileNoCashe: can't open file\n"); return NULL; } GetFilPathForLot(FilePath,LotNr,subname); strscpy(File,FilePath,sizeof(File)); strlcat(File,resource,sizeof(File)); strscpy(OpenFiles[i].filename,File,sizeof(OpenFiles[i].filename)); strscpy(OpenFiles[i].resource,resource,sizeof(OpenFiles[i].resource)); strscpy(OpenFiles[i].subname,subname,sizeof(OpenFiles[i].subname)); strscpy(OpenFiles[i].type,type,sizeof(OpenFiles[i].type)); //#ifdef DEBUG printf("lotOpenFile: opening file \"%s\" for %s\n",File,type); //#endif OpenFiles[i].LotNr = LotNr; return OpenFiles[i].FILEHANDLER; } }
FILE *lotOpenFileNoCashe(unsigned int DocID,char resource[],char type[], char lock,char subname[]) { return lotOpenFileNoCasheByLotNr(rLotForDOCid(DocID),resource,type,lock,subname); }
int gcrepo(int LotNr, char *subname) { int i; struct ReposetoryHeaderFormat ReposetoryHeader; char htmlbuffer[524288]; char imagebuffer[524288]; char *acl_allow; char *acl_deny; char *url, *attributes; unsigned long int raddress; char path[1024]; char path2[1024]; char path3[1024]; FILE *FNREPO; struct reformat *re; int keept = 0; int gced = 0; container *attrkeys = ropen(); if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_HAVE_4_BYTES_VERSION_PREFIX|RE_COPYONCLOSE)) == NULL) { perror("reopen DocumentIndex"); return 0; } if ( (FNREPO = lotOpenFileNoCasheByLotNr(LotNr,"reposetory","rb", 's',subname)) == NULL) { #ifdef DEBUG printf("lot dont have a reposetory file\n"); #endif return 0; } while (rGetNext_fh(LotNr,&ReposetoryHeader,htmlbuffer,sizeof(htmlbuffer),imagebuffer,&raddress,0,0,subname,&acl_allow,&acl_deny, FNREPO ,&url, &attributes)) { #ifdef DEBUG printf("dokument \"%s\", DocID %u.\n", RE_DocumentIndex(re,ReposetoryHeader.DocID)->Url, ReposetoryHeader.DocID); #endif //printf("%p\n", docindex.RepositoryPointer); if (raddress != RE_DocumentIndex(re,ReposetoryHeader.DocID)->RepositoryPointer) { #ifdef DEBUG printf("Garbage collecting %d at %u. docindex has %u\n", ReposetoryHeader.DocID, raddress,RE_DocumentIndex(re,ReposetoryHeader.DocID)->RepositoryPointer); #endif ++gced; } else { unsigned long int offset; offset = rApendPost(&ReposetoryHeader, htmlbuffer, imagebuffer, subname, acl_allow, acl_deny, "repo.wip", url, attributes, attrkeys); RE_DocumentIndex(re,ReposetoryHeader.DocID)->RepositoryPointer = offset; #ifdef DEBUG printf("Writing DocID: %d\n", ReposetoryHeader.DocID); #endif ++keept; } } fclose(FNREPO); //lokker filen repo.wip //lotCloseFiles(); rclose(attrkeys); printf("keept %i\ngced %i\n",keept,gced); reclose(re); /* And we have a race... */ GetFilPathForLot(path, LotNr, subname); strcpy(path2, path); strcpy(path3, path); strcat(path, "repo.wip"); strcat(path2, "reposetory"); rename(path, path2); strcpy(path, path3); strcat(path, "DocumentIndex.wip"); strcat(path3, "DocumentIndex"); rename(path, path3); #ifdef DI_FILE_CASHE closeDICache(); #endif return 0; }
int main (int argc, char *argv[]) { int lotNr; int i; unsigned int DocID; char text[50]; unsigned int radress; unsigned int rsize; char **Data; int Count, TokCount; unsigned short hits; unsigned long WordID; int bucket; int y; int nr; FILE *revindexFilesHa[NrOfDataDirectorys]; unsigned char lang; FILE *FH; unsigned int DocIDPlace; int *nrOfLinkWordsToDocID = malloc(sizeof(int) * NrofDocIDsInLot); for (i=0;i<NrofDocIDsInLot;i++) { //begynner på 2000 så det skal være lett og skille de visuelt fra andre hits nrOfLinkWordsToDocID[i] = 2000; } //tester for at vi har fåt hvilken lot vi skal bruke if (argc < 3) { printf("Usage: ./anchorread lotnr subname\n\n"); exit(1); } lotNr = atoi(argv[1]); char *subname = argv[2]; if ( (FH = lotOpenFileNoCasheByLotNr(lotNr,"anchors","rb", 's',subname)) == NULL) { printf("lot dont have a anchors file\n"); exit(1); } fclose(FH); revindexFilesOpenLocal(revindexFilesHa,lotNr,"Anchor","wb",subname); //int anchorGetNext (int LotNr,unsigned int *DocID,char *text,unsigned int *radress,unsigned int *rsize) while (anchorGetNext(lotNr,&DocID,text,sizeof(text),&radress,&rsize,subname) ) { DocIDPlace = (DocID - LotDocIDOfset(rLotForDOCid(DocID))); ++nrOfLinkWordsToDocID[DocIDPlace]; convert_to_lowercase((unsigned char *)text); #ifdef DEBUG if (DocID == 4999999) { printf("DocID %i, text: \"%s\", DocIDPlace %i, nrOfLinkWordsToDocID %i\n",DocID,text,DocIDPlace,nrOfLinkWordsToDocID[DocIDPlace]); } #endif if ((TokCount = split(text, " ", &Data)) == -1) { printf("canæt splitt \"%s\"\n",text); } //for (i=(TokCount-1);i>=0;i--) { i=0; while (Data[i] != NULL) { /* if (nrOfLinkWordsToDocID[DocIDPlace] > 65505) { #ifdef DEBUG if (DocID == 4999999) { printf("reach max nr of words for DocID %u. Hav %i+ words\n",DocID,nrOfLinkWordsToDocID[DocIDPlace]); } #endif break; } */ if (Data[i][0] == '\0') { #ifdef DEBUG if (DocID == 4999999) { printf("emty data element\n"); } #endif } else if (strcmp(Data[i],"www") == 0) { #ifdef DEBUG if (DocID == 4999999) { printf("www\n"); } #endif ++nrOfLinkWordsToDocID[DocIDPlace]; } else if (isStoppWord(Data[i])) { #ifdef DEBUG if (DocID == 4999999) { printf("stopword \"%s\"\n",Data[i]); } #endif //++nrOfLinkWordsToDocID[DocIDPlace]; } else { #ifdef DEBUG if (DocID == 4999999) { printf("\t\"%s\" %i\n",Data[i],nrOfLinkWordsToDocID[DocIDPlace]); } #endif WordID = crc32boitho(Data[i]); if (WordID == 0) { printf("got 0 as word id for \"%s\". Somthing may be wrong.\n",Data[i]); } bucket = WordID % NrOfDataDirectorys; if (nrOfLinkWordsToDocID[DocIDPlace] > 65535) { hits = 65535; } else { hits = nrOfLinkWordsToDocID[DocIDPlace]; } #ifdef DEBUG if (DocID == 4999999) { printf("\thits %i: \"%s\": %hu, bucket %i\n",i,Data[i],hits,bucket); } #endif if (fwrite(&DocID,sizeof(unsigned int),1,revindexFilesHa[bucket]) != 1) { perror("fwrite DocID"); } //runarb: 13 mai 2007. vi har byttet til å bruke et tal for språk. //burde da dette fra DocumentIndex hvis det finnes, men lagres ikke der //må si i IndexRes på hvordan vi gjør det der //fprintf(revindexFilesHa[bucket],"aa "); lang = 0; nr = 1; if(fwrite(&lang,sizeof(unsigned char),1,revindexFilesHa[bucket]) != 1) { perror("fwrite lang"); } if(fwrite(&WordID,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) { perror("fwrite WordID"); } if(fwrite(&nr,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) { perror("fwrite nr"); } if(fwrite(&hits,sizeof(unsigned short),1,revindexFilesHa[bucket]) != 1) { perror("fwrite hits"); } ++nrOfLinkWordsToDocID[DocIDPlace]; } ++i; } FreeSplitList(Data); #ifdef DEBUG if (DocID == 4999999) { printf("\n"); } #endif } free(nrOfLinkWordsToDocID); }
int gcdecide(int LotNr, char *subname, struct gcaoptFormat *gcaopt, time_t newest_document) { int i; struct reformat *re; FILE *DOCINDEXFH; whisper_t whisper; //åpner dokument indeks får å teste at vi har en, hvis ikke kan vi bare avslutte. if ( (DOCINDEXFH = lotOpenFileNoCasheByLotNr(LotNr,"DocumentIndex","rb", 's',subname)) == NULL) { #ifdef DEBUG printf("lot dont have a DocumentIndex file\n"); #endif return 0; } fclose(DOCINDEXFH); blog(gcaopt->log,1,"Runing gc for collection \"%s\", lot nr %i",subname,LotNr); if((re = reopen(LotNr, sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_COPYONCLOSE|RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) { perror("can't reopen()"); exit(1); } whisper = gcwhisper_read(subname); //går gjenom alle på jakt etter de som kan slettes for (i=0;i<NrofDocIDsInLot;i++) { if (DIS_isDeleted(REN_DocumentIndex(re, i))) { continue; } #ifdef DEBUG #ifdef BLACK_BOX printf("dokument \"%s\", lastSeen: %s", REN_DocumentIndex(re, i)->Url, ctime_s(&REN_DocumentIndex(re, i)->lastSeen)); #endif #endif #ifdef BLACK_BOX if ((whisper & GCWHISPER_NOTOLD) == 0 && (((gcaopt->lastSeenHack == 1) && (REN_DocumentIndex(re, i)->lastSeen == 0)) || ((REN_DocumentIndex(re, i)->lastSeen != 0) && (newest_document > (REN_DocumentIndex(re, i)->lastSeen + gcaopt->MaxAgeDiflastSeen))))) { //sletter DIS_delete(REN_DocumentIndex(re, i)); //sletter dokumentet i bb spesefike ting. bbdocument_delete (REN_DocumentIndex(re, i)->Url, subname); blog(gcaopt->log,2,"dokument \"%s\" can be deleted. Last seen: %s, DocID %u",REN_DocumentIndex(re, i)->Url,ctime_s(&REN_DocumentIndex(re, i)->lastSeen),LotDocIDOfset(LotNr) +i); ++gcaopt->gced; } else { ++gcaopt->keept; } #endif } //markerer hva vi kan slette. gc_reduce(re, LotNr, subname); reclose(re); //trunkerer reposetoryet. gcrepo(LotNr, subname); //vasker iindex struct IndekserOptFormat IndekserOpt; IndekserOpt.optMustBeNewerThen = 0; IndekserOpt.optAllowDuplicates = 0; IndekserOpt.optValidDocIDs = NULL; IndekserOpt.sequenceMode =1; IndekserOpt.garbareCollection = 1; for (i=0;i<64;i++) { Indekser(LotNr,"Main",i,subname,&IndekserOpt); } for (i=0;i<64;i++) { Indekser(LotNr,"acl_allow",i,subname,&IndekserOpt); } for (i=0;i<64;i++) { Indekser(LotNr,"acl_denied",i,subname,&IndekserOpt); } for (i=0;i<64;i++) { Indekser(LotNr,"attributes",i,subname,&IndekserOpt); } //siden vi nå har lagt til alle andringer fra rev index kan vi nå slettet gced filen også //Indekser_deleteGcedFile(LotNr, subname); lotDeleteFile("gced", LotNr, subname); return 0; }
void connectHandler(int socket) { struct packedHedderFormat packedHedder; int i,n; int LotNr; char lotPath[512]; char buf[100]; unsigned int FilterTime; int filnamelen; FILE *FH; struct stat inode; // lager en struktur for fstat å returnere. off_t filesize; char c; struct DocumentIndexFormat DocumentIndexPost; int DocID; struct ReposetoryHeaderFormat ReposetoryHeader; unsigned int radress; char htmlbuffer[524288]; int destLeng; char dest[512]; off_t fileBloks,filerest; char *filblocbuff; //while ((i=read(socket, &packedHedder, sizeof(struct packedHedderFormat))) > 0) { while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) { //printf("command: %i\n",packedHedder.command); //printf("i er %i\n",i); printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); //printf("subname: %s\n",packedHedder.subname); //lar size reflektere hva som er igjen av pakken packedHedder.size = packedHedder.size - sizeof(packedHedder); if (packedHedder.command == C_rmkdir) { printf("C_rmkdir\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant read lotnr"); exit(1); } //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant read destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant read dest"); exit(1); } GetFilPathForLot(lotPath,LotNr,packedHedder.subname); sprintf(lotPath,"%s%s",lotPath,dest); printf("mkdir %s\n",lotPath); makePath(lotPath); printf("~C_rmkdir\n"); } else if (packedHedder.command == C_rComand) { //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant read lotnr"); exit(1); } //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant read destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant read dest"); exit(1); } printf("run command %s\n",dest); system(dest); } else if (packedHedder.command == C_getLotToIndex) { printf("fikk C_getLotToIndex\n"); int dirty; if ((i=recv(socket, &dirty, sizeof(dirty),MSG_WAITALL)) == -1) { perror("Cant read dirty"); exit(1); } printf("dirty: %i\n",dirty); LotNr = findLotToIndex(packedHedder.subname,dirty); printf("sending respons\n"); sendall(socket,&LotNr, sizeof(LotNr)); } else if (packedHedder.command == C_getlotHasSufficientSpace) { printf("fikk C_getLotToIndex\n"); int needSpace; int response; if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=recv(socket, &needSpace, sizeof(needSpace),MSG_WAITALL)) == -1) { perror("Cant read dirty"); exit(1); } printf("needSpace: %i, LotNr %i\n",needSpace,LotNr); response = lotHasSufficientSpace(LotNr, needSpace, packedHedder.subname); printf("sending respons\n"); sendall(socket,&response, sizeof(response)); } else if (packedHedder.command == C_rGetSize) { printf("fikk C_rGetSize\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) { perror("Cant read filnamelen"); exit(1); } if (filnamelen > sizeof(buf)) { printf("filname to long\n"); }; if ((i=read(socket, buf, filnamelen)) == -1) { perror("Cant read filnamelen"); exit(1); } printf("filname %s\n",buf); if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) { perror(buf); //sending that he fil is emty fileBloks = 0; sendall(socket,&fileBloks, sizeof(fileBloks)); } else { //finner og sender il størelse fstat(fileno(FH),&inode); //filesize = inode.st_size; //sendall(socket,&filesize, sizeof(filesize)); fileBloks = inode.st_size; printf("size is %" PRId64 "\n",fileBloks); sendall(socket,&fileBloks, sizeof(fileBloks)); fclose(FH); } } else if (packedHedder.command == C_rGetFile) { printf("fikk C_rGetFile\n"); //leser data. Det skal væren en int som sier hvilken lot vi vil ha if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } if ((i=read(socket, &filnamelen, sizeof(filnamelen))) == -1) { perror("Cant read filnamelen"); exit(1); } if (filnamelen > sizeof(buf)) { printf("filname to long\n"); }; if ((i=read(socket, buf, filnamelen)) == -1) { perror("Cant read filnamelen"); exit(1); } printf("filname %s\n",buf); if ((FH = lotOpenFileNoCasheByLotNr(LotNr,buf,"rb",'s',packedHedder.subname)) == NULL) { perror(buf); //sending that the fil is emty fileBloks = 0; filerest = 0; sendall(socket,&fileBloks, sizeof(fileBloks)); sendall(socket,&filerest, sizeof(filerest)); } else { //finner og sender fil størelse fstat(fileno(FH),&inode); //filesize = inode.st_size; //sendall(socket,&filesize, sizeof(filesize)); fileBloks = (int)floor(inode.st_size / rNetTrabsferBlok); filerest = inode.st_size - (fileBloks * rNetTrabsferBlok); sendall(socket,&fileBloks, sizeof(fileBloks)); sendall(socket,&filerest, sizeof(filerest)); printf("sending fil. fileBloks %"PRId64", filerest %"PRId64"\n",fileBloks,filerest); filblocbuff = (char *)malloc(rNetTrabsferBlok); for(i=0; i < fileBloks; i++) { //fread(filblocbuff,sizeof(c),rNetTrabsferBlok,FH); //fread_all(const void *buf, size_t size, FILE *stream) fread_all(filblocbuff,rNetTrabsferBlok,FH, 4096); if ((n=sendall(socket, filblocbuff, rNetTrabsferBlok)) == -1) { perror("Cant recv dest"); exit(1); } } printf("did recv %i fileBloks\n",i); fread(filblocbuff,sizeof(c),filerest,FH); if ((n=sendall(socket, filblocbuff, filerest)) == -1) { perror("Cant recv filerest"); exit(1); } free(filblocbuff); /* for (i=0;i<filesize;i++) { fread(&c,sizeof(char),1,FH); send(socket, &c, sizeof(char), 0); //printf("%i\n",(int)c); } */ printf("send file end\n"); fclose(FH); } } else if (packedHedder.command == C_rGetNext) { printf("fikk C_rGetNext\n"); printf("støttes ikke lengere"); exit(1); /* //leser data. Det skal væren en unigned int som sier hvilken lot vi vil ha //har deklarert den som int her ??? if ((i=read(socket, &LotNr, sizeof(LotNr))) == -1) { perror("Cant read lotnr"); exit(1); } printf("leser FilterTime\n"); //leser filtertime if ((i=read(socket, &FilterTime, sizeof(FilterTime))) == -1) { perror("Cant read lotnr"); exit(1); } printf("lotnr %i FilterTime %u\n",LotNr,FilterTime); //henter inn data om den lotten if (rGetNext(LotNr,&ReposetoryHeader,htmlbuffer,NULL,&radress,FilterTime,0)) { //printf("DocId: %i url: %s\n",ReposetoryHeader.DocID,ReposetoryHeader.url); //sender pakke hedder sendpacked(socket,C_rLotData,PROTOCOLVERSION, ReposetoryHeader.htmlSize + sizeof(ReposetoryHeader) +sizeof(radress), NULL,packedHedder.subname); //sennder ReposetoryHeader'en sendall(socket,&ReposetoryHeader, sizeof(ReposetoryHeader)); //sender htmlen sendall(socket,&htmlbuffer, ReposetoryHeader.htmlSize); //sender adressen sendall(socket,&radress,sizeof(radress)); //printf("data sent\n"); //printf("rGetNext: %i\n",ReposetoryHeader.DocID); } else { sendpacked(socket,C_rEOF,PROTOCOLVERSION, 0, NULL,packedHedder.subname); printf("ferdig\n"); } */ } else if (packedHedder.command == C_DIWrite) { if ((i=recv(socket, &DocumentIndexPost, sizeof(struct DocumentIndexFormat),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } DIWrite(&DocumentIndexPost,DocID,packedHedder.subname, NULL); //printf("DIWrite: %i\n",DocID); } else if (packedHedder.command == C_DIRead) { int DocID; struct DocumentIndexFormat DocumentIndexPost; printf("got commane C_DIRead. sise %i hsize %i ds %i\n",packedHedder.size, sizeof(packedHedder), sizeof(DocID)); if ((i=recv(socket, &DocID, sizeof(DocID),0)) == -1) { perror("recv"); exit(1); } //printf("DocID %i\n",DocID); //leser inn datan //int DIRead (struct DocumentIndexFormat *DocumentIndexPost, int DocID); DIRead(&DocumentIndexPost,DocID,packedHedder.subname); sendall(socket,&DocumentIndexPost, sizeof(struct DocumentIndexFormat)); } else if (packedHedder.command == C_rGetIndexTime) { int Lotnr; unsigned int IndexTime; if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) { perror("recv"); exit(1); } IndexTime = GetLastIndexTimeForLot(LotNr,packedHedder.subname); sendall(socket,&IndexTime, sizeof(IndexTime)); } else if (packedHedder.command == C_rSetIndexTime) { int Lotnr; if ((i=recv(socket, &LotNr, sizeof(LotNr),0)) == -1) { perror("recv"); exit(1); } setLastIndexTimeForLot(LotNr,NULL,packedHedder.subname); } else if (packedHedder.command == C_rSendFile) { //skal mota en fil for lagring i reposetoryet //char FilePath[156]; FILE *FILEHANDLER; char c; char opentype[2]; //char *filblocbuff; //off_t fileBloks,filerest; if ((i=recv(socket, &LotNr, sizeof(LotNr),MSG_WAITALL)) == -1) { perror("Cant recv lotnr"); exit(1); } printf("lotNr %i\n",LotNr); //leser destinasjonelengden if ((i=recv(socket, &destLeng, sizeof(destLeng),MSG_WAITALL)) == -1) { perror("Cant recv destLeng"); exit(1); } if (destLeng > sizeof(dest)) { printf("dest filname is to long at %i\n",destLeng); exit(1); } //leser destinasjonene if ((i=recv(socket, &dest, destLeng,MSG_WAITALL)) == -1) { perror("Cant recv dest"); exit(1); } printf("coping %s as length %i in to lot %i\n",dest,destLeng,LotNr); if ((i=recv(socket, &opentype, sizeof(char) +1,MSG_WAITALL)) == -1) { perror("Cant recv opentype"); exit(1); } printf("opentype \"%s\"\n",opentype); //GetFilPathForLot(FilePath,LotNr,packedHedder.subname); //legger til filnavnet //strncat(FilePath,dest,sizeof(FilePath)); //leser inn filstørelsen if ((i=recv(socket, &fileBloks, sizeof(fileBloks),MSG_WAITALL)) == -1) { perror("Cant recv fileBloks"); exit(1); } if ((i=recv(socket, &filerest, sizeof(filerest),MSG_WAITALL)) == -1) { perror("Cant recv filerest"); exit(1); } printf("fileBloks: %" PRId64 ", filerest: %" PRId64 "\n",fileBloks,filerest); //åpner filen if ((FILEHANDLER = lotOpenFileNoCasheByLotNr(LotNr,dest,opentype,'e',packedHedder.subname)) == NULL) { perror(dest); } filblocbuff = (char *)malloc(rNetTrabsferBlok); for(i=0; i < fileBloks; i++) { if ((n=recv(socket, filblocbuff, rNetTrabsferBlok,MSG_WAITALL)) == -1) { perror("Cant recv dest"); exit(1); } fwrite(filblocbuff,sizeof(c),rNetTrabsferBlok,FILEHANDLER); } printf("did recv %i fileBloks\n",i); if ((n=recv(socket, filblocbuff, filerest,MSG_WAITALL)) == -1) { perror("Cant recv filerest"); exit(1); } fwrite(filblocbuff,sizeof(c),filerest,FILEHANDLER); free(filblocbuff); fclose(FILEHANDLER); printf("\n"); } else if (packedHedder.command == C_DIGetIp) { unsigned int DocID; struct DocumentIndexFormat DocumentIndexPost; //printf("got command C_DIGetIp\n"); if ((i=recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } //printf("DocID %u\n",DocID); DIRead(&DocumentIndexPost,DocID,packedHedder.subname); //printf("ipadress: %u\n",DocumentIndexPost.IPAddress); sendall(socket,&DocumentIndexPost.IPAddress, sizeof(DocumentIndexPost.IPAddress)); } else if (packedHedder.command == C_anchorAdd) { size_t textlen; unsigned int DocID; char *text; printf("Add anchor....\n"); if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } else if ((i = recv(socket, &textlen, sizeof(textlen), MSG_WAITALL)) == -1) { perror("recv(textlen)"); exit(1); } text = malloc(textlen+1); text[textlen] = '\0'; if ((i = recv(socket, text, textlen, MSG_WAITALL)) == -1) { perror("recv(text)"); exit(1); } anchoraddnew(DocID, text, textlen, packedHedder.subname, NULL); printf("Text for %d: %s\n", DocID, text); free(text); } else if (packedHedder.command == C_anchorGet) { size_t len; char *text; int LotNr; unsigned int DocID; printf("Get anchor...\n"); if ((i = recv(socket, &DocID, sizeof(DocID),MSG_WAITALL)) == -1) { perror("recv"); exit(1); } printf("got DocID %u\n",DocID); LotNr = rLotForDOCid(DocID); printf("trying to read anchor\n"); len = anchorRead(LotNr, packedHedder.subname, DocID, NULL, -1); printf("got anchor of length %i\n",len); sendall(socket, &len, sizeof(len)); text = malloc(len+1); printf("readint it again\n"); anchorRead(LotNr, packedHedder.subname, DocID, text, len+1); sendall(socket, text, len); } else if (packedHedder.command == C_readHTML) { /* unsigned int DocID; unsigned int len; char *text; char *acla, *acld; struct DocumentIndexFormat DocIndex; struct ReposetoryHeaderFormat ReposetoryHeader; if ((i = recv(socket, &DocID, sizeof(DocID), MSG_WAITALL)) == -1) { perror("recv"); exit(1); } if ((i = recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) { perror("recv(len)"); exit(1); } printf("len %u\n",len); text = malloc(len); if (text == NULL) exit(1); DIRead(&DocIndex, DocID, packedHedder.subname); if (!rReadHtml( text, &len, DocIndex.RepositoryPointer, DocIndex.htmlSize, DocID, packedHedder.subname, &ReposetoryHeader, &acla, &acld, DocIndex.imageSize)) { len = 0; sendall(socket, &len, sizeof(len)); } else { ++len; // \0 #ifdef DEBUG printf("docID %u\n",DocID); printf("Got: (len %i, real %i) ########################\n%s\n#####################\n", len, strlen(text), text); #endif sendall(socket, &len, sizeof(len)); sendall(socket, text, len); sendall(socket, &ReposetoryHeader,sizeof(ReposetoryHeader)); } free(text); */ } /* runarb: 06 des 2007: vi har gåt bort fra denne metoden for nå, og bruker heller index over smb. Men tar vare på den da vi kan trenge den siden else if (packedHedder.command == C_urltodocid) { char cmd; int alloclen; char *urlbuf; if (urltodociddb == NULL) { cmd = C_DOCID_NODB; sendall(socket, &cmd, sizeof(cmd)); exit(1); } else { cmd = C_DOCID_READY; sendall(socket, &cmd, sizeof(cmd)); } cmd = C_DOCID_NEXT; alloclen = 1024; urlbuf = malloc(alloclen); do { unsigned int DocID; size_t len; if ((i = recv(socket, &cmd, sizeof(cmd), MSG_WAITALL)) == -1) { err(1, "recv(cmd)"); } if (cmd == C_DOCID_DONE) break; if ((i == recv(socket, &len, sizeof(len), MSG_WAITALL)) == -1) { err(1, "recv(len)"); } if (alloclen < len+1) { free(urlbuf); alloclen *= 2; urlbuf = malloc(alloclen); } if ((i == recv(socket, urlbuf, len, MSG_WAITALL)) == -1) { err(1, "recv(len)"); } urlbuf[len] = '\0'; if (!getDocIDFromUrl(urltodociddb, urlbuf, &DocID)) { cmd = C_DOCID_NOTFOUND; sendall(socket, &cmd, sizeof(cmd)); } else { cmd = C_DOCID_FOUND; sendall(socket, &cmd, sizeof(cmd)); sendall(socket, &DocID, sizeof(DocID)); } } while (1); free(urlbuf); } */ else { printf("unnown comand. %i\n", packedHedder.command); } //printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); } //while }
int main (int argc, char *argv[]) { if (argc < 2) { printf("Program to sort a crc32attr.map\n\nUsage:\n\t./sortCrc32attrMap subname\n"); exit(1); } char *subname = argv[1]; FILE *f_crc32_words = NULL, *f_crc32_words_new = NULL; size_t crc32_words_size = 0; struct stat inode; int attr_crc32_words_blocksize = sizeof(unsigned int) + sizeof(char)*MAX_ATTRIB_LEN; struct Crc32attrMapFormat *m_crc32_words = NULL; unsigned int last; size_t i; size_t nrOfElements; if ((f_crc32_words = lotOpenFileNoCasheByLotNr(1, "crc32attr.map", "r+", 's', subname)) == NULL) { perror("Can't open the crc32attr.map file."); return -1; } if (fstat(fileno(f_crc32_words), &inode) != 0) { perror("Can't fstat crc32attr.map"); return -1; } crc32_words_size = inode.st_size; if (crc32_words_size==0) { printf("crc32attr.map is 0 bytes. Skipping\n"); return 0; } if ((m_crc32_words=mmap(NULL, crc32_words_size, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(f_crc32_words), 0)) == MAP_FAILED) { perror("Can't mmap"); return -1; } nrOfElements = (crc32_words_size / attr_crc32_words_blocksize); printf("Will sort %zu elements of sise %d\n",nrOfElements, attr_crc32_words_blocksize); qsort(m_crc32_words,nrOfElements,attr_crc32_words_blocksize, attr_crc32_words_block_compare); /************************************************************************************ Now when we have it sorted we will print out only uniq elements in a new file. ************************************************************************************/ if ((f_crc32_words_new = lotOpenFileNoCasheByLotNr(1, "crc32attr.map.new", "wb", 'e', subname)) == NULL) { perror("Can't open thecrc32attr.map.new file for lot"); return -1; } last = 0; for(i=0;i<nrOfElements;i++) { if (m_crc32_words[i].crc32 != last) { #ifdef DEBUG printf("crc32 %u, text %s\n",m_crc32_words[i].crc32, m_crc32_words[i].text); #endif if (fwrite(&m_crc32_words[i], sizeof(struct Crc32attrMapFormat), 1, f_crc32_words_new) != 1) { perror("fwrite crc32attr.map.new"); return -1; } } last = m_crc32_words[i].crc32; } munmap(m_crc32_words,crc32_words_size); fclose(f_crc32_words); fclose(f_crc32_words_new); // Swap the files if (lotRename(1, subname, "crc32attr.map.new", "crc32attr.map") != 0) { perror("rename crc32attr.map.new crc32attr.map"); return -1; } printf("Done\n"); return 0; }
void connectHandler(int socket) { struct packedHedderFormat packedHedder; int isAuthenticated = 0; char tkeyForTest[32]; int i,n; int intrespons; int count = 0; container *attrkeys = NULL; #ifdef DEBUG_TIME struct timeval start_time, end_time; struct timeval tot_start_time, tot_end_time; gettimeofday(&tot_start_time, NULL); #endif ionice_benice(); while ((i=recv(socket, &packedHedder, sizeof(struct packedHedderFormat),MSG_WAITALL)) > 0) { #ifdef DEBUG printf("size is: %i\nversion: %i\ncommand: %i\n",packedHedder.size,packedHedder.version,packedHedder.command); #endif packedHedder.size = packedHedder.size - sizeof(packedHedder); if (attrkeys == NULL) { attrkeys = ropen(); } if (packedHedder.command == bbc_askToAuthenticate) { if ((i=recv(socket, tkeyForTest, sizeof(tkeyForTest),MSG_WAITALL)) == -1) { perror("Cant read tkeyForTest"); exit(1); } if (1) { printf("authenticated\n"); intrespons = bbc_authenticate_ok; bbdocument_init(NULL); isAuthenticated = 1; } else { printf("authenticate faild\n"); intrespons = bbc_authenticate_feiled; } if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else { if (!isAuthenticated) { printf("user not autentikated\n"); exit(1); } if (packedHedder.command == bbc_docadd) { #ifdef DEBUG printf("bbc_docadd\n"); #endif char *subname,*documenturi,*documenttype,*document,*acl_allow,*acl_denied,*title,*doctype; char *attributes; int dokument_size; unsigned int lastmodified; #ifdef DEBUG_TIME gettimeofday(&start_time, NULL); #endif //subname if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recvall(socket, subname, intrespons)) == 0) { perror("Cant read subname"); exit(1); } //documenturi if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } documenturi = malloc(intrespons +1); if ((i=recvall(socket, documenturi, intrespons)) == 0) { perror("Cant read documenturi"); exit(1); } //documenttype if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } documenttype = malloc(intrespons +1); if ((i=recvall(socket, documenttype, intrespons)) == 0) { perror("Cant read documenttype"); exit(1); } //document //dokument_size if ((i=recvall(socket, &dokument_size, sizeof(dokument_size))) == 0) { perror("Cant read dokument_size"); exit(1); } document = malloc(dokument_size +1); if (dokument_size == 0) { document[0] = '\0'; } else { if ((i=recvall(socket, document, dokument_size)) == 0) { fprintf(stderr,"Can't read document of size %i\n",dokument_size); perror("recvall"); exit(1); } } //lastmodified if ((i=recvall(socket, &lastmodified, sizeof(lastmodified))) == 0) { perror("Cant read lastmodified"); exit(1); } //acl_allow if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } acl_allow = malloc(intrespons +1); if ((i=recvall(socket, acl_allow, intrespons)) == 0) { perror("Cant read acl_allow"); exit(1); } //acl_denied if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } acl_denied = malloc(intrespons +1); if ((i=recvall(socket, acl_denied, intrespons)) == 0) { perror("Cant read acl_denied"); exit(1); } //title if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } title = malloc(intrespons +1); if ((i=recvall(socket, title, intrespons)) == 0) { perror("Cant read title"); exit(1); } //doctype if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } doctype = malloc(intrespons +1); if ((i=recvall(socket, doctype, intrespons)) == 0) { perror("Cant read doctype"); exit(1); } // Attribute list if ((i = recvall(socket, &intrespons, sizeof(intrespons))) == 0) err(1, "Can't receive attribute list len"); attributes = malloc(intrespons +1); if ((i=recvall(socket, attributes, intrespons)) == 0) err(1, "Can't receive attribute list"); #ifdef DEBUG_TIME gettimeofday(&end_time, NULL); printf("Time debug: bbdn_docadd recv data time: %f\n",getTimeDifference(&start_time, &end_time)); #endif printf("\n"); printf("########################################################\n"); printf("Url: %s\n",documenturi); printf("got subname \"%s\": title \"%s\". Nr %i, dokument_size %i attrib: %s\n",subname,title,count,dokument_size, attributes); printf("########################################################\n"); printf("calling bbdocument_add():\n"); #ifdef DEBUG_TIME gettimeofday(&start_time, NULL); #endif intrespons = bbdocument_add(subname,documenturi,documenttype,document,dokument_size,lastmodified,acl_allow,acl_denied,title,doctype, attributes, attrkeys); printf(":bbdocument_add end\n"); printf("########################################################\n"); #ifdef DEBUG_TIME gettimeofday(&end_time, NULL); printf("Time debug: bbdn_docadd runing bbdocument_add() time: %f\n",getTimeDifference(&start_time, &end_time)); #endif free(subname); free(documenturi); free(documenttype); free(document); free(acl_allow); free(acl_denied); free(title); free(doctype); free(attributes); // send status if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else if (packedHedder.command == bbc_opencollection) { char *subname; char path[PATH_MAX]; printf("open collection\n"); if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) err(1, "Cant read intrespons"); subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) err(1, "Cant read subname"); GetFilPathForLot(path, 1, subname); strcat(path, "fullyCrawled"); unlink(path); free(subname); } else if (packedHedder.command == bbc_closecollection) { printf("closecollection\n"); char *subname; //subname if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } bbdocument_close(attrkeys); attrkeys = NULL; //toDo må bruke subname, og C ikke perl her printf("cleanin lots start\n"); char command[PATH_MAX]; snprintf(command,sizeof(command),"perl %s -l -s \"%s\"",bfile("perl/cleanLots.pl"),subname); printf("running \"%s\"\n",command); intrespons = system(command); printf("cleanin lots end\n"); // legger subnamet til listen over ventene subnavn, og huper searchd. lot_recache_collection(subname); /* We are done crawling */ { int fd = lotOpenFileNoCasheByLotNrl(1, "fullyCrawled", ">>", '\0', subname); if (fd == -1) { warn("Unable to write fullyCrawled file"); } else { close(fd); } } free(subname); if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else if (packedHedder.command == bbc_deleteuri) { printf("deleteuri\n"); char *subname, *uri; //subname if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } subname[intrespons] = '\0'; if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } uri = malloc(intrespons +1); if ((i=recv(socket, uri, intrespons,MSG_WAITALL)) == -1) { perror("Cant read uri"); exit(1); } uri[intrespons] = '\0'; printf("going to delete: %s from %s\n", uri, subname); /* Add docid to the gced file */ { FILE *fh; unsigned int DocID, lastmodified; unsigned int lotNr; int err = 0; if (uriindex_get(uri, &DocID, &lastmodified, subname) == 0) { fprintf(stderr,"Unable to get uri info. uri=\"%s\",subname=\"%s\".",uri,subname); perror("Unable to get uri info"); err++; } if (!err) { lotNr = rLotForDOCid(DocID); if ((fh = lotOpenFileNoCasheByLotNr(lotNr,"gced","a", 'e',subname)) == NULL) { perror("can't open gced file"); err++; } else { fwrite(&DocID, sizeof(DocID), 1, fh); fclose(fh); } } if (!err) { struct reformat *re; if((re = reopen(rLotForDOCid(DocID), sizeof(struct DocumentIndexFormat), "DocumentIndex", subname, RE_HAVE_4_BYTES_VERSION_PREFIX)) == NULL) { perror("can't reopen()"); err++; } else { DIS_delete(RE_DocumentIndex(re, DocID)); reclose(re); } } //markerer at den er skitten if (!err) { FILE *dirtfh; dirtfh = lotOpenFileNoCashe(DocID,"dirty","ab",'e',subname); fwrite("1",1,1,dirtfh); fclose(dirtfh); } if (err == 0) bbdocument_delete(uri, subname); } free(subname); intrespons = 1; // Always return ok for now if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } } else if (packedHedder.command == bbc_deletecollection) { printf("deletecollection\n"); char *subname, *uri; //subname if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } subname[intrespons] = '\0'; printf("going to delete collection: %s\n", subname); intrespons = bbdocument_deletecoll(subname); if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } free(subname); } else if (packedHedder.command == bbc_addwhisper) { whisper_t add; char *subname; if ((i=recv(socket, &intrespons, sizeof(intrespons),MSG_WAITALL)) == -1) err(1, "Cant read intrespons"); subname = malloc(intrespons+1); if ((i=recv(socket, subname, intrespons,MSG_WAITALL)) == -1) { perror("Cant read subname"); exit(1); } subname[intrespons] = '\0'; if ((i=recv(socket, &add, sizeof(add),MSG_WAITALL)) == -1) err(1, "Cant read add whisper"); gcwhisper_write(subname, add); free(subname); } else if (packedHedder.command == bbc_HasSufficientSpace) { char *subname; //subname if ((i=recvall(socket, &intrespons, sizeof(intrespons))) == 0) { perror("Cant read intrespons"); exit(1); } subname = malloc(intrespons +1); if ((i=recvall(socket, subname, intrespons)) == 0) { perror("Cant read subname"); exit(1); } // tester bare i lot 1 her. Må også sjekke andre loter når vi begynner å støtte frlere disker på ES. intrespons = lotHasSufficientSpace(1, 4096, subname); if ((n=sendall(socket, &intrespons, sizeof(intrespons))) == -1) { perror("Cant recv filerest"); exit(1); } printf("~Asked for HasSufficientSpace for subname \"%s\". Returnerer %d\n",subname, intrespons); free(subname); } else { printf("unnown comand. %i\n", packedHedder.command); } } ++count; // #ifdef DEBUG_BREAK_AFTER // if (count >= DEBUG_BREAK_AFTER) { // printf("exeting after %i docoments\n",count); // exit(1); // } // #endif } #ifdef DEBUG_TIME gettimeofday(&tot_end_time, NULL); printf("Time debug: bbdn total time time: %f\n",getTimeDifference(&tot_start_time, &tot_end_time)); #endif }
int main (int argc, char *argv[]) { FILE *fp; char username[MAX_USER_NAME_LEN], username_last[MAX_USER_NAME_LEN]; DB *dbp = NULL; DBT key, data; int ret; //int *dbpp; struct userToSubnameDbFormat userToSubnameDb; if (argc != 3) { printf("usgae ./mergeUserToSubname lotnr subname\n"); exit(1); } int lotNr = atoi(argv[1]); char *subname = argv[2]; if (!userToSubname_open(&userToSubnameDb,'w')) { perror("userToSubname_open"); exit(1); } if ((fp = lotOpenFileNoCasheByLotNr(lotNr,"acllist","rb", 's',subname) ) == NULL) { perror("acllist"); } else { username_last[0] = '\0'; while(fgets(username,sizeof(username),fp) != NULL) { chomp(username); if (strcmp(username_last,username) != 0) { printf("username \"%s\"\n",username); userToSubname_add(&userToSubnameDb,username,subname); strcpy(username_last,username); } } fclose(fp); } if ((fp = lotOpenFileNoCasheByLotNr(lotNr,"aclcollectionlist","rb", 's',subname) ) == NULL) { perror("aclcollectionlist"); } else { username_last[0] = '\0'; while(fgets(username,sizeof(username),fp) != NULL) { chomp(username); if (strcmp(username_last,username) != 0) { printf("username \"%s\"\n",username); userToSubname_add(&userToSubnameDb,username,subname); strcpy(username_last,username); } } fclose(fp); } userToSubname_close(&userToSubnameDb); /* //temp userToSubname_open(&dbpp); char buf[128] = "*****"; userToSubname_getsubnamesAsString(&dbpp,"Everyone",buf); printf("aa subnames \"%s\"\n",buf); userToSubname_close(&dbpp); */ return 0; }