void parse_libextractor_output(struct hashtable *dst, const char *output, char **whitelist) { char **lines; int n_lines = split(output, "\n", &lines); char key[MAX_ATTRIB_LEN]; char val[MAX_ATTRIB_LEN]; printf("parse_libextractor_output: n_lines %i\n",n_lines); int i, j; for (i = 0; i < n_lines; i++) { char *line = lines[i]; char *remain = strchr(line, '-'); const char *ret; if (remain == NULL) continue; int keylen = remain - line; remain += 2; // skip '- ' keylen = keylen > MAX_ATTRIB_LEN ? MAX_ATTRIB_LEN : keylen; strlcpy(key, line, keylen); strlcpy(val, remain, sizeof val); if ((ret=in_list(key, whitelist)) == NULL) { //warnx("ignoring attr %s - %s\n", key, val); continue; } hashtable_insert(dst, strdup(ret), strdup(val)); //warnx("inserted attr %s - %s\n", key, val); } FreeSplitList(lines); printf("parse_libextractor_output: done\n"); }
void suggest_1(char *host, char *arg, char *user, char *collection) { CLIENT *clnt; numbest_res *result_1; struct senddata args; char first[1024]; char suggeston[1024]; char **wordlist; int splitn; splitn = split(arg, " ", &wordlist); if (splitn == 0) return; args.word = wordlist[splitn-1]; convert_to_lowercase(args.word); args.user = user; args.collection = (collection ? collection : ""); #ifndef DEBUG clnt = clnt_create (host, SUGGEST, SUGGESTVERS, "udp"); if (clnt == NULL) { clnt_pcreateerror (host); exit (1); } #endif /* DEBUG */ /* XXX: set lower timeout */ result_1 = get_best_results_2(&args, clnt); if (!result_1) { #if 1 //clnt_perror (clnt, "call failed"); #endif } else { if (result_1->_errno == 0) { namelist nl; int i; if (strlen(args.word) > 0) { for (nl = result_1->numbest_res_u.list; nl != NULL; nl = nl->next) { for (i = 0; i < splitn-1; i++) printf("%s ", wordlist[i]); printf("%s\n", nl->name); } } else { for (i = 0; i < splitn-1; i++) printf("%s ", wordlist[i]); puts(""); } } } FreeSplitList(wordlist); #ifndef DEBUG clnt_destroy (clnt); #endif /* DEBUG */ }
void shortenurl(char *url,int urllen) { char **Data; int Count, TokCount; #ifdef BLACK_BOKS unsigned char newurl[128]; #else unsigned char newurl[201]; #endif int added, suburllen; int i; char slash[2]; int len; char *p; char proto[128]; char origurl[urllen+1]; bblog(DEBUGINFO, "shortenurl: inn url %s", url); newurl[0] = '\0'; proto[0] = '\0'; //tar bort proto:// først p = strstr(url, "://"); if (p != NULL && p > url) { p += 3; /* Skip past :// */ strncpy(proto, url, p-url); proto[p-url] = '\0'; while (*p == '/') p++; len = strlen(p); memmove(url, p, len); url[len] = '\0'; } else if (strncmp(url, "outlook:", 8) == 0) { shortenurl_outlook(url, urllen); return; } else { len = strlen(url); } strcpy(origurl, url); #ifdef DEBUG bblog(DEBUGINFO, "shortenurl: after proto \"%s\"", url); #endif //hvis den er kort kan vi bare returnere if (len < TARGET_VISIBLE_URL_LEN) { #ifdef DEBUG bblog(DEBUGINFO, "shortenurl: url is short enough. Don't need to shorten"); #endif snprintf(url, urllen, "%s%s", proto, origurl); return; } if ((TokCount = split(url, "/", &Data)) > 1) { #ifdef DEBUG bblog(DEBUGINFO, "seperator: / "); #endif strcpy(slash,"/"); } else if ((TokCount = split(url, "\\", &Data)) > 1) { #ifdef DEBUG bblog(DEBUGINFO, "seperator: \\ "); #endif strcpy(slash,"\\"); } else { bblog(ERROR, "can't split url"); snprintf(url, urllen, "%s%s", proto, origurl); return; } /* Count = 0; while( (Data[Count] != NULL) ) { printf("\t\t%d\t\"%s\"\n", Count, Data[Count]); printf("\n"); Count++; } */ --TokCount; //split ser ut til å begynner på 1, ikke på 0 #ifdef DEBUG bblog(DEBUGINFO, "\tfound %d token(s):", TokCount); #endif Count = 0; added = 0; suburllen = 0; while( (Data[Count] != NULL) ) { bblog(DEBUGINFO, "a: \t\t%d\t\"%s\"", Count, Data[Count]); suburllen = strlen(Data[Count]); if ((added + suburllen) < (TARGET_VISIBLE_URL_LEN * 0.3)) { strlcat(newurl,Data[Count],sizeof(newurl)); strlcat(newurl,slash,sizeof(newurl)); } else { break; } added += suburllen; ++Count; } strlcat(newurl, "...", sizeof(newurl)); //printf("rev:\n"); Count = TokCount; added = 0; suburllen = 0; while( (Count > 0) ) { bblog(DEBUGINFO, "b: \t\t%d\t\"%s\"", Count, Data[Count]); suburllen = strlen(Data[Count]); if ((added + suburllen) < (TARGET_VISIBLE_URL_LEN * 0.7)) { #ifdef DEBUG bblog(DEBUGINFO, "candidate %s",Data[Count]); #endif } else { break; } added += suburllen; --Count; } bblog(DEBUGINFO, "TokCount %i, count %i",TokCount,Count); //hvis også siste navn er for langt, hånterer vi det spesifikt. if (TokCount == Count) { bblog(DEBUGINFO, "bb"); strlcat(newurl,slash,sizeof(newurl)); strlcat(newurl,Data[Count],sizeof(newurl)); } else { //printf("addint last part:\n"); for (i=Count+1;i<TokCount+1;i++) { bblog(DEBUGINFO, "c: \t\t%d\t\"%s\"", i, Data[i]); bblog(DEBUGINFO, "newurl: len %i, \"%s\"",strlen((char*)newurl),newurl); strlcat(newurl,slash,sizeof(newurl)); strlcat(newurl,Data[i],sizeof(newurl)); } } bblog(DEBUGINFO, "shortenurl 1: newurl \"%s\"",newurl); //runarb 27 mai //Hvis den har et utf 8 tegn der vi slutter å kopierer for vi med bare halve tegnet, og bryter da xml'en bblog(DEBUGINFO, "strlen %i, size %i",strlen((char*)newurl),sizeof(newurl)); i = strlen((char*)newurl) -1; while(i!=0 && newurl[i] > 127) { bblog(DEBUGINFO, "removing char %c",newurl[i]); newurl[i] = '\0'; --i; } //if ((strlen(newurl) == (sizeof(newurl) -1) ) // // && ( (newurl[sizeof(newurl) -1] > 127) || (newurl[sizeof(newurl) -1] < 10)) // ) { // newurl[sizeof(newurl) -1] = 'X'; //} bblog(DEBUGINFO, "shortenurl 2: newurl \"%s\"",newurl); FreeSplitList(Data); //strscpy(url,newurl,urllen); snprintf(url, urllen, "%s%s", proto, newurl); }
int bbdocument_init(container **attrkeys) { DIR *dirp; FILE *filep; char buf[512]; char path[512]; struct dirent *dp; char lines[512]; char **splitdata; int TokCount; struct fileFilterFormat *fileFilter = NULL; char fileFilterName[] = "fileFilter"; perl_embed_init(NULL, 1); //chtbl_init(&htbl, PRIME_TBLSIZ, bbdocument_h, bbdocument_hmatch, free); h_fileFilter = create_hashtable(PRIME_TBLSIZ, bbdocument_h, bbdocument_hmatch); printf("opening %s\n",bfile(fileFilterName)); if ((dirp = opendir(bfile(fileFilterName))) == NULL) { fprintf(stderr,"warn: cant open fileFilter \"%s\". Cant use fileFilters\n",bfile(fileFilterName)); return 1; } while ((dp = readdir(dirp)) != NULL) { if (dp->d_name[0] == '.') { continue; } sprintf(path,"%s/%s/",bfile(fileFilterName),dp->d_name); sprintf(buf,"%sruninfo",path); printf("%s\n",buf); if ((filep = fopen(buf,"r")) == NULL) { printf("no runinfo file for \"%s\"\n",dp->d_name); continue; } printf("loading \"%s\"\n",dp->d_name); while ((!feof(filep)) && (fgets(lines,sizeof(lines) -1,filep) != NULL)) { //blanke linjer og komentarer som starter på # if ((lines[0] == '\n') || (lines[0] == '#')) { continue; } //void chomp(char string[]) chomp(lines); //printf("line %s\n",lines); TokCount = split(lines, ": ", &splitdata); //printf("\tfound %d token(s):\n", TokCount); /* if (TokCount != 2) { printf("bad config line \"%s\". Splitet in %i elements\n",lines,TokCount); continue; } */ if (strcmp(splitdata[0],"documentstype") == 0) { //legger til det gamle filteret if (fileFilter != NULL) { if (NULL != hashtable_search(h_fileFilter,fileFilter->documentstype )) { printf("####################### BUG ################################\n"); printf("allredy have a filter for \"%s\"!\n",fileFilter->documentstype); printf("#######################/BUG ################################\n"); } //add to hash printf("inserting %s\n",(*fileFilter).documentstype); //chtbl_insert(&htbl,(void *)fileFilter); if (!hashtable_insert(h_fileFilter,fileFilter->documentstype,fileFilter) ) { printf("cant insert\n"); exit(-1); } printf("end inserting\n"); } //begynner på et nytt filter fileFilter = malloc(sizeof(struct fileFilterFormat)); fileFilter->attrwhitelist = NULL; //ikke alle filfiltere har sat alle opsjoner, så vi nulstiller alt, slik at det er lett og strcmp()'e //etter en verdi, uten at vi må tenke på at den kansje ikke er satt. memset(fileFilter,'\0',sizeof(struct fileFilterFormat)); // default til FILTER_EXEOC fileFilter->filtertype = FILTER_EXEOC; strcpy((*fileFilter).documentstype,splitdata[1]); strlcpy(fileFilter->path, path, sizeof fileFilter->path); } else if (strcmp(splitdata[0],"command") == 0) { //vi kan ha : i komandoen. Kopierer derfor først inn hele, så fjerner vi command: //strcpy((*fileFilter).command,splitdata[1]); strscpy((*fileFilter).command,lines,sizeof((*fileFilter).command)); strcasesandr((*fileFilter).command,sizeof((*fileFilter).command),"command: ",""); //leger til path der vi har sakt vi skal ha lokal path ( ./ ) strcasesandr((*fileFilter).command,sizeof((*fileFilter).command),"./",path); printf(".command %s\n",(*fileFilter).command); } else if (strcmp(splitdata[0],"comment") == 0) { strscpy((*fileFilter).comment,splitdata[1],sizeof((*fileFilter).comment)); } else if (strcmp(splitdata[0],"format") == 0) { strscpy((*fileFilter).format,splitdata[1],sizeof((*fileFilter).format)); } else if (strcmp(splitdata[0],"outputtype") == 0) { //stdio, file, osv,, strcpy((*fileFilter).outputtype,splitdata[1]); } else if (strcmp(splitdata[0],"outputformat") == 0) { //text, html strcpy((*fileFilter).outputformat,splitdata[1]); } else if (strcmp(splitdata[0], "filtertype") == 0) { if (strcmp(splitdata[1], FILTER_EXEOC_STR) == 0) fileFilter->filtertype = FILTER_EXEOC; else if (strcmp(splitdata[1], FILTER_PERL_PLUGIN_STR) == 0) fileFilter->filtertype = FILTER_PERL_PLUGIN; else errx(1, "Unknown filtertype %s\n", splitdata[1]); } else if (strcmp(splitdata[0], "attrwhitelist") == 0) { // TODO: Free fileFilter->attrwhitelist if (!split(splitdata[1], ",", &fileFilter->attrwhitelist)) warnx("attrwhitelist was empty."); } else { printf("unknown command \"%s\"\n",lines); } //clean FreeSplitList(splitdata); } if (fileFilter != NULL) { //add to hash printf("inserting %s\n",(*fileFilter).documentstype); //chtbl_insert(&htbl,(void *)fileFilter); if (!hashtable_insert(h_fileFilter,fileFilter->documentstype,fileFilter) ) { printf("cant insert\n"); exit(-1); } printf("end inserting\n"); } //markerer at vi har lagt det til fileFilter = NULL; fclose(filep); } closedir(dirp); if (attrkeys != NULL) { *attrkeys = ropen(); } return 1; }
void shortenurl(char *url,int urllen) { char **Data; size_t *datalen, protolen; int Count, TokCount; #ifdef BLACK_BOKS unsigned char newurl[128]; #else unsigned char newurl[201]; #endif char slash[2]; int len; #ifdef DEBUG bblog(DEBUG, "shortenurl: inn url %s", url); #endif char *p; char proto[128]; char origurl[urllen+1]; newurl[0] = '\0'; proto[0] = '\0'; //tar bort proto:// først p = strstr(url, "://"); if (p != NULL && p > url) { p += 3; /* Skip past :// */ strncpy(proto, url, p-url); proto[p-url] = '\0'; while (*p == '/') p++; len = strlen(p); memmove(url, p, len); url[len] = '\0'; } else { len = strlen(url); } strcpy(origurl, url); #ifdef DEBUG bblog(DEBUG, "shortenurl: after proto \"%s\"",url); #endif //hvis den er kort kan vi bare returnere if (len < TARGET_VISIBLE_URL_LEN) { #ifdef DEBUG bblog(DEBUG, "shortenurl: url is short enough. Don't need to shorten"); #endif snprintf(url, urllen, "%s%s", proto, origurl); return; } if ((TokCount = split(url, "/", &Data)) > 1) { #ifdef DEBUG bblog(DEBUG, "seperator: /"); #endif strcpy(slash,"/"); } else if ((TokCount = split(url, "\\", &Data)) > 1) { #ifdef DEBUG bblog(DEBUG, "seperator: \\"); #endif strcpy(slash,"\\"); } else { bblog(ERROR, "can't split url: %s", origurl); snprintf(url, urllen, "%s%s", proto, origurl); return; } Count = 0; datalen = malloc(sizeof(size_t) * TokCount); while( (Data[Count] != NULL) ) { //printf("\t\t%d\t\"%s\"\n", Count, Data[Count]); //printf("\n"); datalen[Count] = strlen(Data[Count]); Count++; } --TokCount; //split ser ut til å begynner på 1, ikke på 0 #ifdef DEBUG bblog(DEBUG, "found %d token(s):", TokCount); { int a; for (a = 0; Data[a] != NULL; a++) { bblog(DEBUG, " %s (%d)", Data[a], strlen(Data[a])); } } #endif /* Minmum length of filename */ #define PRESERVE_SPACE 20 /* XXX: Find suitable number */ protolen = strlen(proto); /* We really want the first part (alias, or ip), first directory and first part of file name */ if (TokCount >= 3 && (protolen + datalen[0] + datalen[1] + datalen[TokCount-1]) > TARGET_VISIBLE_URL_LEN) { /* Shorten last element enough */ if (datalen[TokCount-1] > PRESERVE_SPACE) { Data[TokCount-1][PRESERVE_SPACE] = '\0'; Data[TokCount-1][PRESERVE_SPACE-3] = '\xE2'; Data[TokCount-1][PRESERVE_SPACE-2] = '\x80'; Data[TokCount-1][PRESERVE_SPACE-1] = '\xA6'; datalen[TokCount-1] = PRESERVE_SPACE; } if (protolen + datalen[0] + datalen[1] + datalen[TokCount-1] <= TARGET_VISIBLE_URL_LEN) { snprintf(url, urllen, /*proto*/"%s" /*ip*/"%s" /*slash*/ "%s" /*firstdir*/ "%s" /*slash*/"%s" /*dot*/"%s" /*slash*/"%s" /*file*/"%s", proto, Data[0], slash, Data[1], slash, "\xE2\x80\xA6", slash, Data[TokCount-1]); goto shortenurllongdone; } if (datalen[1] > PRESERVE_SPACE) { Data[1][PRESERVE_SPACE] = '\0'; Data[1][PRESERVE_SPACE-3] = '\xE2'; Data[1][PRESERVE_SPACE-2] = '\x80'; Data[1][PRESERVE_SPACE-1] = '\xA6'; datalen[1] = PRESERVE_SPACE; } if (protolen + datalen[0] + datalen[1] + datalen[TokCount-1] <= TARGET_VISIBLE_URL_LEN) { snprintf(url, urllen, /*proto*/"%s" /*ip*/"%s" /*slash*/ "%s" /*firstdir*/ "%s" /*slash*/"%s" /*dot*/"%s" /*slash*/"%s" /*file*/"%s", proto, Data[0], slash, Data[1], slash, "\xE2\x80\xA6", slash, Data[TokCount-1]); goto shortenurllongdone; } if (datalen[0] > PRESERVE_SPACE) { Data[0][PRESERVE_SPACE] = '\0'; Data[0][PRESERVE_SPACE-3] = '\xE2'; Data[0][PRESERVE_SPACE-2] = '\x80'; Data[0][PRESERVE_SPACE-1] = '\xA6'; datalen[0] = PRESERVE_SPACE; } if (protolen + datalen[0] + datalen[1] + datalen[TokCount-1] <= TARGET_VISIBLE_URL_LEN) { snprintf(url, urllen, /*proto*/"%s" /*ip*/"%s" /*slash*/ "%s" /*firstdir*/ "%s" /*slash*/"%s" /*dot*/"%s" /*slash*/"%s" /*file*/"%s", proto, Data[0], slash, Data[1], slash, "\xE2\x80\xA6", slash, Data[TokCount-1]); goto shortenurllongdone; } snprintf(url, urllen, "%.*s", PRESERVE_SPACE, origurl); shortenurllongdone: ; } else { int n = 0; size_t totlen, rtotlen; size_t bp, fp; char *p; int cur; int lastdots; totlen = snprintf(url, urllen-totlen, "%s", proto); //printf("url: %s proto: %s %d\n", url, proto, totlen); totlen += snprintf(url+totlen, urllen-totlen, "%s%s", Data[0], slash); //printf("url: %s proto: %s %d\n", url, proto, totlen); bp = TokCount - 1; fp = 1; lastdots = 0; rtotlen = totlen; while (totlen+PRESERVE_SPACE < TARGET_VISIBLE_URL_LEN && bp >= fp && n < TokCount) { //printf("%d < %d && %d >= %d && %d < %d\n", totlen+20, TARGET_VISIBLE_URL_LEN, bp, fp, n, TokCount); if ((n & 1) == 0) { if ((lastdots & 1) == 1) { n++; //printf("dot dot 1\n"); continue; } cur = fp; } else { if ((lastdots & 2) == 2) { n++; //printf("dot dot 2\n"); continue; } cur = bp; } //printf("Itr: %d fp: %d bp: %d cur: %d\n", n, fp, bp, cur); //printf("trying: %s %d\n", Data[cur], strlen(Data[cur])); //printf("%d + %d + %d < %d\n", totlen, PRESERVE_SPACE, snprintf(NULL, 0, "%s%s", Data[cur], slash), TARGET_VISIBLE_URL_LEN); if (totlen+PRESERVE_SPACE+snprintf(NULL, 0, "%s%s", Data[cur], slash) < TARGET_VISIBLE_URL_LEN) { //printf("Wanted: %d\n", cur); totlen += snprintf(NULL, 0, "%s%s", Data[cur], slash); if ((n & 1) == 0) fp++; else bp--; } else { lastdots = 1; if (lastdots == 0) totlen += snprintf(NULL, 0, "...%s", slash); if ((n & 1) == 0) { lastdots |= 1; //printf("setting dot 1...\n"); } else { lastdots |= 2; //printf("setting dot 2...\n"); } } n++; } //printf("Total length: %d\n", totlen); //printf("bp: %d, fp: %d\n", bp, fp); /* Get fp */ for (cur = 1; cur < fp; cur++) { //printf("cur: %d totlen: %d urllen: %d\n", cur, rtotlen, urllen); //printf("before: %s %s %d\n", url, Data[cur], urllen-rtotlen); rtotlen += snprintf(url+rtotlen, urllen-rtotlen, "%s%s", Data[cur], slash); //printf("after: %s %s\n", url); } if (bp >= fp) { rtotlen += snprintf(url+rtotlen, urllen-rtotlen, "%s%s", "\xE2\x80\xA6", slash); //printf("%s\n", url); } for (cur = bp+1; cur < TokCount; cur++) { //printf("2 cur: %d totlen: %d\n", cur, rtotlen); rtotlen += snprintf(url+rtotlen, urllen-rtotlen, "%s%s", Data[cur], slash); //printf("%s %s\n", url, Data[cur]); } //printf("%d %s\n", strlen(Data[TokCount]), Data[TokCount]); if (strlen(Data[TokCount]) > TARGET_VISIBLE_URL_LEN-rtotlen) { size_t endlen; char *end = strrchr(Data[TokCount], '.'); if (end == NULL) endlen = 0; else endlen = strlen(end); //printf("File will be %d long\n", TARGET_VISIBLE_URL_LEN-rtotlen-endlen-3); { int i = TARGET_VISIBLE_URL_LEN-rtotlen-endlen-2; Data[TokCount][i] = '\0'; i--; /* Don't cut in the middle of a utf-8 character */ while (Data[TokCount][i] & 0x80) { Data[TokCount][i] = '\0'; i--; } } rtotlen += snprintf(url+rtotlen, urllen-rtotlen, "%s\xE2\x80\xA6%s", Data[TokCount], end); } else { rtotlen += snprintf(url+rtotlen, urllen-rtotlen, "%s", Data[TokCount]); } } FreeSplitList(Data); }
int main (int argc, char *argv[]) { int lotNr; int i; unsigned int DocID; char text[50]; unsigned int radress; unsigned int rsize; char **Data; int Count, TokCount; unsigned short hits; unsigned long WordID; int bucket; int y; int nr; FILE *revindexFilesHa[NrOfDataDirectorys]; unsigned char lang; FILE *FH; unsigned int DocIDPlace; int *nrOfLinkWordsToDocID = malloc(sizeof(int) * NrofDocIDsInLot); for (i=0;i<NrofDocIDsInLot;i++) { //begynner på 2000 så det skal være lett og skille de visuelt fra andre hits nrOfLinkWordsToDocID[i] = 2000; } //tester for at vi har fåt hvilken lot vi skal bruke if (argc < 3) { printf("Usage: ./anchorread lotnr subname\n\n"); exit(1); } lotNr = atoi(argv[1]); char *subname = argv[2]; if ( (FH = lotOpenFileNoCasheByLotNr(lotNr,"anchors","rb", 's',subname)) == NULL) { printf("lot dont have a anchors file\n"); exit(1); } fclose(FH); revindexFilesOpenLocal(revindexFilesHa,lotNr,"Anchor","wb",subname); //int anchorGetNext (int LotNr,unsigned int *DocID,char *text,unsigned int *radress,unsigned int *rsize) while (anchorGetNext(lotNr,&DocID,text,sizeof(text),&radress,&rsize,subname) ) { DocIDPlace = (DocID - LotDocIDOfset(rLotForDOCid(DocID))); ++nrOfLinkWordsToDocID[DocIDPlace]; convert_to_lowercase((unsigned char *)text); #ifdef DEBUG if (DocID == 4999999) { printf("DocID %i, text: \"%s\", DocIDPlace %i, nrOfLinkWordsToDocID %i\n",DocID,text,DocIDPlace,nrOfLinkWordsToDocID[DocIDPlace]); } #endif if ((TokCount = split(text, " ", &Data)) == -1) { printf("canæt splitt \"%s\"\n",text); } //for (i=(TokCount-1);i>=0;i--) { i=0; while (Data[i] != NULL) { /* if (nrOfLinkWordsToDocID[DocIDPlace] > 65505) { #ifdef DEBUG if (DocID == 4999999) { printf("reach max nr of words for DocID %u. Hav %i+ words\n",DocID,nrOfLinkWordsToDocID[DocIDPlace]); } #endif break; } */ if (Data[i][0] == '\0') { #ifdef DEBUG if (DocID == 4999999) { printf("emty data element\n"); } #endif } else if (strcmp(Data[i],"www") == 0) { #ifdef DEBUG if (DocID == 4999999) { printf("www\n"); } #endif ++nrOfLinkWordsToDocID[DocIDPlace]; } else if (isStoppWord(Data[i])) { #ifdef DEBUG if (DocID == 4999999) { printf("stopword \"%s\"\n",Data[i]); } #endif //++nrOfLinkWordsToDocID[DocIDPlace]; } else { #ifdef DEBUG if (DocID == 4999999) { printf("\t\"%s\" %i\n",Data[i],nrOfLinkWordsToDocID[DocIDPlace]); } #endif WordID = crc32boitho(Data[i]); if (WordID == 0) { printf("got 0 as word id for \"%s\". Somthing may be wrong.\n",Data[i]); } bucket = WordID % NrOfDataDirectorys; if (nrOfLinkWordsToDocID[DocIDPlace] > 65535) { hits = 65535; } else { hits = nrOfLinkWordsToDocID[DocIDPlace]; } #ifdef DEBUG if (DocID == 4999999) { printf("\thits %i: \"%s\": %hu, bucket %i\n",i,Data[i],hits,bucket); } #endif if (fwrite(&DocID,sizeof(unsigned int),1,revindexFilesHa[bucket]) != 1) { perror("fwrite DocID"); } //runarb: 13 mai 2007. vi har byttet til å bruke et tal for språk. //burde da dette fra DocumentIndex hvis det finnes, men lagres ikke der //må si i IndexRes på hvordan vi gjør det der //fprintf(revindexFilesHa[bucket],"aa "); lang = 0; nr = 1; if(fwrite(&lang,sizeof(unsigned char),1,revindexFilesHa[bucket]) != 1) { perror("fwrite lang"); } if(fwrite(&WordID,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) { perror("fwrite WordID"); } if(fwrite(&nr,sizeof(unsigned long),1,revindexFilesHa[bucket]) != 1) { perror("fwrite nr"); } if(fwrite(&hits,sizeof(unsigned short),1,revindexFilesHa[bucket]) != 1) { perror("fwrite hits"); } ++nrOfLinkWordsToDocID[DocIDPlace]; } ++i; } FreeSplitList(Data); #ifdef DEBUG if (DocID == 4999999) { printf("\n"); } #endif } free(nrOfLinkWordsToDocID); }