void hash_init(hash_struct * hash, int normalized) { hash->sav = inthash_new(0); hash->adrfil = inthash_new(0); hash->former_adrfil = inthash_new(0); hash->normalized = normalized; /* Case-insensitive comparison ; keys are direct char* filenames */ inthash_value_set_key_handler(hash->sav, key_duphandler, key_freehandler, key_sav_hashes, key_sav_equals, hash); /* URL-style comparison ; keys are lien_url structure pointers casted to char* */ inthash_value_set_key_handler(hash->adrfil, key_duphandler, key_freehandler, key_adrfil_hashes, key_adrfil_equals, hash); inthash_value_set_key_handler(hash->former_adrfil, key_duphandler, key_freehandler, key_former_adrfil_hashes, key_former_adrfil_equals, hash); }
HTSEXT_API char* hts_getcategories(char* path, int type) { String categ = STRING_EMPTY; String profiles = STRING_EMPTY; char* rpath = path; find_handle h; inthash hashCateg = NULL; if (rpath[0]) { if (rpath[strlen(rpath)-1]=='/') { rpath[strlen(rpath)-1]='\0'; /* note: patching stored (inhash) value */ } } h = hts_findfirst(rpath); if (h) { String iname = STRING_EMPTY; if (type == 1) { hashCateg = inthash_new(127); StringCat(categ, "Test category 1"); StringCat(categ, "\r\nTest category 2"); } do { if (hts_findisdir(h)) { char BIGSTK line2[1024]; StringCopy(iname,rpath); StringCat(iname,"/"); StringCat(iname,hts_findgetname(h)); StringCat(iname,"/hts-cache/winprofile.ini"); if (fexist(StringBuff(iname))) { if (type == 1) { FILE* fp = fopen(StringBuff(iname), "rb"); if (fp != NULL) { int done=0; while(!feof(fp) && !done) { int n = linput(fp, line2, sizeof(line2) - 2); if (n > 0) { if (strfield(line2, "category=")) { if (*(line2+9)) { if (!inthash_read(hashCateg, line2+9, NULL)) { inthash_write(hashCateg, line2+9, 0); if (StringLength(categ) > 0) { StringCat(categ, "\r\n"); } unescapehttp(line2+9, &categ); } } done=1; } } } line2[0] = '\0'; fclose(fp); } } else { if (StringLength(profiles) > 0) { StringCat(profiles, "\r\n"); } StringCat(profiles, hts_findgetname(h)); } } } } while(hts_findnext(h)); hts_findclose(h); StringFree(iname); } if (hashCateg) { inthash_delete(&hashCateg); hashCateg = NULL; } if (type == 1) return StringBuffRW(categ); else return StringBuffRW(profiles); }
/* Indexing system A little bit dirty, (quick'n dirty, in fact) But should be okay on most cases Tags and javascript handled (ignored) */ int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) { #if HTS_MAKE_KEYWORD_INDEX char catbuff[CATBUFF_SIZE]; int intag=0,inscript=0,incomment=0; char keyword[KEYW_LEN+32]; int i=0; // int WordIndexSize=1024; inthash WordIndexHash=NULL; FILE *tmpfp=NULL; // // Check parameters if (!html_data) return 0; if (!size) return 0; if (!mime) return 0; if (!filename) return 0; // Init ? if (hts_index_init) { remove(concat(catbuff,indexpath,"index.txt")); remove(concat(catbuff,indexpath,"sindex.html")); hts_index_init=0; } // Check MIME type if (is_html_mime_type(mime)) { inscript=0; } // FIXME - temporary fix for image/svg+xml (svg) // "IN XML" (html like, in fact :) ) else if ( (strfield2(mime,"image/svg+xml")) || (strfield2(mime,"image/svg-xml")) #if HTS_USEMMS || strfield2(mime,"video/x-ms-asf") #endif ) { inscript=0; } else if ( (strfield2(mime,"application/x-javascript")) || (strfield2(mime,"text/css")) ) { inscript=1; //} else if (strfield2(mime, "text/vnd.wap.wml")) { // humm won't work in many cases // inscript=0; } else return 0; // Temporary file tmpfp = tmpfile(); if (!tmpfp) return 0; // Create hash structure // Hash tables rulez da world! WordIndexHash=inthash_new(WordIndexSize); if (!WordIndexHash) return 0; // Start indexing this page keyword[0]='\0'; while(i<size) { if (strfield(html_data + i , "<script")) { inscript=1; } else if (strfield(html_data + i , "<!--")) { incomment=1; } else if (strfield(html_data + i , "</script")) { if (!incomment) inscript=0; } else if (strfield(html_data + i , "-->")) { incomment=0; } else if (html_data[i]=='<') { if (!inscript) intag=1; } else if (html_data[i]=='>') { intag=0; } else { // Okay, parse keywords if ( (!inscript) && (!incomment) && (!intag) ) { char cchar=html_data[i]; int pos; int len = (int) strlen(keyword); // Replace (ignore case, and so on..) if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0) cchar=KEYW_TRANSCODE_TO[pos]; if (strchr(KEYW_ACCEPT,cchar)) { /* Ignore some characters at begining */ if ((len>0) || (!strchr(KEYW_IGNORE_BEG,cchar))) { keyword[len++]=cchar; keyword[len]='\0'; } } else if ( (strchr(KEYW_SPACE,cchar)) || (!cchar) ) { /* Avoid these words */ if (len>0) { if (strchr(KEYW_NOT_BEG,keyword[0])) { keyword[(len=0)]='\0'; } } /* Strip ending . and so */ { int ok=0; while((len = (int) strlen(keyword)) && (!ok)) { if (strchr(KEYW_STRIP_END,keyword[len-1])) { /* strip it */ keyword[len-1]='\0'; } else ok=1; } } /* Store it ? */ if (len >= KEYW_MIN_LEN ) { hts_primindex_words++; if (inthash_inc(WordIndexHash,keyword)) { /* added new */ fprintf(tmpfp,"%s\n",keyword); } } keyword[(len=0)]='\0'; } else /* Invalid */ keyword[(len=0)]='\0'; if (len>KEYW_LEN) { keyword[(len=0)]='\0'; } } } i++; } // Reset temp file fseek(tmpfp,0,SEEK_SET); // Process indexing for this page { //FILE* fp=NULL; //fp=fopen(concat(indexpath,"index.txt"),"ab"); if (fp_tmpproject) { while(!feof(tmpfp)) { char line[KEYW_LEN + 32]; linput(tmpfp,line,KEYW_LEN + 2); if (strnotempty(line)) { intptr_t e=0; if (inthash_read(WordIndexHash,line,&e)) { //if (e) { char BIGSTK savelst[HTS_URLMAXSIZE*2]; e++; /* 0 means "once" */ if (strncmp((const char*)fslash(catbuff,(char*)indexpath),filename,strlen(indexpath))==0) // couper strcpybuff(savelst,filename+strlen(indexpath)); else strcpybuff(savelst,filename); // Add entry for this file and word fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst); hts_primindex_size++; //} } } } //fclose(fp); } } // Delete temp file fclose(tmpfp); tmpfp=NULL; // Clear hash table inthash_delete(&WordIndexHash); #endif return 1; }