Beispiel #1
0
void hash_init(hash_struct * hash, int normalized) {
  hash->sav = inthash_new(0);
  hash->adrfil = inthash_new(0);
  hash->former_adrfil = inthash_new(0);
  hash->normalized = normalized;

  /* Case-insensitive comparison ; keys are direct char* filenames */
  inthash_value_set_key_handler(hash->sav,
                                key_duphandler,
                                key_freehandler,
                                key_sav_hashes,
                                key_sav_equals,
                                hash);

  /* URL-style comparison ; keys are lien_url structure pointers casted 
     to char* */
  inthash_value_set_key_handler(hash->adrfil,
                                key_duphandler,
                                key_freehandler,
                                key_adrfil_hashes,
                                key_adrfil_equals,
                                hash);
  inthash_value_set_key_handler(hash->former_adrfil,
                                key_duphandler,
                                key_freehandler,
                                key_former_adrfil_hashes,
                                key_former_adrfil_equals,
                                hash);
}
Beispiel #2
0
HTSEXT_API char* hts_getcategories(char* path, int type) {
  String categ = STRING_EMPTY;
  String profiles = STRING_EMPTY;
  char* rpath = path;
  find_handle h;
  inthash hashCateg = NULL;
  if (rpath[0]) {
    if (rpath[strlen(rpath)-1]=='/') {
      rpath[strlen(rpath)-1]='\0';      /* note: patching stored (inhash) value */
    }
  }
  h = hts_findfirst(rpath);
  if (h) {
    String iname = STRING_EMPTY;
    if (type == 1) {
      hashCateg = inthash_new(127);
      StringCat(categ, "Test category 1");
      StringCat(categ, "\r\nTest category 2");
    }
    do {
      if (hts_findisdir(h)) {
        char BIGSTK line2[1024];
        StringCopy(iname,rpath);
        StringCat(iname,"/");
        StringCat(iname,hts_findgetname(h));
        StringCat(iname,"/hts-cache/winprofile.ini");
        if (fexist(StringBuff(iname))) {
          if (type == 1) {
            FILE* fp = fopen(StringBuff(iname), "rb");
            if (fp != NULL) {
              int done=0;
              while(!feof(fp) && !done) {
                int n = linput(fp, line2, sizeof(line2) - 2);
                if (n > 0) {
                  if (strfield(line2, "category=")) {
                    if (*(line2+9)) {
                      if (!inthash_read(hashCateg, line2+9, NULL)) {
                        inthash_write(hashCateg, line2+9, 0);
                        if (StringLength(categ) > 0) {
                          StringCat(categ, "\r\n");
                        }
                        unescapehttp(line2+9, &categ);
                      }
                    }
                    done=1;
                  }
                }
              }
              line2[0] = '\0';
              fclose(fp);
            }
          } else {
            if (StringLength(profiles) > 0) {
              StringCat(profiles, "\r\n");
            }
            StringCat(profiles, hts_findgetname(h));
          }
        }
        
      }
    } while(hts_findnext(h));
    hts_findclose(h);
    StringFree(iname);
  }
  if (hashCateg) {
    inthash_delete(&hashCateg);
    hashCateg = NULL;
  }
  if (type == 1)
    return StringBuffRW(categ);
  else
    return StringBuffRW(profiles);
}
Beispiel #3
0
/* 
   Indexing system
   A little bit dirty, (quick'n dirty, in fact)
   But should be okay on most cases
   Tags and javascript handled (ignored)
*/
int index_keyword(const char* html_data,LLint size,const char* mime,const char* filename,const char* indexpath) {
#if HTS_MAKE_KEYWORD_INDEX
	char catbuff[CATBUFF_SIZE];
  int intag=0,inscript=0,incomment=0;
  char keyword[KEYW_LEN+32];
  int i=0;
  //
  int WordIndexSize=1024;
  inthash WordIndexHash=NULL;
  FILE *tmpfp=NULL;
  //

  // Check parameters
  if (!html_data)
    return 0;
  if (!size)
    return 0;
  if (!mime)
    return 0;
  if (!filename)
    return 0;

  // Init ?
  if (hts_index_init) {
    remove(concat(catbuff,indexpath,"index.txt"));
    remove(concat(catbuff,indexpath,"sindex.html"));
    hts_index_init=0;
  }

  // Check MIME type
  if (is_html_mime_type(mime)) {
    inscript=0;
  } 
  // FIXME - temporary fix for image/svg+xml (svg)
  // "IN XML" (html like, in fact :) )
  else if (
    (strfield2(mime,"image/svg+xml"))
    ||
    (strfield2(mime,"image/svg-xml"))
#if HTS_USEMMS
		||
		strfield2(mime,"video/x-ms-asf")
#endif
    ) {
    inscript=0;
  }
  else if (
    (strfield2(mime,"application/x-javascript"))
    || (strfield2(mime,"text/css"))
    ) {
    inscript=1;
  //} else if (strfield2(mime, "text/vnd.wap.wml")) {   // humm won't work in many cases
  //  inscript=0;
  } else
    return 0;

  // Temporary file
  tmpfp = tmpfile();
  if (!tmpfp)
    return 0;

  // Create hash structure
  // Hash tables rulez da world!
  WordIndexHash=inthash_new(WordIndexSize);
  if (!WordIndexHash)
    return 0;

  // Start indexing this page
  keyword[0]='\0';
  while(i<size) {
    if (strfield(html_data + i , "<script")) {
      inscript=1;
    } 
    else if (strfield(html_data + i , "<!--")) {
      incomment=1;
    }
    else if (strfield(html_data + i , "</script")) {
      if (!incomment)
        inscript=0;
    } 
    else if (strfield(html_data + i , "-->")) {
      incomment=0;
    }
    else if (html_data[i]=='<') {
      if (!inscript)
        intag=1;
    }    
    else if (html_data[i]=='>') {
      intag=0;
    }    
    else {    
      // Okay, parse keywords
      if ( (!inscript) && (!incomment) && (!intag) ) {
        char cchar=html_data[i];
        int pos;
        int len = (int) strlen(keyword);
        
        // Replace (ignore case, and so on..)
        if ((pos=strcpos(KEYW_TRANSCODE_FROM,cchar))>=0)
          cchar=KEYW_TRANSCODE_TO[pos];
        
        if (strchr(KEYW_ACCEPT,cchar)) {
          /* Ignore some characters at begining */
          if ((len>0) || (!strchr(KEYW_IGNORE_BEG,cchar))) {
            keyword[len++]=cchar;
            keyword[len]='\0';
          }
        } else if ( (strchr(KEYW_SPACE,cchar)) || (!cchar) ) {


          /* Avoid these words */
          if (len>0) {
            if (strchr(KEYW_NOT_BEG,keyword[0])) {
              keyword[(len=0)]='\0';
            }
          }

          /* Strip ending . and so */
          {
            int ok=0;
            while((len = (int) strlen(keyword)) && (!ok)) {
              if (strchr(KEYW_STRIP_END,keyword[len-1])) {      /* strip it */
                keyword[len-1]='\0';
              } else
                ok=1;
            }
          }
          
          /* Store it ? */
          if (len >= KEYW_MIN_LEN ) {
            hts_primindex_words++;
            if (inthash_inc(WordIndexHash,keyword)) {   /* added new */
              fprintf(tmpfp,"%s\n",keyword);
            }
          }
          keyword[(len=0)]='\0';
        } else      /* Invalid */
          keyword[(len=0)]='\0';

        if (len>KEYW_LEN) {
          keyword[(len=0)]='\0';
        }
      }
      
    }
    
    i++;
  }

  // Reset temp file
  fseek(tmpfp,0,SEEK_SET);

  // Process indexing for this page
  {
    //FILE* fp=NULL;
    //fp=fopen(concat(indexpath,"index.txt"),"ab");
    if (fp_tmpproject) {
      while(!feof(tmpfp)) {
        char line[KEYW_LEN + 32];
        linput(tmpfp,line,KEYW_LEN + 2);
        if (strnotempty(line)) {
          intptr_t e=0;
          if (inthash_read(WordIndexHash,line,&e)) {
            //if (e) {
            char BIGSTK savelst[HTS_URLMAXSIZE*2];
            e++;          /* 0 means "once" */
            
            if (strncmp((const char*)fslash(catbuff,(char*)indexpath),filename,strlen(indexpath))==0)  // couper
              strcpybuff(savelst,filename+strlen(indexpath));
            else
              strcpybuff(savelst,filename);
            
            // Add entry for this file and word
            fprintf(fp_tmpproject,"%s %d %s\n",line,(int) (KEYW_SORT_MAXCOUNT - e),savelst);
            hts_primindex_size++;
            //}
          }
        }
      }
      //fclose(fp);
    }
  }

  // Delete temp file
  fclose(tmpfp);
  tmpfp=NULL;

  // Clear hash table
  inthash_delete(&WordIndexHash);
#endif
  return 1;
}