コード例 #1
0
ファイル: env.c プロジェクト: BackupGGCode/dataparksearch
DPS_ENV *DpsEnvInit(DPS_ENV *Conf){
#ifdef MECAB
  const char *mecab_argv[8] = {"mecab", "-F", "%m ", "-B", " ", "-E", " ", NULL};
#endif
	if(!Conf){
		Conf=(DPS_ENV *)DpsMalloc(sizeof(DPS_ENV));
		if (Conf == NULL) return NULL;
		bzero((void*)Conf, sizeof(*Conf));
		Conf->freeme=1;
	}else{
		bzero((void*)Conf, sizeof(*Conf));
	}
	
	Conf->Flags.OptimizeAtUpdate = 1;
	Conf->Flags.do_excerpt = 1;
	Conf->Flags.PopRankNeoIterations = 3;
	Conf->Flags.GuesserBytes = 512;
	Conf->Flags.robots_period = 604800;   /* one week */
	Conf->Flags.URLInfoSQL = 1;
	Conf->Flags.SRVInfoSQL = 1;
	Conf->Flags.CheckInsertSQL = 1;
	Conf->Flags.mark_for_index = 1;
	Conf->Flags.MaxSiteLevel = 2;
	Conf->Flags.SEASentences = 32;
	Conf->Flags.SEASentenceMinLength = 64;
	Conf->Flags.PagesInGroup = 1;
	Conf->Flags.SubDocCnt = 5;
	Conf->Flags.MaxCrawlDelay = 300;
	Conf->Flags.rel_nofollow = 1;
	Conf->Flags.bind_addr.sin_family = AF_INET;
	Conf->Flags.use_meta = 1;
	Conf->WordParam.min_word_len = 1;
	Conf->WordParam.max_word_len = 32;
	Conf->WordParam.correct_factor = 1;
	Conf->WordParam.incorrect_factor = 1;
	Conf->url_number = 0x7FFFFFFF;
	Conf->lcs=DpsGetCharSet("latin1");
	Conf->bcs=DpsGetCharSet("latin1");
	Conf->CharsToEscape = DpsStrdup("\"&<>");
#ifdef MECAB
/*	Conf->mecab = mecab_new2 ("mecab -F \"%m \" -B \" \" -E \" \"");*/
	Conf->mecab = mecab_new(7, (char**)mecab_argv);
#endif
	
	return(Conf);
}
コード例 #2
0
ファイル: chinese.c プロジェクト: invokerj/dpsearch-4.53
int DpsChineseListLoad(DPS_AGENT *Agent, DPS_CHINALIST *List, const char *charset, const char *fname) {
     struct stat     sb;
     char *str, *data = NULL, *cur_n = NULL;
     DPS_CHINAWORD chinaword;
     char word[PATH_MAX];
     dpsunicode_t uword[256];
     DPS_CHARSET *sys_int, *fcs;
     DPS_CONV to_uni;
     int             fd;
     char            savebyte;

     sys_int = DpsGetCharSet("sys-int");
     if (!(fcs = DpsGetCharSet(charset))) {
       if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Charset '%s' not found or not supported", charset);
       else fprintf(stderr, "Charset '%s' not found or not supported", charset);
       return DPS_ERROR;
     }
     DpsConvInit(&to_uni, fcs, sys_int, Agent->Conf->CharsToEscape, DPS_RECODE_HTML);

     if (*fname != '/') {
       dps_snprintf(word, sizeof(word), "%s/%s", DpsVarListFindStr(&Agent->Conf->Vars, "EtcDir", DPS_CONF_DIR), fname);
       fname = word;
     }

     if (stat(fname, &sb)) {
       if (Agent->Conf->is_log_open) 
	    DpsLog(Agent, DPS_LOG_ERROR, "Unable to stat FreqDic file '%s': %s", fname, strerror(errno));
       else fprintf(stderr, "Unable to stat FrecDic file '%s': %s", fname, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = open(fname, O_RDONLY)) <= 0) {
       if (Agent->Conf->is_log_open) 
	    DpsLog(Agent, DPS_LOG_ERROR, "Unable to open FreqDic file '%s': %s", fname, strerror(errno));
       else fprintf(stderr, "Unable to open FreqDic file '%s': %s", fname, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       if (Agent->Conf->is_log_open) 
	 DpsLog(Agent, DPS_LOG_ERROR, "Unable to alloc %d bytes", sb.st_size);
       else fprintf(stderr, "Unable to alloc %ld bytes", (long)sb.st_size);
       close(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       if (Agent->Conf->is_log_open) 
	 DpsLog(Agent, DPS_LOG_ERROR, "Unable to read FreqDic file '%s': %s", fname, strerror(errno));
       else fprintf(stderr, "Unable to read FreqDic file '%s': %s", fname, strerror(errno));
       DPS_FREE(data);
       close(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, NL_INT);
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }
     close(fd);

     bzero((void*)&chinaword, sizeof(chinaword));
     chinaword.word = uword;
     while(str != NULL) {
          if(!str[0]) goto loop_continue;
          if(str[0]=='#') goto loop_continue;
          sscanf(str, "%d %63s ", &chinaword.freq, word );
	  DpsConv(&to_uni, (char*)uword, sizeof(uword), word, sizeof(word));
          DpsChineseListAdd(List, &chinaword);
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, NL_INT);
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DpsChineseListSort(List);
     { register size_t i, j = 0;
       for (i = 1; i < List->nwords; i++) {
	 if (cmpchinese(&List->ChiWord[j], &List->ChiWord[i]) == 0) {
	   List->ChiWord[j].freq += List->ChiWord[i].freq;
	 } else { j++;
	 }
       }
       for (i = j + 1; i < List->nwords; i++) {
	 DPS_FREE(List->ChiWord[i].word);
       }
       List->nwords = j + 1;
     }
     return DPS_OK;
}
コード例 #3
0
ファイル: spell.c プロジェクト: github188/SimpleCode
__C_LINK int __DPSCALL DpsImportDictionary(DPS_ENV * Conf, const char *lang, const char *charset,
				   const char *filename, int skip_noflag, const char *first_letters){
        struct stat     sb;
	char *str, *data = NULL, *cur_n = NULL;	
	char *lstr;	
	dpsunicode_t *ustr;	
	DPS_CHARSET *sys_int;
	DPS_CHARSET *dict_charset;
	DPS_CONV touni;
	DPS_CONV fromuni;
	int             fd;
	char            savebyte;

	if ((lstr = (char*) DpsMalloc(2048)) == NULL) {
	  DPS_FREE(str);
	  return DPS_ERROR; 
	}
	if ((ustr = (dpsunicode_t*) DpsMalloc(8192)) == NULL) {
	  DPS_FREE(lstr);
	  return DPS_ERROR; 
	}

	dict_charset = DpsGetCharSet(charset);
	sys_int = DpsGetCharSet("sys-int");
	if ((dict_charset == NULL) || (sys_int == NULL)) {
	  DPS_FREE(lstr);
	  DPS_FREE(ustr);
	  return DPS_ERROR;
	}
	
	DpsConvInit(&touni, dict_charset, sys_int, Conf->CharsToEscape, 0);
	DpsConvInit(&fromuni, sys_int, dict_charset, Conf->CharsToEscape, 0);
	
	if (stat(filename, &sb)) {
	  fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno));
	  DPS_FREE(lstr);
	  DPS_FREE(ustr);
	  return DPS_ERROR;
	}
	if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) {
	  fprintf(stderr, "Unable to open synonyms file '%s': %s", filename, strerror(errno));
	  return DPS_ERROR;
	}
	if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
	  fprintf(stderr, "Unable to alloc %ld bytes", (long)sb.st_size);
	  DpsClose(fd);
	  DPS_FREE(lstr);
	  DPS_FREE(ustr);
	  return DPS_ERROR;
	}
	if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
	  fprintf(stderr, "Unable to read synonym file '%s': %s", filename, strerror(errno));
	  DPS_FREE(data);
	  DpsClose(fd);
	  DPS_FREE(lstr);
	  DPS_FREE(ustr);
	  return DPS_ERROR;
	}
	data[sb.st_size] = '\0';
	str = data;
	cur_n = strchr(str, '\n');
	if (cur_n != NULL) {
	  cur_n++;
	  savebyte = *cur_n;
	  *cur_n = '\0';
	}

	DpsClose(fd);
	while(str != NULL) {
		char *s;
		const char *flag;
		int res;
		
	        flag = NULL;
		s = str;
		while(*s){
			if(*s == '\r') *s = '\0';
			if(*s == '\n') *s = '\0';
			s++;
		}
		if((s=strchr(str,'/'))){
			*s=0;
			s++;flag=s;
			while(*s){
				if(((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))s++;
				else{
					*s=0;
					break;
				}
			}
		}else{
			if(skip_noflag)	goto loop_continue;
			flag="";
		}

		res = DpsConv(&touni, (char*)ustr, 8192, str, 1024);
		DpsUniStrToLower(ustr);

		/* Dont load words if first letter is not required */
		/* It allows to optimize loading at  search time   */
		if(*first_letters) {
			DpsConv(&fromuni, lstr, 2048, ((const char*)ustr),(size_t)res);
			if(!strchr(first_letters,lstr[0]))
				goto loop_continue;
		}
		res = DpsSpellAdd(&Conf->Spells,ustr,flag,lang);
		if (res != DPS_OK) {
		  DPS_FREE(lstr);
		  DPS_FREE(ustr); DPS_FREE(data);
		  return res;
		}
		if (Conf->Flags.use_accentext) {
		  dpsunicode_t *af_uwrd = DpsUniAccentStrip(ustr);
		  if (DpsUniStrCmp(af_uwrd, ustr) != 0) {
		    res = DpsSpellAdd(&Conf->Spells, af_uwrd, flag, lang);
		    if (res != DPS_OK) {
		      DPS_FREE(lstr);
		      DPS_FREE(ustr); DPS_FREE(data); DPS_FREE(af_uwrd);
		      return res;
		    }
		  }
		  DPS_FREE(af_uwrd);
		  if (strncasecmp(lang, "de", 2) == 0) {
		    dpsunicode_t *de_uwrd = DpsUniGermanReplace(ustr);
		    if (DpsUniStrCmp(de_uwrd, ustr) != 0) {
		      res = DpsSpellAdd(&Conf->Spells, de_uwrd, flag, lang);
		      if (res != DPS_OK) {
			DPS_FREE(lstr);
			DPS_FREE(ustr); DPS_FREE(data); DPS_FREE(de_uwrd);
			return res;
		      }
		    }
		    DPS_FREE(de_uwrd);
		  }
		}
	loop_continue:
		str = cur_n;
		if (str != NULL) {
		  *str = savebyte;
		  cur_n = strchr(str, '\n');
		  if (cur_n != NULL) {
		    cur_n++;
		    savebyte = *cur_n;
		    *cur_n = '\0';
		  }
		}
	}
	DPS_FREE(data);
	DPS_FREE(lstr);
	DPS_FREE(ustr);
	return DPS_OK;
}
コード例 #4
0
ファイル: spell.c プロジェクト: github188/SimpleCode
static int DpsUniRegExec(const DPS_UNIREG_EXP *reg, const dpsunicode_t *string) {
	const dpsunicode_t *start = string;
	int match=0;
#ifdef DEBUG_UNIREG
	DPS_CHARSET *k = DpsGetCharSet("koi8-r");
	DPS_CHARSET *sy = DpsGetCharSet("sys-int");
	DPS_CONV fromuni;
	char sstr[1024];
	char rstr[1024];
	
	DpsConvInit(&fromuni, sy, k, NULL, 0);
#endif
	
	for(start=string;*start;start++){
		const dpsunicode_t *tstart=start;
		size_t i;
		
		for(i=0;i<reg->ntokens;i++){
			const dpsunicode_t *s;
			int inc=DPS_UNIREG_INC;
#ifdef DEBUG_UNIREG

			DpsConv(&fromuni, sstr, 1024, (char*)tstart, 1024);
			DpsConv(&fromuni, rstr, 1024, (char*)reg->Token[i].str, 1024);
			printf("t:%d tstart='%s'\ttok='%s'\t", i, sstr, rstr);
#endif
			switch(reg->Token[i].str[0]){
				case '^':
					if(string!=tstart){
						match=0;
					}else{	
						match=1;
					}
					break;
				case '[':
					match=0;
					for(s=reg->Token[i].str+1;*s;s++){
						if(*s==']'){
						}else
						if(*s=='^'){
							inc=DPS_UNIREG_EXC;
							match=1;
						}else{
							if((*tstart==*s)&&(inc==DPS_UNIREG_EXC)){
								match=0;
								break;
							}
							if((*tstart==*s)&&(inc==DPS_UNIREG_INC)){
								match=1;
								break;
							}
						}
					}
					tstart++;
					break;
				case '$':
					if(*tstart!=0){
						match=0;
					}else{
						match=1;
					}
					break;
				default:
					match=1;
					for(s=reg->Token[i].str;(*s)&&(*tstart);s++,tstart++){
						if(*s=='.'){
							/* Any char */
						}else
						if((*s)!=(*tstart)){
							match=0;
							break;
						}
					}
					if((*s)&&(!*tstart))match=0;
					break;
			}
#ifdef DEBUG_UNIREG
			printf("match=%d\n",match);
#endif
			if(!match)break;
		}
		if(match)break;
	}

#ifdef DEBUG_UNIREG
	printf("return match=%d\n",match);
#endif
	return match;

}
コード例 #5
0
ファイル: filler.c プロジェクト: Maxime2/dataparksearch
int main(int argc, char ** argv, char **envp) {
	const char	*env, *bcharset, *lcharset, *conf_dir;
	char		template_name[PATH_MAX+6]="";
	char            *template_filename = NULL;
	char		*query_string = NULL;
	char		self[1024]="";
	char		*url = NULL;
	const char      *ResultContentType;
	int		res,httpd=0;
	size_t          catcolumns = 0;
	int		page_size,page_number;
	DPS_ENV		*Env;
	DPS_AGENT	*Agent;
	DPS_VARLIST	query_vars;
	
	/* Output Content-type if under HTTPD	 */
	/* Some servers do not pass QUERY_STRING */
	/* if the query was empty, so check	 */
	/* REQUEST_METHOD too     to be safe     */
	
	httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD"));
	if (!(conf_dir=getenv("DPS_ETC_DIR")))
		conf_dir=DPS_CONF_DIR;
	
	
	DpsInit(argc, argv, envp);
	Env=DpsEnvInit(NULL);
	if (Env == NULL) {
	  if(httpd){
	    printf("Content-Type: text/plain\r\n\r\n");
	  }
	  printf("Can't alloc Env\n");
	  exit(0);
	}
	DpsVarListInit(&query_vars);
	Agent = DpsAgentInit(NULL, Env, 0);
	if (Agent == NULL) {
	  if(httpd){
	    printf("Content-Type: text/plain\r\n\r\n");
	  }
	  printf("Can't alloc Agent\n");
	  exit(0);
	}
	DpsVarListAddEnviron(&Env->Vars,"ENV");
	
	/* Detect self and template name */
	if((env = getenv("DPSEARCH_TEMPLATE")))
		dps_strncpy(template_name, env, sizeof(template_name) - 1);
	else if((env = getenv("PATH_INFO")) && env[0])
		dps_strncpy(template_name, env + 1, sizeof(template_name) - 1);
	
	if((env=getenv("DPSEARCH_SELF")))
		dps_strncpy(self,env,sizeof(self)-1);
	
	if((env=getenv("QUERY_STRING"))){
	        query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2);
		if (query_string == NULL) {
		  if(httpd){
		    printf("Content-Type: text/plain\r\n\r\n");
		  }
		  printf("Can't alloc query_string\n");
		  exit(0);
		}
		dps_strncpy(query_string, env, dps_strlen(env) + 1);

		/* Hack for Russian Apache from apache.lexa.ru  */
		/* QUERY_STRING is already converted to server  */
		/* character set. We must print original query  */
		/* string instead however. Under usual apache   */ 
		/* we'll use QUERY_STRING. Note that query_vars */
		/* list will contain not unescaped values, so   */
		/* we don't have to escape them when displaying */
		env = getenv("CHARSET_SAVED_QUERY_STRING");
		DpsParseQStringUnescaped(&query_vars,env?env:query_string);
	
		/* Unescape and save variables from QUERY_STRING */
		/* Env->Vars will have unescaped values however  */
		DpsParseQueryString(Agent,&Env->Vars,query_string);
	
		template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", ""));
	
		if((env=getenv("REDIRECT_STATUS"))){

			/* Check Apache internal redirect  */
			/* via   "AddHandler" and "Action" */
			if(!self[0]){
				dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1);
			}
			if(!template_name[0]){
				dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1);
			}
			if (*template_filename == '\0') { 
			  DPS_FREE(template_filename); 
			  template_filename = (char*)DpsStrdup("filler.htm"); 
			}
		}else{
			/* CGI executed without Apache internal redirect */

			/* Detect $Self variable with OS independant SLASHES */
			if(!self[0]){
				dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1);
			}

			if(!template_name[0]){
				char *s,*e;
				
				/*This is with OS specific SLASHES */
				env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi");

				if(strcmp(conf_dir,".")){
					/* Take from the config directory */
					dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", 
						     conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self));
				}else{
					/* Take from the current directory */
					dps_strncpy(template_name,env,sizeof(template_name)-1);
				}

				/* Find right slash if it presents */
				s=((s=strrchr(template_name,DPSSLASH))?s:template_name);

				if (*template_filename == '\0') {

				  /* Find .cgi substring */
				  if ((e = strstr(s, ".cgi")) != NULL) {
					/* Replace ".cgi" with ".htm" */
					e[1]='h';e[2]='t';e[3]='m';
				  } else {
				        dps_strcat(s, ".htm");
				  }
				  e = strrchr(s, '/');
				  DPS_FREE(template_filename);
				  template_filename = (char*)DpsStrdup(e + 1);
				} else {
				  dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2);
				}
			}
		}
	}else{
		/* Executed from command line     */
		/* or under server which does not */
		/* pass an empty QUERY_STRING var */
		if(argv[1]) {
		  query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10);
		  if (query_string == NULL) {
		    if(httpd){
		      printf("Content-Type: text/plain\r\n\r\n");
		    }
		    printf("Can't realloc query_string\n");
		    exit(0);
		  }
		  sprintf(query_string, "q=%s", argv[1]);
		} else {
		  query_string = (char*)DpsRealloc(query_string, 1024);
		  if (query_string == NULL) {
		    if(httpd){
		      printf("Content-Type: text/plain\r\n\r\n");
		    }
		    printf("Can't realloc query_string\n");
		    exit(0);
		  }
		  sprintf(query_string, "q=");
		}

		/* Hack for Russian Apache from apache.lexa.ru  */
		/* QUERY_STRING is already converted to server  */
		/* character set. We must print original query  */
		/* string instead however. Under usual apache   */ 
		/* we'll use QUERY_STRING. Note that query_vars */
		/* list will contain not unescaped values, so   */
		/* we don't have to escape them when displaying */
		env = getenv("CHARSET_SAVED_QUERY_STRING");
		DpsParseQStringUnescaped(&query_vars,env?env:query_string);
	
		/* Unescape and save variables from QUERY_STRING */
		/* Env->Vars will have unescaped values however  */
		DpsParseQueryString(Agent,&Env->Vars,query_string);

		DPS_FREE(template_filename);
		template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", ""));
		if (*template_filename == '\0') { 
		  DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); 
		}
	
		/*// Get template name from command line variable &tmplt */
		if(!template_name[0])
			dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename);
	}
	
	DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename);
	DPS_FREE(template_filename);

	Agent->tmpl.Env_Vars = &Env->Vars;
	
	DpsURLNormalizePath(template_name);
	
	if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) 
	    || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) {
	  if (strcmp(template_name, "filler.htm")) { /* trying load default template */
	    fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr);
	    DPS_FREE(template_filename);
	    template_filename = (char*)DpsStrdup("filler.htm");
	    dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename);

	    if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) {

		if(httpd)printf("Content-Type: text/plain\r\n\r\n");
		printf("%s\n",Env->errstr);
		DpsVarListFree(&query_vars);
		DpsEnvFree(Env);
		DPS_FREE(query_string);
		DpsAgentFree(Agent);
		return(0);
	    }
	  } else {
		if(httpd)printf("Content-Type: text/plain\r\n\r\n");
		printf("%s\n",Env->errstr);
		DpsVarListFree(&query_vars);
		DpsEnvFree(Env);
		DPS_FREE(query_string);
		DpsAgentFree(Agent);
		return(0);
	  }
	}

	/* set locale if specified */
	if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) {
	  setlocale(LC_ALL, url);
/*#ifdef HAVE_ASPELL*/
	  { char *p;
	    if ((p = strchr(url, '.')) != NULL) {
	      *p = '\0';
	      DpsVarListReplaceStr(&Env->Vars, "g-lc", url);
	      *p = '.';
	    }
	  }
/*#endif*/
	  url = NULL;
	}
	
	/* Call again to load search Limits if need */
	DpsParseQueryString(Agent, &Env->Vars, query_string);
	Agent->Flags = Env->Flags;
	Agent->flags |= DPS_FLAG_UNOCON;
	Env->flags |= DPS_FLAG_UNOCON;
	DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0));
	DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes"));
	DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name);
		DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR));
		DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d",
		       Env->Affixes.naffixes,Env->Spells.nspell,
		       Env->Synonyms.nsynonyms,
		       Env->Acronyms.nacronyms,
		       Env->StopWords.nstopwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords);
	DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*");
	Agent->tmpl.Env_Vars = &Agent->Vars;
/*	DpsVarListAddEnviron(&Agent->Vars, "ENV");*/
/****************************************************************************************************************************************/
	/* This is for query tracking */
	DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string);
	DpsVarListAddStr(&Agent->Vars, "self", self);
	env = getenv("HTTP_X_FORWARDER_FOR");
	if (env) {
	  DpsVarListAddStr(&Agent->Vars, "IP", env);
	} else {
	  env = getenv("REMOTE_ADDR");
	  DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost");
	}
	
	bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1");
	Env->bcs=DpsGetCharSet(bcharset);
	lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1");
	Env->lcs=DpsGetCharSet(lcharset);

	ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html");
	
	if(httpd){
		if(!Env->bcs){
			printf("Content-Type: text/plain\r\n\r\n");
			printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name);
			exit(0);
		}else if(!Env->lcs){
			printf("Content-Type: text/plain\r\n\r\n");
			printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name);
			exit(0);
		}else{
		  printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset);
		}
	}else{
		if(!Env->bcs){
			printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name);
			exit(0);
		}
		if(!Env->lcs){
			printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name);
			exit(0);
		}
	}
	
	/* These parameters taken from "variable section of template"*/
	
	res         = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS);
	page_size   = dps_min(res, MAX_PS);
	page_number = DpsVarListFindInt(&Agent->Vars, "p", 0);
	if (page_number == 0) {
	  page_number = DpsVarListFindInt(&Agent->Vars, "np", 0);
	  DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1);
	} else page_number--;
	
	res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size;
	DpsVarListAddInt(&Agent->Vars, "pn", res);
	
	catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", ""));


	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top");

	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop");
	
	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res");

	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot");
	
	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom");
	
	DpsVarListFree(&query_vars);
	DpsAgentFree(Agent);
	DpsEnvFree(Env);
	DPS_FREE(query_string);
	DPS_FREE(url);
	if (httpd) fflush(NULL); else fclose(stdout);
	
#ifdef EFENCE
	fprintf(stderr, "Memory leaks checking\n");
	DpsEfenceCheckLeaks();
#endif
#ifdef FILENCE
	fprintf(stderr, "FD leaks checking\n");
	DpsFilenceCheckLeaks(NULL);
#endif

	return DPS_OK;
}
コード例 #6
0
ファイル: synonym.c プロジェクト: github188/SimpleCode
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){
     struct stat     sb;
     char      *str, *data = NULL, *cur_n = NULL;
     char      lang[64]="";
     DPS_CHARSET    *cs=NULL;
     DPS_CHARSET    *sys_int=DpsGetCharSet("sys-int");
     DPS_CONV  file_uni;
     DPS_WIDEWORD    *ww = NULL;
     size_t key = 1;
     int flag_th = 0;
     int             fd;
     char            savebyte;
     
     if (stat(filename, &sb)) {
       fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size);
       DpsClose(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno));
       DPS_FREE(data);
       DpsClose(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, '\n');
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }

     while(str != NULL) {
          if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue;
          
          if(!strncasecmp(str,"Charset:",8)){
               char * lasttok;
               char * charset;
               if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) {
                    cs=DpsGetCharSet(charset);
                    if(!cs){
                         dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'",
                                   charset, filename);
                         DPS_FREE(data);
			 DpsClose(fd);
                         return DPS_ERROR;
                    }
                    DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0);
               }
          }else
          if(!strncasecmp(str,"Language:",9)){
               char * lasttok;
               char * l;
               if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) {
                    dps_strncpy(lang, l, sizeof(lang)-1);
               }
          }else
          if(!strncasecmp(str, "Thesaurus:", 10)) {
               char * lasttok;
	       char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok);
	       flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0;
          }else{
               char      *av[255];
               size_t         ac, i, j;
	       dpsunicode_t *t;

               if(!cs){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }
               if(!lang[0]){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }

               ac = DpsGetArgs(str, av, 255);
               if (ac < 2) goto loop_continue;

               if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR;

               for (i = 0; i < ac; i++) {
                 ww[i].word = av[i];
                 ww[i].len = dps_strlen(av[i]);
                 ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t));
		 if (ww[i].uword == NULL) return DPS_ERROR;
                 DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1);
                 DpsUniStrToLower(ww[i].uword);
		 ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword);
		 DPS_FREE(t);
               }

               for (i = 0; i < ac - 1; i++) {
                 for (j = i + 1; j < ac; j++) {

                   if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){
                    Env->Synonyms.msynonyms += 64;
                    Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, 
                                                   sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms);
		    if (Env->Synonyms.Synonym == NULL) {
		      Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0;
		      return DPS_ERROR;
  		    }
                   }
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add direct order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add reverse order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
                 }
               }

               for (i = 0; i < ac; i++) {
                 DPS_FREE(ww[i].uword);
               }
               do { key++; } while (key == 0);
          }
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, '\n');
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DPS_FREE(ww);
     DpsClose(fd);
     return DPS_OK;
}