예제 #1
0
static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) {
    DPS_ENV *Conf = Indexer->Conf;
    DPS_UINT4URLIDLIST  L;
    size_t    k,prev;
    urlid_t   *data = NULL;
    DPS_UINT4_POS_LEN *ind=NULL;
    size_t    mind=1000,nind=0;
    char fname[PATH_MAX];
    int  dat_fd=0, ind_fd=0, rc;
    const char	*vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR);

    bzero(&L, sizeof(DPS_UINT4URLIDLIST));

    rc = DpsLimit4(Indexer, &L, field, type, db);

    if(rc != DPS_OK) {
        DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__);
        goto err1;
    }

    if(!L.Item)return(1);

    if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4);

    data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data));
    if(!data) {
        fprintf(stderr,"Error1: %s\n",strerror(errno));
        goto err1;
    }
    ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN));
    if(!ind) {
        fprintf(stderr,"Error2: %s\n",strerror(errno));
        goto err1;
    }
    prev=0;
    for(k=0; k<L.nitems; k++) {
        data[k]=L.Item[k].url_id;
        if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) {
            if(nind==mind) {
                mind+=1000;
                ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN));
                if(!ind) {
                    fprintf(stderr,"Error3: %s\n",strerror(errno));
                    goto err1;
                }
            }
            /* Fill index */
            ind[nind].val=L.Item[prev].val;
            ind[nind].pos = prev * sizeof(*data);
            if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
            else ind[nind].len = (k - prev) * sizeof(*data);
            DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len);
            nind++;

            prev=k;
        }
    }
    if (L.mapped) {
#ifdef HAVE_SYS_MMAN_H
        if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#elif defined(HAVE_SYS_SHM_H)
        if (shmdt(L.Item)) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#endif
        unlink(L.shm_name);
    } else {
        DPS_FREE(L.Item);
    }

    dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name);
    if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
        fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsWriteLock(dat_fd);
    if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) {
        fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsUnLock(dat_fd);
    DpsClose(dat_fd);
    DPS_FREE(data);

    dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name);
    if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
        fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsWriteLock(ind_fd);
    if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) {
        fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsUnLock(ind_fd);
    DpsClose(ind_fd);
    DPS_FREE(ind);

    return(0);

err1:
    if (L.mapped) {
#ifdef HAVE_SYS_MMAN_H
        if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#elif defined(HAVE_SYS_SHM_H)
        if (shmdt(L.Item)) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#endif
        unlink(L.shm_name);
    } else {
        DPS_FREE(L.Item);
    }
    DPS_FREE(data);
    DPS_FREE(ind);
    if(dat_fd) DpsClose(dat_fd);
    if(ind_fd) DpsClose(ind_fd);
    return(1);
}
예제 #2
0
int main(int argc,char **argv, char **envp) {
  int ch, sleeps = 1, optimize = 0, obi = 0;
  unsigned int from = 0, to = 0xFFF, p_to = 0;
	DPS_ENV * Env;
	const char * config_name = DPS_CONF_DIR "/cached.conf";

	DpsInit(argc, argv, envp); /* Initialize library */
	
	DpsInitMutexes();
	Env=DpsEnvInit(NULL);
	if (Env == NULL) exit(1);
	DpsSetLockProc(Env, DpsLockProc);

/*#ifndef HAVE_SETPROCTITLE*/
	ARGV = argv;
	ARGC = argc;
/*#endif*/
	while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){
		switch (ch) {
			case 'f':
				sscanf(optarg, "%x", &from);
				break;	
			case 't': 
				sscanf(optarg, "%x", &p_to);
				break;
			case 'w':
			        DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg);
				break;
                        case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break;
                        case 'b': obi++; break;
                        case 'o': optimize++; break;
                        case 'p': sleeps = atoi(optarg); break;
			case 'h':
			case '?':
			default:
			  usage();
			  DpsEnvFree(Env);
			  DpsDeInit();
			  DpsDestroyMutexes();
				return 1;
				break;
		}
	}
	argc -= optind;
	argv += optind;

	if(argc > 1) {
		usage();
		DpsEnvFree(Env);
		DpsDeInit();
		DpsDestroyMutexes();
		return 1;
	} else if (argc == 1) {
	        config_name = argv[0];
	}
	{
		DPS_LOGDEL *del_buf=NULL;
		size_t del_count = 0, log, bytes, n = 0;
		int dd, log_fd;
		struct stat sb;
		char dname[PATH_MAX] = "";
		DPS_BASE_PARAM P;
		DPS_LOGWORD *log_buf = NULL;
		DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0);

		log2stderr = 1;
		if (Indexer == NULL) {
		  fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__);
		  exit(DPS_ERROR);
		}
		
		if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){
		  fprintf(stderr, "%s\n", DpsEnvErrMsg(Env));
		  DpsEnvFree(Env);
		  DpsDeInit();
		  DpsDestroyMutexes();
		  return DPS_ERROR;
		}
		DpsOpenLog("splitter", Env, log2stderr);
		Indexer->flags = Env->flags = DPS_FLAG_UNOCON;
		DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*");

		bzero(&P, sizeof(P));
		P.subdir = DPS_TREEDIR;
		P.basename = "wrd";
		P.indname = "wrd";
		P.mode = DPS_WRITE_LOCK;
		P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300);
		P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR));
		P.A = Indexer;
		if (p_to != 0) to = p_to;
		else to = P.NFiles - 1;
#ifdef HAVE_ZLIB
		P.zlib_method = Z_DEFLATED;
		P.zlib_level = 9;
		P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS;
		P.zlib_memLevel = 9;
		P.zlib_strategy = DPS_BASE_WRD_STRATEGY;
#endif

		/* Open del log file */
		dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH);
		if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) {
		  dps_strerror(NULL, 0, "Can't open del log '%s'", dname);
		  exit(DPS_ERROR);
		}

		DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles);

		/* Allocate del buffer */
		fstat(dd, &sb);
		if (sb.st_size != 0) {
		  del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1);
		  if (del_buf == NULL) {
		    fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__);
		    exit(0);
		  }
		  del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL);
		}
		DpsClose(dd);

		/* Remove duplicates URLs in DEL log     */
		/* Keep only oldest records for each URL */
		if (del_count > 0) {
		  DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count);
		  if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog);
		    DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups");
		  del_count = DpsRemoveDelLogDups(del_buf, del_count);
		}

		DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to);

		for(log = from; log <= to; log++) {

		  /* Open log file */
		  dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log);
		  if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){
		    if (errno == ENOENT) {
		      dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname);
		      n = 0;
/*		      continue;*/
		    } else {
		      dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname);
		      continue;
		    }
		  } else {
		    DpsWriteLock(log_fd); 
		    DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log);
		    fstat(log_fd, &sb);
		    log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL;
		    if (log_buf != NULL) {
		      unlink(dname);
		      bytes = read(log_fd,log_buf,(size_t)sb.st_size);
		      (void)ftruncate(log_fd, (off_t)0);
		      DpsUnLock(log_fd);
		      DpsClose(log_fd);
		      
		      n = bytes / sizeof(DPS_LOGWORD);
		      DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n);
		      if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog);
		      DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords");
		      n = DpsRemoveOldWords(log_buf, n, del_buf, del_count);
		      if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd);
		      
		    } else {
		      n = 0;
		      DpsUnLock(log_fd);
		      DpsClose(log_fd);
		    }
		  }

		  DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize);
		  if (obi) DpsBaseOptimize(&P, log);
		  DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count);
		  if (optimize) DpsBaseOptimize(&P, log);
		  DpsBaseClose(&P);
		  DPS_FREE(log_buf);

		  DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to);
		  DPSSLEEP(sleeps);
		}
		DPS_FREE(del_buf);
		DpsAgentFree(Indexer);
		DPS_FREE(P.vardir);
	}

	fprintf(stderr, "Splitting done.\n");
	
	DpsEnvFree(Env);
	DpsDeInit();
	DpsDestroyMutexes();

#ifdef EFENCE
	fprintf(stderr, "Memory leaks checking\n");
	DpsEfenceCheckLeaks();
#endif
#ifdef FILENCE
	fprintf(stderr, "FD leaks checking\n");
	DpsFilenceCheckLeaks(NULL);
#endif
	return 0;
}