示例#1
0
static int open_socket(DPS_AGENT *A, char *unix_socket) {
  char unix_path[128];
  struct sockaddr_un unix_addr;
  int sockfd, saddrlen;

  if (DpsRelVarName(A->Conf, unix_path, sizeof(unix_path), unix_socket) < 105) {
  } else {
    DpsLog(A, DPS_LOG_ERROR, "Unix socket name '%s' is too large", unix_path);
    return(DPS_NET_CANT_CONNECT);
  }
  if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
    DpsLog(A, DPS_LOG_ERROR, "unix socket() error %d", errno);
    return(DPS_NET_CANT_CONNECT);
  }
  DpsSockOpt(A, sockfd);

  bzero((void*)&unix_addr, sizeof(unix_addr));
  unix_addr.sun_family = AF_UNIX;
  dps_strncpy(unix_addr.sun_path, unix_path, sizeof(unix_addr.sun_path));
  saddrlen = sizeof(unix_addr.sun_family) + dps_strlen(unix_addr.sun_path);

  if(connect(sockfd, (struct sockaddr *)&unix_addr, sizeof (unix_addr))) {
    dps_strerror(A, DPS_LOG_ERROR, "unix socket '%s' connect() error", unix_path);
    return(DPS_NET_CANT_CONNECT);
  }

  return sockfd;
}
示例#2
0
int main(int argc,char **argv, char **envp) {
  int ch, sleeps = 1, optimize = 0, obi = 0;
  unsigned int from = 0, to = 0xFFF, p_to = 0;
	DPS_ENV * Env;
	const char * config_name = DPS_CONF_DIR "/cached.conf";

	DpsInit(argc, argv, envp); /* Initialize library */
	
	DpsInitMutexes();
	Env=DpsEnvInit(NULL);
	if (Env == NULL) exit(1);
	DpsSetLockProc(Env, DpsLockProc);

/*#ifndef HAVE_SETPROCTITLE*/
	ARGV = argv;
	ARGC = argc;
/*#endif*/
	while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){
		switch (ch) {
			case 'f':
				sscanf(optarg, "%x", &from);
				break;	
			case 't': 
				sscanf(optarg, "%x", &p_to);
				break;
			case 'w':
			        DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg);
				break;
                        case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break;
                        case 'b': obi++; break;
                        case 'o': optimize++; break;
                        case 'p': sleeps = atoi(optarg); break;
			case 'h':
			case '?':
			default:
			  usage();
			  DpsEnvFree(Env);
			  DpsDeInit();
			  DpsDestroyMutexes();
				return 1;
				break;
		}
	}
	argc -= optind;
	argv += optind;

	if(argc > 1) {
		usage();
		DpsEnvFree(Env);
		DpsDeInit();
		DpsDestroyMutexes();
		return 1;
	} else if (argc == 1) {
	        config_name = argv[0];
	}
	{
		DPS_LOGDEL *del_buf=NULL;
		size_t del_count = 0, log, bytes, n = 0;
		int dd, log_fd;
		struct stat sb;
		char dname[PATH_MAX] = "";
		DPS_BASE_PARAM P;
		DPS_LOGWORD *log_buf = NULL;
		DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0);

		log2stderr = 1;
		if (Indexer == NULL) {
		  fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__);
		  exit(DPS_ERROR);
		}
		
		if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){
		  fprintf(stderr, "%s\n", DpsEnvErrMsg(Env));
		  DpsEnvFree(Env);
		  DpsDeInit();
		  DpsDestroyMutexes();
		  return DPS_ERROR;
		}
		DpsOpenLog("splitter", Env, log2stderr);
		Indexer->flags = Env->flags = DPS_FLAG_UNOCON;
		DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*");

		bzero(&P, sizeof(P));
		P.subdir = DPS_TREEDIR;
		P.basename = "wrd";
		P.indname = "wrd";
		P.mode = DPS_WRITE_LOCK;
		P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300);
		P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR));
		P.A = Indexer;
		if (p_to != 0) to = p_to;
		else to = P.NFiles - 1;
#ifdef HAVE_ZLIB
		P.zlib_method = Z_DEFLATED;
		P.zlib_level = 9;
		P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS;
		P.zlib_memLevel = 9;
		P.zlib_strategy = DPS_BASE_WRD_STRATEGY;
#endif

		/* Open del log file */
		dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH);
		if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) {
		  dps_strerror(NULL, 0, "Can't open del log '%s'", dname);
		  exit(DPS_ERROR);
		}

		DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles);

		/* Allocate del buffer */
		fstat(dd, &sb);
		if (sb.st_size != 0) {
		  del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1);
		  if (del_buf == NULL) {
		    fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__);
		    exit(0);
		  }
		  del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL);
		}
		DpsClose(dd);

		/* Remove duplicates URLs in DEL log     */
		/* Keep only oldest records for each URL */
		if (del_count > 0) {
		  DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count);
		  if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog);
		    DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups");
		  del_count = DpsRemoveDelLogDups(del_buf, del_count);
		}

		DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to);

		for(log = from; log <= to; log++) {

		  /* Open log file */
		  dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log);
		  if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){
		    if (errno == ENOENT) {
		      dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname);
		      n = 0;
/*		      continue;*/
		    } else {
		      dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname);
		      continue;
		    }
		  } else {
		    DpsWriteLock(log_fd); 
		    DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log);
		    fstat(log_fd, &sb);
		    log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL;
		    if (log_buf != NULL) {
		      unlink(dname);
		      bytes = read(log_fd,log_buf,(size_t)sb.st_size);
		      (void)ftruncate(log_fd, (off_t)0);
		      DpsUnLock(log_fd);
		      DpsClose(log_fd);
		      
		      n = bytes / sizeof(DPS_LOGWORD);
		      DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n);
		      if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog);
		      DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords");
		      n = DpsRemoveOldWords(log_buf, n, del_buf, del_count);
		      if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd);
		      
		    } else {
		      n = 0;
		      DpsUnLock(log_fd);
		      DpsClose(log_fd);
		    }
		  }

		  DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize);
		  if (obi) DpsBaseOptimize(&P, log);
		  DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count);
		  if (optimize) DpsBaseOptimize(&P, log);
		  DpsBaseClose(&P);
		  DPS_FREE(log_buf);

		  DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to);
		  DPSSLEEP(sleeps);
		}
		DPS_FREE(del_buf);
		DpsAgentFree(Indexer);
		DPS_FREE(P.vardir);
	}

	fprintf(stderr, "Splitting done.\n");
	
	DpsEnvFree(Env);
	DpsDeInit();
	DpsDestroyMutexes();

#ifdef EFENCE
	fprintf(stderr, "Memory leaks checking\n");
	DpsEfenceCheckLeaks();
#endif
#ifdef FILENCE
	fprintf(stderr, "FD leaks checking\n");
	DpsFilenceCheckLeaks(NULL);
#endif
	return 0;
}
示例#3
0
__C_LINK int __DPSCALL DpsBaseOpen(DPS_BASE_PARAM *P, int mode) {
  unsigned int hash;
  size_t filenamelen, z;
  ssize_t wr;
  DPS_BASEITEM  *hTable;
#ifdef DEBUG_SEARCH
  unsigned long total_ticks, stop_ticks, start_ticks = DpsStartTimer();
#endif

  TRACE_IN(P->A, "DpsBaseOpen");

  if (P->opened) DpsBaseClose(P);

  if (P->NFiles == 0) P->NFiles = DpsVarListFindUnsigned(&P->A->Vars, "BaseFiles", 0x100);
  P->FileNo =  DPS_FILENO(P->rec_id, P->NFiles);

  hash = DPS_HASH(P->rec_id);
  filenamelen = dps_strlen(P->vardir) + dps_strlen(P->subdir) + dps_strlen(P->indname) + dps_strlen(P->basename) +  48;
  if (
      ((P->Ifilename = (char *)DpsMalloc(filenamelen)) == NULL) ||
      ((P->Sfilename = (char *)DpsMalloc(filenamelen)) == NULL)            ) {
    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
    DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error 2x%d bytes %s:%d", filenamelen, __FILE__, __LINE__);
    TRACE_OUT(P->A);
    return DPS_ERROR;
  }
  sprintf(P->Sfilename, "%s/%s/%s%04zx.s", P->vardir, P->subdir, P->basename, P->FileNo);
  sprintf(P->Ifilename, "%s/%s/%s%04zx.i", P->vardir, P->subdir, P->indname, P->FileNo);

  if ((P->Ifd = DpsOpen2(P->Ifilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY)) < 0) {
    if ((mode == DPS_READ_LOCK) || ((P->Ifd = DpsOpen3(P->Ifilename, O_RDWR | O_CREAT | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
						   ,
						   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
						   )) < 0)) {
      dps_strerror(P->A, (mode == DPS_READ_LOCK && errno == ENOENT) ? DPS_LOG_DEBUG : DPS_LOG_ERROR, "Can't open/create file %s for %s [%s:%d]", 
	     P->Ifilename, (mode == DPS_READ_LOCK) ? "read" : "write", __FILE__, __LINE__);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
#if 1
    DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    DpsWriteLock(P->Ifd);
    if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) {
      DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
      DpsUnLock(P->Ifd); 
#if 1
      DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
    if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) {
      dps_strerror(P->A, DPS_LOG_ERROR, "Can't set new index for file %s\nwritten %d bytes of %d\nIfd:%d hTable:%x", 
	     P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME, P->Ifd, hTable);
      DPS_FREE(hTable);
      DpsUnLock(P->Ifd); 
#if 1
      DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
    DpsUnLock(P->Ifd); 
#if 1
    DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    DPS_FREE(hTable);
    if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
  }
  if (!P->A->Flags.cold_var) {
#if 1
    DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    switch (mode) {
    case DPS_READ_LOCK:
      DpsReadLock(P->Ifd);
      break;
    case DPS_WRITE_LOCK:
      DpsWriteLock(P->Ifd);
      break;
    }
    P->locked = 1;
  }

  if ((P->Sfd = DpsOpen2(P->Sfilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
		     )) < 0) {
    if ((mode == DPS_READ_LOCK) || ((P->Sfd = DpsOpen3(P->Sfilename, O_RDWR | O_CREAT | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
						   , 
						   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
						   )) < 0)) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't open/create file %s", P->Sfilename);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
  }
  if (!P->A->Flags.cold_var) {
    switch(mode) {
    case DPS_READ_LOCK:
      DpsReadLock(P->Sfd);
      break;
    case DPS_WRITE_LOCK:
      DpsWriteLock(P->Sfd);
      break;
    }
  }

#ifdef DEBUG_SEARCH
    stop_ticks = DpsStartTimer();
    total_ticks = stop_ticks - start_ticks;
    DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase1 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif

    for (z = 0; z < 3; z++) {

	/* search rec_id */
	if ( (P->CurrentItemPos = (dps_uint8)lseek(P->Ifd, (off_t)(hash * sizeof(DPS_BASEITEM)), SEEK_SET)) == (dps_uint8)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename);
	    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	    TRACE_OUT(P->A);
	    return DPS_ERROR;
	}
      if (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	DpsLog(P->A, DPS_LOG_ERROR, "{%s:%d} Can't read index for file %s seek:%ld hash: %u (%d)", 
	       __FILE__, __LINE__, P->Ifilename, P->CurrentItemPos, hash, hash);
	bzero(&P->Item, sizeof(P->Item));
/*	DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	TRACE_OUT(P->A);
	return DPS_ERROR;
*/
      }

#ifdef DEBUG_SEARCH
      stop_ticks = DpsStartTimer();
      total_ticks = stop_ticks - start_ticks;
      DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase2 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif

      if (P->Item.rec_id == P->rec_id || P->Item.rec_id == 0) P->mishash = 0;
      else P->mishash = 1;
      P->PreviousItemPos = P->CurrentItemPos;
      if (P->mishash)
	while((P->Item.next != 0) && (P->Item.rec_id != P->rec_id)) {
	  P->PreviousItemPos = P->CurrentItemPos;
	  P->CurrentItemPos = P->Item.next;
	  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
	    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	    TRACE_OUT(P->A);
	    return DPS_ERROR;
	  }
	  if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
	    if (wr == 0) {
	      DpsLog(P->A, DPS_LOG_ERROR, "Possible corrupted hash chain for file %s, trying to restore (%s:%d)", 
		     P->Ifilename, __FILE__, __LINE__);
	      if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't read previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      P->Item.next = 0;
	      if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      if ((wr = write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't write previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      goto search_again;
	
	    } else {
	      DpsLog(P->A, DPS_LOG_ERROR, "Can't read hash chain for file %s %d of %d bytes (%s:%d)", 
		     P->Ifilename, wr, sizeof(DPS_BASEITEM), __FILE__, __LINE__);
	      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	      TRACE_OUT(P->A);
	      return DPS_ERROR;
	    }
	  }
#ifdef DEBUG_SEARCH
	  stop_ticks = DpsStartTimer();
	  total_ticks = stop_ticks - start_ticks;
	  DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase3 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif
	}
      break;
    search_again:;
    }
  P->opened = 1;
  P->mode = mode;
#ifdef DEBUG_SEARCH
  stop_ticks = DpsStartTimer();
  total_ticks = stop_ticks - start_ticks;
  DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase4 %03X in %.5f sec.\n", P->FileNo, (float)total_ticks / 1000);
#endif
/*  fprintf(stderr, "Sfd:0x%x - %s\n", P->Sfd, P->Sfilename);
  fprintf(stderr, "Ifd:0x%x - %s\n", P->Ifd, P->Ifilename);*/
  TRACE_OUT(P->A);
  return DPS_OK;
}
示例#4
0
extern __C_LINK int __DPSCALL DpsBaseOptimize(DPS_BASE_PARAM *P, int sbase) {
  struct	stat sb;
  urlid_t base, base_from, base_to;
  long unsigned ActualSize, OriginalSize, i, nitems;
  off_t pos, posold, NewItemPos, SSize;
  dps_uint8 diff, gain;
  double dr = 0.0, cr = 0.0;
  ssize_t nread; size_t rsize;
  ssize_t wr;
  int OptimizeRatio, res, error_cnt;
  char buffer[BUFSIZ];
  DPS_BASEITEM *hTable;
  DPS_SORTBASEITEM *si = NULL;

  OptimizeRatio = DpsVarListFindInt(&P->A->Vars, "OptimizeRatio", 15);

  P->mode = DPS_WRITE_LOCK;
  if (sbase == -1) {
    base_from = 0; base_to = (urlid_t)P->NFiles;
  } else {
    base_from = sbase; base_to = sbase + 1;
  }

  for (base = base_from; base < base_to; base++) {

    error_cnt = 0;
    gain = (dps_uint8)0;
    P->rec_id = ((base & DPS_BASE_MASK) << DPS_BASE_BITS);
    if (DpsBaseOpen(P, DPS_WRITE_LOCK) != DPS_OK) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't open base %s/%s {%s:%d}", P->subdir, P->basename, __FILE__, __LINE__);
      DpsBaseClose(P);
      return DPS_ERROR;
    }
    if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
      DpsBaseClose(P);
      return DPS_ERROR;
    }

    if (fstat(P->Sfd, &sb) == 0) {
      SSize = sb.st_size;
    } else {
      if ((SSize = (off_t)lseek(P->Sfd, (off_t)0, SEEK_END)) == (off_t)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Sfilename, __FILE__, __LINE__);
	DpsBaseClose(P);
	return DPS_ERROR;
      }
    }

    nitems = 0;
    ActualSize = 0;
    OriginalSize = 0;
    while(read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) {
      nitems++;
      if ((P->Item.rec_id != 0) && ((dps_uint8)P->Item.offset < (dps_uint8)SSize) && (P->Item.size > 0)) {
	ActualSize += (long unsigned)P->Item.size;
	OriginalSize += (long unsigned)(P->Item.orig_size ? P->Item.orig_size : P->Item.size);
      }
    }
    if (ftruncate(P->Ifd, (off_t)(nitems * sizeof(DPS_BASEITEM))) != 0) {
	dps_strerror(P->A, DPS_LOG_EXTRA, "ftruncate error (pos:%ld) [%s:%d]", (off_t)(nitems * sizeof(DPS_BASEITEM)), __FILE__, __LINE__);
    }

    dr = (nitems) ? fabs(100.0 * ((long unsigned)SSize - ActualSize) / ((double)SSize + 1.0)) : 0.0;
    cr = (nitems) ? fabs(100.0 * ActualSize / (OriginalSize + 1)) : 0.0;

    DpsLog(P->A, DPS_LOG_EXTRA, "Optimize: %s/%s base 0x%X, %ld recs defrag: %.2f%% Ratio: %.2f%% Data: %ld File: %ld", 
	   P->subdir, P->basename, P->FileNo, nitems, dr, cr,  ActualSize, (long)SSize);

    if ((dr >= (double)OptimizeRatio) || (ActualSize == 0 && SSize != 0)) {

      si = (DPS_SORTBASEITEM*)DpsMalloc((nitems + 1) * sizeof(DPS_SORTBASEITEM));

      if (si == NULL) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't alloc si (%d bytes) at {%s:%d}", (nitems + 1) * sizeof(DPS_SORTBASEITEM), __FILE__, __LINE__);
	DpsBaseClose(P);
	return DPS_ERROR;
      }
      if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	DpsBaseClose(P);
	DPS_FREE(si);
	return DPS_ERROR;
      }

      for (i = 0; (i < nitems) && (read(P->Ifd, &si[i].Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)); ) {
	if(si[i].Item.rec_id != 0 && ((dps_uint8)si[i].Item.offset < (dps_uint8)SSize) && (si[i].Item.size > 0) && (si[i].Item.size < ActualSize) ) {
	  i++;
	}
      }

      if (i < nitems) nitems = i;
      if (nitems > 1) DpsSort((void*)si, (size_t)nitems, sizeof(DPS_SORTBASEITEM), cmpsi);

      gain = (dps_uint8)0;
      pos = (off_t)0;
      posold = (off_t)0;
      if (nitems > 0) {
	if ((long unsigned)si[0].Item.offset < (long unsigned)SSize) {
	  posold = (off_t)si[0].Item.offset;
	} else {
	  si[0].Item.offset = (off_t)0;
	  si[0].Item.size = 0;
	}
      }
      if (nitems > 1) {
	if (si[0].Item.size > (rsize = (size_t)(si[1].Item.offset - si[0].Item.offset))) {
	  DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by offset: %ld -> %ld", (long)si[0].Item.size, (long)rsize);
	  si[0].Item.size = rsize;
	  error_cnt++;
	}
      }
      if ((diff = (dps_uint8)posold) > 0) {
	for(
	    lseek(P->Sfd, posold, SEEK_SET), rsize = 0;
	    (rsize < si[0].Item.size) && ((nread = read(P->Sfd, buffer, 
							(rsize + BUFSIZ < si[0].Item.size) ? BUFSIZ : (si[0].Item.size - rsize) )) > 0);
	    lseek(P->Sfd, posold, SEEK_SET)
	    ) {
	  lseek(P->Sfd, pos, SEEK_SET);
	  (void)write(P->Sfd, buffer, (size_t)nread);
	  rsize += (size_t)nread;
	  posold += (off_t)nread;
	  pos += (off_t)nread;
	}
	si[0].Item.offset = 0;
	if (rsize != si[0].Item.size) {
	  DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by size: %ld -> %ld", (long)si[0].Item.size, (long)rsize);
	  si[0].Item.size = rsize;
	  error_cnt++;
	}
	gain += diff;
      }
      
      if (nitems > 0)
      for (i = 0; i < nitems - 1; i++) {
	if ((long unsigned)si[i + 1].Item.offset > (long unsigned)SSize) {
	  DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i + 1].Item.offset, (long)SSize);
	  si[i + 1].Item.size = 0;
	  si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size;
	  error_cnt++;
	} else {
	  pos = (off_t)(si[i].Item.offset + si[i].Item.size);
	  posold = (off_t)si[i + 1].Item.offset;
	  if (i < nitems - 2) {
	    if (si[i + 1].Item.size > (rsize = (size_t)(si[i + 2].Item.offset - si[i + 1].Item.offset))) {
	      DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by offset: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize );
	      si[i + 1].Item.size = rsize;
	      error_cnt++;
	    }
	  }
	  if ((diff = (dps_uint8)posold - (dps_uint8)pos) > 0) {
	    for(
		lseek(P->Sfd, posold, SEEK_SET), rsize = 0;
		(rsize < si[i + 1].Item.size) && ((nread = read(P->Sfd, buffer,
					      (rsize + BUFSIZ < si[i + 1].Item.size) ? BUFSIZ : (si[i + 1].Item.size - rsize) )) > 0);
		lseek(P->Sfd, posold, SEEK_SET)
		) {
	      lseek(P->Sfd, pos, SEEK_SET);
	      (void)write(P->Sfd, buffer, (size_t)nread);
	      rsize += (size_t)nread;
	      posold += (off_t)nread;
	      pos += (off_t)nread;
	    }
	    if (rsize != si[i + 1].Item.size) {
	      DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by size: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize);
	      si[i + 1].Item.size = rsize;
	      error_cnt++;
	    }
	    si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size;
	    gain += diff;
	  }
	}
      }
      posold = SSize;
      pos = (nitems) ? (off_t)(si[nitems - 1].Item.offset + si[nitems - 1].Item.size) : (off_t)0;
      if (ftruncate(P->Sfd, (off_t)(pos)) != 0) {
	dps_strerror(P->A, DPS_LOG_ERROR, "ftruncate error (pos:%ld) [%s:%d]", pos, __FILE__, __LINE__);
      }
      SSize = pos;

      if (posold > pos) {
	gain += ((dps_uint8)posold - (dps_uint8)pos);
      }

      /*if (gain != 0 || OptimizeRatio == 0 || error_cnt > 0)*/ {

	posold = lseek(P->Ifd, (off_t)0, SEEK_END);
	(void)ftruncate(P->Ifd, (off_t)0);
	lseek(P->Ifd, (off_t)0, SEEK_SET);

	if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) {
	  DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
	  DpsBaseClose(P);
	  DPS_FREE(si);
	  return DPS_ERROR;
	}
	if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) {
	  dps_strerror(P->A, DPS_LOG_ERROR, "[%s:%d] Can't set new index for file %s\nwritten %d bytes of %d",
		 __FILE__, __LINE__, P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
	  DPS_FREE(hTable);
	  DpsBaseClose(P);
	  DPS_FREE(si);
	  return DPS_ERROR;
	}
	DPS_FREE(hTable);

	for (i = 0; i < nitems; i++) {
	  if (si[i].Item.rec_id == 0 || si[i].Item.size == 0) continue;
	  if ((long)si[i].Item.offset > (long)SSize) {
	    DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i].Item.offset, (long)SSize);
	    error_cnt++;
	    continue;
	  }
	  P->rec_id = si[i].Item.rec_id;
	  if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) {
	    DpsBaseClose(P);
	    DPS_FREE(si);
	    return res;
	  }
	  if (P->Item.rec_id != P->rec_id) {
	    if (P->mishash && P->Item.rec_id != 0) {
	      if ((P->Item.next = (dps_uint8)(NewItemPos = lseek(P->Ifd, (off_t)0, SEEK_END))) == (dps_uint8)-1) {
		DpsBaseClose(P);
		DPS_FREE(si);
		return DPS_ERROR;
	      }
	      if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
		DpsBaseClose(P);
		DPS_FREE(si);
		return DPS_ERROR;
	      }
	      if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
		DpsBaseClose(P);
		DPS_FREE(si);
		return DPS_ERROR;
	      }
	      P->CurrentItemPos = (dps_uint8)NewItemPos;
	    }
	  }
	  P->Item = si[i].Item;
	  P->Item.next = (off_t)0;
	  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	    DpsBaseClose(P);
	    DPS_FREE(si);
	    return DPS_ERROR;
	  }
	  if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	    DpsBaseClose(P);
	    DPS_FREE(si);
	    return DPS_ERROR;
	  }
	}
	pos = lseek(P->Ifd, (off_t)0, SEEK_END);
	gain += ((dps_uint8)posold - (dps_uint8)pos);

	DpsLog(P->A, DPS_LOG_DEBUG, "Optimize: %s/%s base 0x%X cleaned, %ld bytes freed", P->subdir, P->basename, base, gain);
      }

      DPS_FREE(si);
    }

    if (error_cnt) base--;
    DpsBaseClose(P);
  }
  return DPS_OK;
}
示例#5
0
__C_LINK int __DPSCALL DpsBaseWrite(DPS_BASE_PARAM *P, void *buffer, size_t len) {
  dps_uint8 NewItemPos;
  int res = DPS_OK;
  size_t size = len;
  size_t orig_size = 0;
  void *data = buffer;

#ifdef HAVE_ZLIB
  z_stream zstream;
  Byte *CData = NULL;

  bzero(&zstream, sizeof(zstream));

  zstream.zalloc = Z_NULL;
  zstream.zfree = Z_NULL;
  zstream.opaque = Z_NULL;
  zstream.next_in = buffer;

  if ( (P->zlib_method == Z_DEFLATED) 
       && (deflateInit2(&zstream, P->zlib_level, Z_DEFLATED, P->zlib_windowBits, P->zlib_memLevel, P->zlib_strategy) == Z_OK) ) {
    
      zstream.avail_in = (uInt)len;
      zstream.avail_out = (uInt)(/*sizeof(gz_header) +*/ 4096 + 2 * len);
    CData = zstream.next_out = (Byte *) DpsMalloc(zstream.avail_out);
    if (zstream.next_out == NULL) {
      return DPS_ERROR;
    }
    deflate(&zstream, Z_FINISH);
    deflateEnd(&zstream);
    orig_size = len;
    size = zstream.total_out;
    data = CData;
    
  }

#endif


  if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) {
    goto DpsBaseWrite_exit;
  }

  if (P->Item.rec_id == P->rec_id) {
    if (P->Item.size < size) {
      if ((P->Item.offset = (dps_uint8)lseek(P->Sfd, (off_t)0, SEEK_END)) == (dps_uint8)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Sfilename, __FILE__, __LINE__);
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
    } else {
      if (lseek(P->Sfd, (off_t)P->Item.offset, SEEK_SET) == (off_t)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s offset %ld {%s:%d}", P->Sfilename, (long)P->Item.offset, __FILE__, __LINE__);
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
    }
  } else { /* new rec_id added */
    if (P->mishash && P->Item.rec_id != 0) {
      if ((P->Item.next = NewItemPos = (dps_uint8)lseek(P->Ifd, (off_t)0, SEEK_END)) == (dps_uint8)-1) {
	DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
      if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
      if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	res = DPS_ERROR;
	goto DpsBaseWrite_exit;
      }
      P->CurrentItemPos = NewItemPos;
      P->Item.next = (off_t)0;
    }
    P->Item.rec_id = P->rec_id;
    if ((P->Item.offset = (dps_uint8)lseek(P->Sfd, (off_t)0, SEEK_END)) == (dps_uint8)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Sfilename, __FILE__, __LINE__);
      res = DPS_ERROR;
      goto DpsBaseWrite_exit;
    }
  }
  if (write(P->Sfd, data, size) != (ssize_t)size) {
    dps_strerror(P->A, DPS_LOG_ERROR, "Can't write %ld bytes at %ld of file %s {%s:%d}",
		 (long)size, (long)P->Item.offset, P->Sfilename, __FILE__, __LINE__);
    res = DPS_ERROR;
    goto DpsBaseWrite_exit;
  }
  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
    res = DPS_ERROR;
    goto DpsBaseWrite_exit;
  }

  P->Item.size = size;
  P->Item.orig_size = orig_size;
  if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
    DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__);
  }
/*  DpsBaseFsync(P->A, P);*/
#ifdef DEBUG_SEARCH
  DpsLog(P->A, DPS_LOG_DEBUG, "[%s/%s] Stored rec_id: %x Size: %d", P->subdir, P->basename, P->rec_id, len);
#endif

 DpsBaseWrite_exit:

#ifdef HAVE_ZLIB
  DPS_FREE(CData);
#endif
  return res;
}