static int open_socket(DPS_AGENT *A, char *unix_socket) { char unix_path[128]; struct sockaddr_un unix_addr; int sockfd, saddrlen; if (DpsRelVarName(A->Conf, unix_path, sizeof(unix_path), unix_socket) < 105) { } else { DpsLog(A, DPS_LOG_ERROR, "Unix socket name '%s' is too large", unix_path); return(DPS_NET_CANT_CONNECT); } if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { DpsLog(A, DPS_LOG_ERROR, "unix socket() error %d", errno); return(DPS_NET_CANT_CONNECT); } DpsSockOpt(A, sockfd); bzero((void*)&unix_addr, sizeof(unix_addr)); unix_addr.sun_family = AF_UNIX; dps_strncpy(unix_addr.sun_path, unix_path, sizeof(unix_addr.sun_path)); saddrlen = sizeof(unix_addr.sun_family) + dps_strlen(unix_addr.sun_path); if(connect(sockfd, (struct sockaddr *)&unix_addr, sizeof (unix_addr))) { dps_strerror(A, DPS_LOG_ERROR, "unix socket '%s' connect() error", unix_path); return(DPS_NET_CANT_CONNECT); } return sockfd; }
int main(int argc,char **argv, char **envp) { int ch, sleeps = 1, optimize = 0, obi = 0; unsigned int from = 0, to = 0xFFF, p_to = 0; DPS_ENV * Env; const char * config_name = DPS_CONF_DIR "/cached.conf"; DpsInit(argc, argv, envp); /* Initialize library */ DpsInitMutexes(); Env=DpsEnvInit(NULL); if (Env == NULL) exit(1); DpsSetLockProc(Env, DpsLockProc); /*#ifndef HAVE_SETPROCTITLE*/ ARGV = argv; ARGC = argc; /*#endif*/ while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){ switch (ch) { case 'f': sscanf(optarg, "%x", &from); break; case 't': sscanf(optarg, "%x", &p_to); break; case 'w': DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg); break; case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break; case 'b': obi++; break; case 'o': optimize++; break; case 'p': sleeps = atoi(optarg); break; case 'h': case '?': default: usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; break; } } argc -= optind; argv += optind; if(argc > 1) { usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; } else if (argc == 1) { config_name = argv[0]; } { DPS_LOGDEL *del_buf=NULL; size_t del_count = 0, log, bytes, n = 0; int dd, log_fd; struct stat sb; char dname[PATH_MAX] = ""; DPS_BASE_PARAM P; DPS_LOGWORD *log_buf = NULL; DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0); log2stderr = 1; if (Indexer == NULL) { fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__); exit(DPS_ERROR); } if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){ fprintf(stderr, "%s\n", DpsEnvErrMsg(Env)); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return DPS_ERROR; } DpsOpenLog("splitter", Env, log2stderr); Indexer->flags = Env->flags = DPS_FLAG_UNOCON; DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*"); bzero(&P, sizeof(P)); P.subdir = DPS_TREEDIR; P.basename = "wrd"; P.indname = "wrd"; P.mode = DPS_WRITE_LOCK; P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300); P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR)); P.A = Indexer; if (p_to != 0) to = p_to; else to = P.NFiles - 1; #ifdef HAVE_ZLIB P.zlib_method = Z_DEFLATED; P.zlib_level = 9; P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; P.zlib_memLevel = 9; P.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif /* Open del log file */ dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH); if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) { dps_strerror(NULL, 0, "Can't open del log '%s'", dname); exit(DPS_ERROR); } DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles); /* Allocate del buffer */ fstat(dd, &sb); if (sb.st_size != 0) { del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1); if (del_buf == NULL) { fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__); exit(0); } del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL); } DpsClose(dd); /* Remove duplicates URLs in DEL log */ /* Keep only oldest records for each URL */ if (del_count > 0) { DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count); if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups"); del_count = DpsRemoveDelLogDups(del_buf, del_count); } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to); for(log = from; log <= to; log++) { /* Open log file */ dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log); if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){ if (errno == ENOENT) { dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname); n = 0; /* continue;*/ } else { dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname); continue; } } else { DpsWriteLock(log_fd); DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log); fstat(log_fd, &sb); log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL; if (log_buf != NULL) { unlink(dname); bytes = read(log_fd,log_buf,(size_t)sb.st_size); (void)ftruncate(log_fd, (off_t)0); DpsUnLock(log_fd); DpsClose(log_fd); n = bytes / sizeof(DPS_LOGWORD); DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords"); n = DpsRemoveOldWords(log_buf, n, del_buf, del_count); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd); } else { n = 0; DpsUnLock(log_fd); DpsClose(log_fd); } } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize); if (obi) DpsBaseOptimize(&P, log); DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count); if (optimize) DpsBaseOptimize(&P, log); DpsBaseClose(&P); DPS_FREE(log_buf); DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to); DPSSLEEP(sleeps); } DPS_FREE(del_buf); DpsAgentFree(Indexer); DPS_FREE(P.vardir); } fprintf(stderr, "Splitting done.\n"); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return 0; }
__C_LINK int __DPSCALL DpsBaseOpen(DPS_BASE_PARAM *P, int mode) { unsigned int hash; size_t filenamelen, z; ssize_t wr; DPS_BASEITEM *hTable; #ifdef DEBUG_SEARCH unsigned long total_ticks, stop_ticks, start_ticks = DpsStartTimer(); #endif TRACE_IN(P->A, "DpsBaseOpen"); if (P->opened) DpsBaseClose(P); if (P->NFiles == 0) P->NFiles = DpsVarListFindUnsigned(&P->A->Vars, "BaseFiles", 0x100); P->FileNo = DPS_FILENO(P->rec_id, P->NFiles); hash = DPS_HASH(P->rec_id); filenamelen = dps_strlen(P->vardir) + dps_strlen(P->subdir) + dps_strlen(P->indname) + dps_strlen(P->basename) + 48; if ( ((P->Ifilename = (char *)DpsMalloc(filenamelen)) == NULL) || ((P->Sfilename = (char *)DpsMalloc(filenamelen)) == NULL) ) { DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error 2x%d bytes %s:%d", filenamelen, __FILE__, __LINE__); TRACE_OUT(P->A); return DPS_ERROR; } sprintf(P->Sfilename, "%s/%s/%s%04zx.s", P->vardir, P->subdir, P->basename, P->FileNo); sprintf(P->Ifilename, "%s/%s/%s%04zx.i", P->vardir, P->subdir, P->indname, P->FileNo); if ((P->Ifd = DpsOpen2(P->Ifilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY)) < 0) { if ((mode == DPS_READ_LOCK) || ((P->Ifd = DpsOpen3(P->Ifilename, O_RDWR | O_CREAT | DPS_BINARY /*#ifdef O_DIRECT | O_DIRECT #endif*/ , S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH )) < 0)) { dps_strerror(P->A, (mode == DPS_READ_LOCK && errno == ENOENT) ? DPS_LOG_DEBUG : DPS_LOG_ERROR, "Can't open/create file %s for %s [%s:%d]", P->Ifilename, (mode == DPS_READ_LOCK) ? "read" : "write", __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } #if 1 DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DpsWriteLock(P->Ifd); if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) { DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME); DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) { dps_strerror(P->A, DPS_LOG_ERROR, "Can't set new index for file %s\nwritten %d bytes of %d\nIfd:%d hTable:%x", P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME, P->Ifd, hTable); DPS_FREE(hTable); DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DPS_FREE(hTable); if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } } if (!P->A->Flags.cold_var) { #if 1 DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif switch (mode) { case DPS_READ_LOCK: DpsReadLock(P->Ifd); break; case DPS_WRITE_LOCK: DpsWriteLock(P->Ifd); break; } P->locked = 1; } if ((P->Sfd = DpsOpen2(P->Sfilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY /*#ifdef O_DIRECT | O_DIRECT #endif*/ )) < 0) { if ((mode == DPS_READ_LOCK) || ((P->Sfd = DpsOpen3(P->Sfilename, O_RDWR | O_CREAT | DPS_BINARY /*#ifdef O_DIRECT | O_DIRECT #endif*/ , S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH )) < 0)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't open/create file %s", P->Sfilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } } if (!P->A->Flags.cold_var) { switch(mode) { case DPS_READ_LOCK: DpsReadLock(P->Sfd); break; case DPS_WRITE_LOCK: DpsWriteLock(P->Sfd); break; } } #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase1 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000); #endif for (z = 0; z < 3; z++) { /* search rec_id */ if ( (P->CurrentItemPos = (dps_uint8)lseek(P->Ifd, (off_t)(hash * sizeof(DPS_BASEITEM)), SEEK_SET)) == (dps_uint8)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "{%s:%d} Can't read index for file %s seek:%ld hash: %u (%d)", __FILE__, __LINE__, P->Ifilename, P->CurrentItemPos, hash, hash); bzero(&P->Item, sizeof(P->Item)); /* DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; */ } #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase2 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000); #endif if (P->Item.rec_id == P->rec_id || P->Item.rec_id == 0) P->mishash = 0; else P->mishash = 1; P->PreviousItemPos = P->CurrentItemPos; if (P->mishash) while((P->Item.next != 0) && (P->Item.rec_id != P->rec_id)) { P->PreviousItemPos = P->CurrentItemPos; P->CurrentItemPos = P->Item.next; if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) { if (wr == 0) { DpsLog(P->A, DPS_LOG_ERROR, "Possible corrupted hash chain for file %s, trying to restore (%s:%d)", P->Ifilename, __FILE__, __LINE__); if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't read previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } P->Item.next = 0; if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ((wr = write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't write previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } goto search_again; } else { DpsLog(P->A, DPS_LOG_ERROR, "Can't read hash chain for file %s %d of %d bytes (%s:%d)", P->Ifilename, wr, sizeof(DPS_BASEITEM), __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } } #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase3 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000); #endif } break; search_again:; } P->opened = 1; P->mode = mode; #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase4 %03X in %.5f sec.\n", P->FileNo, (float)total_ticks / 1000); #endif /* fprintf(stderr, "Sfd:0x%x - %s\n", P->Sfd, P->Sfilename); fprintf(stderr, "Ifd:0x%x - %s\n", P->Ifd, P->Ifilename);*/ TRACE_OUT(P->A); return DPS_OK; }
extern __C_LINK int __DPSCALL DpsBaseOptimize(DPS_BASE_PARAM *P, int sbase) { struct stat sb; urlid_t base, base_from, base_to; long unsigned ActualSize, OriginalSize, i, nitems; off_t pos, posold, NewItemPos, SSize; dps_uint8 diff, gain; double dr = 0.0, cr = 0.0; ssize_t nread; size_t rsize; ssize_t wr; int OptimizeRatio, res, error_cnt; char buffer[BUFSIZ]; DPS_BASEITEM *hTable; DPS_SORTBASEITEM *si = NULL; OptimizeRatio = DpsVarListFindInt(&P->A->Vars, "OptimizeRatio", 15); P->mode = DPS_WRITE_LOCK; if (sbase == -1) { base_from = 0; base_to = (urlid_t)P->NFiles; } else { base_from = sbase; base_to = sbase + 1; } for (base = base_from; base < base_to; base++) { error_cnt = 0; gain = (dps_uint8)0; P->rec_id = ((base & DPS_BASE_MASK) << DPS_BASE_BITS); if (DpsBaseOpen(P, DPS_WRITE_LOCK) != DPS_OK) { DpsLog(P->A, DPS_LOG_ERROR, "Can't open base %s/%s {%s:%d}", P->subdir, P->basename, __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } if (fstat(P->Sfd, &sb) == 0) { SSize = sb.st_size; } else { if ((SSize = (off_t)lseek(P->Sfd, (off_t)0, SEEK_END)) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Sfilename, __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } } nitems = 0; ActualSize = 0; OriginalSize = 0; while(read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) { nitems++; if ((P->Item.rec_id != 0) && ((dps_uint8)P->Item.offset < (dps_uint8)SSize) && (P->Item.size > 0)) { ActualSize += (long unsigned)P->Item.size; OriginalSize += (long unsigned)(P->Item.orig_size ? P->Item.orig_size : P->Item.size); } } if (ftruncate(P->Ifd, (off_t)(nitems * sizeof(DPS_BASEITEM))) != 0) { dps_strerror(P->A, DPS_LOG_EXTRA, "ftruncate error (pos:%ld) [%s:%d]", (off_t)(nitems * sizeof(DPS_BASEITEM)), __FILE__, __LINE__); } dr = (nitems) ? fabs(100.0 * ((long unsigned)SSize - ActualSize) / ((double)SSize + 1.0)) : 0.0; cr = (nitems) ? fabs(100.0 * ActualSize / (OriginalSize + 1)) : 0.0; DpsLog(P->A, DPS_LOG_EXTRA, "Optimize: %s/%s base 0x%X, %ld recs defrag: %.2f%% Ratio: %.2f%% Data: %ld File: %ld", P->subdir, P->basename, P->FileNo, nitems, dr, cr, ActualSize, (long)SSize); if ((dr >= (double)OptimizeRatio) || (ActualSize == 0 && SSize != 0)) { si = (DPS_SORTBASEITEM*)DpsMalloc((nitems + 1) * sizeof(DPS_SORTBASEITEM)); if (si == NULL) { DpsLog(P->A, DPS_LOG_ERROR, "Can't alloc si (%d bytes) at {%s:%d}", (nitems + 1) * sizeof(DPS_SORTBASEITEM), __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } for (i = 0; (i < nitems) && (read(P->Ifd, &si[i].Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)); ) { if(si[i].Item.rec_id != 0 && ((dps_uint8)si[i].Item.offset < (dps_uint8)SSize) && (si[i].Item.size > 0) && (si[i].Item.size < ActualSize) ) { i++; } } if (i < nitems) nitems = i; if (nitems > 1) DpsSort((void*)si, (size_t)nitems, sizeof(DPS_SORTBASEITEM), cmpsi); gain = (dps_uint8)0; pos = (off_t)0; posold = (off_t)0; if (nitems > 0) { if ((long unsigned)si[0].Item.offset < (long unsigned)SSize) { posold = (off_t)si[0].Item.offset; } else { si[0].Item.offset = (off_t)0; si[0].Item.size = 0; } } if (nitems > 1) { if (si[0].Item.size > (rsize = (size_t)(si[1].Item.offset - si[0].Item.offset))) { DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by offset: %ld -> %ld", (long)si[0].Item.size, (long)rsize); si[0].Item.size = rsize; error_cnt++; } } if ((diff = (dps_uint8)posold) > 0) { for( lseek(P->Sfd, posold, SEEK_SET), rsize = 0; (rsize < si[0].Item.size) && ((nread = read(P->Sfd, buffer, (rsize + BUFSIZ < si[0].Item.size) ? BUFSIZ : (si[0].Item.size - rsize) )) > 0); lseek(P->Sfd, posold, SEEK_SET) ) { lseek(P->Sfd, pos, SEEK_SET); (void)write(P->Sfd, buffer, (size_t)nread); rsize += (size_t)nread; posold += (off_t)nread; pos += (off_t)nread; } si[0].Item.offset = 0; if (rsize != si[0].Item.size) { DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by size: %ld -> %ld", (long)si[0].Item.size, (long)rsize); si[0].Item.size = rsize; error_cnt++; } gain += diff; } if (nitems > 0) for (i = 0; i < nitems - 1; i++) { if ((long unsigned)si[i + 1].Item.offset > (long unsigned)SSize) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i + 1].Item.offset, (long)SSize); si[i + 1].Item.size = 0; si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size; error_cnt++; } else { pos = (off_t)(si[i].Item.offset + si[i].Item.size); posold = (off_t)si[i + 1].Item.offset; if (i < nitems - 2) { if (si[i + 1].Item.size > (rsize = (size_t)(si[i + 2].Item.offset - si[i + 1].Item.offset))) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by offset: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize ); si[i + 1].Item.size = rsize; error_cnt++; } } if ((diff = (dps_uint8)posold - (dps_uint8)pos) > 0) { for( lseek(P->Sfd, posold, SEEK_SET), rsize = 0; (rsize < si[i + 1].Item.size) && ((nread = read(P->Sfd, buffer, (rsize + BUFSIZ < si[i + 1].Item.size) ? BUFSIZ : (si[i + 1].Item.size - rsize) )) > 0); lseek(P->Sfd, posold, SEEK_SET) ) { lseek(P->Sfd, pos, SEEK_SET); (void)write(P->Sfd, buffer, (size_t)nread); rsize += (size_t)nread; posold += (off_t)nread; pos += (off_t)nread; } if (rsize != si[i + 1].Item.size) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by size: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize); si[i + 1].Item.size = rsize; error_cnt++; } si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size; gain += diff; } } } posold = SSize; pos = (nitems) ? (off_t)(si[nitems - 1].Item.offset + si[nitems - 1].Item.size) : (off_t)0; if (ftruncate(P->Sfd, (off_t)(pos)) != 0) { dps_strerror(P->A, DPS_LOG_ERROR, "ftruncate error (pos:%ld) [%s:%d]", pos, __FILE__, __LINE__); } SSize = pos; if (posold > pos) { gain += ((dps_uint8)posold - (dps_uint8)pos); } /*if (gain != 0 || OptimizeRatio == 0 || error_cnt > 0)*/ { posold = lseek(P->Ifd, (off_t)0, SEEK_END); (void)ftruncate(P->Ifd, (off_t)0); lseek(P->Ifd, (off_t)0, SEEK_SET); if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) { DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) { dps_strerror(P->A, DPS_LOG_ERROR, "[%s:%d] Can't set new index for file %s\nwritten %d bytes of %d", __FILE__, __LINE__, P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME); DPS_FREE(hTable); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } DPS_FREE(hTable); for (i = 0; i < nitems; i++) { if (si[i].Item.rec_id == 0 || si[i].Item.size == 0) continue; if ((long)si[i].Item.offset > (long)SSize) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i].Item.offset, (long)SSize); error_cnt++; continue; } P->rec_id = si[i].Item.rec_id; if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) { DpsBaseClose(P); DPS_FREE(si); return res; } if (P->Item.rec_id != P->rec_id) { if (P->mishash && P->Item.rec_id != 0) { if ((P->Item.next = (dps_uint8)(NewItemPos = lseek(P->Ifd, (off_t)0, SEEK_END))) == (dps_uint8)-1) { DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } P->CurrentItemPos = (dps_uint8)NewItemPos; } } P->Item = si[i].Item; P->Item.next = (off_t)0; if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } } pos = lseek(P->Ifd, (off_t)0, SEEK_END); gain += ((dps_uint8)posold - (dps_uint8)pos); DpsLog(P->A, DPS_LOG_DEBUG, "Optimize: %s/%s base 0x%X cleaned, %ld bytes freed", P->subdir, P->basename, base, gain); } DPS_FREE(si); } if (error_cnt) base--; DpsBaseClose(P); } return DPS_OK; }
__C_LINK int __DPSCALL DpsBaseWrite(DPS_BASE_PARAM *P, void *buffer, size_t len) { dps_uint8 NewItemPos; int res = DPS_OK; size_t size = len; size_t orig_size = 0; void *data = buffer; #ifdef HAVE_ZLIB z_stream zstream; Byte *CData = NULL; bzero(&zstream, sizeof(zstream)); zstream.zalloc = Z_NULL; zstream.zfree = Z_NULL; zstream.opaque = Z_NULL; zstream.next_in = buffer; if ( (P->zlib_method == Z_DEFLATED) && (deflateInit2(&zstream, P->zlib_level, Z_DEFLATED, P->zlib_windowBits, P->zlib_memLevel, P->zlib_strategy) == Z_OK) ) { zstream.avail_in = (uInt)len; zstream.avail_out = (uInt)(/*sizeof(gz_header) +*/ 4096 + 2 * len); CData = zstream.next_out = (Byte *) DpsMalloc(zstream.avail_out); if (zstream.next_out == NULL) { return DPS_ERROR; } deflate(&zstream, Z_FINISH); deflateEnd(&zstream); orig_size = len; size = zstream.total_out; data = CData; } #endif if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) { goto DpsBaseWrite_exit; } if (P->Item.rec_id == P->rec_id) { if (P->Item.size < size) { if ((P->Item.offset = (dps_uint8)lseek(P->Sfd, (off_t)0, SEEK_END)) == (dps_uint8)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Sfilename, __FILE__, __LINE__); res = DPS_ERROR; goto DpsBaseWrite_exit; } } else { if (lseek(P->Sfd, (off_t)P->Item.offset, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s offset %ld {%s:%d}", P->Sfilename, (long)P->Item.offset, __FILE__, __LINE__); res = DPS_ERROR; goto DpsBaseWrite_exit; } } } else { /* new rec_id added */ if (P->mishash && P->Item.rec_id != 0) { if ((P->Item.next = NewItemPos = (dps_uint8)lseek(P->Ifd, (off_t)0, SEEK_END)) == (dps_uint8)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); res = DPS_ERROR; goto DpsBaseWrite_exit; } if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { res = DPS_ERROR; goto DpsBaseWrite_exit; } if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { res = DPS_ERROR; goto DpsBaseWrite_exit; } P->CurrentItemPos = NewItemPos; P->Item.next = (off_t)0; } P->Item.rec_id = P->rec_id; if ((P->Item.offset = (dps_uint8)lseek(P->Sfd, (off_t)0, SEEK_END)) == (dps_uint8)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Sfilename, __FILE__, __LINE__); res = DPS_ERROR; goto DpsBaseWrite_exit; } } if (write(P->Sfd, data, size) != (ssize_t)size) { dps_strerror(P->A, DPS_LOG_ERROR, "Can't write %ld bytes at %ld of file %s {%s:%d}", (long)size, (long)P->Item.offset, P->Sfilename, __FILE__, __LINE__); res = DPS_ERROR; goto DpsBaseWrite_exit; } if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); res = DPS_ERROR; goto DpsBaseWrite_exit; } P->Item.size = size; P->Item.orig_size = orig_size; if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); } /* DpsBaseFsync(P->A, P);*/ #ifdef DEBUG_SEARCH DpsLog(P->A, DPS_LOG_DEBUG, "[%s/%s] Stored rec_id: %x Size: %d", P->subdir, P->basename, P->rec_id, len); #endif DpsBaseWrite_exit: #ifdef HAVE_ZLIB DPS_FREE(CData); #endif return res; }