static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; DPS_UINT4URLIDLIST L; size_t k,prev; urlid_t *data = NULL; DPS_UINT4_POS_LEN *ind=NULL; size_t mind=1000,nind=0; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0, rc; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); bzero(&L, sizeof(DPS_UINT4URLIDLIST)); rc = DpsLimit4(Indexer, &L, field, type, db); if(rc != DPS_OK) { DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__); goto err1; } if(!L.Item)return(1); if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4); data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data)); if(!data) { fprintf(stderr,"Error1: %s\n",strerror(errno)); goto err1; } ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error2: %s\n",strerror(errno)); goto err1; } prev=0; for(k=0; k<L.nitems; k++) { data[k]=L.Item[k].url_id; if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) { if(nind==mind) { mind+=1000; ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error3: %s\n",strerror(errno)); goto err1; } } /* Fill index */ ind[nind].val=L.Item[prev].val; ind[nind].pos = prev * sizeof(*data); if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(dat_fd); if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
static int MakeNestedIndex(DPS_AGENT *Indexer, DPS_UINT8URLIDLIST *L, const char *lim_name, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; size_t k, prev; urlid_t *data=NULL; DPS_UINT8_POS_LEN *ind=NULL; size_t mind=1000, nind=0, ndata; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0; int rc=DPS_OK; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); if(!L->Item)return(1); if (L->nitems > 1) DpsSort(L->Item, L->nitems, sizeof(DPS_UINT8URLID), (qsort_cmp)cmp_ind8); data = (urlid_t*)DpsMalloc((L->nitems + 1) * sizeof(urlid_t)); if(!data){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", (L->nitems + 1) * sizeof(urlid_t), __FILE__, __LINE__); goto err1; } ind=(DPS_UINT8_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } prev=0; for(k=0; k < L->nitems; k++) { data[k] = L->Item[k].url_id; if((k == L->nitems-1) || (L->Item[k].hi != L->Item[prev].hi) || (L->Item[k].lo != L->Item[prev].lo)) { if(nind==mind){ mind+=1000; ind=(DPS_UINT8_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } } /* Fill index */ ind[nind].hi = L->Item[prev].hi; ind[nind].lo = L->Item[prev].lo; ind[nind].pos = prev * sizeof(*data); if (k == L->nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%08X%08X - %d %d\n", ind[nind].hi, ind[nind].lo, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } ndata = L->nitems; ClearIndex8(L); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR,DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(dat_fd); if((ndata * sizeof(*data)) != (size_t)write(dat_fd, data, ndata * sizeof(*data))) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, DPSSLASH,DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT8_POS_LEN))){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: ClearIndex8(L); DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
__C_LINK int __DPSCALL DpsBaseOpen(DPS_BASE_PARAM *P, int mode) { unsigned int hash; size_t filenamelen, z; ssize_t wr; DPS_BASEITEM *hTable; #ifdef DEBUG_SEARCH unsigned long total_ticks, stop_ticks, start_ticks = DpsStartTimer(); #endif TRACE_IN(P->A, "DpsBaseOpen"); if (P->opened) DpsBaseClose(P); if (P->NFiles == 0) P->NFiles = DpsVarListFindUnsigned(&P->A->Vars, "BaseFiles", 0x100); P->FileNo = DPS_FILENO(P->rec_id, P->NFiles); hash = DPS_HASH(P->rec_id); filenamelen = dps_strlen(P->vardir) + dps_strlen(P->subdir) + dps_strlen(P->indname) + dps_strlen(P->basename) + 48; if ( ((P->Ifilename = (char *)DpsMalloc(filenamelen)) == NULL) || ((P->Sfilename = (char *)DpsMalloc(filenamelen)) == NULL) ) { DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error 2x%d bytes %s:%d", filenamelen, __FILE__, __LINE__); TRACE_OUT(P->A); return DPS_ERROR; } sprintf(P->Sfilename, "%s/%s/%s%04zx.s", P->vardir, P->subdir, P->basename, P->FileNo); sprintf(P->Ifilename, "%s/%s/%s%04zx.i", P->vardir, P->subdir, P->indname, P->FileNo); if ((P->Ifd = DpsOpen2(P->Ifilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY)) < 0) { if ((mode == DPS_READ_LOCK) || ((P->Ifd = DpsOpen3(P->Ifilename, O_RDWR | O_CREAT | DPS_BINARY /*#ifdef O_DIRECT | O_DIRECT #endif*/ , S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH )) < 0)) { dps_strerror(P->A, (mode == DPS_READ_LOCK && errno == ENOENT) ? DPS_LOG_DEBUG : DPS_LOG_ERROR, "Can't open/create file %s for %s [%s:%d]", P->Ifilename, (mode == DPS_READ_LOCK) ? "read" : "write", __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } #if 1 DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DpsWriteLock(P->Ifd); if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) { DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME); DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) { dps_strerror(P->A, DPS_LOG_ERROR, "Can't set new index for file %s\nwritten %d bytes of %d\nIfd:%d hTable:%x", P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME, P->Ifd, hTable); DPS_FREE(hTable); DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } DpsUnLock(P->Ifd); #if 1 DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif DPS_FREE(hTable); if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } } if (!P->A->Flags.cold_var) { #if 1 DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo)); #endif switch (mode) { case DPS_READ_LOCK: DpsReadLock(P->Ifd); break; case DPS_WRITE_LOCK: DpsWriteLock(P->Ifd); break; } P->locked = 1; } if ((P->Sfd = DpsOpen2(P->Sfilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY /*#ifdef O_DIRECT | O_DIRECT #endif*/ )) < 0) { if ((mode == DPS_READ_LOCK) || ((P->Sfd = DpsOpen3(P->Sfilename, O_RDWR | O_CREAT | DPS_BINARY /*#ifdef O_DIRECT | O_DIRECT #endif*/ , S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH )) < 0)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't open/create file %s", P->Sfilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } } if (!P->A->Flags.cold_var) { switch(mode) { case DPS_READ_LOCK: DpsReadLock(P->Sfd); break; case DPS_WRITE_LOCK: DpsWriteLock(P->Sfd); break; } } #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase1 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000); #endif for (z = 0; z < 3; z++) { /* search rec_id */ if ( (P->CurrentItemPos = (dps_uint8)lseek(P->Ifd, (off_t)(hash * sizeof(DPS_BASEITEM)), SEEK_SET)) == (dps_uint8)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "{%s:%d} Can't read index for file %s seek:%ld hash: %u (%d)", __FILE__, __LINE__, P->Ifilename, P->CurrentItemPos, hash, hash); bzero(&P->Item, sizeof(P->Item)); /* DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; */ } #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase2 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000); #endif if (P->Item.rec_id == P->rec_id || P->Item.rec_id == 0) P->mishash = 0; else P->mishash = 1; P->PreviousItemPos = P->CurrentItemPos; if (P->mishash) while((P->Item.next != 0) && (P->Item.rec_id != P->rec_id)) { P->PreviousItemPos = P->CurrentItemPos; P->CurrentItemPos = P->Item.next; if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) { if (wr == 0) { DpsLog(P->A, DPS_LOG_ERROR, "Possible corrupted hash chain for file %s, trying to restore (%s:%d)", P->Ifilename, __FILE__, __LINE__); if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't read previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } P->Item.next = 0; if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } if ((wr = write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't write previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } goto search_again; } else { DpsLog(P->A, DPS_LOG_ERROR, "Can't read hash chain for file %s %d of %d bytes (%s:%d)", P->Ifilename, wr, sizeof(DPS_BASEITEM), __FILE__, __LINE__); DPS_FREE(P->Ifilename); DPS_FREE(P->Sfilename); TRACE_OUT(P->A); return DPS_ERROR; } } #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase3 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000); #endif } break; search_again:; } P->opened = 1; P->mode = mode; #ifdef DEBUG_SEARCH stop_ticks = DpsStartTimer(); total_ticks = stop_ticks - start_ticks; DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase4 %03X in %.5f sec.\n", P->FileNo, (float)total_ticks / 1000); #endif /* fprintf(stderr, "Sfd:0x%x - %s\n", P->Sfd, P->Sfilename); fprintf(stderr, "Ifd:0x%x - %s\n", P->Ifd, P->Ifilename);*/ TRACE_OUT(P->A); return DPS_OK; }