Пример #1
0
static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) {
    DPS_ENV *Conf = Indexer->Conf;
    DPS_UINT4URLIDLIST  L;
    size_t    k,prev;
    urlid_t   *data = NULL;
    DPS_UINT4_POS_LEN *ind=NULL;
    size_t    mind=1000,nind=0;
    char fname[PATH_MAX];
    int  dat_fd=0, ind_fd=0, rc;
    const char	*vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR);

    bzero(&L, sizeof(DPS_UINT4URLIDLIST));

    rc = DpsLimit4(Indexer, &L, field, type, db);

    if(rc != DPS_OK) {
        DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__);
        goto err1;
    }

    if(!L.Item)return(1);

    if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4);

    data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data));
    if(!data) {
        fprintf(stderr,"Error1: %s\n",strerror(errno));
        goto err1;
    }
    ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN));
    if(!ind) {
        fprintf(stderr,"Error2: %s\n",strerror(errno));
        goto err1;
    }
    prev=0;
    for(k=0; k<L.nitems; k++) {
        data[k]=L.Item[k].url_id;
        if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) {
            if(nind==mind) {
                mind+=1000;
                ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN));
                if(!ind) {
                    fprintf(stderr,"Error3: %s\n",strerror(errno));
                    goto err1;
                }
            }
            /* Fill index */
            ind[nind].val=L.Item[prev].val;
            ind[nind].pos = prev * sizeof(*data);
            if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
            else ind[nind].len = (k - prev) * sizeof(*data);
            DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len);
            nind++;

            prev=k;
        }
    }
    if (L.mapped) {
#ifdef HAVE_SYS_MMAN_H
        if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#elif defined(HAVE_SYS_SHM_H)
        if (shmdt(L.Item)) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#endif
        unlink(L.shm_name);
    } else {
        DPS_FREE(L.Item);
    }

    dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name);
    if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
        fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsWriteLock(dat_fd);
    if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) {
        fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsUnLock(dat_fd);
    DpsClose(dat_fd);
    DPS_FREE(data);

    dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name);
    if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
        fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsWriteLock(ind_fd);
    if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) {
        fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsUnLock(ind_fd);
    DpsClose(ind_fd);
    DPS_FREE(ind);

    return(0);

err1:
    if (L.mapped) {
#ifdef HAVE_SYS_MMAN_H
        if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#elif defined(HAVE_SYS_SHM_H)
        if (shmdt(L.Item)) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#endif
        unlink(L.shm_name);
    } else {
        DPS_FREE(L.Item);
    }
    DPS_FREE(data);
    DPS_FREE(ind);
    if(dat_fd) DpsClose(dat_fd);
    if(ind_fd) DpsClose(ind_fd);
    return(1);
}
Пример #2
0
static int MakeNestedIndex(DPS_AGENT *Indexer, DPS_UINT8URLIDLIST *L, const char *lim_name, DPS_DB *db) {
     DPS_ENV   *Conf = Indexer->Conf;
     size_t    k, prev;
     urlid_t   *data=NULL;
     DPS_UINT8_POS_LEN *ind=NULL;
     size_t    mind=1000, nind=0, ndata;
     char fname[PATH_MAX];
     int  dat_fd=0, ind_fd=0;
     int  rc=DPS_OK;
     const char	*vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR);
     
     if(!L->Item)return(1);
     
     if (L->nitems > 1) DpsSort(L->Item, L->nitems, sizeof(DPS_UINT8URLID), (qsort_cmp)cmp_ind8);
     
     data = (urlid_t*)DpsMalloc((L->nitems + 1) * sizeof(urlid_t));
     if(!data){
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", (L->nitems + 1) * sizeof(urlid_t), __FILE__, __LINE__);
       goto err1;
     }
     ind=(DPS_UINT8_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT8_POS_LEN));
     if(!ind){
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__);
       goto err1;
     }
     prev=0;
     for(k=0; k < L->nitems; k++) {
          data[k] = L->Item[k].url_id;
          if((k == L->nitems-1) || (L->Item[k].hi != L->Item[prev].hi) || (L->Item[k].lo != L->Item[prev].lo)) {
               if(nind==mind){
                    mind+=1000;
                    ind=(DPS_UINT8_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT8_POS_LEN));
                    if(!ind) {
		      DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__);
		      goto err1;
                    }
               }
               /* Fill index */
               ind[nind].hi = L->Item[prev].hi;
               ind[nind].lo = L->Item[prev].lo;
               ind[nind].pos = prev * sizeof(*data);
               if (k == L->nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
               else ind[nind].len = (k - prev) * sizeof(*data);
               DpsLog(Indexer, DPS_LOG_DEBUG, "%08X%08X - %d %d\n", ind[nind].hi, ind[nind].lo, (int)ind[nind].pos, ind[nind].len);
               nind++;
               
               prev=k;
          }
     }
     ndata = L->nitems;
     ClearIndex8(L);
     
     dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR,DPSSLASH, lim_name);
     if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
       goto err1;
     }
     DpsWriteLock(dat_fd);
     if((ndata * sizeof(*data)) != (size_t)write(dat_fd, data, ndata * sizeof(*data))) {
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
       goto err1;
     }
     DpsUnLock(dat_fd);
     DpsClose(dat_fd);
     DPS_FREE(data);

     dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, DPSSLASH,DPS_TREEDIR, DPSSLASH, lim_name);
     if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
       goto err1;
     }
     DpsWriteLock(ind_fd);
     if((nind*sizeof(DPS_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT8_POS_LEN))){
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
          goto err1;
     }
     DpsUnLock(ind_fd);
     DpsClose(ind_fd);
     DPS_FREE(ind);
     
     return(0);
     
err1:
     ClearIndex8(L);
     DPS_FREE(data);
     DPS_FREE(ind);
     if(dat_fd) DpsClose(dat_fd);
     if(ind_fd) DpsClose(ind_fd);
     return(1);
}
Пример #3
0
__C_LINK int __DPSCALL DpsBaseOpen(DPS_BASE_PARAM *P, int mode) {
  unsigned int hash;
  size_t filenamelen, z;
  ssize_t wr;
  DPS_BASEITEM  *hTable;
#ifdef DEBUG_SEARCH
  unsigned long total_ticks, stop_ticks, start_ticks = DpsStartTimer();
#endif

  TRACE_IN(P->A, "DpsBaseOpen");

  if (P->opened) DpsBaseClose(P);

  if (P->NFiles == 0) P->NFiles = DpsVarListFindUnsigned(&P->A->Vars, "BaseFiles", 0x100);
  P->FileNo =  DPS_FILENO(P->rec_id, P->NFiles);

  hash = DPS_HASH(P->rec_id);
  filenamelen = dps_strlen(P->vardir) + dps_strlen(P->subdir) + dps_strlen(P->indname) + dps_strlen(P->basename) +  48;
  if (
      ((P->Ifilename = (char *)DpsMalloc(filenamelen)) == NULL) ||
      ((P->Sfilename = (char *)DpsMalloc(filenamelen)) == NULL)            ) {
    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
    DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error 2x%d bytes %s:%d", filenamelen, __FILE__, __LINE__);
    TRACE_OUT(P->A);
    return DPS_ERROR;
  }
  sprintf(P->Sfilename, "%s/%s/%s%04zx.s", P->vardir, P->subdir, P->basename, P->FileNo);
  sprintf(P->Ifilename, "%s/%s/%s%04zx.i", P->vardir, P->subdir, P->indname, P->FileNo);

  if ((P->Ifd = DpsOpen2(P->Ifilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY)) < 0) {
    if ((mode == DPS_READ_LOCK) || ((P->Ifd = DpsOpen3(P->Ifilename, O_RDWR | O_CREAT | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
						   ,
						   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
						   )) < 0)) {
      dps_strerror(P->A, (mode == DPS_READ_LOCK && errno == ENOENT) ? DPS_LOG_DEBUG : DPS_LOG_ERROR, "Can't open/create file %s for %s [%s:%d]", 
	     P->Ifilename, (mode == DPS_READ_LOCK) ? "read" : "write", __FILE__, __LINE__);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
#if 1
    DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    DpsWriteLock(P->Ifd);
    if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) {
      DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME);
      DpsUnLock(P->Ifd); 
#if 1
      DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
    if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) {
      dps_strerror(P->A, DPS_LOG_ERROR, "Can't set new index for file %s\nwritten %d bytes of %d\nIfd:%d hTable:%x", 
	     P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME, P->Ifd, hTable);
      DPS_FREE(hTable);
      DpsUnLock(P->Ifd); 
#if 1
      DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
    DpsUnLock(P->Ifd); 
#if 1
    DPS_RELEASELOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    DPS_FREE(hTable);
    if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
  }
  if (!P->A->Flags.cold_var) {
#if 1
    DPS_GETLOCK(P->A, DPS_LOCK_BASE_N(P->FileNo));
#endif
    switch (mode) {
    case DPS_READ_LOCK:
      DpsReadLock(P->Ifd);
      break;
    case DPS_WRITE_LOCK:
      DpsWriteLock(P->Ifd);
      break;
    }
    P->locked = 1;
  }

  if ((P->Sfd = DpsOpen2(P->Sfilename, ((mode == DPS_READ_LOCK) ? O_RDONLY : O_RDWR) | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
		     )) < 0) {
    if ((mode == DPS_READ_LOCK) || ((P->Sfd = DpsOpen3(P->Sfilename, O_RDWR | O_CREAT | DPS_BINARY
/*#ifdef O_DIRECT
		     | O_DIRECT
#endif*/
						   , 
						   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
						   )) < 0)) {
      DpsLog(P->A, DPS_LOG_ERROR, "Can't open/create file %s", P->Sfilename);
      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
      TRACE_OUT(P->A);
      return DPS_ERROR;
    }
  }
  if (!P->A->Flags.cold_var) {
    switch(mode) {
    case DPS_READ_LOCK:
      DpsReadLock(P->Sfd);
      break;
    case DPS_WRITE_LOCK:
      DpsWriteLock(P->Sfd);
      break;
    }
  }

#ifdef DEBUG_SEARCH
    stop_ticks = DpsStartTimer();
    total_ticks = stop_ticks - start_ticks;
    DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase1 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif

    for (z = 0; z < 3; z++) {

	/* search rec_id */
	if ( (P->CurrentItemPos = (dps_uint8)lseek(P->Ifd, (off_t)(hash * sizeof(DPS_BASEITEM)), SEEK_SET)) == (dps_uint8)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s", P->Ifilename);
	    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	    TRACE_OUT(P->A);
	    return DPS_ERROR;
	}
      if (read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) {
	DpsLog(P->A, DPS_LOG_ERROR, "{%s:%d} Can't read index for file %s seek:%ld hash: %u (%d)", 
	       __FILE__, __LINE__, P->Ifilename, P->CurrentItemPos, hash, hash);
	bzero(&P->Item, sizeof(P->Item));
/*	DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	TRACE_OUT(P->A);
	return DPS_ERROR;
*/
      }

#ifdef DEBUG_SEARCH
      stop_ticks = DpsStartTimer();
      total_ticks = stop_ticks - start_ticks;
      DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase2 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif

      if (P->Item.rec_id == P->rec_id || P->Item.rec_id == 0) P->mishash = 0;
      else P->mishash = 1;
      P->PreviousItemPos = P->CurrentItemPos;
      if (P->mishash)
	while((P->Item.next != 0) && (P->Item.rec_id != P->rec_id)) {
	  P->PreviousItemPos = P->CurrentItemPos;
	  P->CurrentItemPos = P->Item.next;
	  if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) {
	    DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s", P->Ifilename);
	    DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	    TRACE_OUT(P->A);
	    return DPS_ERROR;
	  }
	  if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
	    if (wr == 0) {
	      DpsLog(P->A, DPS_LOG_ERROR, "Possible corrupted hash chain for file %s, trying to restore (%s:%d)", 
		     P->Ifilename, __FILE__, __LINE__);
	      if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't seek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      if ((wr = read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't read previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      P->Item.next = 0;
	      if (lseek(P->Ifd, (off_t)P->PreviousItemPos, SEEK_SET) == (off_t)-1) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't seeek for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      if ((wr = write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM))) != sizeof(DPS_BASEITEM)) {
		DpsLog(P->A, DPS_LOG_ERROR, "Can't write previous pos for file %s (%s:%d)", P->Ifilename, __FILE__, __LINE__);
		DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
		TRACE_OUT(P->A);
		return DPS_ERROR;
	      }
	      goto search_again;
	
	    } else {
	      DpsLog(P->A, DPS_LOG_ERROR, "Can't read hash chain for file %s %d of %d bytes (%s:%d)", 
		     P->Ifilename, wr, sizeof(DPS_BASEITEM), __FILE__, __LINE__);
	      DPS_FREE(P->Ifilename);    DPS_FREE(P->Sfilename);
	      TRACE_OUT(P->A);
	      return DPS_ERROR;
	    }
	  }
#ifdef DEBUG_SEARCH
	  stop_ticks = DpsStartTimer();
	  total_ticks = stop_ticks - start_ticks;
	  DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase3 %03X in %.5f sec.", P->FileNo, (float)total_ticks / 1000);
#endif
	}
      break;
    search_again:;
    }
  P->opened = 1;
  P->mode = mode;
#ifdef DEBUG_SEARCH
  stop_ticks = DpsStartTimer();
  total_ticks = stop_ticks - start_ticks;
  DpsLog(P->A, DPS_LOG_EXTRA, "OpenBase4 %03X in %.5f sec.\n", P->FileNo, (float)total_ticks / 1000);
#endif
/*  fprintf(stderr, "Sfd:0x%x - %s\n", P->Sfd, P->Sfilename);
  fprintf(stderr, "Ifd:0x%x - %s\n", P->Ifd, P->Ifilename);*/
  TRACE_OUT(P->A);
  return DPS_OK;
}