Пример #1
0
Suffixarray *
readSuffixarray(void *space, 
                char *idxfilename, 
                CharSequence **seqs,
                Uint len) {
  FILE *fp; 
  char **suffixptr; 
  Uint     nmemb = 0,
           idvmemb = 0,
           llvmemb = 0,
           numofsuffixes,
           *suftab = NULL,
           *suflink = NULL;
  childtab *chldtab = NULL; 
  unsigned char flags=0,
                *lcpctab = NULL;
  signed char   *id = NULL;
  PairUint *llvtab = NULL;
  PairSint *idvtab = NULL;
  MultiCharSeq *mseq;
  Suffixarray *s;

  mseq = concatCharSequences(space, seqs, len, (char)254, (char)127);
  numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 
  suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127);

  fp = fopen(idxfilename, "r");
  if (fp == NULL) {
    fprintf(stderr,"Couldn't open file '%s'. Exit forced.\n", idxfilename);
    exit(-1);
  }

  fread(&nmemb, sizeof(Uint), 1, fp);
  suftab = ALLOCMEMORY(NULL, NULL, Uint, nmemb);
  fread(suftab, sizeof(Uint), nmemb, fp);
  fread(&flags, sizeof(char), 1, fp);

  if (flags & LCP_TAB_STORED) {
    fprintf(stderr, "reading lcpc/vtab\n");
    lcpctab = ALLOCMEMORY(space, NULL, unsigned char, nmemb);
    fread(lcpctab, sizeof(unsigned char), nmemb, fp);
    
    fread(&llvmemb, sizeof(Uint), 1, fp);
    llvtab = ALLOCMEMORY(space, NULL, PairUint, nmemb);
    fread(llvtab, sizeof(PairUint), llvmemb, fp);
  }
Пример #2
0
  Suffixarray*
constructSufArr(void *space, 
    CharSequence **s, 
    Uint len, 
    FAlphabet* alphabet)
{

  Uint i, numofsuffixes,
  *sorted, 
  *inv_suftab;
  char **suffixptr;
  MultiCharSeq *mseq; 
  Suffixarray *arr;

  mseq = concatCharSequences(space, s, len, (char)254, (char)127);
  numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 

  //printf("md5: %s\n", MD5((unsigned char*)"f**k",4,NULL));
  fprintf(stderr, "alphabet of size (%d): %s\n", mseq->mapsize, mseq->map);
  //fprintf(stderr, "allocating space for %d suffixes (%d bytes)\n", numofsuffixes, 2*numofsuffixes*sizeof(Uint));
  inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes);
  arr = ALLOCMEMORY(space, NULL, Suffixarray, 1);

  fprintf(stderr, "constructing suftab.\n");
  suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127);

  //mseq->sequences[numofsuffixes-1]=0; 
  sorted = alurusort(space, mseq->sequences, &(numofsuffixes));
  //  mseq->sequences[numofsuffixes-1]=save;

  /*  sorted = quickSortMultikey (space, suffixptr, numofsuffixes, 
      cmpCharSequence, numofsuffixes-1, NULL);     
      */
  fprintf(stderr, "constructing inv_suftab.\n");
  for (i=0; i < numofsuffixes; i++) {
    inv_suftab[sorted[i]]=i;
  }

  arr->seq = mseq;
  arr->numofsuffixes = numofsuffixes;
  arr->suffixptr = suffixptr;
  arr->suftab = sorted;
  arr->inv_suftab = inv_suftab;

  return arr;
}
  Suffixarray*
constructSufArr(void *space, 
    CharSequence **s, 
    Uint len, 
    FAlphabet* alphabet)
{
  Uint i, numofsuffixes,
  *sorted, 
  *inv_suftab;
  char **suffixptr;
  MultiCharSeq *mseq;
  Suffixarray *arr;

  mseq = concatCharSequences(space, s, len, (char)254, (char)127);
  numofsuffixes = (mseq->totallength - mseq->numofsequences)+1; 

  printf("allocating space for %d suffixes (%d bytes)\n", numofsuffixes, 2*numofsuffixes*sizeof(Uint));
  inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes);
  arr = ALLOCMEMORY(space, NULL, Suffixarray, 1);


  printf("constructing suftab.\n");
  suffixptr = getSuffixPtr(space, mseq, (char)254, (char)127);
  sorted = quickSortMultikey (space, suffixptr, numofsuffixes, 
      cmpCharSequence, numofsuffixes-1, NULL);

  printf("constructing inv_suftab.\n");
  for (i=0; i < numofsuffixes; i++) {
    inv_suftab[sorted[i]]=i;
  }

  arr->seq = mseq;
  arr->numofsuffixes = numofsuffixes;
  arr->suffixptr = suffixptr;
  arr->suftab = sorted;
  arr->inv_suftab = inv_suftab;

  return arr;
}
Пример #4
0
Suffixarray *
readSuffixarray(void *space, 
    char *idxfilename, 
    CharSequence **seqs,
    Uint len,
    unsigned char silent) {
  FILE *fp; 
  Uint     nmemb = 0,
           idvmemb = 0,
           llvmemb = 0,
           numofsuffixes,
           *suftab = NULL,
           idvi =0;
  childtab *chldtab = NULL; 
  unsigned char flags=0,
                *lcpctab = NULL;
  unsigned char *mdfive=NULL,
                *check=NULL;
  PairUint *llvtab = NULL;
  PairLSint *idvtab = NULL;
  PairSint *idvutab = NULL;

  MultiCharSeq *mseq;
  Suffixarray *s;

#ifdef SUFLINK_MMAP
  int fd;
  signed char   *id = NULL;
  long curiopos, offset;
  struct stat sb;
  char *suflinkptr;
  int pagediff_id;
  int pagediff_sl;
#elif SUFLINK_DISKACC
  int fd;
  off_t off_sl;
  off_t off_id;
#else
  signed char   *id = NULL;
  Uint *suflink = NULL;
#endif
  
  mseq = concatCharSequences(space, seqs, len, (char)126, (char)127);
  numofsuffixes = mseq->totallength; 

  fp = fopen(idxfilename, "r");
  if (fp == NULL) {
    DBG("Couldn't open file '%s'. Exit forced.\n", idxfilename);
    exit(-1);
  }

  fread(&nmemb, sizeof(Uint), 1, fp);
  suftab = ALLOCMEMORY(NULL, NULL, Uint, nmemb);
  fread(suftab, sizeof(Uint), nmemb, fp);
  fread(&flags, sizeof(char), 1, fp);

  if (flags & LCP_TAB_STORED) {
    if (!silent) MSG("reading lcpc/vtab.\n");
    lcpctab = ALLOCMEMORY(space, NULL, unsigned char, nmemb);
    fread(lcpctab, sizeof(unsigned char), nmemb, fp);

    fread(&llvmemb, sizeof(Uint), 1, fp);
    llvtab = ALLOCMEMORY(space, NULL, PairUint, nmemb);
    fread(llvtab, sizeof(PairUint), llvmemb, fp);
  }

  if (flags & CHLD_TAB_STORED) {
    if(!silent) MSG("reading childtab.\n");
    chldtab = ALLOCMEMORY(space, NULL, childtab, nmemb);
    fread(chldtab, sizeof(childtab), nmemb, fp);
  }

  if ((flags & SUFLINK_TAB_STORED)) {
    if(!silent) MSG("reading suflinks.\n");

#ifdef SUFLINK_MMAP 
    curiopos = ftell(fp);
    fd = open(idxfilename, O_RDONLY);
    if (fd == -1) {
      perror("open");
      exit(EXIT_FAILURE);
    }

    if (fstat(fd, &sb) == -1) {       
      perror("fstat");
      exit(EXIT_FAILURE);
    }

    offset = curiopos & ~(sysconf(_SC_PAGE_SIZE) - 1);
    if (curiopos >= sb.st_size) {
      fprintf(stderr, "offset is past end of file\n");
      exit(EXIT_FAILURE);
    }
    
    pagediff_sl = curiopos - offset;   
    suflinkptr = mmap(0, nmemb*sizeof(Uint) + pagediff_sl, PROT_READ, MAP_SHARED, fd, offset);

    if (suflinkptr == MAP_FAILED) {
      perror("mmap");
      exit(EXIT_FAILURE);
    }
#elif SUFLINK_DISKACC 
    sl_diskacc = 1;
    off_sl = ftell(fp);
    fd = open(idxfilename, O_RDONLY);
#else
    suflink = ALLOCMEMORY(space, NULL, Uint, nmemb);
    fread(suflink, sizeof(Uint), nmemb, fp);
#endif

#ifdef SUFLINK_MMAP
    offset = (curiopos+(nmemb*sizeof(Uint))) & ~(sysconf(_SC_PAGE_SIZE) - 1);
    if (curiopos >= sb.st_size) {
      fprintf(stderr, "offset is past end of file\n");
      exit(EXIT_FAILURE);
    }
    
    pagediff_id = (curiopos+(nmemb*sizeof(Uint))) - offset;   
    id = mmap(0, nmemb*sizeof(signed char) + pagediff_id, PROT_READ, MAP_SHARED, fd, offset);

    if (id == MAP_FAILED) {
      perror("mmap");
      exit(EXIT_FAILURE);
    }
    fseek(fp, nmemb*(sizeof(Uint)+sizeof(signed char)), SEEK_CUR); 

#elif SUFLINK_DISKACC
    off_id = off_sl+(nmemb*sizeof(Uint));
    fseek(fp, nmemb*(sizeof(Uint)+sizeof(signed char)), SEEK_CUR);    
#else   
    id = ALLOCMEMORY(space, NULL, signed char, nmemb);
    fread(id, sizeof(signed char), nmemb, fp);
#endif

    fread(&idvmemb, sizeof(Uint), 1, fp);
    idvtab = ALLOCMEMORY(space, NULL, PairLSint, idvmemb);
    if ((flags & LINT_SUFLINKS)) {
      if(!silent) MSG("reading lsint id.\n");
      fread(idvtab, sizeof(PairLSint), idvmemb, fp);
    } else { 
      idvutab = ALLOCMEMORY(space, NULL, PairSint, idvmemb);
      if(!silent) MSG("reading uint id.\n");
      fread(idvutab, sizeof(PairUint), idvmemb, fp);
      for(idvi=0; idvi < idvmemb; idvi++) {
        idvtab[idvi].a = idvutab[idvi].a;
        idvtab[idvi].b = idvutab[idvi].b;
      }
      free(idvutab);
    }
  }

  if ((flags & MD5_STORED)) {
    mdfive = ALLOCMEMORY(space, NULL, unsigned char, 16);
    fread(mdfive, sizeof(unsigned char), 16, fp);
  }
Пример #5
0
  Suffixarray*
constructSufArr(void *space, 
    CharSequence **s, 
    Uint len, 
    FAlphabet* alphabet,
    unsigned char silent)
{

  Uint i, numofsuffixes,
  *sorted, 
  *inv_suftab;
  MultiCharSeq *mseq; 
  Suffixarray *arr;
  unsigned char *temp,
                *mdfive=NULL;


  
  mseq = concatCharSequences(space, s, len, (char)126, (char)127);
  numofsuffixes = mseq->totallength;
  mdfive  = ALLOCMEMORY(space, NULL, char, 16);
  temp = MD5R((unsigned char*)mseq->sequences, numofsuffixes, NULL);
  
  
  memmove(mdfive, temp, 16);


  if(!silent) NFO("alphabet of size (%d): %s\n", mseq->mapsize, mseq->map);
  if(!silent) NFO("size of db sequence: %u\n", numofsuffixes);
  inv_suftab = ALLOCMEMORY(space, NULL, Uint , numofsuffixes);
  arr = ALLOCMEMORY(space, NULL, Suffixarray, 1);

  if(!silent) MSG("constructing suftab.\n");

#ifdef SUF_MKQUICKSORT
  sorted = quickSortMultikey (space, mseq->sequences, numofsuffixes, 
      cmpCharSequence, numofsuffixes-1, NULL);     
#else
  sorted = alurusort(space, mseq->sequences, 
      &(numofsuffixes));
#endif

  if (!silent)NFO("constructing inv_suftab (%u).\n", numofsuffixes);
  for (i=0; i < numofsuffixes; i++) {
    if (sorted[i] > numofsuffixes) fprintf(stderr, "construction error? %u: %u\n",i, sorted[i]);
    inv_suftab[sorted[i]]=i;
  }
  if (!silent)MSG("inv_suftab constructed.\n");

  arr->seq = mseq;
  arr->numofsuffixes = numofsuffixes;
  arr->suftab = sorted;
  arr->inv_suftab = inv_suftab;
  arr->mdfive = mdfive;
  arr->lcpctab = NULL;
  arr->llvtab = NULL;
  
  arr->id = NULL;
  arr->idvtab = NULL;
  arr->chldtab = NULL;
  arr->bcktab = NULL;

  arr->suflink = NULL;
  arr->suflink_l = NULL;
  arr->suflink_r = NULL;
  arr->llint = 1;
  return arr;
}