Exemplo n.º 1
0
void csa_new_from_bwt(CSA csa, char *fname, char *fidx, int psi_id, int idx_id, bool coded)
{
	int k;
	i64 i,j,v,m;
	FILE *f2;
	i64 psize,isize;
	i64 n;

	psi_id = csa.id;
	if (psi_id >= 0) {
		printf("create psi: id=%d\n",psi_id);
	}
	if (idx_id >= 0) {
		printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2);
	}

	psize = 0;

	if (psi_id >= 0) {
		switch (psi_id & 0x3f) {
			case ID_DIFF_GAMMA:
			case ID_DIFF_GAMMA_RL:
			case ID_DIFF_GAMMA_SPARSE:
			case ID_DIFF_GAMMA_RL_SPARSE:
				psize = psi1_makeindex(&csa, fname);
				printf("n     %ld\n",csa.n);
				printf("Psi   %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
			case ID_DIFF_GAMMA_RR:
				psize = psi12_makeindex(&csa, fname);
				printf("n     %ld\n",csa.n);
				printf("Psi   %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
			case ID_BWT_DNA:
				psize = lf_dna_makeindex(&csa, fname, coded);
				printf("n     %ld\n",csa.n);
				printf("BW    %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
			case ID_BWT_BIT:
				psize = lf_bit_makeindex(&csa, fname);
				printf("n     %ld\n",csa.n);
				printf("BW    %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
			case ID_BWT_WT:
			case ID_BWT_WT_HUF:
			case ID_BWT_WT_DENSE:
			case ID_BWT_WT_SPARSE4:
			case ID_BWT_WT_RR:
				psize = lf_wt_makeindex(&csa, fname);
				printf("n     %ld\n",csa.n);
				printf("BW    %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
#if 0
			case ID_BWT_HUF:
				psize = lf_bwt_makeindex(&csa, fname);
				printf("n     %ld\n",csa.n);
				printf("BW    %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
#endif
			case ID_SPARSE4:
				psize = psi2_makeindex(&csa, fname);
				printf("n     %ld\n",csa.n);
				printf("Psi   %ld bytes (%1.3f bpc)\n",
						psize,(double)psize*8/csa.n);
				break;
			default:
				printf("psi_id = %d\n",psi_id);
				exit(1);
		}
	}

	csa.k = (blog(csa.n+1)+1+8-1)/8;

	for (i=0; i<SIGMA; i++) csa.CtoA[i] = -1;
	//  csa.K[-1+1] = 0;
	csa.K[-1+1] = 1;
	for (m=0,v=1,i=0; i<SIGMA; i++) {
		if (csa.C[i]>0) {
			csa.AtoC[m] = i;
			csa.CtoA[i] = m;
			csa.K[m+1] = v;
			//      printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]);
			v += csa.C[i];
			m++;
		}
	}
	csa.K[m+1] = v;
	csa.m = m;

	if (csa.D >= csa.n) {
		printf("D=%d >= n=%ld\n",csa.D,csa.n);
		exit(0);
	}
	if (csa.D2 >= csa.n) {
		printf("D2=%d >= n=%ld\n",csa.D2,csa.n);
		exit(0);
	}

	if (idx_id >= 0) {
		n = csa.n;
		k = csa.k;
		////  compute SA and ISA
		if (csa.D > 0) csa.SA = (uchar *) mymalloc(((n-1)/csa.D+1+1)*k);
		if (csa.D2 > 0) csa.ISA = (uchar *) mymalloc(((n-1)/csa.D2+1+1)*k);
		if (csa.D == 0 && csa.D2 == 0) goto brk;

		switch (psi_id & 0x3f) {
			case ID_DIFF_GAMMA:
			case ID_DIFF_GAMMA_RL:
			case ID_DIFF_GAMMA_SPARSE:
			case ID_DIFF_GAMMA_RL_SPARSE:
			case ID_SPARSE4:
			case ID_DIFF_GAMMA_RR:
				j = 0;
				for (i=0; i<=n; i++) {
					display_progressbar("making sa ",i,n);
					j = csa.psi(&csa,j);
					//  sa[j] = i;
					if (csa.D > 0 && j % csa.D == 0) {
						putuint(csa.SA,j / csa.D,i,k);
					}
					if (csa.D2 > 0 && i % csa.D2 == 0) {
						putuint(csa.ISA,i / csa.D2,j,k);
					}
				}
				//      putuint(csa.SA,0,n,k);
				break;
			case ID_BWT_DNA:
			case ID_BWT_BIT:
			case ID_BWT_WT:
			case ID_BWT_WT_HUF:
			case ID_BWT_WT_DENSE:
			case ID_BWT_WT_SPARSE4:
			case ID_BWT_WT_RR:
			case ID_BWT_HUF:
				j = 0;
				for (i=n-1; i>=0; i--) {
					display_progressbar("making sa ",i,n);
					v = csa.LF(&csa,j);
					//        printf("LF[%ld] = %ld\n",j,v);
					j = v;
					if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k);
					if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k);
				}
				putuint(csa.SA,0,n,k);
				break;
			default:
				break;
		}
brk:
		////      write idx
		f2 = fopen(fidx,"wb"); /* directory */
		if (f2 == NULL) {
			perror("csa2_new1: ");
			exit(1);
		}

		isize = 0;

		writeint(4,VERSION,f2); /* version */
		isize += 4;

		writeint(1,ID_HEADER,f2); // header ID
		isize += 1;
		isize = write_header(&csa, f2, isize);

		if (csa.D > 0) {
			writeint(1,ID_SA,f2);
			isize += 1;
			isize = write_sa(&csa, f2, isize);
		}

		if (csa.D2 > 0) {
			writeint(1,ID_ISA,f2);
			isize += 1;
			isize = write_isa(&csa, f2, isize);
		}


		fclose(f2);

		if (csa.D > 0) free(csa.SA);
		if (csa.D2 > 0) free(csa.ISA);

		printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize),
				(double)(psize+isize)*8/csa.n);
	}
	free(fidx);
}
Exemplo n.º 2
0
void csa_new_from_bwt(int argc, char *argv[])
{
  i64 i,j,v,m;
  FILE *f2;
  i64 psize,isize;
  i64 n;
  int k;
  char *fname,*fidx;
  char *p;
  int psi_id, idx_id;
  CSA csa;
  int sigma;

  csa.sigma = 256; /* default alphabet size */
  csa.k2 = 1;

//  for (i=0; i<SIGMA+2; i++) csa.C[i] = 0;
//  for (i=0; i<SIGMA; i++) csa.C[i] = 0;

  fname = NULL;  fidx = NULL;
  psi_id = idx_id = -1;
  for (i=1; i<argc; i++) {
    p = argv[i];
    if (p[0] == '-') {
      p++;
      switch (toupper(p[0])) {
      case 'I':
      // -I[n]:[D]:[D2]
        p++;
        idx_id = 0;
        csa_options(&csa, p);
        break;
      case 'P':
      // -P[n]:[L]
        p++;
        psi_id = 0;
        psi_options(&csa, p);
        break;
      case 'C':
      // -C[s]
        p++;
        sigma_options(&csa, p);
        break;
      default:
        printf("??? no such option %s\n",argv[i]);
        exit(1);
      }
    } else {
      fname = argv[i];
      k = strlen(fname);
      fidx = mymalloc(k+5);
      sprintf(fidx,"%s.idx",fname);
    }
  }
  if (fname == NULL) {
    printf("no input file.\n");
    exit(0);
  }
  printf("sigma = %d k2 = %d\n", csa.sigma, csa.k2);
  sigma = csa.sigma;

  csa.C = mymalloc(sizeof(*csa.C)*sigma); //
  csa.CtoA = mymalloc(sizeof(*csa.CtoA)*sigma); //
  csa.AtoC = mymalloc(sizeof(*csa.AtoC)*sigma); //
  csa.K = mymalloc(sizeof(*csa.K)*(sigma+2)); //
  for (i=0; i<sigma; i++) csa.C[i] = 0;


  psi_id = csa.id;
  if (psi_id >= 0) {
    printf("create psi: id=%d\n",psi_id);
  }
  if (idx_id >= 0) {
    printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2);
  }

  psize = 0;

  if (psi_id >= 0) {
    switch (psi_id & 0x3f) {
    case ID_DIFF_GAMMA:
    case ID_DIFF_GAMMA_RL:
    case ID_DIFF_GAMMA_SPARSE:
    case ID_DIFF_GAMMA_RL_SPARSE:
      psize = psi1_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("Psi   %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_DIFF_GAMMA_RR:
      psize = psi12_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("Psi   %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_DNA:
      psize = lf_dna_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_DNA2:
      psize = lf_dna2_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_BIT:
      psize = lf_bit_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_WT:
    case ID_BWT_WT_HUF:
    case ID_BWT_WT_DENSE:
    case ID_BWT_WT_SPARSE4:
    case ID_BWT_WT_RR:
      psize = lf_wt_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
#if 0
    case ID_BWT_HUF:
      psize = lf_bwt_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
#endif
    case ID_SPARSE4:
      psize = psi2_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("Psi   %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    default:
      printf("psi_id = %d\n",psi_id);
      exit(1);
    }
  }

  csa.k = (blog(csa.n+1)+1+8-1)/8;

  for (i=0; i<sigma; i++) csa.CtoA[i] = -1;
  csa.K[-1+1] = 1;
  for (m=0,v=1,i=0; i<sigma; i++) {
    if (csa.C[i]>0) {
      csa.AtoC[m] = i;
      csa.CtoA[i] = m;
      csa.K[m+1] = v;
//      printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]);
      v += csa.C[i];
      m++;
    }
  }
  csa.K[m+1] = v;
  csa.m = m;

  if (csa.D >= csa.n) {
    printf("D=%d >= n=%ld\n",csa.D,csa.n);
    exit(0);
  }
  if (csa.D2 >= csa.n) {
    printf("D2=%d >= n=%ld\n",csa.D2,csa.n);
    exit(0);
  }

  if (idx_id >= 0) {
    n = csa.n;
    k = csa.k;
////  compute SA and ISA
    if (csa.D > 0) csa.SA = mymalloc(((n-1)/csa.D+1+1)*k);
    if (csa.D2 > 0) csa.ISA = mymalloc(((n-1)/csa.D2+1+1)*k);
    if (csa.D == 0 && csa.D2 == 0) goto brk;

    switch (psi_id & 0x3f) {
    case ID_DIFF_GAMMA:
    case ID_DIFF_GAMMA_RL:
    case ID_DIFF_GAMMA_SPARSE:
    case ID_DIFF_GAMMA_RL_SPARSE:
    case ID_SPARSE4:
    case ID_DIFF_GAMMA_RR:
      j = 0;
      for (i=0; i<=n; i++) {
        display_progressbar("making sa ",i,n);
        j = csa.psi(&csa,j);
  //  sa[j] = i;
        if (csa.D > 0 && j % csa.D == 0) {
          putuint(csa.SA,j / csa.D,i,k);
        }
        if (csa.D2 > 0 && i % csa.D2 == 0) {
          putuint(csa.ISA,i / csa.D2,j,k);
        }
      }
//      putuint(csa.SA,0,n,k);
      break;
    case ID_BWT_DNA:
    case ID_BWT_DNA2:
    case ID_BWT_BIT:
    case ID_BWT_WT:
    case ID_BWT_WT_HUF:
    case ID_BWT_WT_DENSE:
    case ID_BWT_WT_SPARSE4:
    case ID_BWT_WT_RR:
    case ID_BWT_HUF:
      j = 0;
      for (i=n-1; i>=0; i--) {
        display_progressbar("making sa ",i,n);
        v = csa.LF(&csa,j);
//        printf("LF[%ld] = %ld\n",j,v);
        j = v;
        if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k);
        if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k);
      }
//      putuint(csa.SA,0,n,k);
      if (csa.D > 0) putuint(csa.SA,0,n,k); // 2011-12-20
      break;
    default:
      break;
    }
brk:
////      write idx
    f2 = fopen(fidx,"wb"); /* directory */
    if (f2 == NULL) {
      perror("csa2_new1: ");
      exit(1);
    }

    isize = 0;

    writeint(4,VERSION,f2); /* version */
    isize += 4;

    writeint(1,ID_HEADER,f2); // header ID
    isize += 1;
    isize = write_header(&csa, f2, isize);

    if (csa.D > 0) {
      writeint(1,ID_SA,f2);
      isize += 1;
      isize = write_sa(&csa, f2, isize);
    }

    if (csa.D2 > 0) {
      writeint(1,ID_ISA,f2);
      isize += 1;
      isize = write_isa(&csa, f2, isize);
    }


    fclose(f2);

    if (csa.D > 0) free(csa.SA);
    if (csa.D2 > 0) free(csa.ISA);

    printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize),
                (double)(psize+isize)*8/csa.n);
  }
  free(fidx);
}