Ejemplo n.º 1
0
void csa_new_from_bwt_wrapper(int argc, char *argv[]) {
	i64 i;
	char *p;
	int k;
	int psi_id, idx_id;
	char *fname,*fidx;
	CSA csa;

	for (i=0; i<SIGMA+2; i++) csa.C[i] = 0;

	fname = NULL;
	psi_id = idx_id = -1;

	for (i=1; i<argc; i++) {
		p = argv[i];
		if (p[0] == '-') {
			p++;
			switch (toupper(p[0])) {
				case 'I':
					// -I[n]:[D]:[D2]
					p++;
					idx_id = 0;
					csa_options(&csa, p);
					break;
				case 'P':
					// -P[n]:[L]
					p++;
					psi_id = 0;
					psi_options(&csa, p);
					break;
				default:
					printf("??? %s\n",argv[i]);
					exit(1);
			}
		} else {
			fname = argv[i];
			k = strlen(fname);
			fidx = (char *) mymalloc(k+5);
			sprintf(fidx,"%s.idx",fname);
		}
	}

	if (fname == NULL) {
		printf("no input file.\n");
		exit(0);
	}

	csa_new_from_bwt(csa, fname, fidx, psi_id, idx_id, false);

}
Ejemplo n.º 2
0
void csa_new_from_bwt_gnu_bwt_wrapper(const char *directory, const char *name) {
	i64 i;
	int psi_id, idx_id;
	char *fname, *fidx;
	CSA csa;

	for (i=0; i<SIGMA+2; i++) csa.C[i] = 0;

	idx_id = 0;
	psi_id = 0;

	char csa_opt[10];
	strcpy(csa_opt, ":64:64");
	char psi_opt[10];
	strcpy(psi_opt, "3:64");

	csa_options(&csa, csa_opt);
	psi_options(&csa, psi_opt);

	fname = (char *) malloc(500 * sizeof(char));
	fidx = (char *) malloc(500 * sizeof(char));
	fname[0]='\0';
	fidx[0]='\0';

	if (name==NULL) {
		strcat(fname, directory);
		strcat(fname, "/output");
  } else {
		strcat(fname, directory);
		strcat(fname, "/");
		strcat(fname, name);
	}

	strcat(fidx, fname);
	strcat(fidx, ".idx");

	csa_new_from_bwt(csa, fname, fidx, psi_id, idx_id, true);
}
Ejemplo n.º 3
0
void csa_new_from_bwt(int argc, char *argv[])
{
  i64 i,j,v,m;
  FILE *f2;
  i64 psize,isize;
  i64 n;
  int k;
  char *fname,*fidx;
  char *p;
  int psi_id, idx_id;
  CSA csa;
  int sigma;

  csa.sigma = 256; /* default alphabet size */
  csa.k2 = 1;

//  for (i=0; i<SIGMA+2; i++) csa.C[i] = 0;
//  for (i=0; i<SIGMA; i++) csa.C[i] = 0;

  fname = NULL;  fidx = NULL;
  psi_id = idx_id = -1;
  for (i=1; i<argc; i++) {
    p = argv[i];
    if (p[0] == '-') {
      p++;
      switch (toupper(p[0])) {
      case 'I':
      // -I[n]:[D]:[D2]
        p++;
        idx_id = 0;
        csa_options(&csa, p);
        break;
      case 'P':
      // -P[n]:[L]
        p++;
        psi_id = 0;
        psi_options(&csa, p);
        break;
      case 'C':
      // -C[s]
        p++;
        sigma_options(&csa, p);
        break;
      default:
        printf("??? no such option %s\n",argv[i]);
        exit(1);
      }
    } else {
      fname = argv[i];
      k = strlen(fname);
      fidx = mymalloc(k+5);
      sprintf(fidx,"%s.idx",fname);
    }
  }
  if (fname == NULL) {
    printf("no input file.\n");
    exit(0);
  }
  printf("sigma = %d k2 = %d\n", csa.sigma, csa.k2);
  sigma = csa.sigma;

  csa.C = mymalloc(sizeof(*csa.C)*sigma); //
  csa.CtoA = mymalloc(sizeof(*csa.CtoA)*sigma); //
  csa.AtoC = mymalloc(sizeof(*csa.AtoC)*sigma); //
  csa.K = mymalloc(sizeof(*csa.K)*(sigma+2)); //
  for (i=0; i<sigma; i++) csa.C[i] = 0;


  psi_id = csa.id;
  if (psi_id >= 0) {
    printf("create psi: id=%d\n",psi_id);
  }
  if (idx_id >= 0) {
    printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2);
  }

  psize = 0;

  if (psi_id >= 0) {
    switch (psi_id & 0x3f) {
    case ID_DIFF_GAMMA:
    case ID_DIFF_GAMMA_RL:
    case ID_DIFF_GAMMA_SPARSE:
    case ID_DIFF_GAMMA_RL_SPARSE:
      psize = psi1_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("Psi   %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_DIFF_GAMMA_RR:
      psize = psi12_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("Psi   %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_DNA:
      psize = lf_dna_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_DNA2:
      psize = lf_dna2_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_BIT:
      psize = lf_bit_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    case ID_BWT_WT:
    case ID_BWT_WT_HUF:
    case ID_BWT_WT_DENSE:
    case ID_BWT_WT_SPARSE4:
    case ID_BWT_WT_RR:
      psize = lf_wt_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
#if 0
    case ID_BWT_HUF:
      psize = lf_bwt_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("BW    %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
#endif
    case ID_SPARSE4:
      psize = psi2_makeindex(&csa, fname);
      printf("n     %ld\n",csa.n);
      printf("Psi   %ld bytes (%1.3f bpc)\n",
              psize,(double)psize*8/csa.n);
      break;
    default:
      printf("psi_id = %d\n",psi_id);
      exit(1);
    }
  }

  csa.k = (blog(csa.n+1)+1+8-1)/8;

  for (i=0; i<sigma; i++) csa.CtoA[i] = -1;
  csa.K[-1+1] = 1;
  for (m=0,v=1,i=0; i<sigma; i++) {
    if (csa.C[i]>0) {
      csa.AtoC[m] = i;
      csa.CtoA[i] = m;
      csa.K[m+1] = v;
//      printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]);
      v += csa.C[i];
      m++;
    }
  }
  csa.K[m+1] = v;
  csa.m = m;

  if (csa.D >= csa.n) {
    printf("D=%d >= n=%ld\n",csa.D,csa.n);
    exit(0);
  }
  if (csa.D2 >= csa.n) {
    printf("D2=%d >= n=%ld\n",csa.D2,csa.n);
    exit(0);
  }

  if (idx_id >= 0) {
    n = csa.n;
    k = csa.k;
////  compute SA and ISA
    if (csa.D > 0) csa.SA = mymalloc(((n-1)/csa.D+1+1)*k);
    if (csa.D2 > 0) csa.ISA = mymalloc(((n-1)/csa.D2+1+1)*k);
    if (csa.D == 0 && csa.D2 == 0) goto brk;

    switch (psi_id & 0x3f) {
    case ID_DIFF_GAMMA:
    case ID_DIFF_GAMMA_RL:
    case ID_DIFF_GAMMA_SPARSE:
    case ID_DIFF_GAMMA_RL_SPARSE:
    case ID_SPARSE4:
    case ID_DIFF_GAMMA_RR:
      j = 0;
      for (i=0; i<=n; i++) {
        display_progressbar("making sa ",i,n);
        j = csa.psi(&csa,j);
  //  sa[j] = i;
        if (csa.D > 0 && j % csa.D == 0) {
          putuint(csa.SA,j / csa.D,i,k);
        }
        if (csa.D2 > 0 && i % csa.D2 == 0) {
          putuint(csa.ISA,i / csa.D2,j,k);
        }
      }
//      putuint(csa.SA,0,n,k);
      break;
    case ID_BWT_DNA:
    case ID_BWT_DNA2:
    case ID_BWT_BIT:
    case ID_BWT_WT:
    case ID_BWT_WT_HUF:
    case ID_BWT_WT_DENSE:
    case ID_BWT_WT_SPARSE4:
    case ID_BWT_WT_RR:
    case ID_BWT_HUF:
      j = 0;
      for (i=n-1; i>=0; i--) {
        display_progressbar("making sa ",i,n);
        v = csa.LF(&csa,j);
//        printf("LF[%ld] = %ld\n",j,v);
        j = v;
        if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k);
        if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k);
      }
//      putuint(csa.SA,0,n,k);
      if (csa.D > 0) putuint(csa.SA,0,n,k); // 2011-12-20
      break;
    default:
      break;
    }
brk:
////      write idx
    f2 = fopen(fidx,"wb"); /* directory */
    if (f2 == NULL) {
      perror("csa2_new1: ");
      exit(1);
    }

    isize = 0;

    writeint(4,VERSION,f2); /* version */
    isize += 4;

    writeint(1,ID_HEADER,f2); // header ID
    isize += 1;
    isize = write_header(&csa, f2, isize);

    if (csa.D > 0) {
      writeint(1,ID_SA,f2);
      isize += 1;
      isize = write_sa(&csa, f2, isize);
    }

    if (csa.D2 > 0) {
      writeint(1,ID_ISA,f2);
      isize += 1;
      isize = write_isa(&csa, f2, isize);
    }


    fclose(f2);

    if (csa.D > 0) free(csa.SA);
    if (csa.D2 > 0) free(csa.ISA);

    printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize),
                (double)(psize+isize)*8/csa.n);
  }
  free(fidx);
}