Exemplo n.º 1
0
// TODO: optimize this
void copy_read(long offset,FILE *from,FILE* to) {
  fseek(from,offset,SEEK_SET);
  fputs(READ_LINE(from),to);
  fputs(READ_LINE(from),to);
  fputs(READ_LINE(from),to);
  fputs(READ_LINE(from),to);
}
Exemplo n.º 2
0
void index_file(char *filename,hashtable index,long start_offset,long length) {
  FILE *fd1=fopen(filename,"r");  
  if (fd1==NULL) {
    fprintf(stderr,"Unable to open %s\n",filename);
    exit(1);
  }
  // move to the right position
  if(length>0) {
    fprintf(stderr, " Not implemented\n");
    exit(1);
  }
  long cline=1;
  // index creation could be done in parallel
  while(!feof(fd1)) {
    long start_pos=ftell(fd1);
    char *hdr=READ_LINE(fd1);

    if ( hdr==NULL) break;
    if ( hdr[0]!='@' ) {
      fprintf(stderr,"line %ul: error in header %s",cline,hdr);
      exit(1);
    }
    // discard @ (one less byte)
    hdr=&hdr[1];
    int len=strlen(hdr);
    len--;
    hdr[len-1]='\0'; //
    // get seq
    //printf("cline=%ld\nLEN=%ld  hdr=%s\n",cline,len,hdr);
    if ( new_indexentry(index,hdr,len,start_pos)==NULL) {
      fprintf(stderr,"line %ul: malloc failed?",cline);
      exit(1);
    }
    char *seq=READ_LINE(fd1);
    char *hdr2=READ_LINE(fd1);
    char *qual=READ_LINE(fd1);
    
    if (seq==NULL || hdr2==NULL || qual==NULL ) {
      fprintf(stderr,"line %ul: file truncated",cline);
      exit(1);
    }
    
    PRINT_READS_PROCESSED(cline/4);
    //
    cline+=4;
  }
  
  fclose(fd1);
  return;
}
Exemplo n.º 3
0
// check if the read name format was generated by casava 1.8
int is_casava_1_8(char *f) {
  regex_t regex;
  int reti;
  int is_casava_1_8=0;
  reti = regcomp(&regex,"[A-Z0-9:]* [12]:[YN]:[0-9]*:.*",0);  
  if ( reti ) { 
    fprintf(stderr, "Internal error: Could not compile regex\n"); 
    exit(2); 
  }
  FILE *fd1=open_fastq(f);
  char *hdr=READ_LINE(fd1);
  fclose(fd1);
  /* Execute regular expression */
  //fprintf(stderr,"%s\n",hdr);
  reti = regexec(&regex, hdr, 0, NULL, 0);
  if ( !reti ) {    // match
    is_casava_1_8=1;
  } 
  /* else{
    char msgbuf[100];
    regerror(reti, &regex, msgbuf, sizeof(msgbuf));
    //fprintf(stderr, "Regex match failed: %s\n", msgbuf);
    } */
  regfree(&regex);
  return is_casava_1_8;
}
Exemplo n.º 4
0
int main(int argc, char **argv ) {

  //printf("%d",sizeof(struct index_entry)); 
  
  if (argc!=2) {
    fprintf(stderr,"Usage: fastq_filter_n fastq1\n");
    exit(1);
  }
  FILE *fd1=open_fastq(argv[1]);
  // ************************************************************
  unsigned long cline=1;
  unsigned long cur_read=0;
  //char tmp_buffer[MAX_READ_LENGTH];
  // read the entry using another fd
  cline=1;
  while(!feof(fd1)) {
    char *hdr=READ_LINE(fd1,1);
    if ( hdr==NULL) break;
    if ( hdr[0]!='@' ) {
      fprintf(stderr,"line %lu: error in header %s",cline,hdr);
      return 1;
    }
    //
    char *seq=READ_LINE(fd1,2);
    READ_LINE(fd1,3);
    READ_LINE(fd1,4);
    
    short n_found=0;
    int k;
    for ( k=0;k<MAX_READ_LENGTH;k++) {
      if (seq[k]=='\n') break;
      if (seq[k]=='N' || seq[k]=='n' ) {
	n_found=1; break;
      }
    }
    if ( ! n_found ) 
      WRITE_READ(stdout);
    cline+=4;
    cur_read++;    
  }
  fclose(fd1);
  exit(0);
}
Exemplo n.º 5
0
int main(int argc, char **argv ) {
  long paired=0;

  //printf("%d",sizeof(struct index_entry)); 
  
  if (argc!=6) {
    fprintf(stderr,"Usage: fastqinterleaved2pair.c fastq fastq1 fastq2\n");
    //fprintf(stderr,"%d",argc);
    exit(1);
  }

  FILE *fd=open_fastq(argv[1]);
  // ************************************************************
  off_t cur_offset=1;
  unsigned long cline=1;
  hashtable index=new_hashtable(HASHSIZE);
  index_mem+=sizeof(hashtable);

  index_file(argv[1],index,0,-1);
  printf("\n");
  // print some info
  printf("Reads indexed: %ld\n",index->n_entries);
  printf("Memory used in indexing: %ld MB\n",index_mem/1024/1024);  
  // 

  char *p1=argv[3];
  char *p2=argv[4];
  char *p3=argv[5];
  fd1=open_fastq(argv[1]);
  fd2=open_fastq(argv[2]);
  FILE *fdw1=fopen(p1,"w");
  FILE *fdw2=fopen(p2,"w");
  FILE *fdw3=fopen(p3,"w");
  unsigned long up2=0;

  if ( fdw1==NULL || fdw2==NULL || fdw3==NULL ) {
    fprintf(stderr,"Unable to create output files\n");
    exit(1);
  }
  
  // read the entry using another fd
  cline=1;
  while(!feof(fd2)) {
    long start_pos=ftell(fd2);
    char *hdr=READ_LINE(fd2);

    if ( hdr==NULL) break;
    if ( hdr[0]!='@' ) {
      fprintf(stderr,"line %ul: error in header %s",cline,hdr);
      return 1;
    }
    // discard @ (one less byte)
    hdr=&hdr[1];
    int len=strlen(hdr);
    len--;
    hdr[len-1]='\0'; //
    // lookup hdr in index
    INDEX_ENTRY* e=lookup_header(index,hdr);
    if (e==NULL) {
      ++up2;
      copy_read(start_pos,fd2,fdw3);
    } else {
      long key=hashit(hdr);
      // pair found
      ++paired;
      copy_read(start_pos,fd2,fdw2);
      copy_read(e->entry_start,fd1,fdw1);
      // remove entry from index
      if (delete(index,key,e)!=e) {
	fprintf(stderr,"Unable to delete entry from index\n");
	exit(1);
      }
      free_indexentry(e);
    }
    PRINT_READS_PROCESSED(cline/4);

    cline+=4;
  }
  printf("\n");
  printf("Recording %ld unpaired reads from %s\n",index->n_entries,argv[1]);fflush(stdout);
  fclose(fd1);


  // record the unpaired from argv[1]
  fd1=open_fastq(argv[1]); 
#ifndef SEQDISKACCESS
  init_hash_traversal(index);
  INDEX_ENTRY* e;
  cline=1;
  while((e=(INDEX_ENTRY*)next_hash_object(index))!=NULL) {
          copy_read(e->entry_start,fd1,fdw3);
	  PRINT_READS_PROCESSED(cline);
	  ++cline;
  }
  //
#else
  //sequential disk access
  //
  cline=1;
  unsigned long remaining=index->n_entries;
  while(!feof(fd1) && remaining ) {
    //long start_pos=ftell(fd2);
    char *hdr=READ_LINE(fd1);

    if ( hdr==NULL) break;
    if ( hdr[0]!='@' ) {
      fprintf(stderr,"line %ld %s: error in header %s",cline,argv[1],hdr);
      return 1;
    }
    // discard @ (one less byte)
    hdr=&hdr[1];
    int len=strlen(hdr);
    len--;
    hdr[len-1]='\0'; //

    // lookup hdr in index
    INDEX_ENTRY* e=lookup_header(index,hdr);
    if (e!=NULL) {
      copy_read(e->entry_start,fd1,fdw3);
      remaining--;
    } else {
      READ_LINE(fd1);//seq
      READ_LINE(fd1);//qual
      READ_LINE(fd1);//qual
    }
    PRINT_READS_PROCESSED(cline/4);
    cline+=4;
  }
  fclose(fd1);
#endif
  printf("\n");
  printf("Unpaired from %s: %ld\n",argv[1],index->n_entries);
  printf("Unpaired from %s: %ld\n",argv[2],up2);
  printf("Paired: %ld\n",paired);
  /*fseek(fd2,start_pos,SEEK_SET);
    printf("%s",READ_LINE(fd2));
    printf("%s",READ_LINE(fd2));
    printf("%s",READ_LINE(fd2));
    printf("%s",READ_LINE(fd2));
  */
  fclose(fdw1);
  fclose(fdw2);
  fclose(fdw3);
  if ( paired == 0 ) {
    fprintf(stderr,"!!!WARNING!!! 0 paired reads! are the headers ok?\n");
    exit(1);
  }
  exit(0);
}
Exemplo n.º 6
0
hash_t *HASH_load(disk_t *file){

  wchar_t *line = L"";
  while(STRING_starts_with(line, "#") || STRING_equals2(STRING_trim(line), L""))
    line = READ_LINE(file);
  
  int version;
  if(STRING_equals(line,">> HASH MAP BEGIN")){
    version = 1;
  } else if (STRING_equals(line,">> HASH MAP V2 BEGIN")){
    version = 2;
  } else if (STRING_equals(line,">> HASH MAP V3 BEGIN")){
    version = 3;
  } else  if (STRING_starts_with(line, ">> HASH MAP V")){
    version = 3;
    vector_t v = {0};
    int try_anyway = VECTOR_push_back(&v, "Try anyway (program might crash and/or behave unstable)");
    int ok = VECTOR_push_back(&v, "Ok");

    int res = GFX_Message(&v, "Need a newer version of Radium to load this file");

    if (res!=try_anyway)
      return NULL;
    (void)ok;

  } else {
    GFX_Message(NULL, "Trying to load something which is not a hash map. First line: \"%S\"", line);
    return NULL;
  }

  line = READ_LINE(file);
  
  int elements_size = STRING_get_int(line);

  hash_t *hash=HASH_create(elements_size);
  hash->version = version;

  line = READ_LINE(file);
  
  while(!STRING_equals(line,"<< HASH MAP END") && !STRING_equals(line,"<< HASH MAP V2 END") && !STRING_equals(line,"<< HASH MAP V3 END")){
    const char *key = STRING_get_chars(line);
    int i = 0;

    if(version > 1){

      line = READ_LINE(file);
      
      i = STRING_get_int(line);
      int new_size = i+1;
      if(new_size > hash->num_array_elements)
        hash->num_array_elements = new_size;

    } else if(!strncmp(key,"<int hash>",strlen("<int hash>"))) {

      sscanf(key, "<int hash> %d", &i);
      key = "";
      hash->num_array_elements++;

    }

    bool success;
    dyn_t dyn = DYN_load(file, &success);
    if (!success)
      return NULL;

    put_dyn(hash, key, i, dyn);
            
    line = READ_LINE(file);
  }

  return hash;  
}
Exemplo n.º 7
0
static gboolean
gnm_glpk_read_solution_458 (GnmGlpk *lp,
			    GsfInputTextline *tl,
			    GnmSolverResult *result,
			    GnmSolverSensitivity *sensitivity,
			    gboolean has_integer)
{
	GnmSubSolver *subsol = lp->parent;
	const char *line;
	unsigned cols, rows, c, r;
	gnm_float val;
	char pstat, dstat;

	READ_LINE (tl, line);

	if (has_integer) {
		if (sscanf (line, "s %*s %u %u %c %" GNM_SCANF_g,
			    &rows, &cols, &pstat, &val) != 4)
			goto fail;
	} else {
		if (sscanf (line, "s %*s %u %u %c %c %" GNM_SCANF_g,
			    &rows, &cols, &pstat, &dstat, &val) != 5)
			goto fail;
	}
	if (cols != g_hash_table_size (subsol->cell_from_name))
		goto fail;

	result->value = val;
	switch (pstat) {
	case 'o':
		result->quality = GNM_SOLVER_RESULT_OPTIMAL;
		break;
	case 'f':
		result->quality = GNM_SOLVER_RESULT_FEASIBLE;
		break;
	case 'u':
	case 'i':
	case 'n':
		result->quality = GNM_SOLVER_RESULT_INFEASIBLE;
		break;
	default:
		goto fail;
	}

	for (r = 0; r < rows; r++) {
		gnm_float pval, dval;
		char rstat;
		unsigned r1, cidx = r;

		READ_LINE (tl, line);

		if ((has_integer
		     ? sscanf (line, "i %d %" GNM_SCANF_g,
			       &r1, &dval) != 2
		     : sscanf (line, "i %d %c %" GNM_SCANF_g " %" GNM_SCANF_g,
			       &r1, &rstat, &pval, &dval) != 4) ||
		    r1 != cidx + 1)
			goto fail;
		// rstat?

		sensitivity->constraints[cidx].shadow_price = dval;
	}

	for (c = 0; c < cols; c++) {
		gnm_float pval, dval;
		char cstat;
		unsigned c1, cidx = c;

		READ_LINE (tl, line);

		if ((has_integer
		     ? sscanf (line, "j %d %" GNM_SCANF_g,
			       &c1, &pval) != 2
		     : sscanf (line, "j %d %c %" GNM_SCANF_g " %" GNM_SCANF_g,
			       &c1, &cstat, &pval, &dval) != 4) ||
		    c1 != cidx + 1)
			goto fail;
		// cstat?

		result->solution[cidx] = pval;
	}

	// Success
	return FALSE;

fail:
	return TRUE;
}