// TODO: optimize this void copy_read(long offset,FILE *from,FILE* to) { fseek(from,offset,SEEK_SET); fputs(READ_LINE(from),to); fputs(READ_LINE(from),to); fputs(READ_LINE(from),to); fputs(READ_LINE(from),to); }
void index_file(char *filename,hashtable index,long start_offset,long length) { FILE *fd1=fopen(filename,"r"); if (fd1==NULL) { fprintf(stderr,"Unable to open %s\n",filename); exit(1); } // move to the right position if(length>0) { fprintf(stderr, " Not implemented\n"); exit(1); } long cline=1; // index creation could be done in parallel while(!feof(fd1)) { long start_pos=ftell(fd1); char *hdr=READ_LINE(fd1); if ( hdr==NULL) break; if ( hdr[0]!='@' ) { fprintf(stderr,"line %ul: error in header %s",cline,hdr); exit(1); } // discard @ (one less byte) hdr=&hdr[1]; int len=strlen(hdr); len--; hdr[len-1]='\0'; // // get seq //printf("cline=%ld\nLEN=%ld hdr=%s\n",cline,len,hdr); if ( new_indexentry(index,hdr,len,start_pos)==NULL) { fprintf(stderr,"line %ul: malloc failed?",cline); exit(1); } char *seq=READ_LINE(fd1); char *hdr2=READ_LINE(fd1); char *qual=READ_LINE(fd1); if (seq==NULL || hdr2==NULL || qual==NULL ) { fprintf(stderr,"line %ul: file truncated",cline); exit(1); } PRINT_READS_PROCESSED(cline/4); // cline+=4; } fclose(fd1); return; }
// check if the read name format was generated by casava 1.8 int is_casava_1_8(char *f) { regex_t regex; int reti; int is_casava_1_8=0; reti = regcomp(®ex,"[A-Z0-9:]* [12]:[YN]:[0-9]*:.*",0); if ( reti ) { fprintf(stderr, "Internal error: Could not compile regex\n"); exit(2); } FILE *fd1=open_fastq(f); char *hdr=READ_LINE(fd1); fclose(fd1); /* Execute regular expression */ //fprintf(stderr,"%s\n",hdr); reti = regexec(®ex, hdr, 0, NULL, 0); if ( !reti ) { // match is_casava_1_8=1; } /* else{ char msgbuf[100]; regerror(reti, ®ex, msgbuf, sizeof(msgbuf)); //fprintf(stderr, "Regex match failed: %s\n", msgbuf); } */ regfree(®ex); return is_casava_1_8; }
int main(int argc, char **argv ) { //printf("%d",sizeof(struct index_entry)); if (argc!=2) { fprintf(stderr,"Usage: fastq_filter_n fastq1\n"); exit(1); } FILE *fd1=open_fastq(argv[1]); // ************************************************************ unsigned long cline=1; unsigned long cur_read=0; //char tmp_buffer[MAX_READ_LENGTH]; // read the entry using another fd cline=1; while(!feof(fd1)) { char *hdr=READ_LINE(fd1,1); if ( hdr==NULL) break; if ( hdr[0]!='@' ) { fprintf(stderr,"line %lu: error in header %s",cline,hdr); return 1; } // char *seq=READ_LINE(fd1,2); READ_LINE(fd1,3); READ_LINE(fd1,4); short n_found=0; int k; for ( k=0;k<MAX_READ_LENGTH;k++) { if (seq[k]=='\n') break; if (seq[k]=='N' || seq[k]=='n' ) { n_found=1; break; } } if ( ! n_found ) WRITE_READ(stdout); cline+=4; cur_read++; } fclose(fd1); exit(0); }
int main(int argc, char **argv ) { long paired=0; //printf("%d",sizeof(struct index_entry)); if (argc!=6) { fprintf(stderr,"Usage: fastqinterleaved2pair.c fastq fastq1 fastq2\n"); //fprintf(stderr,"%d",argc); exit(1); } FILE *fd=open_fastq(argv[1]); // ************************************************************ off_t cur_offset=1; unsigned long cline=1; hashtable index=new_hashtable(HASHSIZE); index_mem+=sizeof(hashtable); index_file(argv[1],index,0,-1); printf("\n"); // print some info printf("Reads indexed: %ld\n",index->n_entries); printf("Memory used in indexing: %ld MB\n",index_mem/1024/1024); // char *p1=argv[3]; char *p2=argv[4]; char *p3=argv[5]; fd1=open_fastq(argv[1]); fd2=open_fastq(argv[2]); FILE *fdw1=fopen(p1,"w"); FILE *fdw2=fopen(p2,"w"); FILE *fdw3=fopen(p3,"w"); unsigned long up2=0; if ( fdw1==NULL || fdw2==NULL || fdw3==NULL ) { fprintf(stderr,"Unable to create output files\n"); exit(1); } // read the entry using another fd cline=1; while(!feof(fd2)) { long start_pos=ftell(fd2); char *hdr=READ_LINE(fd2); if ( hdr==NULL) break; if ( hdr[0]!='@' ) { fprintf(stderr,"line %ul: error in header %s",cline,hdr); return 1; } // discard @ (one less byte) hdr=&hdr[1]; int len=strlen(hdr); len--; hdr[len-1]='\0'; // // lookup hdr in index INDEX_ENTRY* e=lookup_header(index,hdr); if (e==NULL) { ++up2; copy_read(start_pos,fd2,fdw3); } else { long key=hashit(hdr); // pair found ++paired; copy_read(start_pos,fd2,fdw2); copy_read(e->entry_start,fd1,fdw1); // remove entry from index if (delete(index,key,e)!=e) { fprintf(stderr,"Unable to delete entry from index\n"); exit(1); } free_indexentry(e); } PRINT_READS_PROCESSED(cline/4); cline+=4; } printf("\n"); printf("Recording %ld unpaired reads from %s\n",index->n_entries,argv[1]);fflush(stdout); fclose(fd1); // record the unpaired from argv[1] fd1=open_fastq(argv[1]); #ifndef SEQDISKACCESS init_hash_traversal(index); INDEX_ENTRY* e; cline=1; while((e=(INDEX_ENTRY*)next_hash_object(index))!=NULL) { copy_read(e->entry_start,fd1,fdw3); PRINT_READS_PROCESSED(cline); ++cline; } // #else //sequential disk access // cline=1; unsigned long remaining=index->n_entries; while(!feof(fd1) && remaining ) { //long start_pos=ftell(fd2); char *hdr=READ_LINE(fd1); if ( hdr==NULL) break; if ( hdr[0]!='@' ) { fprintf(stderr,"line %ld %s: error in header %s",cline,argv[1],hdr); return 1; } // discard @ (one less byte) hdr=&hdr[1]; int len=strlen(hdr); len--; hdr[len-1]='\0'; // // lookup hdr in index INDEX_ENTRY* e=lookup_header(index,hdr); if (e!=NULL) { copy_read(e->entry_start,fd1,fdw3); remaining--; } else { READ_LINE(fd1);//seq READ_LINE(fd1);//qual READ_LINE(fd1);//qual } PRINT_READS_PROCESSED(cline/4); cline+=4; } fclose(fd1); #endif printf("\n"); printf("Unpaired from %s: %ld\n",argv[1],index->n_entries); printf("Unpaired from %s: %ld\n",argv[2],up2); printf("Paired: %ld\n",paired); /*fseek(fd2,start_pos,SEEK_SET); printf("%s",READ_LINE(fd2)); printf("%s",READ_LINE(fd2)); printf("%s",READ_LINE(fd2)); printf("%s",READ_LINE(fd2)); */ fclose(fdw1); fclose(fdw2); fclose(fdw3); if ( paired == 0 ) { fprintf(stderr,"!!!WARNING!!! 0 paired reads! are the headers ok?\n"); exit(1); } exit(0); }
hash_t *HASH_load(disk_t *file){ wchar_t *line = L""; while(STRING_starts_with(line, "#") || STRING_equals2(STRING_trim(line), L"")) line = READ_LINE(file); int version; if(STRING_equals(line,">> HASH MAP BEGIN")){ version = 1; } else if (STRING_equals(line,">> HASH MAP V2 BEGIN")){ version = 2; } else if (STRING_equals(line,">> HASH MAP V3 BEGIN")){ version = 3; } else if (STRING_starts_with(line, ">> HASH MAP V")){ version = 3; vector_t v = {0}; int try_anyway = VECTOR_push_back(&v, "Try anyway (program might crash and/or behave unstable)"); int ok = VECTOR_push_back(&v, "Ok"); int res = GFX_Message(&v, "Need a newer version of Radium to load this file"); if (res!=try_anyway) return NULL; (void)ok; } else { GFX_Message(NULL, "Trying to load something which is not a hash map. First line: \"%S\"", line); return NULL; } line = READ_LINE(file); int elements_size = STRING_get_int(line); hash_t *hash=HASH_create(elements_size); hash->version = version; line = READ_LINE(file); while(!STRING_equals(line,"<< HASH MAP END") && !STRING_equals(line,"<< HASH MAP V2 END") && !STRING_equals(line,"<< HASH MAP V3 END")){ const char *key = STRING_get_chars(line); int i = 0; if(version > 1){ line = READ_LINE(file); i = STRING_get_int(line); int new_size = i+1; if(new_size > hash->num_array_elements) hash->num_array_elements = new_size; } else if(!strncmp(key,"<int hash>",strlen("<int hash>"))) { sscanf(key, "<int hash> %d", &i); key = ""; hash->num_array_elements++; } bool success; dyn_t dyn = DYN_load(file, &success); if (!success) return NULL; put_dyn(hash, key, i, dyn); line = READ_LINE(file); } return hash; }
static gboolean gnm_glpk_read_solution_458 (GnmGlpk *lp, GsfInputTextline *tl, GnmSolverResult *result, GnmSolverSensitivity *sensitivity, gboolean has_integer) { GnmSubSolver *subsol = lp->parent; const char *line; unsigned cols, rows, c, r; gnm_float val; char pstat, dstat; READ_LINE (tl, line); if (has_integer) { if (sscanf (line, "s %*s %u %u %c %" GNM_SCANF_g, &rows, &cols, &pstat, &val) != 4) goto fail; } else { if (sscanf (line, "s %*s %u %u %c %c %" GNM_SCANF_g, &rows, &cols, &pstat, &dstat, &val) != 5) goto fail; } if (cols != g_hash_table_size (subsol->cell_from_name)) goto fail; result->value = val; switch (pstat) { case 'o': result->quality = GNM_SOLVER_RESULT_OPTIMAL; break; case 'f': result->quality = GNM_SOLVER_RESULT_FEASIBLE; break; case 'u': case 'i': case 'n': result->quality = GNM_SOLVER_RESULT_INFEASIBLE; break; default: goto fail; } for (r = 0; r < rows; r++) { gnm_float pval, dval; char rstat; unsigned r1, cidx = r; READ_LINE (tl, line); if ((has_integer ? sscanf (line, "i %d %" GNM_SCANF_g, &r1, &dval) != 2 : sscanf (line, "i %d %c %" GNM_SCANF_g " %" GNM_SCANF_g, &r1, &rstat, &pval, &dval) != 4) || r1 != cidx + 1) goto fail; // rstat? sensitivity->constraints[cidx].shadow_price = dval; } for (c = 0; c < cols; c++) { gnm_float pval, dval; char cstat; unsigned c1, cidx = c; READ_LINE (tl, line); if ((has_integer ? sscanf (line, "j %d %" GNM_SCANF_g, &c1, &pval) != 2 : sscanf (line, "j %d %c %" GNM_SCANF_g " %" GNM_SCANF_g, &c1, &cstat, &pval, &dval) != 4) || c1 != cidx + 1) goto fail; // cstat? result->solution[cidx] = pval; } // Success return FALSE; fail: return TRUE; }