static int isHomologous (Array kgTreeFams, char *transcript1, char *transcript2) { Texta tokens; int i,j; char *treeFamId; static Texta treeFamIdsTranscript1 = NULL; static Texta treeFamIdsTranscript2 = NULL; textCreateClear (treeFamIdsTranscript1,100); textCreateClear (treeFamIdsTranscript2,100); tokens = textFieldtokP (transcript1,"|"); for (i = 0; i < arrayMax (tokens); i++) { if (treeFamId = lookUpTreeFam (kgTreeFams,textItem (tokens,i))) { textAdd (treeFamIdsTranscript1,treeFamId); } } textDestroy (tokens); tokens = textFieldtokP (transcript2,"|"); for (i = 0; i < arrayMax (tokens); i++) { if (treeFamId = lookUpTreeFam (kgTreeFams,textItem (tokens,i))) { textAdd (treeFamIdsTranscript2,treeFamId); } } textDestroy (tokens); for (i = 0; i < arrayMax (treeFamIdsTranscript1); i++) { for (j = 0; j < arrayMax (treeFamIdsTranscript2); j++) { if (strEqual (textItem (treeFamIdsTranscript1,i),textItem (treeFamIdsTranscript2,j))) { return 1; } } } return 0; }
int checkOriginal ( BlatQuery* blQ, PslEntry* entry ) { int res = 0; Texta t = textFieldtokP( blQ->qName, "|"); if( ( strEqual ( textItem( t, 1 ), entry->tName ) ) && ( atoi( textItem( t, 3 ) ) == entry->tStart ) && ( atoi( textItem( t, 4 ) ) == entry->tEnd ) && entry->misMatches==0 && (entry->matches+entry->nCount) == entry->qSize) res = 1; else res = 0; textDestroy( t ); return res; }
static void printMrfAlignBlocks (SamEntry *e, int _strand) { char strand = '.'; int len, intronic; int q, pos; int i; Texta tokens; tokens = textFieldtokP (e->cigar, "MN"); if (_strand == R_FIRST) { if (e->flags & S_QUERY_STRAND) strand = '-'; else strand = '+'; } else { if (e->flags & S_MATE_STRAND) strand = '-'; else strand = '+'; } // Process first item in cigar len = atoi (textItem (tokens, 0)); pos = e->pos; q = 1; printf ("%s:%c:%d:%d:%d:%d", e->rname, strand, e->pos, pos + len - 1, 1, len); pos += len - 1; q += len; // Process rest of cigar if (arrayMax (tokens) > 2) { for (i = 2; i < arrayMax (tokens) - 1; i += 2) { len = atoi (textItem (tokens, i)); intronic = atoi (textItem (tokens, i - 1)); pos += intronic + 1; printf(",%s:%c:%d:%d:%d:%d", e->rname, strand, pos, pos + len - 1, q, q + len - 1); pos += len - 1; q += len; } } textDestroy (tokens); }
SEXP c_read_biokit_exprs (SEXP filename) { LineStream ls; char* line; const int MAND_NCOL=7; // the first column is the row name, and column 2-7 are mandatory int add_ncol=0; Texta it; Texta rnames=textCreate(128); Array mrpkms=arrayCreate(128, double); Array mreads=arrayCreate(128, int); Array srpkms=arrayCreate(128, double); Array sreads=arrayCreate(128, int); Array mprop=arrayCreate(128, double); Array allmap = arrayCreate(128, int); Array annos=arrayCreate(128, Texta); Texta anno=NULL; // must have a NULL assigned; otherwise textCreateClear leads to memory error Stringa str=stringCreate(8); SEXP R_rnames, R_mrpkms, R_mreads, R_srpkms, R_sreads, R_mprop, R_allmap, R_res; SEXP R_colnames, R_class; int nprot=0; int i=0; int j=0; int nrow=0; const char* fn=CHAR(STRING_ELT(filename, 0)); ls = ls_createFromFile(strdup(fn)); ls_nextLine(ls); // skip the first header line while(line = ls_nextLine(ls)) { it = textFieldtokP(line, "\t"); if(arrayMax(it)<MAND_NCOL) error("Input file must contain no less than %d columns", MAND_NCOL); textAdd(rnames, textItem(it, 0)); array(mrpkms, arrayMax(mrpkms), double)=atof(textItem(it, 1)); array(mreads, arrayMax(mreads), int)=atoi(textItem(it, 2)); array(srpkms, arrayMax(srpkms), double)=atof(textItem(it, 3)); array(sreads, arrayMax(sreads), int)=atoi(textItem(it, 4)); array(mprop, arrayMax(mprop), double)=atof(textItem(it, 5)); array(allmap, arrayMax(allmap), int)=atoi(textItem(it, 6)); add_ncol = max(arrayMax(it)-MAND_NCOL, add_ncol); textCreateClear(anno, arrayMax(it)-MAND_NCOL); for(i=MAND_NCOL; i<arrayMax(it); ++i) { textAdd(anno, textItem(it, i)); } array(annos, arrayMax(annos), Texta)=textClone(anno); nrow++; } R_rnames=PROTECT(allocVector(STRSXP, nrow)); nprot++; R_mrpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_mreads=PROTECT(allocVector(INTSXP, nrow)); nprot++; R_srpkms=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_sreads=PROTECT(allocVector(INTSXP, nrow)); nprot++; R_mprop=PROTECT(allocVector(REALSXP, nrow)); nprot++; R_allmap=PROTECT(allocVector(INTSXP, nrow)); nprot++; for(i=0; i<nrow; ++i) { SET_STRING_ELT(R_rnames, i, mkChar(textItem(rnames, i))); REAL(R_mrpkms)[i]=arru(mrpkms, i, double); INTEGER(R_mreads)[i]=arru(mreads, i, int); REAL(R_srpkms)[i]=arru(srpkms, i, double); INTEGER(R_sreads)[i]=arru(sreads, i, int); REAL(R_mprop)[i]=arru(mprop, i, double); INTEGER(R_allmap)[i]=arru(allmap, i, int); } R_res=PROTECT(allocVector(VECSXP, MAND_NCOL+add_ncol-1)); nprot++; SET_VECTOR_ELT(R_res, 0, R_mrpkms); SET_VECTOR_ELT(R_res, 1, R_mreads); SET_VECTOR_ELT(R_res, 2, R_srpkms); SET_VECTOR_ELT(R_res, 3, R_sreads); SET_VECTOR_ELT(R_res, 4, R_mprop); SET_VECTOR_ELT(R_res, 5, R_allmap); for(i=0; i<add_ncol; ++i) { SEXP R_anno=NULL; R_anno=PROTECT(allocVector(STRSXP, nrow)); for(j=0; j<nrow; ++j) { anno=array(annos, j, Texta); if(arrayMax(anno)>i) { SET_STRING_ELT(R_anno, j, mkChar(textItem(anno, i))); } else { SET_STRING_ELT(R_anno, j, R_NaString); } } SET_VECTOR_ELT(R_res, i+MAND_NCOL-1, R_anno); // -1 because the first column is row name UNPROTECT(1); } PROTECT(R_colnames=allocVector(STRSXP, MAND_NCOL+add_ncol-1)); nprot++; PROTECT(R_class=allocVector(STRSXP, 1)); nprot++; SET_STRING_ELT(R_colnames, 0, mkChar("RPKM_MultiMap")); SET_STRING_ELT(R_colnames, 1, mkChar("ReadCount_MultiMap")); SET_STRING_ELT(R_colnames, 2, mkChar("RPKM_UniqMap")); SET_STRING_ELT(R_colnames, 3, mkChar("ReadCount_UniqMap")); SET_STRING_ELT(R_colnames, 4, mkChar("MultiProp")); SET_STRING_ELT(R_colnames, 5, mkChar("AllMappingReads")); for(i=0; i<add_ncol; ++i) { stringPrintf(str, "Annotation%d", i+1); SET_STRING_ELT(R_colnames, i+MAND_NCOL-1, mkChar(string(str))); } SET_STRING_ELT(R_class, 0, mkChar("data.frame")); setAttrib(R_res, install("names"), R_colnames); setAttrib(R_res, install("row.names"), R_rnames); setAttrib(R_res, install("class"), R_class); for(i=0; i<nrow; ++i) { textDestroy(array(annos, i, Texta)); } arrayDestroy(annos); arrayDestroy(rnames); arrayDestroy(mrpkms); arrayDestroy(mreads); arrayDestroy(srpkms); arrayDestroy(sreads); arrayDestroy(mprop); arrayDestroy(allmap); stringDestroy(str); ls_destroy(ls); UNPROTECT(nprot); return(R_res); }
int main (int argc, char **argv) { LineStream ls; Texta tokens = NULL; char *line; int hasQual = 0; int hasSeqs = 0; int start=1; ls = ls_createFromFile ("-"); while (line = ls_nextLine (ls)) { // Put all the lines of the SAM header in comments if (line[0] == '@') { printf ("# %s\n", line); continue; } // Parse each SAM entry and store into array tokens = textFieldtokP (line, "\t"); if (arrayMax (tokens) < 11) { textDestroy( tokens ); ls_destroy (ls); die ("Invalid SAM entry: %s", line); } SamEntry *currSamE = NULL; SamEntry *mateSamE = NULL; AllocVar(currSamE ); int ret = generateSamEntry( tokens, currSamE, &hasSeqs, &hasQual ); textDestroy( tokens ); if ( ret==0 ) { if ( isPaired ( currSamE ) ) ls_nextLine( ls ); // discarding next entry too (the mate) destroySamEntry( currSamE ); freeMem( currSamE ); continue; } if ( isPaired( currSamE ) ) { int hasQual2, hasSeq2; AllocVar( mateSamE ); Texta secondEnd = NULL; secondEnd = textFieldtok (ls_nextLine( ls ) , "\t"); ret = generateSamEntry( secondEnd, mateSamE, &hasSeq2, &hasQual2 ); textDestroy( secondEnd ); if( ret == 0 ) { destroySamEntry( currSamE ); destroySamEntry( mateSamE ); freeMem( currSamE ); freeMem( mateSamE ); continue; } if (strcmp (currSamE->qname, mateSamE->qname) != 0) { die ("Please note that for paired-end data, sam2mrf requires the mate pairs to be on subsequent lines. You may want to sort the SAM file first.\nEx: sort -r file.sam | sam2mrf > file.mrf\n"); } } // Print MRF headers if( start ) { printf ("%s", MRF_COLUMN_NAME_BLOCKS); if (hasSeqs) printf("\t%s", MRF_COLUMN_NAME_SEQUENCE); if (hasQual) printf("\t%s", MRF_COLUMN_NAME_QUALITY_SCORES); printf ("\t%s\n", MRF_COLUMN_NAME_QUERY_ID); start=0; } // Print AlignmentBlocks printMrfAlignBlocks (currSamE, R_FIRST); if( isPaired ( currSamE ) ) { printf ("|"); printMrfAlignBlocks (mateSamE, R_SECOND); } seq_init(); // Print Sequence if (hasSeqs) { if (!currSamE->seq) die ("Entry missing sequence column\n"); if( currSamE->flags & S_QUERY_STRAND ) seq_reverseComplement( currSamE->seq, strlen(currSamE->seq)); printf ("\t%s", currSamE->seq); if (mateSamE) { if (!mateSamE->seq) die ("Entry missing sequence column\n"); if( mateSamE->flags & S_MATE_STRAND ) seq_reverseComplement( mateSamE->seq, strlen(mateSamE->seq)); printf ("|%s", mateSamE->seq); } } // Print quality scores if (hasQual) { if (!currSamE->qual) die ("Entry missing quality scores column\n"); printf ("\t%s", currSamE->qual); if (mateSamE) { if (!mateSamE->qual) die ("Entry missing quality scores column\n"); printf ("|%s", mateSamE->qual); } } // Print queryID if (mateSamE) { printf ("\t%s|%s", currSamE->qname,"2"); // No need to print out both IDs, but need the pipe symbol for consistency } else { printf ("\t%s", currSamE->qname); } printf("\n"); destroySamEntry( currSamE ); freeMem( currSamE ); if( isPaired( currSamE ) ) { destroySamEntry ( mateSamE ); freeMem( mateSamE ); } } // clean up ls_destroy (ls); return EXIT_SUCCESS; }
void incl_getExonHlightFile (FILE *fp, Array regions, char *sdata_dir) { LineStream src; FILE *out; char *line; Texta entry; int i, astart, aend; Stringa buffer = stringCreate (50); stringPrintf (buffer, "%s/tmp/exons.hlight_s.txt", sdata_dir); if (!(out = fopen (string (buffer), "w"))) { fprintf (stderr, "Cannot open exons.hlight_s.txt\n"); return; } SRegion_t *tmp; tmp = arrayp (regions, 0, SRegion_t); if (tmp->chromosome == 0) { fprintf (fp, "file = %s/exons.hlight.txt\n", sdata_dir); } else { for (i = 0; i < arrayMax (regions); i++) { tmp = arrayp (regions, i, SRegion_t); if (tmp->chromosome == 23) { stringPrintf (buffer, "%s/X/exons.hlight.txt", sdata_dir); } else if (tmp->chromosome == 24) { stringPrintf (buffer, "%s/Y/exons.hlight.txt", sdata_dir); } else { stringPrintf (buffer, "%s/%i/exons.hlight.txt", sdata_dir, tmp->chromosome); } if ((src = ls_createFromFile (string (buffer))) == NULL) { fprintf (stderr, "Cannot open exons.hlight.txt\n"); return; } while ((line = ls_nextLine (src)) != NULL) { entry = textFieldtokP (line, " "); astart = atoi (textItem (entry, 1)); aend = atoi (textItem (entry, 2)); if ((astart >= tmp->start && astart <= tmp->end) || (aend >= tmp->start && aend <= tmp->end)) { fprintf (out, "%s\n", line); } textDestroy (entry); } } fprintf (fp, "file = %s/tmp/exons.hlight_s.txt\n", sdata_dir); } stringDestroy (buffer); ls_destroy (src); fclose (out); }