int readOEAs(int countOEA, char *fileName_OEA) { FILE *fp; int OEA_id; char *strOEA_F, *strOEA_R; strOEA_F=(char *) malloc(max_OEA_Length * sizeof(char)); strOEA_R=(char *) malloc(max_OEA_Length * sizeof(char)); fp=fopen(fileName_OEA, "r"); countOEA_Added=0; OEA_id=0; // fprintf(stdout, "I AM HERE 3"); while (countOEA>0) { fscanf(fp,">%i R\n", &OEA_id); fscanf(fp,"%s\n", strOEA_R);// The sequence itself is from forward strand fscanf(fp,">%i F\n", &OEA_id); fscanf(fp,"%s\n", strOEA_F);//The sequence it self is from reverse trand if (strlen(strOEA_F) <= maxLen_Btw_Pairs && strlen(strOEA_R) <= maxLen_Btw_Pairs) { OEAArray[countOEA_Added].OEA_F=(char *) malloc((strlen(strOEA_F)+1)*sizeof(char)); OEAArray[countOEA_Added].OEA_R=(char *) malloc((strlen(strOEA_R)+1)*sizeof(char)); OEAArray[countOEA_Added].OEA_F_len = strlen(strOEA_F); OEAArray[countOEA_Added].OEA_R_len = strlen(strOEA_R); strcpy(OEAArray[countOEA_Added].OEA_R, reverseComp(strOEA_R)); strcpy(OEAArray[countOEA_Added].OEA_F, strOEA_F); OEAArray[countOEA_Added].id=OEA_id; countOEA_Added++; } countOEA--; } //printf("%s %s\n", OEAArray[181].OEA_F, OEAArray[181].OEA_R); }
int clean(const char *devFile, const char *logFile, const char *strR1, const char *strR2, const char *strSE, int tmpforcePairs, int tmpPolyATTrim) { int forcePairs = tmpforcePairs; FILE *f = fopen(devFile, "r"); //FILE *f = stderr; FILE *R1 = NULL; FILE *R2 = NULL; FILE *SE = NULL; FILE *log = NULL; struct reads r; struct stats s; int sum = 0, R1_len = 0, R2_len = 0, SE_len = 0; statsConstruct(&s); PolyATTrim = tmpPolyATTrim; while (grabTab(f, &r, &s)) { if ((r.r2).r_header != NULL && (r.r1).r_header != NULL) { s.pe_kept++; if (R1 == NULL) { R1 = fopen(strR1, "a"); } if (R2 == NULL) { R2 = fopen(strR2, "a"); } if ((r.r1).r_header[0] == '@') { if ((r.r1).r_header[strlen((r.r1).r_header)-1] == '1') { fprintf(R1, "%s\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); (r.r2).r_header[strlen((r.r2).r_header)-1] = '2'; fprintf(R2, "%s\n%s\n+\n%s", (r.r2).r_header, (r.r2).r_seq, (r.r2).r_qual); } else { fprintf(R1, "%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); fprintf(R2, "%s/2\n%s\n+\n%s", (r.r2).r_header, (r.r2).r_seq, (r.r2).r_qual); } } else { fprintf(R1, "@%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); fprintf(R2, "@%s/2\n%s\n+\n%s", (r.r2).r_header, (r.r2).r_seq, (r.r2).r_qual); } } else if (forcePairs && (r.r1).r_header != NULL) { s.numForcedPairs++; int loc = (strlen((r.r1).r_seq))/2; char cSeq = (r.r1).r_seq[loc]; char cQual = (r.r1).r_qual[loc]; if (R1 == NULL) { R1 = fopen(strR1, "a"); } if (R2 == NULL) { R2 = fopen(strR2, "a"); } if ((r.r1).r_header[0] == '@') { (r.r1).r_seq[loc] = '\0'; (r.r1).r_qual[loc] = '\0'; fprintf(R1, "%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); (r.r1).r_seq[loc] = cSeq; (r.r1).r_qual[loc] = cQual; fprintf(R2, "%s/2\n%s\n+\n%s", (r.r1).r_header, reverseComp(&((r.r1).r_seq)[loc]), reverse(&((r.r1).r_qual)[loc])); } else { (r.r1).r_seq[loc] = '\0'; (r.r1).r_qual[loc] = '\0'; fprintf(R1, "@%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); (r.r1).r_seq[loc] = cSeq; (r.r1).r_qual[loc] = cQual; fprintf(R2, "@%s/2\n%s\n+\n%s", (r.r1).r_header, reverseComp(&((r.r1).r_seq)[loc]), reverse(&((r.r1).r_qual)[loc])); } } else if ((r.r1).r_header != NULL) { if (SE == NULL) { SE = fopen(strSE, "a"); } s.se_kept++; if ((r.r1).r_header[0] == '@') { fprintf(SE, "%s\n%s\n+\n%s", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); } else { fprintf(SE, "@%s\n%s\n+\n%s", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual); } if ((r.r1).r_qual[strlen((r.r1).r_qual)-1] != '\n') { fprintf(SE, "\n"); } } } if (log == NULL) { log = fopen(logFile, "a"); } for (sum = 0; sum < 700; sum++) { R1_len += (s.R1_length[sum] * sum); R2_len += (s.R2_length[sum] * sum); SE_len += (s.SE_length[sum] * sum); } fprintf(log, "A\tT\tG\tC\tN\tPolyA_Removed_Reads\tPolyT_Removed_Reads\tShort_discarded\tPE_Kept\tSE_Kept\tForced_Pairs\tR1_Ave_Len\tR2_Ave_Len\tSE_Ave_Len\tAverageQual\n"); fprintf(log, "%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t\ %.2f\t%.2f\t%.2f\t%.2f\n", s.A, s.T, s.G, s.C, s.N, s.polyATrimmed, s.polyTTrimmed, s.r1_discarded + s.r2_discarded + s.se_discarded, s.pe_kept, s.se_kept, s.numForcedPairs, (float)R1_len/(float)(s.pe_kept), (float)R2_len/(float)(s.pe_kept), (float)SE_len/(float)(s.se_kept), (float)((float)(s.qualTotal)/(float)(s.A + s.T + s.C + s.G + s.N))); if (f != NULL) { fclose(f); } if (R1 != NULL) { fclose(R1); fclose(R2); } if (SE != NULL) { fclose(SE); } if (log != NULL) { fclose(log); } return 1; }