Example #1
0
int readOEAs(int countOEA, char *fileName_OEA)
{

	FILE *fp;
  int OEA_id;
  char *strOEA_F, *strOEA_R;
  strOEA_F=(char *) malloc(max_OEA_Length * sizeof(char));
  strOEA_R=(char *) malloc(max_OEA_Length * sizeof(char));
  fp=fopen(fileName_OEA, "r");
  countOEA_Added=0;
	OEA_id=0;

//	 fprintf(stdout, "I AM HERE 3");
  
	while (countOEA>0)
    {
  
      fscanf(fp,">%i R\n", &OEA_id);
      fscanf(fp,"%s\n", strOEA_R);// The sequence itself is from forward strand
	    fscanf(fp,">%i F\n", &OEA_id);
      fscanf(fp,"%s\n", strOEA_F);//The sequence it self is from reverse trand
    	 
		 if (strlen(strOEA_F) <= maxLen_Btw_Pairs && strlen(strOEA_R) <= maxLen_Btw_Pairs)
	{
	
		OEAArray[countOEA_Added].OEA_F=(char *) malloc((strlen(strOEA_F)+1)*sizeof(char));
	  OEAArray[countOEA_Added].OEA_R=(char *) malloc((strlen(strOEA_R)+1)*sizeof(char));
	  OEAArray[countOEA_Added].OEA_F_len = strlen(strOEA_F);
	  OEAArray[countOEA_Added].OEA_R_len = strlen(strOEA_R);
	  strcpy(OEAArray[countOEA_Added].OEA_R, reverseComp(strOEA_R));
	  strcpy(OEAArray[countOEA_Added].OEA_F, strOEA_F);
	  OEAArray[countOEA_Added].id=OEA_id;
	  countOEA_Added++;
	} 
      countOEA--;
    }


	//printf("%s %s\n", OEAArray[181].OEA_F, OEAArray[181].OEA_R);

}
Example #2
0
int clean(const char *devFile, const char *logFile, const char *strR1, const char *strR2, const char *strSE, int tmpforcePairs, int tmpPolyATTrim) {
        int forcePairs = tmpforcePairs;

        FILE *f = fopen(devFile, "r");
        //FILE *f = stderr;
        FILE *R1 = NULL;
        FILE *R2 = NULL;
        FILE *SE = NULL;
        FILE *log = NULL;

        struct reads r;
        struct stats s;
        int sum = 0, R1_len = 0, R2_len = 0, SE_len = 0;

        statsConstruct(&s);
        PolyATTrim = tmpPolyATTrim;

        while (grabTab(f, &r, &s)) {

            if ((r.r2).r_header != NULL && (r.r1).r_header != NULL) {
                s.pe_kept++;
            
                if (R1 == NULL) {
                    R1 = fopen(strR1, "a");
                }        
                if (R2 == NULL) {
                    R2 = fopen(strR2, "a");
                }

                if ((r.r1).r_header[0] == '@') {

                    if ((r.r1).r_header[strlen((r.r1).r_header)-1] == '1') {
                        fprintf(R1, "%s\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                        (r.r2).r_header[strlen((r.r2).r_header)-1] = '2';
                        fprintf(R2, "%s\n%s\n+\n%s", (r.r2).r_header, (r.r2).r_seq, (r.r2).r_qual);
                    } else {
                        fprintf(R1, "%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                        fprintf(R2, "%s/2\n%s\n+\n%s", (r.r2).r_header, (r.r2).r_seq, (r.r2).r_qual);
                    }

                    } else {
                        fprintf(R1, "@%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                        fprintf(R2, "@%s/2\n%s\n+\n%s", (r.r2).r_header, (r.r2).r_seq, (r.r2).r_qual);
                    }


            } else if (forcePairs && (r.r1).r_header != NULL) {
                s.numForcedPairs++;
                int loc = (strlen((r.r1).r_seq))/2;
                char cSeq = (r.r1).r_seq[loc];
                char cQual = (r.r1).r_qual[loc];

                if (R1 == NULL) {
                    R1 = fopen(strR1, "a");
                }        
                if (R2 == NULL) {
                    R2 = fopen(strR2, "a");
                }

                if ((r.r1).r_header[0] == '@') {
                    (r.r1).r_seq[loc] = '\0';
                    (r.r1).r_qual[loc] = '\0';
                    fprintf(R1, "%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                    (r.r1).r_seq[loc] = cSeq;
                    (r.r1).r_qual[loc] = cQual;
                    fprintf(R2, "%s/2\n%s\n+\n%s", (r.r1).r_header, reverseComp(&((r.r1).r_seq)[loc]), reverse(&((r.r1).r_qual)[loc]));
                } else {
                    (r.r1).r_seq[loc] = '\0';
                    (r.r1).r_qual[loc] = '\0';
                    fprintf(R1, "@%s/1\n%s\n+\n%s\n", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                    (r.r1).r_seq[loc] = cSeq;
                    (r.r1).r_qual[loc] = cQual;
                    fprintf(R2, "@%s/2\n%s\n+\n%s", (r.r1).r_header, reverseComp(&((r.r1).r_seq)[loc]), reverse(&((r.r1).r_qual)[loc]));
                }            
            } else if ((r.r1).r_header != NULL) {
                if (SE == NULL) {
                    SE = fopen(strSE, "a");
                }
                s.se_kept++;
                if ((r.r1).r_header[0] == '@') {
                    fprintf(SE, "%s\n%s\n+\n%s", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                } else {
                     fprintf(SE, "@%s\n%s\n+\n%s", (r.r1).r_header, (r.r1).r_seq, (r.r1).r_qual);
                }

                if ((r.r1).r_qual[strlen((r.r1).r_qual)-1] != '\n') {
                    fprintf(SE, "\n");
                }

            }

        }

        if (log == NULL) {
            log = fopen(logFile, "a");
        }

        for (sum = 0; sum < 700; sum++) {
            R1_len += (s.R1_length[sum] * sum);
            R2_len += (s.R2_length[sum] * sum);
            SE_len += (s.SE_length[sum] * sum);
        }

        fprintf(log, "A\tT\tG\tC\tN\tPolyA_Removed_Reads\tPolyT_Removed_Reads\tShort_discarded\tPE_Kept\tSE_Kept\tForced_Pairs\tR1_Ave_Len\tR2_Ave_Len\tSE_Ave_Len\tAverageQual\n");
        fprintf(log, "%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t\
                %.2f\t%.2f\t%.2f\t%.2f\n", 
                s.A, s.T, s.G, s.C, s.N, s.polyATrimmed, s.polyTTrimmed, s.r1_discarded + s.r2_discarded + s.se_discarded, s.pe_kept, s.se_kept, s.numForcedPairs,
        (float)R1_len/(float)(s.pe_kept), (float)R2_len/(float)(s.pe_kept), (float)SE_len/(float)(s.se_kept), (float)((float)(s.qualTotal)/(float)(s.A + s.T + s.C + s.G + s.N)));

          
    if (f != NULL) {
            fclose(f);
        }

    if (R1 != NULL) {
        fclose(R1);
            fclose(R2);
    }


        if (SE != NULL) {
                fclose(SE);
        }

        if (log != NULL) {
            fclose(log);
        }
        return 1;

}