int main(int argc, char *argv[]) { // set some defaults char *expDir = "."; bool allreads = false; bool unmappedReads = false; bool lookup = false; // process cmd line args int argcc = 1; while (argcc < argc) { switch (argv[argcc][1]) { case 'e': // set exp dir argcc++; expDir = argv[argcc]; break; case 'a': allreads = true; break; case 'u': unmappedReads = true; break; case 'l': argcc++; lookup = true; LookupInit(argv[argcc]); break; case 'L': argcc++; sscanf(argv[argcc], "%d", &lookupLimit); break; } argcc++; } // crazy, but only way to get rows/cols right now is from mask. Mask mask(1,1); char maskPath[MAX_PATH_LENGTH]; sprintf(maskPath, "%s/bfmask.bin", expDir); mask.SetMask(maskPath); Histogram meanHist(1001, -1.0, 3.0); Histogram stdevZeromerHist(1001, 0.0, 3.0); Histogram stdevOnemerHist(1001, 0.0, 3.0); Histogram avgZeromer(1001, -2.0, 1.0); int w = mask.W(); int h = mask.H(); int validReads[w][h]; memset(validReads, 0, sizeof(validReads)); char blastFile[MAX_PATH_LENGTH]; sprintf(blastFile, "%s/keypass.rpt", expDir); FILE *fp = fopen(blastFile, "r"); if (fp) { char line[256]; while (fgets(line, sizeof(line), fp)) { int row, col; sscanf(line, "r%d|c%d", &row, &col); char *ptr = strrchr(line, '|'); ptr++; double qual; int len; sscanf(ptr, "%lf %d", &qual, &len); validReads[col][row] = 0; if (len > 50 && qual > 0.9) { // look at high quality reads validReads[col][row] |= 1; } if (len > 30 && qual > 0.8) { // look at medium quality reads validReads[col][row] |= 2; } if (len > 20 && !(validReads[col][row] & 4)) validReads[col][row] |= 4; } fclose(fp); } RawWells wells(expDir, "1.wells", mask.H(), mask.W()); wells.OpenForRead(); const WellData *data = NULL; int numFlows = wells.NumFlows(); double measured[numFlows]; int keypassCount = 0; int keypassFailCount = 0; const int numFlowsInKey = 7; int keypassLib[numFlowsInKey] = {1, 0, 1, 0, 0, 1, 0}; // int keypassLib[numFlowsInKey] = {0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}; // const int numFlowsInKey = 11; int i; int zeromerCount = 0; int onemerCount = 0; double runningAvg0mer = 0.0; double runningAvg1mer = 0.0; double runningAvgN0mer = 0.0; double runningAvgN1mer = 0.0; int runningAvgCount = 0; for(i=0;i<numFlowsInKey;i++) { if (keypassLib[i] == 0) zeromerCount++; if (keypassLib[i] == 1) onemerCount++; } while ((data = wells.ReadNextRegionData()) != NULL) { // look for all live library reads // 1 0 1 0 0 1 0 N - library TCAG key with TACG flow order if (mask.Match(data->x, data->y, MaskLib)) { bool useit = false; if (unmappedReads && validReads[data->x][data->y] == 0) useit = true; else if (!unmappedReads && (allreads || (validReads[data->x][data->y] > 0))) // look at any (4), medium (2), or high(1) quality reads useit = true; if (lookup) { useit = false; if (LookupFind(data->y, data->x)) useit = true; } if (useit) { // do simple keypass on raw well data: // 1 - subtract avg 0-mer // 2 - normalize to avg 1-mer // 3 - threshold to generate vector, compare to 1st 7 flows of known double avg0mer = 0.0; for(i=0;i<numFlowsInKey;i++) { if (keypassLib[i] == 0) avg0mer += data->flowValues[i]; } avg0mer /= zeromerCount; double avg1mer = 0.0; for(i=0;i<numFlowsInKey;i++) { if (keypassLib[i] == 1) avg1mer += data->flowValues[i]; } avg1mer /= onemerCount; // keep a running avg on 0-mer & 1-mer raw key signal runningAvg0mer += avg0mer; runningAvg1mer += avg1mer; runningAvgCount++; // key normalization... avg0mer = 0.0; // force our algorithm to assume weka was right, and 0-mer on avg is already 0 !!! (need to think on this) int flow; for(flow=0;flow<numFlows;flow++) { measured[flow] = data->flowValues[flow] - avg0mer; } double mult = 1.0/avg1mer; for(flow=0;flow<numFlows;flow++) { measured[flow] *= mult; } // calc avg normalized 0-mers & 1-mers double avgN0mer = 0.0; double avgN1mer = 0.0; for(i=0;i<numFlowsInKey;i++) { if (keypassLib[i] == 0) avgN0mer += measured[i]; if (keypassLib[i] == 1) avgN1mer += measured[i]; } avgN0mer /= zeromerCount; avgN1mer /= onemerCount; runningAvgN0mer += avgN0mer; runningAvgN1mer += avgN1mer; // keypass... int keypassVec[numFlowsInKey]; bool keypass = true; for(flow=0;flow<numFlowsInKey;flow++) { keypassVec[flow] = (int)(measured[flow]+0.5); if (keypassVec[flow] != keypassLib[flow]) keypass = false; } if (keypass) { keypassCount++; } else { keypassFailCount++; } // now, lets generate a few metrics and see how they correlate to mapped reads, interest is in mixed fragments // metric1 - the dist between the avg 0-mer and the avg 1-mer - for this, we can usually call the read without cafie corrections for around 40 flows, so we go ahead and do that, then avg the 0-mer and 1-mer signals, then report the mean dist, and the stdev on the 0-mers and 1-mers int numTestFlows = 12; double onemerSig[40]; double zeromerSig[40]; int onemerCount = 0; int zeromerCount = 0; int base; for(flow=numFlowsInKey+1;flow<numTestFlows+numFlowsInKey+1;flow++) { // note we ignore the key base = (int)(measured[flow]+0.5); if (base == 0) { zeromerSig[zeromerCount] = measured[flow]; zeromerCount++; avgZeromer.Add(measured[flow]); } if (base == 1) { onemerSig[onemerCount] = measured[flow]; onemerCount++; } } // if we have sane counts, calc metrics for this read double avgZeroMer = 0.0; double avgOneMer = 0.0; double onemerStdev = 0.0; double zeromerStdev = 0.0; if (zeromerCount > 2 && onemerCount > 2) { int k; for(k=0;k<zeromerCount;k++) { avgZeroMer += zeromerSig[k]; } avgZeroMer /= (double)zeromerCount; for(k=0;k<onemerCount;k++) { avgOneMer += onemerSig[k]; } avgOneMer /= (double)onemerCount; double delta = 0.0; for(k=0;k<zeromerCount;k++) { delta = avgZeroMer - zeromerSig[k]; zeromerStdev += delta*delta; } zeromerStdev = sqrt(zeromerStdev); for(k=0;k<onemerCount;k++) { delta = avgOneMer - onemerSig[k]; onemerStdev += delta*delta; } onemerStdev = sqrt(onemerStdev); meanHist.Add(avgOneMer - avgZeroMer); stdevZeromerHist.Add(zeromerStdev); stdevOnemerHist.Add(onemerStdev); } } } } wells.Close(); // dump some stats printf("Reads: pass/fail/all %d/%d/%d\n", keypassCount, keypassFailCount, keypassCount + keypassFailCount); printf("Avg signals in key: Raw: 0-mer: %.4lf 1-mer: %.4lf Norm: 0-mer: %.4lf 1-mer: %.4lf\n", runningAvg0mer/runningAvgCount, runningAvg1mer/runningAvgCount, runningAvgN0mer/runningAvgCount, runningAvgN1mer/runningAvgCount); meanHist.Dump("AvgDist.txt", 1); stdevZeromerHist.Dump("ZeromerStdev.txt", 1); stdevOnemerHist.Dump("OnemerStdev.txt", 1); avgZeromer.Dump("Avg0mer.txt", 1); }
void RTFInit () { int i; RTFColor *cp; RTFFont *fp; RTFStyle *sp; RTFStyleElt *eltList, *ep; rtfClass = -1; pushedClass = -1; pushedChar = EOF; rtfLineNum = 0; rtfLinePos = 0; prevChar = EOF; bumpLine = 0; if (rtfTextBuf == (char *) NULL) /* initialize text buffers */ { rtfTextBuf = RTFAlloc (rtfBufSiz); pushedTextBuf = RTFAlloc (rtfBufSiz); if (rtfTextBuf == (char *) NULL || pushedTextBuf == (char *) NULL) RTFPanic ("Cannot allocate text buffers."); rtfTextBuf[0] = pushedTextBuf[0] = '\0'; } RTFFree (inputName); RTFFree (outputName); inputName = outputName = (char *) NULL; /* initialize control symbol lookup table */ LookupInit (); for (i = 0; i < rtfMaxClass; i++) RTFSetClassCallback (i, (RTFFuncPtr) NULL); for (i = 0; i < rtfMaxDestination; i++) RTFSetDestinationCallback (i, (RTFFuncPtr) NULL); /* install built-in destination readers */ RTFSetDestinationCallback (rtfFontTbl, ReadFontTbl); RTFSetDestinationCallback (rtfColorTbl, ReadColorTbl); RTFSetDestinationCallback (rtfStyleSheet, ReadStyleSheet); RTFSetDestinationCallback (rtfInfo, ReadInfoGroup); RTFSetDestinationCallback (rtfPict, ReadPictGroup); RTFSetDestinationCallback (rtfObject, ReadObjGroup); RTFSetReadHook ((RTFFuncPtr) NULL); /* dump old lists if necessary */ while (fontList != (RTFFont *) NULL) { fp = fontList->rtfNextFont; RTFFree (fontList->rtfFName); RTFFree ((char *) fontList); fontList = fp; } while (colorList != (RTFColor *) NULL) { cp = colorList->rtfNextColor; RTFFree ((char *) colorList); colorList = cp; } while (styleList != (RTFStyle *) NULL) { sp = styleList->rtfNextStyle; eltList = styleList->rtfSSEList; while (eltList != (RTFStyleElt *) NULL) { ep = eltList->rtfNextSE; RTFFree (eltList->rtfSEText); RTFFree ((char *) eltList); eltList = ep; } RTFFree (styleList->rtfSName); RTFFree ((char *) styleList); styleList = sp; } CharSetInit (); csTop = 0; }