void fastaToPsl(char *inName, char *outName) /* fastaToPsl - Convert axt to psl format. */ { struct lineFile *inLF; FILE *outFh; boolean read; struct psl* pslAlign; DNA *qSeq; int qSize; int qSeqLen; char *qHeader; DNA *tSeq; int tSize; int tSeqLen; char *tHeader; int queryCounter; inLF = lineFileOpen(inName, TRUE); outFh = mustOpen(outName, "w"); /* read the target sequence */ read = faMixedSpeedReadNext(inLF, &qSeq, &qSize, &qHeader); if (!read) errAbort("Could not read target FASTA entry."); qSeq = cloneString(qSeq); qSeqLen = countNonDash(qSeq, qSize); qHeader = cloneString(qHeader); verbose(2, "Query sequence header: %s\n", qHeader); verbose(3, "Query sequence alignment length: %d\n", qSize); verbose(3, "Query sequence length: %d\n", qSeqLen); verbose(4, "Query sequence: %s\n", qSeq); /* read the rest of the sequences */ queryCounter = 1; pslWriteHead(outFh); while (faMixedSpeedReadNext(inLF, &tSeq, &tSize, &tHeader)) { tSeqLen = countNonDash(tSeq, tSize); verbose(2, "Target sequence (%d) header: %s\n", queryCounter, tHeader); verbose(3, "Target sequence (%d) length: %d\n", queryCounter, tSeqLen); verbose(4, "Target sequence (%d): %s\n", queryCounter, tSeq); pslAlign = pslFromAlign(qHeader, qSeqLen, 0, qSeqLen, qSeq, tHeader, tSeqLen, 0, tSeqLen, tSeq, "+", 0); pslTabOut(pslAlign, outFh); ++queryCounter; } lineFileClose(&inLF); }
void outputSubAxt(struct axt *axt, int start, int size, int score, FILE *f) /* Output subset of axt to axt file. */ { struct axt a; a = *axt; a.symCount = size; a.score = score; a.qStart += countNonDash(a.qSym, start); a.qEnd = a.qStart + countNonDash(a.qSym + start, size); a.tStart += countNonDash(a.tSym, start); a.tEnd = a.tStart + countNonDash(a.tSym + start, size); a.qSym += start; a.tSym += start; axtWrite(&a, f); }
struct mafAli *mafSubsetE(struct mafAli *maf, char *componentSource, int newStart, int newEnd, bool getInitialDashes) /* Extract subset of maf that intersects a given range * in a component sequence. The newStart and newEnd * are given in the forward strand coordinates of the * component sequence. The componentSource is typically * something like 'mm3.chr1'. This will return NULL * if maf does not intersect range. The score field * in the returned maf will not be filled in (since * we don't know which scoring scheme to use). */ { struct mafComp *mcMaster = mafFindComponent(maf, componentSource); struct mafAli *subset; struct mafComp *mc, *subMc; char *s, *e; int textStart, textSize; /* Reverse complement input range if necessary. */ if (mcMaster->strand == '-') reverseIntRange(&newStart, &newEnd, mcMaster->srcSize); /* Check if any real intersection and return NULL if not. */ if (newStart >= newEnd) return NULL; if (newStart >= mcMaster->start + mcMaster->size) return NULL; if (newEnd <= mcMaster->start) return NULL; /* Clip to bounds of actual data. */ if (newStart < mcMaster->start) newStart = mcMaster->start; if (newEnd > mcMaster->start + mcMaster->size) newEnd = mcMaster->start + mcMaster->size; /* Translate position in master sequence to position in * multiple alignment. */ s = skipIgnoringDash(mcMaster->text, newStart - mcMaster->start, TRUE); e = skipIgnoringDash(s, newEnd - newStart, TRUE); textStart = s - mcMaster->text; textSize = e - s; if (getInitialDashes && (newStart == mcMaster->start)) { textStart = 0; textSize += s - mcMaster->text; } /* Allocate subset structure and fill it in */ AllocVar(subset); subset->textSize = textSize; for (mc = maf->components; mc != NULL; mc = mc->next) { AllocVar(subMc); subMc->src = cloneString(mc->src); subMc->srcSize = mc->srcSize; subMc->strand = mc->strand; if (mc->size != 0) { subMc->start = mc->start + countNonDash(mc->text, textStart); subMc->size = countNonDash(mc->text+textStart, textSize); subMc->text = cloneStringZ(mc->text + textStart, textSize); if (mc->quality != NULL) subMc->quality = cloneStringZ(mc->quality + textStart, textSize); } else { /* empty row annotation */ subMc->size = 0; subMc->start = mc->start; } subMc->leftStatus = mc->leftStatus; subMc->leftLen = mc->leftLen; subMc->rightStatus = mc->rightStatus; subMc->rightLen = mc->rightLen; slAddHead(&subset->components, subMc); } slReverse(&subset->components); return subset; }