Bits *maskQuerySeq(struct dnaSeq *seq, boolean isProt, boolean maskQuery, boolean lcMask) /* Massage query sequence a bit, converting it to correct * case (upper for protein/lower for DNA) and optionally * returning upper/lower case info , and trimming poly A. */ { Bits *qMaskBits = NULL; verbose(2, "%s\n", seq->name); if (isProt) faToProtein(seq->dna, seq->size); else { if (maskQuery) { if (lcMask) toggleCase(seq->dna, seq->size); qMaskBits = maskFromUpperCaseSeq(seq); } faToDna(seq->dna, seq->size); } if (seq->size > qWarnSize) { warn("Query sequence %s has size %d, it might take a while.", seq->name, seq->size); } return qMaskBits; }
void faToTwoBit(char *inFiles[], int inFileCount, char *outFile) /* Convert inFiles in fasta format to outfile in 2 bit * format. */ { struct twoBit *twoBitList = NULL, *twoBit; int i; struct hash *uniqHash = newHash(18); FILE *f; for (i=0; i<inFileCount; ++i) { char *fileName = inFiles[i]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dnaSeq seq; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { if (seq.size == 0) { warn("Skipping item %s which has no sequence.\n",seq.name); continue; } /* strip off version number */ if (stripVersion) { char *sp = NULL; sp = strchr(seq.name,'.'); if (sp != NULL) *sp = '\0'; } if (hashLookup(uniqHash, seq.name)) { if (!ignoreDups) errAbort("Duplicate sequence name %s", seq.name); else continue; } hashAdd(uniqHash, seq.name, NULL); if (noMask) faToDna(seq.dna, seq.size); else unknownToN(seq.dna, seq.size); twoBit = twoBitFromDnaSeq(&seq, !noMask); slAddHead(&twoBitList, twoBit); } lineFileClose(&lf); } slReverse(&twoBitList); f = mustOpen(outFile, "wb"); twoBitWriteHeader(twoBitList, f); for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { twoBitWriteOne(twoBit, f); } carefulClose(&f); }
boolean faSomeSpeedReadNext(struct lineFile *lf, DNA **retDna, int *retSize, char **retName, boolean isDna) /* Read in DNA or Peptide FA record. */ { char *poly; int size; if (!faMixedSpeedReadNext(lf, retDna, retSize, retName)) return FALSE; size = *retSize; poly = *retDna; if (isDna) faToDna(poly, size); else faToProtein(poly, size); return TRUE; }