Пример #1
0
SharedAnnotationData Peak2GeneFormatLoader::parseLine(const QStringList &lineValues) {
    SharedAnnotationData data(new AnnotationData);
    CHECK_EXT(lineValues.size() == COLUMNS_COUNT, skipLine = true; os.addWarning(QString("Incorrect columns count at line %1: expect %2, got %3")
                                                                                 .arg(currentLineNumber).arg(COLUMNS_COUNT).arg(lineValues.size())), data);

    data->qualifiers << U2Qualifier("chrom", getChromName(lineValues));
    CHECK(!skipLine, data);

    data->location->regions << getRegion(lineValues);
    CHECK(!skipLine, data);

    data->name = getPeakName(lineValues);
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("score", getPeakScore(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("NA", getNa(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("Genes", getGenes(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("Strand", getStrand(lineValues));
    CHECK(!skipLine, data);

    data->qualifiers << U2Qualifier("TSS2pCenter", getTss2pCenter(lineValues));
    CHECK(!skipLine, data);

    return data;
}
Пример #2
0
void IMM::load(istream &is)
{
  int numModels, numElements;
  BOOM::String str, pStr;
  ContentType contentType;
  
  is >> contentType >> N >> phase >> numModels;
  setContentType(contentType);

  for(int i=0 ; i<numModels ; ++i)
    {
      models->push_back(new BOOM::StringMap<double>(hashTableSize(N)));
      BOOM::StringMap<double> &model=*(*models)[i];
      is >> numElements;
      for(int j=0 ; j<numElements ; ++j)
	{
	  is >> str >> pStr;
	  model.lookup(str.c_str(),str.length())=pStr.asDouble();
	}
    }

  if(getStrand()==FORWARD_STRAND)
    {
      BOOM::String modelType;
      is >> modelType;
      revComp=new IMM(is,REVERSE_STRAND);
      revComp->revComp=this;
    }
QString PWMatrixSearchPrompter::composeRichDoc() {
    Actor* modelProducer = qobject_cast<IntegralBusPort*>(target->getPort(MODEL_PORT))->getProducer(PWMatrixWorkerFactory::WMATRIX_SLOT.getId());
    Actor* seqProducer = qobject_cast<IntegralBusPort*>(target->getPort(BasePorts::IN_SEQ_PORT_ID()))->getProducer(BaseSlots::DNA_SEQUENCE_SLOT().getId());

    QString unsetStr = "<font color='red'>"+tr("unset")+"</font>";
    QString seqName = tr("For each sequence from <u>%1</u>,").arg(seqProducer ? seqProducer->getLabel() : unsetStr);
    QString modelName = tr("with all profiles provided by <u>%1</u>,").arg(modelProducer ? modelProducer->getLabel() : unsetStr);

    QString resultName = getRequiredParam(NAME_ATTR);
    resultName = getHyperlink(NAME_ATTR, resultName);

    QString strandName;
    switch (getStrand(getParameter(BaseAttributes::STRAND_ATTRIBUTE().getId()).value<QString>())) {
    case 0: strandName = PWMatrixSearchWorker::tr("both strands"); break;
    case 1: strandName = PWMatrixSearchWorker::tr("direct strand"); break;
    case -1: strandName = PWMatrixSearchWorker::tr("complement strand"); break;
    }
    strandName = getHyperlink(BaseAttributes::STRAND_ATTRIBUTE().getId(), strandName);

    QString doc = tr("%1 search transcription factor binding sites (TFBS) %2."
        "<br>Recognize sites with <u>similarity %3%</u>, process <u>%4</u>."
        "<br>Output the list of found regions annotated as <u>%5</u>.")
        .arg(seqName)
        .arg(modelName)
        .arg(getHyperlink(SCORE_ATTR, getParameter(SCORE_ATTR).toInt()))
        .arg(strandName)
        .arg(resultName);

    return doc;
}
Пример #4
0
double ThreePeriodicMarkovChain::scoreSubsequence(const Sequence &seq,
						  const BOOM::String &str,
						  int begin,int length,
						  int seqPhase)
{
  // ### This is quick and dirty -- will be optimizing this class next week
  //     to use a suffix-tree-like data structure so that each base is
  //     read only once

  double score=0;
  int end=begin+length;

  switch(getStrand())
    {
    case FORWARD_STRAND:
      for(int pos=begin ; pos<end ; ++pos)
	score+=chains[(seqPhase+pos-begin)%3]->
	  scoreSingleBase(seq,str,pos,seq[pos],str[pos]);
      break;
    case REVERSE_STRAND:
      for(int pos=begin ; pos<end ; ++pos)
	score+=chains[posmod(seqPhase-(pos-begin))]->
	  scoreSingleBase(seq,str,pos,seq[pos],str[pos]);
      break;
    }
  return score;
}
Пример #5
0
double BranchAcceptor::getLogP(const Sequence &S,const BOOM::String &str,
			       int begin)
{
  double score;
  switch(getStrand())
    {
    case FORWARD_STRAND:
      {
	score=
	  branchPoint->getLogP(S,str,begin)+
	  acceptor->getLogP(S,str,
			    begin+branchPoint->getContextWindowLength());
      }
      break;

    case REVERSE_STRAND:
      {
	score=
	  branchPoint->getLogP(S,str,begin+
			       acceptor->getContextWindowLength())+
	  acceptor->getLogP(S,str,begin);
      }
      break;
    default: throw "bad!";
    }
  return score;
}
void ProtocolConnection::onReceived(std::size_t bytesReceived) {
	_protocolParser->notifyNewData(getReceiveBuffer(), bytesReceived);

	ProtocolParser::ProtocolParserStatus status = _protocolParser->internParse();

	if (status != ProtocolParser::NEED_MORE) {
		getStrand().post(boost::bind(&ProtocolConnection::internOnFrameReceived, this, status, _protocolParser->getNewFrame()));
	}
}
Пример #7
0
bool BranchAcceptor::save(ostream &os)
{
  os.precision(8);
  os << "BranchAcceptor" << endl;
  os << getSignalType() << " " << getCutoff() << " " << getStrand() <<endl;
  os << getConsensusOffset() << endl;
  branchPoint->save(os);
  acceptor->save(os);
  return true;
}
Пример #8
0
ContentSensor *ThreePeriodicMarkovChain::reverseComplement()
{
  ThreePeriodicMarkovChain *other=
    new ThreePeriodicMarkovChain(complement(getStrand()),
				 ::reverseComplement(getContentType()));
  for(int i=0 ; i<3 ; ++i)
    other->chains[i]=
      static_cast<MarkovChain*>(chains[i]->reverseComplement());
  return other;
}
Пример #9
0
void ReadAnnotation::write(ostream*f){
	int rank=getRank();
	int readIndex=getReadIndex();
	int positionOnStrand=getPositionOnStrand();
	char strand=getStrand();
	f->write((char*)&rank,sizeof(int));
	f->write((char*)&readIndex,sizeof(int));
	f->write((char*)&positionOnStrand,sizeof(int));
	f->write((char*)&strand,sizeof(char));
}
Пример #10
0
void PWMatrixSearchWorker::init() {
    modelPort = ports.value(MODEL_PORT);
    dataPort = ports.value(BasePorts::IN_SEQ_PORT_ID());
    output = ports.value(BasePorts::OUT_ANNOTATIONS_PORT_ID());
    dataPort->addComplement(output);
    output->addComplement(dataPort);

    strand = getStrand(actor->getParameter(BaseAttributes::STRAND_ATTRIBUTE().getId())->getAttributeValue<QString>(context));
    cfg.minPSUM = actor->getParameter(SCORE_ATTR)->getAttributeValue<int>(context);
    resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>(context);
}
Пример #11
0
 /*! \brief Compute effect of soft and hard clipping on coordinates
  * When offset is applied to either reference position or query position
  * the origin of sequencing can be identified.
  * @return offset of origin of sequencing from start or end
  * @deprecated - Use cigar operation directly to avoid constructing multiple Cigar objects
  */
 int getOriginOffset() const
 {
     assert(m_dataPtr);
     // Now delegated to Cigar
     const Strand & strand = getStrand();
     if(strand == FORWARD) {
         return getCigar().getOriginOffsetForward();
     } else if(strand == REVERSE) {
         return getCigar().getOriginOffsetReverse();
     }
     return 0; // Strand NA?
 }
Пример #12
0
IMM::IMM(const IMM &other)
  : N(other.N), phase(other.phase), alphabetSize(other.alphabetSize),
    revComp(NULL), models(new BOOM::Vector<BOOM::StringMap<double>*>)
{
  for(int i=0 ; i<=N ; ++i)
    models->push_back(new BOOM::StringMap<double>(*(*other.models)[i]));
  setContentType(other.getContentType());
  setStrand(other.getStrand());
  if(getContentType()==INTERGENIC)
    revComp=this;
  else if(other.revComp && getStrand()==FORWARD_STRAND)
    revComp=new IMM(*other.revComp);
}
Пример #13
0
int Edge::propagateBackward(int phase)
{
  if(isIntergenic()) return (left->getStrand()==FORWARD_STRAND ? 0 : 2);
  if(!isCoding()) return phase;
  int length=getFeatureEnd()-getFeatureBegin();
  switch(getStrand())
    {
    case FORWARD_STRAND:
      return posmod(phase-length);
    case REVERSE_STRAND:
      return (phase+length)%3;
    }
}
Пример #14
0
double IMM::scoreSingleBase(const Sequence &seq,const BOOM::String &str,
				    int index,Symbol s,char c)
{
  const char *p=str.c_str();
  switch(getStrand())
    {
    case PLUS_STRAND:
      {
	int maxOrder=(index>N ? N : index);
	for(int order=maxOrder ; order>=0 ; --order)
	  {
	    BOOM::StringMap<double> &model=*(*models)[order];
	    if(model.isDefined(p,index-order,order+1))
	      return model.lookup(p,index-order,order+1);
	  }
	throw BOOM::String("IMM::scoreSingleBase('+',")+
	  index+",strlen="+strlen(p)+",str="+
	  str.substring(index,maxOrder)+")";
      }

    case MINUS_STRAND:
      {
	/*
	  On the minus strand we have to take our contexts from the
	  right (but only because we trained the model that way)
	 */
	int seqLen=str.length();
	int maxOrder=seqLen-index-1;
	if(maxOrder>N) maxOrder=N;
	for(int order=maxOrder ; order>=0 ; --order)
	  {
	    BOOM::StringMap<double> &model=*(*models)[order];
	    if(model.isDefined(p,index,order+1)) 
	      return model.lookup(p,index,order+1);
	  }
	throw BOOM::Stacktrace(
          BOOM::String("IMM::scoreSingleBase('-',")+
	    index+",strlen="+strlen(p)+",str="+
	  str.substring(index,maxOrder)+")");
      }

    default: throw BOOM::String(__FILE__)+__LINE__;
    }
}
Пример #15
0
bool IMM::save(ostream &os)
{
  os.precision(8);
  os << "IMM" << endl;
  os << getContentType() << endl;
  os << N << "\t" << phase << "\t" << endl;
  int numModels=models->size();
  os << numModels << endl;
  for(int i=0 ; i<numModels ; ++i)
    {
      BOOM::StringMap<double> &model=*(*models)[i];
      os << model.size() << endl;
      BOOM::StringMap<double>::iterator cur=model.begin(), end=model.end();
      for(; cur!=end ; ++cur)
	os << (*cur).first << endl << (*cur).second << endl;
    }
  if(getStrand()==FORWARD_STRAND) 
    revComp->save(os);
  return true;
}
Пример #16
0
void extractCalls(Config *config) {
    bam_hdr_t *hdr = sam_hdr_read(config->fp);
    bam_mplp_t iter;
    int ret, tid, pos, i, seqlen, type, rv, o = 0;
    int beg0 = 0, end0 = 1u<<29;
    int n_plp; //This will need to be modified for multiple input files
    int ctid = -1; //The tid of the contig whose sequence is stored in "seq"
    int idxBED = 0, strand;
    uint32_t nmethyl = 0, nunmethyl = 0;
    const bam_pileup1_t **plp = NULL;
    char *seq = NULL, base;
    mplp_data *data = NULL;
    struct lastCall *lastCpG = NULL;
    struct lastCall *lastCHG = NULL;

    if(config->merge) {
        if(config->keepCpG) {
            lastCpG = calloc(1, sizeof(struct lastCall));
            assert(lastCpG);
            lastCpG->tid = -1;
        }
        if(config->keepCHG) {
            lastCHG = calloc(1, sizeof(struct lastCall));
            assert(lastCHG);
            lastCHG->tid = -1;
        }
    }

    data = calloc(1,sizeof(mplp_data));
    if(data == NULL) {
        fprintf(stderr, "Couldn't allocate space for the data structure in extractCalls()!\n");
        return;
    }
    data->config = config;
    data->hdr = hdr;
    if (config->reg) {
        if((data->iter = sam_itr_querys(config->bai, hdr, config->reg)) == 0) {
            fprintf(stderr, "failed to parse regions %s", config->reg);
            return;
        }
    }
    if(config->bedName) {
        config->bed = parseBED(config->bedName, hdr);
        if(config->bed == NULL) return;
    }

    plp = calloc(1, sizeof(bam_pileup1_t *)); //This will have to be modified for multiple input files
    if(plp == NULL) {
        fprintf(stderr, "Couldn't allocate space for the plp structure in extractCalls()!\n");
        return;
    }

    //Start the pileup
    iter = bam_mplp_init(1, filter_func, (void **) &data);
    bam_mplp_init_overlaps(iter);
    bam_mplp_set_maxcnt(iter, config->maxDepth);
    while((ret = cust_mplp_auto(iter, &tid, &pos, &n_plp, plp)) > 0) {
        //Do we need to process this position?
        if (config->reg) {
            beg0 = data->iter->beg, end0 = data->iter->end;
            if ((pos < beg0 || pos >= end0)) continue; // out of the region requested
        }
        if(tid != ctid) {
            if(seq != NULL) free(seq);
            seq = faidx_fetch_seq(config->fai, hdr->target_name[tid], 0, faidx_seq_len(config->fai, hdr->target_name[tid]), &seqlen);
            if(seqlen < 0) {
                fprintf(stderr, "faidx_fetch_seq returned %i while trying to fetch the sequence for tid %i (%s)!\n",\
                        seqlen, tid, hdr->target_name[tid]);
                fprintf(stderr, "Note that the output will be truncated!\n");
                return;
            }
            ctid = tid;
        }

        if(config->bed) { //Handle -l
            while((o = posOverlapsBED(tid, pos, config->bed, idxBED)) == -1) idxBED++;
            if(o == 0) continue; //Wrong strand
        }

        if(isCpG(seq, pos, seqlen)) {
            if(!config->keepCpG) continue;
            type = 0;
        } else if(isCHG(seq, pos, seqlen)) {
            if(!config->keepCHG) continue;
            type = 1;
        } else if(isCHH(seq, pos, seqlen)) {
            if(!config->keepCHH) continue;
            type = 2;
        } else {
            continue;
        }

        nmethyl = nunmethyl = 0;
        base = *(seq+pos);
        for(i=0; i<n_plp; i++) {
            if(plp[0][i].is_del) continue;
            if(plp[0][i].is_refskip) continue;
            if(config->bed) if(!readStrandOverlapsBED(plp[0][i].b, config->bed->region[idxBED])) continue;
            strand = getStrand((plp[0]+i)->b);
            if(strand & 1) {
                if(base != 'C' && base != 'c') continue;
            } else {
                if(base != 'G' && base != 'g') continue;
            }
            rv = updateMetrics(config, plp[0]+i);
            if(rv > 0) nmethyl++;
            else if(rv<0) nunmethyl++;
        }

        if(nmethyl+nunmethyl==0) continue;
        if(!config->merge || type==2) {
            writeCall(config->output_fp[type], config, hdr->target_name[tid], pos, 1, nmethyl, nunmethyl);
        } else {
            //Merge into per-CpG/CHG metrics
            if(type==0) {
                if(base=='G' || base=='g') pos--;
                processLast(config->output_fp[0], config, lastCpG, hdr, tid, pos, 2, nmethyl, nunmethyl);
            } else {
                if(base=='G' || base=='g') pos-=2;
                processLast(config->output_fp[1], config, lastCHG, hdr, tid, pos, 3, nmethyl, nunmethyl);
            }
        }
    }

    //Don't forget the last CpG/CHG
    if(config->merge) {
        if(config->keepCpG && lastCpG->tid != -1) {
            processLast(config->output_fp[0], config, lastCpG, hdr, tid, pos, 2, nmethyl, nunmethyl);
        }
        if(config->keepCHG && lastCHG->tid != -1) {
            processLast(config->output_fp[1], config, lastCHG, hdr, tid, pos, 3, nmethyl, nunmethyl);
        }
    }

    bam_hdr_destroy(hdr);
    if(data->iter) hts_itr_destroy(data->iter);
    bam_mplp_destroy(iter);
    free(data);
    free(plp);
    if(seq != NULL) free(seq);
}
Пример #17
0
Task* ORFWorker::tick() {
    if (input->hasMessage()) {
        Message inputMessage = getMessageAndSetupScriptValues(input);
        if (inputMessage.isEmpty()) {
            output->put(Message::getEmptyMapMessage());
        }
        cfg.strand = getStrand(actor->getParameter(BaseAttributes::STRAND_ATTRIBUTE().getId())->getAttributeValue<QString>(context));
        cfg.minLen = actor->getParameter(LEN_ATTR)->getAttributeValue<int>(context);
        cfg.mustFit = actor->getParameter(FIT_ATTR)->getAttributeValue<bool>(context);
        cfg.mustInit = actor->getParameter(INIT_ATTR)->getAttributeValue<bool>(context);
        cfg.allowAltStart = actor->getParameter(ALT_ATTR)->getAttributeValue<bool>(context);
        cfg.includeStopCodon = actor->getParameter(ISC_ATTR)->getAttributeValue<bool>(context);
        cfg.maxResult2Search = actor->getParameter(RES_ATTR)->getAttributeValue<int>(context);
        resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>(context);
        if(resultName.isEmpty()) {
            algoLog.error(tr("ORF: result name is empty, default name used"));
            resultName = "misc_feature";
        }
        transId = actor->getParameter(ID_ATTR)->getAttributeValue<QString>(context);
        if (cfg.minLen < 0) {
            algoLog.error(tr("ORF: Incorrect value: min-length must be greater then zero"));
            return new FailTask(tr("Incorrect value: min-length must be greater then zero"));
        }

        SharedDbiDataHandler seqId = inputMessage.getData().toMap().value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>();
        QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId));

        if (seqObj.isNull()) {
            return NULL;
        }

        const DNAAlphabet* alphabet = seqObj->getAlphabet();
        if (alphabet && alphabet->getType() == DNAAlphabet_NUCL) {
            ORFAlgorithmSettings config(cfg);
            config.searchRegion.length = seqObj->getSequenceLength();
            if (config.strand != ORFAlgorithmStrand_Direct) {
                DNATranslation* compTT = AppContext::getDNATranslationRegistry()->
                                         lookupComplementTranslation(alphabet);
                if (compTT != NULL) {
                    config.complementTT = compTT;
                } else {
                    config.strand = ORFAlgorithmStrand_Direct;
                }
            }
            config.proteinTT = AppContext::getDNATranslationRegistry()->
                               lookupTranslation(alphabet, DNATranslationType_NUCL_2_AMINO, transId);
            if (config.proteinTT) {
                Task* t = new ORFFindTask(config,seqObj->getEntityRef());
                connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
                return t;
            }
        }
        QString err = tr("Bad sequence supplied to ORFWorker: %1").arg(seqObj->getSequenceName());

        return new FailTask(err);
    } else if (input->isEnded()) {
        output->setEnded();
        setDone();
    }
    return NULL;
}
Пример #18
0
 bool Interval::overlaps(const Interval & b, bool strand_specific){
   if(getStrand() == b.getStrand() || getStrand() == BOTH || b.getStrand() == BOTH || !strand_specific){
     return (getStart() < b.getStop() && b.getStart() < getStop());
   }
   return false;
 }
Пример #19
0
/*****************************
 * ORFPrompter
 *****************************/
QString ORFPrompter::composeRichDoc() {
    IntegralBusPort* input = qobject_cast<IntegralBusPort*>(target->getPort(BasePorts::IN_SEQ_PORT_ID()));
    Actor* producer = input->getProducer(BaseSlots::DNA_SEQUENCE_SLOT().getId());
    QString unsetStr = "<font color='red'>"+tr("unset")+"</font>";
    QString producerName = tr(" from <u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);

    ORFAlgorithmSettings cfg;
    cfg.strand = getStrand(getParameter(BaseAttributes::STRAND_ATTRIBUTE().getId()).value<QString>());
    cfg.minLen = getParameter(LEN_ATTR).toInt();
    cfg.mustFit = getParameter(FIT_ATTR).toBool();
    cfg.mustInit = getParameter(INIT_ATTR).toBool();
    cfg.allowAltStart = getParameter(ALT_ATTR).toBool();
    cfg.includeStopCodon = getParameter(ISC_ATTR).toBool();

    QString extra;
    if (!cfg.mustInit) {
        QString anyLink = getHyperlink(INIT_ATTR, tr("starting with any codon"));
        extra += tr(", allow ORFs %1 other than terminator").arg(anyLink);
    } else if (cfg.allowAltStart) {
        QString altLink = getHyperlink(ALT_ATTR, tr("alternative start codons"));
        extra += tr(", take into account %1").arg(altLink);
    }
    if (cfg.mustFit) {
        QString mustFitLink = getHyperlink(FIT_ATTR, tr("ignore non-terminated"));
        extra += tr(", %1 ORFs").arg(mustFitLink);
    }

    QString strandName;
    switch (cfg.strand) {
    case ORFAlgorithmStrand_Both:
        strandName = ORFWorker::tr("both strands");
        break;
    case ORFAlgorithmStrand_Direct:
        strandName = ORFWorker::tr("direct strand");
        break;
    case ORFAlgorithmStrand_Complement:
        strandName = ORFWorker::tr("complement strand");
        break;
    }
    strandName = getHyperlink(BaseAttributes::STRAND_ATTRIBUTE().getId(), strandName);

    QString resultName = getHyperlink(NAME_ATTR, getRequiredParam(NAME_ATTR));

    QString transId = getParameter(ID_ATTR).toString();
    QString ttName = AppContext::getDNATranslationRegistry()->
                     lookupTranslation(AppContext::getDNAAlphabetRegistry()->findById(BaseDNAAlphabetIds::NUCL_DNA_DEFAULT()), DNATranslationType_NUCL_2_AMINO, transId)->getTranslationName();
    ttName = getHyperlink(ID_ATTR, ttName);


    QString doc = tr("For each nucleotide sequence%1, find ORFs in <u>%2</u> using the <u>%3</u>."
                     "<br>Detect only ORFs <u>not shorter than %4 bps</u>%5."
                     "<br>Output the list of found regions annotated as <u>%6</u>.")
                  .arg(producerName) //sequence from Read Fasta 1
                  .arg(strandName) //both strands
                  .arg(ttName) //Standard Genetic Code
                  .arg(getHyperlink(LEN_ATTR, cfg.minLen)) //100
                  .arg(extra) //  take into account alternative start codons.
                  .arg(resultName);

    return doc;
}
Пример #20
0
static struct blastBlock *nextBlock(struct blastFile *bf, struct blastQuery *bq,
                                    struct blastGappedAli *bga, boolean *skipRet)
/* Read in next blast block.  Return NULL at EOF or end of gapped
 * alignment. If an unparsable block is found, set skipRet to TRUE and return
 * NULL. */
{
struct blastBlock *bb;
char *line;
char *words[16];
int wordCount;
char *parts[3];
int partCount;
static struct dyString *qString = NULL, *tString = NULL;

verbose(TRACE_LEVEL,  "blastFileNextBlock\n");
*skipRet = FALSE;

/* Seek until get something like:
 *   Score = 8770 bits (4424), Expect = 0.0
 * or something that looks like we're done with this gapped
 * alignment. */
for (;;)
    {
    if (!nextBlockLine(bf, bq, &line))
	return NULL;
    if (startsWith(" Score", line))
	break;
    }
AllocVar(bb);
bb->gappedAli = bga;
wordCount = chopLine(line, words);
if (wordCount < 8 || !sameWord("Score", words[0]) 
    || !isdigit(words[2][0]) || !(isdigit(words[7][0]) || words[7][0] == 'e')
    || !startsWith("Expect", words[5]))
    {
    bfError(bf, "Expecting something like:\n"
             "Score = 8770 bits (4424), Expect = 0.0");
    }
bb->bitScore = atof(words[2]);
bb->eVal = evalToDouble(words[7]);

/* Process something like:
 *   Identities = 8320/9618 (86%), Gaps = 3/9618 (0%)
 *             or
 *   Identities = 8320/9618 (86%)
 *             or
 *   Identities = 10/19 (52%), Positives = 15/19 (78%), Frame = +2
 *     (wu-tblastn)
 *             or
 *   Identities = 256/400 (64%), Positives = 306/400 (76%)
 *   Frame = +1 / -2
 *     (tblastn)
 *
 *   Identities = 1317/10108 (13%), Positives = 2779/10108 (27%), Gaps = 1040/10108
 *   (10%)
 *      - wrap on long lines
 *
 * Handle weird cases where the is only a `Score' line, with no `Identities'
 * lines by skipping the alignment; they seem line small, junky alignments.
 */
line = bfNeedNextLine(bf);
wordCount = chopLine(line, words);
if (wordCount < 3 || !sameWord("Identities", words[0]))
    {
    if (wordCount > 1 || sameWord("Score", words[0]))
        {
        /* ugly hack to skip block with no identities */
        *skipRet = TRUE;
        blastBlockFree(&bb);
        return NULL;
        }
    bfError(bf, "Expecting identity count");
    }
partCount = chopByChar(words[2], '/', parts, ArraySize(parts));
if (partCount != 2 || !isdigit(parts[0][0]) || !isdigit(parts[1][0]))
    bfSyntax(bf);
bb->matchCount = atoi(parts[0]);
bb->totalCount = atoi(parts[1]);
if (wordCount >= 7 && sameWord("Gaps", words[4]))
    {
    if (!isdigit(words[6][0]))
	bfSyntax(bf);
    bb->insertCount = atoi(words[6]);
    }
if ((wordCount >= 11) && sameWord("Frame", words[8]))
    {
    bb->qStrand = '+';
    bb->tStrand = words[10][0];
    bb->tFrame = atoi(words[10]);
    }

line = bfNeedNextLine(bf);
boolean wrapped = (startsWith("(", line));

/* Process something like:
 *     Strand = Plus / Plus (blastn)
 *     Frame = +1           (tblastn)
 *     Frame = +1 / -2      (tblastx)
 *     <blank line>         (blastp)
 * note that wu-tblastn puts frame on Identities line
 */
if (wrapped)
    line = bfNeedNextLine(bf);
wordCount = chopLine(line, words);
if ((wordCount >= 5) && sameWord("Strand", words[0]))
    {
    bb->qStrand = getStrand(bf, words[2]);
    bb->tStrand = getStrand(bf, words[4]);
    }
else if ((wordCount >= 5) && sameWord("Frame", words[0]) && (words[3][0] == '/'))
    {
    // Frame = +1 / -2      (tblastx)
    bb->qStrand = (words[2][0] == '-') ? -1 : 1;
    bb->tStrand = (words[4][0] == '-') ? -1 : 1;
    bb->qFrame = atoi(words[2]);
    bb->tFrame = atoi(words[4]);
    }
else if ((wordCount >= 3) && sameWord("Frame", words[0]))
    {
    // Frame = +1           (tblastn)
    bb->qStrand = 1;
    bb->tStrand = (words[2][0] == '-') ? -1 : 1;
    bb->qFrame = atoi(words[2]);
    bb->tFrame = 1;
    }
else if (wordCount == 0)
    {
    /* if we didn't parse frame, default it */
    if (bb->qStrand == 0)
        {
        bb->qStrand = '+';
        bb->tStrand = '+';
        }
    }
else
    bfError(bf, "Expecting Strand, Frame or blank line");


/* Process alignment lines.  They come in groups of three
 * separated by a blank line - something like:
 * Query: 26429 taccttgacattcctcagtgtgtcatcatcgttctctcctccaaacggcgagagtccgga 26488
 *              |||||| |||||||||| ||| ||||||||||||||||||||||| || || ||||||||
 * Sbjct: 62966 taccttaacattcctcaatgtttcatcatcgttctctcctccaaatggtgaaagtccgga 63025
 */
if (qString == NULL)
    {
    qString = newDyString(50000);
    tString = newDyString(50000);
    }
clearBlastBlock(bb, qString, tString);
for (;;)
    {
    if (!findBlockSeqPair(bf, bq))
        break;
    parseBlockSeqPair(bf, bb, qString, tString);
    }

/* convert to [0..n) and move to strand coords if necessary */
bb->qStart--;
if (bb->qStrand < 0)
    reverseIntRange(&bb->qStart, &bb->qEnd, bq->queryBaseCount);
bb->tStart--;
if (bb->tStrand < 0)
    reverseIntRange(&bb->tStart, &bb->tEnd, bga->targetSize);
bb->qSym = cloneMem(qString->string, qString->stringSize+1);
bb->tSym = cloneMem(tString->string, tString->stringSize+1);
return bb;
}