Exemplos de esl_abc_ValidateSeq em C++ (Cpp)

Linguagem de programação: C++ (Cpp)

Método / Função: esl_abc_ValidateSeq

Exemplos em hotexamples.com: 2

esl_abc_ValidateSeq em C++ (Cpp) - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de esl_abc_ValidateSeq em C++ (Cpp) extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: esl_translate.c Projeto: ElofssonLab/TOPCONS2

int esl_trans_s2p(ESL_SQ *in, ESL_SQ **out, int frameshift, int rcFlag) { // The encoding for this is taken from squid: A=0, C=1, G=2, U/T=3, // code[0] corresponds to AAA, code[1] is AAC... code[4] is ACA... // and so on up to 63 being UUU. 64 is a sentinel. Regular 20 amino codes and '*' for stop // the nucleotide indices match well with the easel alphabet index // but the actual translation still needs to be hard coded char code[] = {'K','N','K','N','T','T','T','T','R','S','R','S', 'I','I','M','I','Q','H','Q','H','P','P','P','P', 'R','R','R','R','L','L','L','L','E','D','E','D', 'A','A','A','A','G','G','G','G','V','V','V','V', '*','Y','*','Y','L','F','L','F','*','C','W','C', 'L','F','L','F'}; int status; int codon; //progress in counting current codon char *aaseq; //hold the protein sequence to be output char *aaptr; //pointer records progress in writing to output char *readseq; //pointer records progress in reading nucleotide sequence int read_dg; //index into digital sequence ESL_ALPHABET *abc = esl_alphabet_Create(eslDNA); char errbuf[256]; //validateseq demands this char namestring[256]; (*out) = NULL; if(frameshift >= in->n) return eslFAIL; if(!abc) goto ERROR; //make sure we have a nucleotide sequence; could use esl_abc_ValidateSeq but that wants too //much boilerplate for the simple bit I need done. doesn't help that i don't care if there are U or T //characters but that would test against two alphabets if(in->seq) { if(eslOK != esl_abc_ValidateSeq(abc, in->seq, in->n, errbuf)) goto ERROR; } else if(in->dsq) { if(in->abc->type != eslRNA && in->abc->type != eslDNA) goto ERROR; } else { goto ERROR; } //apply the reverse compliment if(rcFlag) {if(esl_sq_ReverseComplement(in) != eslOK) goto ERROR;} ESL_ALLOC(aaseq, (in->n+1) * sizeof(char)); aaptr = aaseq; if(in->seq) //text sequence { //get an alphabet to do the lookup with. //an ordinary text sequence doesn't have in->abc //if it has one that is not a standard dna/rna alphabet //then this code won't work. I wanted to use an alphabet if available, could save some allocating time that way //if we're calling this repeatedly //but the compiler complains about "pointer qualifiers" so nevermind readseq = in->seq+frameshift; //as long as there are at least 3 nucleotides left, pull and translate another codon for (; *readseq != '\0' && *(readseq+1) != '\0' && *(readseq+2) != '\0'; readseq += 3) { codon = abc->inmap[(int)*(readseq)] * 16 + abc->inmap[(int)*(readseq+1)] * 4 + abc->inmap[(int)*(readseq+2)]; if(codon > 63 || codon < 0) break; *aaptr = code[codon]; aaptr += 1; } *aaptr = '\0'; } else if(in->dsq) //do it digitally { if(in->dsq == NULL) goto ERROR; read_dg = 1+frameshift; //add one here because digital index 0 is a sentinel for(;in->dsq[read_dg] != 255 && in->dsq[read_dg+1] != 255 && in->dsq[read_dg+2] != 255; read_dg += 3) { codon = in->dsq[read_dg] * 16 + in->dsq[read_dg+1] * 4 + in->dsq[read_dg+2]; if(codon > 63 || codon < 0) break; *aaptr = code[codon]; aaptr += 1; } *aaptr = '\0'; } else { goto ERROR; } //modify name to record any reading frame adjustments sprintf(namestring, "%s_s%d", in->name, frameshift); if(rcFlag) strcat(namestring, "_rc"); *out = esl_sq_CreateFrom(namestring, aaseq, in->desc, in->acc, in->ss); if(aaseq != NULL) free(aaseq); //return the input to its original state if(rcFlag) {if(esl_sq_ReverseComplement(in) != eslOK) goto ERROR;} if(abc) esl_alphabet_Destroy(abc); if(*out) return eslOK; ERROR: if(abc) esl_alphabet_Destroy(abc); if(aaseq != NULL) free(aaseq); (*out) = NULL; return eslEMEM; }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: profillic-esl_msafile.hpp Projeto: Tsingke/profillic-hmmer

static int profillic_esl_msafile_profile_Read(ESLX_MSAFILE *afp, ESL_MSA **ret_msa, ProfileType * profile_ptr ) { /// \note Right now this isn't actually using the open file pointer; for convenience I just use the profile.fromFile( <filename> ) method. /// \todo Use convenience fns in esl_buffer.h; see eg hmmer-3.1/easel/esl_msafile_stockholm.c for examples... ESL_MSA *msa = NULL; string profile_string; char *buf; long len; int seqidx; int status; char errmsg2[eslERRBUFSIZE]; ESL_DASSERT1((afp->format == eslMSAFILE_PROFILLIC)); const char * const seqname = "Galosh Profile Consensus"; const char * const msaname = "Galosh Profile"; uint32_t profile_length; galosh::Sequence<typename ProfileType::ProfileResidueType> consensus_sequence; stringstream tmp_consensus_output_stream; uint32_t pos_i; if (profile_ptr == NULL) { ESL_EXCEPTION(eslEINCONCEIVABLE, "profile_ptr is NULL in profillic_esl_msafile_profile_Read(..)!"); } //if (feof(afp->bf->fp)) { status = eslEOF; goto ERROR; } afp->errmsg[0] = '\0'; // Read in the galosh profile (from profillic) //fseek( afp->bf->fp, 0, SEEK_END ); // go to the end //len = afp->bf->ftell( afp->bf->fp ); // get the position at the end (length) //fseek( afp->bf->fp, 0, SEEK_SET ); // go to the beginning again. //ESL_ALLOC_CPP( char, buf, sizeof( char ) * len ); //malloc buffer //fread( buf, len, 1, afp->bf->fp ); //read into buffer //profile_string = buf; //profile_ptr->fromString( profile_string ); profile_ptr->fromFile( afp->bf->filename ); //if (buf) free(buf); // \todo WHY WON'T THIS WORK? See HACKs in profillic-hmmbuild.cpp to work around it. //fseek( afp->bf->fp, 0, SEEK_END ); // go to the end (to signal there's no more profiles in the file, the next time we come to this function) // Calculate the consensus sequence. profile_length = profile_ptr->length(); consensus_sequence.reinitialize( profile_length ); for( pos_i = 0; pos_i < profile_length; pos_i++ ) { consensus_sequence[ pos_i ] = ( *profile_ptr )[ pos_i ][ galosh::Emission::Match ].maximumValueType(); } tmp_consensus_output_stream << consensus_sequence; /* Allocate a growable MSA, and auxiliary parse data coupled to the MSA allocation */ #ifdef eslAUGMENT_ALPHABET if (afp->abc && (msa = esl_msa_CreateDigital(afp->abc, 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } #endif if (! afp->abc && (msa = esl_msa_Create( 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } // Set first-and-only seq to the consensus. This should set sqlen[0] to the profile's length and set ax to have length 1 and ax[0] to be the sequence itself. Also msa->sqname[0] to the "name" of that consensus sequence. /* if nec, make room for the new seq */ if (msa->nseq >= msa->sqalloc && (status = esl_msa_Expand(msa)) != eslOK) return status; seqidx = msa->nseq; // 0 msa->nseq++; // = 1 status = esl_strdup(seqname, -1, &(msa->sqname[seqidx])); // NOTE: Could add description of this "sequence" here, using esl_msa_SetSeqDescription(msa, seqidx, desc). #ifdef eslAUGMENT_ALPHABET if (msa->flags & eslMSA_DIGITAL) { // NOTE (profillic): There was a bug in this; it had said .."esl_abc_dsqcat(msa->abc, " where it should have said .."esl_abc_dsqcat(msa->abc->inmap, " if((status = esl_abc_dsqcat(msa->abc->inmap, &(msa->ax[seqidx]), &(msa->sqlen[seqidx]), tmp_consensus_output_stream.str().c_str(), profile_length)) != eslOK) { /* invalid char(s), get informative error message */ if (esl_abc_ValidateSeq(msa->abc, tmp_consensus_output_stream.str().c_str(), profile_length, afp->errmsg) != eslOK) ESL_XFAIL(eslEFORMAT, errmsg2, "%s (line %d): %s", msa->sqname[0], afp->linenumber, afp->errmsg); } } #endif if (! (msa->flags & eslMSA_DIGITAL)) { status = esl_strcat(&(msa->aseq[seqidx]), 0, tmp_consensus_output_stream.str().c_str(), profile_length); msa->sqlen[seqidx] = profile_length; } msa->alen = profile_length; /// \todo OR read in a fasta file of sequences too. /// \todo (Optional?) Set msa->name to the name of the profile (file?) esl_strdup(msaname, -1, &(msa->name)); /// \todo make sure eslMSA_HASWGTS is FALSE .. OR set it to TRUE and set msa->wgt[idx] to 1.0. /// \note Could have secondary structure (per sequence) too. msa->ss[0]. msa->sslen[0] should be the same as msa->sqlen[0]. /// \todo Investigate what msa->sa and msa->pp are for. /* Give the newly parsed MSA a good * going-over, and finalize the fields of the MSA data structure. * verify_parse will fill in errmsg if it sees a problem. */ //if (verify_parse(msa, afp->errmsg) != eslOK) { status = eslEFORMAT; goto ERROR; } if (( status = esl_msa_SetDefaultWeights(msa)) != eslOK) goto ERROR; if (ret_msa != NULL) *ret_msa = msa; else esl_msa_Destroy(msa); return eslOK; ERROR: if (msa != NULL) esl_msa_Destroy(msa); if (ret_msa != NULL) *ret_msa = NULL; return status; }