static int bin_reclaim_compound(Symbol* tsym, Reclaim* reclaimer) { int stat = NC_NOERR; int nfields; size_t fid, i, arraycount; ptrdiff_t saveoffset; reclaimer->offset = read_alignment(reclaimer->offset,tsym->typ.cmpdalign); saveoffset = reclaimer->offset; /* Get info about each field in turn and reclaim it */ nfields = listlength(tsym->subnodes); for(fid=0;fid<nfields;fid++) { Symbol* field = listget(tsym->subnodes,fid); int ndims = field->typ.dimset.ndims; /* compute the total number of elements in the field array */ for(i=0;i<ndims;i++) arraycount *= field->typ.dimset.dimsyms[i]->dim.declsize; reclaimer->offset = read_alignment(reclaimer->offset,field->typ.alignment); for(i=0;i<arraycount;i++) { if((stat = bin_reclaim_datar(field->typ.basetype, reclaimer))) goto done; } } reclaimer->offset = saveoffset; reclaimer->offset += tsym->typ.size; done: return stat; }
shared_ptr<VectorSiteContainer> SiteContainerBuilder::read_alignment(string filename, string file_format, bool interleaved) throw (Exception) { try { return read_alignment(filename, file_format, "nt", interleaved); } catch (AlphabetException &e) { return read_alignment(filename, file_format, "aa", interleaved); } }
void read_counts(Tree &T, Counts &data, std::string fname, long nalpha) { int i; unsigned int j; long id; State sta, sta2; std::string str; std::vector<long> d; // The number of letters is fixed to 4 in read_fasta. if (nalpha != 4) { std::cout << "Reading counts only implemented for 4 letters." << std::endl; } // reads in the alignment. The data is in _orbits and _couts. read_alignment(fname); // Now we fill in the Counts structure. data.nalpha = 4; data.nspecies = g_numSpecies; data.nstates = 1; // calculate the power: for (i=0; i < data.nspecies; i++) { data.nstates = data.nstates*data.nalpha; } // Creates a state of a given dimension create_state(sta, data.nspecies, data.nalpha); create_state(sta2, data.nspecies, data.nalpha); // Matches the species in the fasta with the ones in the tree. if (!match_species(T, g_nameSpecies, g_numSpecies, d)) { throw std::length_error( "Could not match species in tree with species in fasta." ); } // Stores the counts in the Counts structure. data.c.resize(data.nstates); data.N = 0; for (j=0; j < _orbitals.size(); j++) { str = transform_adn_chain_val_to_string(_orbitals[j]); string2state(str, sta); permute_state(d, sta, sta2); id = state2index(sta2); data.c[id] = (double) _counts[j]; data.N = data.N + data.c[id]; } // Stores the species names data.species.resize(data.nspecies); for (i=0; i < T.nleaves; i++) { data.species[i] = T.names[i]; } }
void ExternalAligner::align_seqs_impl(Strings& seqs) const { std::string input = tmp_file(); ASSERT_FALSE(input.empty()); std::string output = tmp_file(); ASSERT_FALSE(output.empty()); { boost::shared_ptr<std::ostream> file = name_to_ostream(input); std::ostream& out = *file; for (int i = 0; i < seqs.size(); i++) { write_fasta(out, TO_S(i), "", seqs[i], 60); } } align_file(input, output); Strings rows; read_alignment(rows, output); ASSERT_EQ(rows.size(), seqs.size()); seqs.swap(rows); if (!go("NPGE_DEBUG").as<bool>()) { remove_file(input); remove_file(output); } }
static int bin_reclaim_vlen(Symbol* tsym, Reclaim* reclaimer) { int stat = NC_NOERR; size_t i; Symbol* basetype = tsym->typ.basetype; nc_vlen_t* vl = (nc_vlen_t*)(reclaimer->memory+reclaimer->offset); /* Free up each entry in the vlen list */ if(vl->p != NULL) { Reclaim vreclaimer; vreclaimer.memory = vl->p; vreclaimer.offset = 0; for(i=0;i<vl->len;i++) { vreclaimer.offset = read_alignment(vreclaimer.offset,basetype->typ.alignment); if((stat = bin_reclaim_datar(basetype,&vreclaimer))) goto done; vreclaimer.offset += basetype->typ.size; } reclaimer->offset += tsym->typ.size; efree(vl->p); } done: return stat; }
struct alignment* detect_and_read_sequences(struct alignment* aln,struct parameters* param) { int feature = 0; char **input = 0; unsigned short int* input_type = 0; unsigned short int* input_numseq = 0; int num_input = 0; int i = 0; int j = 0; int c = 0; int a,b; int free_read = 1; unsigned int numseq = get_kalign_context()->numseq; while(free_read == 1 || param->infile[i]){ num_input++; i++; free_read = 0; } numseq = 0; input = malloc(sizeof(char*) * num_input); input_type = malloc(sizeof(unsigned short int) * num_input); input_numseq = malloc(sizeof(unsigned short int) * num_input); for (i = 0; i < num_input;i++){ input[i] = 0; input_type[i] = 0; input_numseq[i] = 0; } free_read = 0; if(param->quiet){ c = 1; }else{ c = 0; } for (i = c; i < num_input;i++){ if(!param->infile[i]){ k_printf("reading from STDIN: "); }else{ k_printf("reading from %s: ",param->infile[i]); } input[i] = get_input_into_string(input[i],param->infile[i]); if(input[i]){ free_read++; if (byg_start("<macsim>",input[i]) != -1){ input_numseq[i] = count_sequences_macsim(input[i]); feature = 1; input_type[i] = 1; }else if (byg_start("<uniprot",input[i]) != -1){ input_numseq[i] = count_sequences_uniprot(input[i]); input_type[i] = 2; }else if(byg_start("This SWISS-PROT",input[i]) != -1){ input_numseq[i] = count_sequences_swissprot(input[i]); input_type[i] = 3; }else if (byg_start("This Swiss-Prot",input[i]) != -1){ input_numseq[i] = count_sequences_swissprot(input[i]); input_type[i] = 3; }else if (byg_start("CLUSTAL W",input[i]) != -1){ input_numseq[i] = count_sequences_clustalw(input[i]); input_type[i] = 4; }else if (byg_start("PileUp",input[i]) != -1){ input_numseq[i] = count_sequences_clustalw(input[i]); input_type[i] = 4; }else if (byg_start("MSF:",input[i]) != -1){ input_numseq[i] = count_sequences_clustalw(input[i]); input_type[i] = 4; }else if (byg_start("STOCKHOLM",input[i]) != -1){ input_numseq[i] = count_sequences_stockholm(input[i]); input_type[i] = 5; }else{ input_numseq[i] = count_sequences_fasta(input[i]); input_type[i] = 0; } k_printf("found %d sequences\n",input_numseq[i]); if(input_numseq[i] < 1){ free(input[i]); input[i] = 0; }else{ numseq += input_numseq[i]; } }else{ k_printf("found no sequences.\n"); if(!param->outfile && i){ param->outfile = param->infile[i]; k_printf("-> output file, in "); //try to set format.... if(!param->format){ if (byg_start("msf",param->outfile) != -1){ param->format = "msf"; }else if (byg_start("clustal",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("aln",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("macsim",param->outfile) != -1){ param->format = "macsim"; }else{ param->format = "fasta"; } if(param->reformat){ k_printf("unaligned fasta format\n"); }else if(param->format){ k_printf("%s format\n",param->format); }else{ k_printf("fasta format\n"); } } } k_printf("\n"); } } if(numseq < 2){ k_printf("%s\n", usage); if(!numseq){ k_printf("\nWARNING: No sequences found.\n\n"); }else{ k_printf("\nWARNING: Only one sequence found.\n\n"); } for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_param(param); exit(0); } if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ if( free_read < 2){ k_printf("\nWARNING: You are trying to perform a profile - profile alignment but ony one input file was detected.\n\n"); param->alignment_type = "default"; } } if (param->feature_type && !feature){ for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_param(param); throwKalignException(k_printf("\nWARNING: You are trying to perform a feature alignment but the input format(s) do not contain feature information.\n")); } get_kalign_context()->numprofiles = (numseq << 1) - 1; aln = aln_alloc(aln); //numseq = 0; if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ j = 0; for (i = 0; i < num_input;i++){ if(input[i]){ switch(input_type[i]){ case 0: aln = read_alignment(aln,input[i]); break; case 1: aln = read_alignment_macsim_xml(aln,input[i]); break; case 2: aln = read_alignment_uniprot_xml(aln,input[i]); break; case 3: aln = read_alignment_from_swissprot(aln, input[i]); break; case 4: aln = read_alignment_clustal(aln,input[i]); break; case 5: aln = read_alignment_stockholm(aln,input[i]); break; default: aln = read_alignment(aln,input[i]); break; } input[i] = 0; //create partial profile.... aln->nsip[numseq+j] = input_numseq[i]; aln->sip[numseq+j] = malloc(sizeof(int)*aln->nsip[numseq+j]); //k_printf("%d %d\n",numseq+j,aln->sl[numseq+j]); j++; } } num_input = j; c = 0; for (i = 0;i < num_input;i++){ // for ( j = 0; j < aln->nsip[numseq+i];j++){ aln->sip[numseq+i][j] = c; c++; // k_printf("%d ",aln->sip[numseq+i][j]); } aln->sl[numseq+i] = aln->sl[aln->sip[numseq+i][0]]; // k_printf("PROFILE:%d contains: %d long:%d\n",i+numseq,aln->nsip[numseq+i],aln->sl[numseq+i]); // k_printf("\n"); } //sanity check -are all input for (i = 0;i < num_input;i++){ for ( j = 0; j < aln->nsip[numseq+i]-1;j++){ a = aln->sip[numseq+i][j]; a = aln->sl[a]; for (c = j+1; j < aln->nsip[numseq+i];j++){ b = aln->sip[numseq+i][c]; b = aln->sl[b]; if(a != b){ for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_aln(aln); free_param(param); throwKalignException(k_printf("Unaligned sequences in input %s.\n",param->infile[i])); } } } } //exit(0); /*for (i = 0; i < numseq;i++){ k_printf("len%d:%d\n",i,aln->sl[i]); for ( j =0 ; j < aln->sl[i];j++){ //if(aln->s[i][j]> 23 || aln->s[i][j] < 0){ // aln->s[i][j] = -1; //} k_printf("%d ",aln->s[i][j]); } // k_printf("\n"); } exit(0);*/ }else{ for (i = 0; i < num_input;i++){ if(input[i]){ switch(input_type[i]){ case 0: aln = read_sequences(aln,input[i]); break; case 1: aln = read_sequences_macsim_xml(aln,input[i]); break; case 2: aln = read_sequences_uniprot_xml(aln,input[i]); break; case 3: aln = read_sequences_from_swissprot(aln, input[i]); break; case 4: aln = read_sequences_clustal(aln,input[i]); break; case 5: aln = read_sequences_stockholm(aln,input[i]); break; default: aln = read_sequences(aln,input[i]); break; } /*if (byg_start("<macsim>",input[i]) != -1){ aln = read_sequences_macsim_xml(aln,input[i]); }else if (byg_start("<uniprot",input[i]) != -1){ aln = read_sequences_uniprot_xml(aln,input[i]); }else if(byg_start("This SWISS-PROT entry is copyright.",input[i]) != -1){ aln = read_sequences_from_swissprot(aln, input[i]); }else if (byg_start("This Swiss-Prot entry is copyright.",input[i]) != -1){ aln = read_sequences_from_swissprot(aln, input[i]); }else if (byg_start("CLUSTAL W",input[i]) != -1){ aln = read_sequences_clustal(aln,input[i]); }else if (byg_start("PileUp",input[i]) != -1){ aln = read_sequences_clustal(aln,input[i]); }else if (byg_start("MSF:",input[i]) != -1){ aln = read_sequences_clustal(aln,input[i]); }else if (byg_start("STOCKHOLM",input[i]) != -1){ aln = read_sequences_stockholm(aln,input[i]); }else{ aln = read_sequences(aln,input[i]); }*/ input[i] = 0; } } } if(numseq < 2){ free_param(param); throwKalignException(k_printf("\nNo sequences could be read.\n")); } if(!param->format && param->outfile){ if (byg_start("msf",param->outfile) != -1){ param->format = "msf"; }else if (byg_start("clustal",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("aln",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("macsim",param->outfile) != -1){ param->format = "macsim"; } k_printf("Output file: %s, in %s format.\n",param->outfile,param->format); } free(input); free(input_type); free(input_numseq); return aln; }