cell * prepare_dictionary(int *argcp, char *(*argvp[])) { u_char *origin; u_char *here; u_char *xlimit; int dict_size; u_char *extension; char *dictionary_file = ""; // Allocate space for the Forth dictionary and read its initial contents origin = aln_alloc(MAXDICT, variables); xlimit = &origin[MAXDICT]; if(*argcp < 2 || (extension = strrchr((*argvp)[1],'.')) == NULL || strcmp(extension, ".dic") != 0 ) { dictionary_file = is_readable("app.dic") ? "app.dic" : DEFAULT_EXE; } else { dictionary_file = (*argvp)[1]; *argcp -= 1; *argvp += 1; } dict_size = read_dictionary(dictionary_file, origin, variables); here = &origin[dict_size]; init_compiler(origin, xlimit, 0xfffe, here, xlimit, variables); return variables; }
int main(int argc, char **argv) { extern void init_dictionary(cell *up); u_char *origin; if (argc != 3) { fprintf(stderr, "usage: meta input-file-name output-file-name\n"); exit(1); } infile = argv[1]; outfile = argv[2]; origin = aln_alloc(MAXDICT, variables); *(token_t *)origin = 0; init_compiler(origin, origin+MAXDICT, 0xfffe, origin, origin+MAXDICT, variables); init_dictionary(variables); return 0; }
struct alignment* detect_and_read_sequences(struct alignment* aln,struct parameters* param) { int feature = 0; char **input = 0; unsigned short int* input_type = 0; unsigned short int* input_numseq = 0; int num_input = 0; int i = 0; int j = 0; int c = 0; int a,b; int free_read = 1; unsigned int numseq = get_kalign_context()->numseq; while(free_read == 1 || param->infile[i]){ num_input++; i++; free_read = 0; } numseq = 0; input = malloc(sizeof(char*) * num_input); input_type = malloc(sizeof(unsigned short int) * num_input); input_numseq = malloc(sizeof(unsigned short int) * num_input); for (i = 0; i < num_input;i++){ input[i] = 0; input_type[i] = 0; input_numseq[i] = 0; } free_read = 0; if(param->quiet){ c = 1; }else{ c = 0; } for (i = c; i < num_input;i++){ if(!param->infile[i]){ k_printf("reading from STDIN: "); }else{ k_printf("reading from %s: ",param->infile[i]); } input[i] = get_input_into_string(input[i],param->infile[i]); if(input[i]){ free_read++; if (byg_start("<macsim>",input[i]) != -1){ input_numseq[i] = count_sequences_macsim(input[i]); feature = 1; input_type[i] = 1; }else if (byg_start("<uniprot",input[i]) != -1){ input_numseq[i] = count_sequences_uniprot(input[i]); input_type[i] = 2; }else if(byg_start("This SWISS-PROT",input[i]) != -1){ input_numseq[i] = count_sequences_swissprot(input[i]); input_type[i] = 3; }else if (byg_start("This Swiss-Prot",input[i]) != -1){ input_numseq[i] = count_sequences_swissprot(input[i]); input_type[i] = 3; }else if (byg_start("CLUSTAL W",input[i]) != -1){ input_numseq[i] = count_sequences_clustalw(input[i]); input_type[i] = 4; }else if (byg_start("PileUp",input[i]) != -1){ input_numseq[i] = count_sequences_clustalw(input[i]); input_type[i] = 4; }else if (byg_start("MSF:",input[i]) != -1){ input_numseq[i] = count_sequences_clustalw(input[i]); input_type[i] = 4; }else if (byg_start("STOCKHOLM",input[i]) != -1){ input_numseq[i] = count_sequences_stockholm(input[i]); input_type[i] = 5; }else{ input_numseq[i] = count_sequences_fasta(input[i]); input_type[i] = 0; } k_printf("found %d sequences\n",input_numseq[i]); if(input_numseq[i] < 1){ free(input[i]); input[i] = 0; }else{ numseq += input_numseq[i]; } }else{ k_printf("found no sequences.\n"); if(!param->outfile && i){ param->outfile = param->infile[i]; k_printf("-> output file, in "); //try to set format.... if(!param->format){ if (byg_start("msf",param->outfile) != -1){ param->format = "msf"; }else if (byg_start("clustal",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("aln",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("macsim",param->outfile) != -1){ param->format = "macsim"; }else{ param->format = "fasta"; } if(param->reformat){ k_printf("unaligned fasta format\n"); }else if(param->format){ k_printf("%s format\n",param->format); }else{ k_printf("fasta format\n"); } } } k_printf("\n"); } } if(numseq < 2){ k_printf("%s\n", usage); if(!numseq){ k_printf("\nWARNING: No sequences found.\n\n"); }else{ k_printf("\nWARNING: Only one sequence found.\n\n"); } for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_param(param); exit(0); } if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ if( free_read < 2){ k_printf("\nWARNING: You are trying to perform a profile - profile alignment but ony one input file was detected.\n\n"); param->alignment_type = "default"; } } if (param->feature_type && !feature){ for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_param(param); throwKalignException(k_printf("\nWARNING: You are trying to perform a feature alignment but the input format(s) do not contain feature information.\n")); } get_kalign_context()->numprofiles = (numseq << 1) - 1; aln = aln_alloc(aln); //numseq = 0; if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ j = 0; for (i = 0; i < num_input;i++){ if(input[i]){ switch(input_type[i]){ case 0: aln = read_alignment(aln,input[i]); break; case 1: aln = read_alignment_macsim_xml(aln,input[i]); break; case 2: aln = read_alignment_uniprot_xml(aln,input[i]); break; case 3: aln = read_alignment_from_swissprot(aln, input[i]); break; case 4: aln = read_alignment_clustal(aln,input[i]); break; case 5: aln = read_alignment_stockholm(aln,input[i]); break; default: aln = read_alignment(aln,input[i]); break; } input[i] = 0; //create partial profile.... aln->nsip[numseq+j] = input_numseq[i]; aln->sip[numseq+j] = malloc(sizeof(int)*aln->nsip[numseq+j]); //k_printf("%d %d\n",numseq+j,aln->sl[numseq+j]); j++; } } num_input = j; c = 0; for (i = 0;i < num_input;i++){ // for ( j = 0; j < aln->nsip[numseq+i];j++){ aln->sip[numseq+i][j] = c; c++; // k_printf("%d ",aln->sip[numseq+i][j]); } aln->sl[numseq+i] = aln->sl[aln->sip[numseq+i][0]]; // k_printf("PROFILE:%d contains: %d long:%d\n",i+numseq,aln->nsip[numseq+i],aln->sl[numseq+i]); // k_printf("\n"); } //sanity check -are all input for (i = 0;i < num_input;i++){ for ( j = 0; j < aln->nsip[numseq+i]-1;j++){ a = aln->sip[numseq+i][j]; a = aln->sl[a]; for (c = j+1; j < aln->nsip[numseq+i];j++){ b = aln->sip[numseq+i][c]; b = aln->sl[b]; if(a != b){ for (i = 0; i < num_input;i++){ free(input[i]); } free(input_numseq); free(input_type); free(input); free_aln(aln); free_param(param); throwKalignException(k_printf("Unaligned sequences in input %s.\n",param->infile[i])); } } } } //exit(0); /*for (i = 0; i < numseq;i++){ k_printf("len%d:%d\n",i,aln->sl[i]); for ( j =0 ; j < aln->sl[i];j++){ //if(aln->s[i][j]> 23 || aln->s[i][j] < 0){ // aln->s[i][j] = -1; //} k_printf("%d ",aln->s[i][j]); } // k_printf("\n"); } exit(0);*/ }else{ for (i = 0; i < num_input;i++){ if(input[i]){ switch(input_type[i]){ case 0: aln = read_sequences(aln,input[i]); break; case 1: aln = read_sequences_macsim_xml(aln,input[i]); break; case 2: aln = read_sequences_uniprot_xml(aln,input[i]); break; case 3: aln = read_sequences_from_swissprot(aln, input[i]); break; case 4: aln = read_sequences_clustal(aln,input[i]); break; case 5: aln = read_sequences_stockholm(aln,input[i]); break; default: aln = read_sequences(aln,input[i]); break; } /*if (byg_start("<macsim>",input[i]) != -1){ aln = read_sequences_macsim_xml(aln,input[i]); }else if (byg_start("<uniprot",input[i]) != -1){ aln = read_sequences_uniprot_xml(aln,input[i]); }else if(byg_start("This SWISS-PROT entry is copyright.",input[i]) != -1){ aln = read_sequences_from_swissprot(aln, input[i]); }else if (byg_start("This Swiss-Prot entry is copyright.",input[i]) != -1){ aln = read_sequences_from_swissprot(aln, input[i]); }else if (byg_start("CLUSTAL W",input[i]) != -1){ aln = read_sequences_clustal(aln,input[i]); }else if (byg_start("PileUp",input[i]) != -1){ aln = read_sequences_clustal(aln,input[i]); }else if (byg_start("MSF:",input[i]) != -1){ aln = read_sequences_clustal(aln,input[i]); }else if (byg_start("STOCKHOLM",input[i]) != -1){ aln = read_sequences_stockholm(aln,input[i]); }else{ aln = read_sequences(aln,input[i]); }*/ input[i] = 0; } } } if(numseq < 2){ free_param(param); throwKalignException(k_printf("\nNo sequences could be read.\n")); } if(!param->format && param->outfile){ if (byg_start("msf",param->outfile) != -1){ param->format = "msf"; }else if (byg_start("clustal",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("aln",param->outfile) != -1){ param->format = "clustal"; }else if (byg_start("macsim",param->outfile) != -1){ param->format = "macsim"; } k_printf("Output file: %s, in %s format.\n",param->outfile,param->format); } free(input); free(input_type); free(input_numseq); return aln; }
void KalignAdapter::alignUnsafe(const MultipleSequenceAlignment& ma, MultipleSequenceAlignment& res, TaskStateInfo& ti) { ti.progress = 0; int* tree = 0; quint32 a, b, c; struct alignment* aln = 0; struct parameters* param = 0; struct aln_tree_node* tree2 = 0; param = (parameters*)malloc(sizeof(struct parameters)); param = interface(param,0,0); kalign_context *ctx = get_kalign_context(); unsigned int &numseq = ctx->numseq; unsigned int &numprofiles = ctx->numprofiles; if (ma->getNumRows() < 2){ if (!numseq){ k_printf("No sequences found.\n\n"); } else { k_printf("Only one sequence found.\n\n"); } free_param(param); throw KalignException("Can't align less then 2 sequences"); } if(ctx->gpo != -1) { param->gpo = ctx->gpo; } if(ctx->gpe != -1) { param->gpe = ctx->gpe; } if(ctx->tgpe != -1) { param->tgpe = ctx->tgpe; } if(ctx->secret != -1) { param->secret = ctx->secret; } /************************************************************************/ /* Convert MA to aln */ /************************************************************************/ k_printf("Prepare data"); numseq = ma->getNumRows(); numprofiles = (numseq << 1) - 1; aln = aln_alloc(aln); for(quint32 i = 0 ; i < numseq; i++) { const MultipleSequenceAlignmentRow row= ma->getMsaRow(i); aln->sl[i] = row->getUngappedLength(); aln->lsn[i] = row->getName().length(); } for (quint32 i = 0; i < numseq;i++) { try { aln->s[i] = (int*) malloc(sizeof(int)*(aln->sl[i]+1)); checkAllocatedMemory(aln->s[i]); aln->seq[i] = (char*)malloc(sizeof(char)*(aln->sl[i]+1)); checkAllocatedMemory(aln->seq[i]); aln->sn[i] = (char*)malloc(sizeof(char)*(aln->lsn[i]+1)); checkAllocatedMemory(aln->sn[i]); } catch (...) { cleanupMemory(NULL, numseq, NULL, aln, param); throw; } } int aacode[26] = {0,1,2,3,4,5,6,7,8,-1,9,10,11,12,23,13,14,15,16,17,17,18,19,20,21,22}; for(quint32 i = 0; i < numseq; i++) { const MultipleSequenceAlignmentRow row= ma->getMsaRow(i); qstrncpy(aln->sn[i], row->getName().toLatin1(), row->getName().length() + 1); //+1 to include '\0' QString gapless = QString(row->getCore()).remove('-'); qstrncpy(aln->seq[i], gapless.toLatin1(), gapless.length() + 1); //+1 to include '\0' for (quint32 j = 0; j < aln->sl[i]; j++) { if (isalpha((int)aln->seq[i][j])){ aln->s[i][j] = aacode[toupper(aln->seq[i][j])-65]; } else { aln->s[i][j] = -1; } } aln->s[i][aln->sl[i]] = 0; aln->seq[i][aln->sl[i]] = 0; aln->sn[i][aln->lsn[i]] = 0; } /*for(int i=0;i<numseq;i++) { for(int j=0;j<aln->sl[i];j++) printf("%d ", aln->s[i][j]); }*/ //aln_dump(aln); //aln = detect_and_read_sequences(aln,param); if(param->ntree > (int)numseq){ param->ntree = (int)numseq; } //DETECT DNA if(param->dna == -1){ for (quint32 i = 0; i < numseq;i++){ param->dna = byg_detect(aln->s[i],aln->sl[i]); if(param->dna){ break; } } } //param->dna = 0; //k_printf("DNA:%d\n",param->dna); //exit(0); if(param->dna == 1){ //brief sanity check... for (quint32 i = 0; i < numseq;i++){ if(aln->sl[i] < 6){ //k_printf("Dna/Rna alignments are only supported for sequences longer than 6."); free(param); free_aln(aln); throw KalignException("Dna/Rna alignments are only supported for sequences longer than 6."); } } aln = make_dna(aln); } //int j; //fast distance calculation; float** submatrix = 0; submatrix = read_matrix(submatrix,param); // sets gap penalties as well..... //if(byg_start(param->alignment_type,"profPROFprofilePROFILE") != -1){ // profile_alignment_main(aln,param,submatrix); //} float** dm = 0; if(param->ntree > 1){ //if(byg_start(param->distance,"pairclustalPAIRCLUSTAL") != -1){ // if(byg_start(param->tree,"njNJ") != -1){ // dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,1); // }else{ // dm = protein_pairwise_alignment_distance(aln,dm,param,submatrix,0); // } //}else if(byg_start("wu",param->alignment_type) != -1){ // dm = protein_wu_distance2(aln,dm,param); // // param->feature_type = "wumanber"; if(param->dna == 1){ // if(byg_start(param->tree,"njNJ") != -1){ // dm = dna_distance(aln,dm,param,1); // }else{ dm = dna_distance(aln,dm,param,0); // } }else{ //if(byg_start(param->tree,"njNJ") != -1){ // dm = protein_wu_distance(aln,dm,param,1); //}else{ try { dm = protein_wu_distance(aln,dm,param,0); } catch (const KalignException &) { cleanupMemory(submatrix, numseq, dm, aln, param); throw; } //} } if(check_task_canceled(ctx)) { cleanupMemory(submatrix, numseq, dm, aln, param); throwCancellingException(); } /*int j; for (int i = 0; i< numseq;i++){ for (j = 0; j< numseq;j++){ k_printf("%f ",dm[i][j]); } k_printf("\n"); }*/ //if(byg_start(param->tree,"njNJ") != -1){ // tree2 = real_nj(dm,param->ntree); //}else{ tree2 = real_upgma(dm,param->ntree); //} //if(param->print_tree){ // print_tree(tree2,aln,param->print_tree); //} } tree = (int*) malloc(sizeof(int)*(numseq*3+1)); for (quint32 i = 1; i < (numseq*3)+1;i++){ tree[i] = 0; } tree[0] = 1; if(param->ntree < 2){ tree[0] = 0; tree[1] = 1; c = numseq; tree[2] = c; a = 2; for (quint32 i = 3; i < (numseq-1)*3;i+=3){ tree[i] = c; tree[i+1] = a; c++; tree[i+2] = c; a++; } }else if(param->ntree > 2){ ntreeify(tree2,param->ntree); }else{ tree = readtree(tree2,tree); for (quint32 i = 0; i < (numseq*3);i++){ tree[i] = tree[i+1]; } free(tree2->links); free(tree2->internal_lables); free(tree2); } //get matrices... //struct feature_matrix* fm = 0; //struct ntree_data* ntree_data = 0; int** map = 0; //if(param->ntree > 2){ // ntree_data = (struct ntree_data*)malloc(sizeof(struct ntree_data)); // ntree_data->realtree = tree2; // ntree_data->aln = aln; // ntree_data->profile = 0; // ntree_data->map = 0; // ntree_data->ntree = param->ntree; // ntree_data->submatrix = submatrix; // ntree_data->tree = tree; // ntree_data = ntree_alignment(ntree_data); // map = ntree_data->map; // tree = ntree_data->tree; // for (int i = 0; i < (numseq*3);i++){ // tree[i] = tree[i+1]; // } // free(ntree_data); //}else if (param->feature_type){ // fm = get_feature_matrix(fm,aln,param); // if(!fm){ // for (int i = 32;i--;){ // free(submatrix[i]); // } // free(submatrix); // free_param(param); // free(map); // free(tree); // throw KalignException("getting feature matrix error"); // } // map = feature_hirschberg_alignment(aln,tree,submatrix,map,fm); // //exit(0); // //map = feature_alignment(aln,tree,submatrix, map,fm); //}else if (byg_start("pairwise",param->alignment_type) != -1){ // if(param->dna == 1){ // map = dna_alignment_against_a(aln,tree,submatrix, map,param->gap_inc); // }else{ // map = hirschberg_alignment_against_a(aln,tree,submatrix, map,param->smooth_window,param->gap_inc); // } // //map = default_alignment(aln,tree,submatrix, map); //}else if (byg_start("fast",param->alignment_type) != -1){ // map = default_alignment(aln,tree,submatrix, map); if(param->dna == 1){ map = dna_alignment(aln,tree,submatrix, map,param->gap_inc); // /*}else if (byg_start("test",param->alignment_type) != -1){ // map = test_alignment(aln,tree,submatrix, map,param->internal_gap_weight,param->smooth_window,param->gap_inc); // }else if (param->aa){ // map = aa_alignment(aln,tree,submatrix, map,param->aa); // }else if (param->alter_gaps){ // map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight); // }else if (byg_start("altergaps",param->alignment_type) != -1){ // map = alter_gaps_alignment(aln,tree,submatrix,map,param->alter_gaps,param->alter_range,param->alter_weight); // }else if(byg_start("simple",param->alignment_type) != -1){ // map = simple_hirschberg_alignment(aln,tree,submatrix, map);*/ //}else if(byg_start("advanced",param->alignment_type) != -1){ // map = advanced_hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc,param->internal_gap_weight); }else{ map = hirschberg_alignment(aln,tree,submatrix, map,param->smooth_window,param->gap_inc); } if (map == NULL) { throw KalignException("Failed to build alignment."); } if(check_task_canceled(ctx)) { free_param(param); free_aln(aln); free(map); free(tree); throwCancellingException(); } //clear up sequence array to be reused as gap array.... int *p = 0; for (quint32 i = 0; i < numseq;i++){ p = aln->s[i]; for (a = 0; a < aln->sl[i];a++){ p[a] = 0; } } //clear up for (quint32 i = 0; i < (numseq-1)*3;i +=3){ a = tree[i]; b = tree[i+1]; aln = make_seq(aln,a,b,map[tree[i+2]]); } //for (int i = 0; i < numseq;i++){ // k_printf("%s %d\n",aln->sn[i],aln->nsip[i]); //} for (quint32 i = 0; i < numseq;i++){ aln->nsip[i] = 0; } aln = sort_sequences(aln,tree,param->sort); //for (int i = 0; i < numseq;i++){ // k_printf("%d %d %d\n",i,aln->nsip[i],aln->sip[i][0]); //} /************************************************************************/ /* Convert aln to MA */ /************************************************************************/ res->setAlphabet(ma->getAlphabet()); for (quint32 i = 0; i < numseq;i++){ int f = aln->nsip[i]; QString seq; for(quint32 j=0;j<aln->sl[f];j++) { seq += QString(aln->s[f][j],'-') + aln->seq[f][j]; } seq += QString(aln->s[f][aln->sl[f]],'-'); res->addRow(QString(aln->sn[f]), seq.toLatin1()); } //output(aln,param); /* if(!param->format){ fasta_output(aln,param->outfile); }else{ if (byg_start("msf",param->format) != -1){ msf_output(aln,param->outfile); }else if (byg_start("clustal",param->format) != -1){ clustal_output(aln,param->outfile); }else if (byg_start("macsim",param->format) != -1){ macsim_output(aln,param->outfile,param->infile[0]); } } */ free_param(param); free_aln(aln); free(map); free(tree); //KalignContext* ctx = getKalignContext(); }