Example #1
0
void MSA::SetClustalWWeights(const Tree &tree)
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uLeafCount = tree.GetLeafCount();

	WEIGHT *Weights = new WEIGHT[uSeqCount];

	CalcClustalWWeights(tree, Weights);

	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const WEIGHT w = Weights[n];
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
		const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
		const unsigned uSeqIndex = GetSeqIndex(uId);
#if	DEBUG
		if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
			Quit("MSA::SetClustalWWeights: names don't match");
#endif
		SetSeqWeight(uSeqIndex, w);
		}
	NormalizeWeights((WEIGHT) 1.0);

	delete[] Weights;
	}
Example #2
0
void MSA::ToFASTAFile(TextFile &File) const
	{
	const unsigned uColCount = GetColCount();
	assert(uColCount > 0);
	const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
	const unsigned uSeqCount = GetSeqCount();

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		File.PutString(">");
		File.PutString(GetSeqName(uSeqIndex));
		File.PutString("\n");

		unsigned n = 0;
		for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
			{
			unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
			if (uLetters > FASTA_BLOCK)
				uLetters = FASTA_BLOCK;
			for (unsigned i = 0; i < uLetters; ++i)
				{
				char c = GetChar(uSeqIndex, n);
				File.PutChar(c);
				++n;
				}
			File.PutChar('\n');
			}
		}
	}
Example #3
0
/*
 * add sequence to seqed widget
 */
int add_seq_seqed(Tcl_Interp *interp,
                  char *sequence,
                  char *seqed_win,
                  int seq_num,
                  int pos,
                  int container_id,
                  char *c_win,
                  int element_id)
{

    Tcl_CmdInfo info;
    tkSeqed *se;
    char *seq_name;
    int sequence_type;
    int seqed_id;

    Tcl_GetCommandInfo(interp, seqed_win, &info);
    se = (tkSeqed*)info.clientData;

    seq_name = GetSeqName(seq_num);
    sequence_type = GetSeqStructure(seq_num);

    seqed_add_sequence(se, strlen(sequence), sequence, seq_name,
                       sequence_type, GetSeqId(seq_num), 0, 0);
    seqed_id = seqed_reg(interp, seqed_win, seq_num, se, pos, container_id,
                         c_win, element_id);
    return seqed_id;
}
Example #4
0
/*
 * create a random sequence from either dna or protein input sequence
 * add sequence to sequence manager
 */
int ScrambleSeq(Tcl_Interp *interp,
		int seq_num)
{
    char *seq1 = GetSeqSequence(seq_num);
    int length = GetSeqLength(seq_num);
    int seq_id = GetSeqId(seq_num);
    char *seq2;
    char *name;
    time_t tim;
    int seed;
    char *parental_name, *child_name;
    int new_seq_num, start, end;
    static int num = 0;


    if (NULL == (seq2 = (char *)xmalloc((length+1) * sizeof(char))))
	return -1;

    memcpy(seq2, seq1, length);

    tim = time(NULL);
    seed = (int) tim;

    scramble_seq(seq2, length, seed);
    seq2[length] = '\0';

    parental_name = GetParentalSeqName(seq_num);
    child_name = GetSeqName(seq_num);

    if (NULL == (name = (char *)xmalloc((strlen(parental_name)+13) * 
					sizeof(char))))
	return -1;
    sprintf(name, "%s_x%d", parental_name, num);
    if (-1 == (new_seq_num = AddSequence(interp, -1, GetSeqLibrary(seq_num), 
					 name, seq2, GetSeqStructure(seq_num), 
					 GetSeqType(seq_num), NULL , " ")))
	return -1;
    xfree(name);

    if (strcmp(parental_name, child_name) != 0) {
	/* sub-sequence */
	/* 
	 * need to get seq num from seq_id instead of using seq_num incase
	 * AddSequence has deleted duplicate names
	 */
	start = GetSubSeqStart(GetSeqNum(seq_id));
	end = GetSubSeqEnd(GetSeqNum(seq_id));

	if (NULL == (name = (char *)xmalloc((strlen(child_name)+13) * 
					    sizeof(char))))
	    return -1;

	sprintf(name, "%s_x%d", child_name, num);
	if (-1 == (AddSubSequence(interp, GetSeqId(new_seq_num), start, end, name)))
	    return -1;
    }
    num++;
    return 0;
}
Example #5
0
/*
 * set active range for sequence
 * 
 */
int SetRange(Tcl_Interp *interp,
	     int seq_id,
	     int start,
	     int end)
{
    char *name;
    int seq_num;
    static int count = 1;

    seq_num = GetSeqNum(seq_id);
    if (NULL == (name = (char *)xmalloc((strlen(GetSeqName(seq_num))+20) 
					 * sizeof(char))))
	return -1;

    sprintf(name, "%s_s%d", GetSeqName(seq_num), count++);

    return (AddSubSequence(interp, seq_id, start, end, name));
}
Example #6
0
/*
 * copy range of sequence
 * add new sequence name to sequence manager
 */
int CopyRange(Tcl_Interp *interp,
	      int seq_id,
	      int start,
	      int end)
{
    int seq_num = GetSeqNum(seq_id);
    char *seq1 = GetSeqSequence(seq_num);
    char *seq2;
    char *name;
    int length = end - start + 1;
    int new_seq_num;
    char *parental_name, *child_name;
    static int count = 1;


    if (NULL == (seq2 = (char *)xmalloc((length+2) * sizeof(char))))
	return -1;
     
    strncpy(seq2, &seq1[start-1], end - start + 1);
    seq2[end - start + 1] = '\0';

    parental_name = GetParentalSeqName(seq_num);
    child_name = GetSeqName(seq_num);

    if (NULL == (name = (char *)xmalloc((strlen(parental_name)+20) * 
					sizeof(char))))
	return -1;
    sprintf(name, "%s_n%d", parental_name, count++);
    if (-1 == (new_seq_num = AddSequence(interp, -1, GetSeqLibrary(seq_num), 
					 name, seq2, GetSeqStructure(seq_num), 
					 GetSeqType(seq_num), NULL, " ")))
	return -1;
    xfree(name);

    /* don't think I need to deal with subsequences in copying a sequence. */
#if 0
    if (strcmp(parental_name, child_name) != 0) {
	/* sub-sequence */
	/* 
	 * need to get seq num from seq_id instead of using seq_num incase
	 * AddSequence has deleted duplicate names
	 */
	start = GetSubSeqStart(GetSeqNum(seq_id));
	end = GetSubSeqEnd(GetSeqNum(seq_id));

	if (NULL == (name = (char *)xmalloc((strlen(child_name)+3) * 
					    sizeof(char))))
	    return -1;

	sprintf(name, "%s_n%d", child_name, count++);
	if (-1 == (AddSubSequence(interp, GetSeqId(new_seq_num), 
				  start, end, name)))
	    return -1;
    }
#endif
    return 0;
}
Example #7
0
unsigned SeqVect::GetSeqIdFromName(const char *Name) const
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned i = 0; i < uSeqCount; ++i)
		{
		if (!strcmp(Name, GetSeqName(i)))
			return GetSeqId(i);
		}
	Quit("SeqVect::GetSeqIdFromName(%s): not found", Name);
	return 0;
	}
Example #8
0
/*
 * complement sequence
 * add new sequence name to sequence manager
 */
int ComplementSeq(Tcl_Interp *interp,
		  int seq_num)
{
    char *seq1 = GetSeqSequence(seq_num);
    char *seq2;
    char *name;
    int length = GetSeqLength(seq_num);
    int seq_id = GetSeqId(seq_num);
    int new_seq_num, start, end;
    char *parental_name, *child_name;

    if (NULL == (seq2 = (char *)xmalloc((length+1) * sizeof(char))))
	return -1;

    memcpy(seq2, seq1, length);
    (void) complement_seq(seq2, length);
    seq2[length] = '\0';

    parental_name = GetParentalSeqName(seq_num);
    child_name = GetSeqName(seq_num);

    if (NULL == (name = (char *)xmalloc((strlen(parental_name)+3) * 
					sizeof(char))))
	return -1;
    sprintf(name, "%s_c", parental_name);
    if (-1 == (new_seq_num = AddSequence(interp, -1, GetSeqLibrary(seq_num), 
					 name, seq2, GetSeqStructure(seq_num), 
					 GetSeqType(seq_num), NULL, " ")))
	return -1;

    xfree(name);

    if (strcmp(parental_name, child_name) != 0) {
	/* sub-sequence */
	/* 
	 * need to get seq num from seq_id instead of using seq_num incase
	 * AddSequence has deleted duplicate names
	 */
	start = GetSubSeqStart(GetSeqNum(seq_id));
	end = GetSubSeqEnd(GetSeqNum(seq_id));

	if (NULL == (name = (char *)xmalloc((strlen(child_name)+3) * 
					    sizeof(char))))
	    return -1;

	sprintf(name, "%s_c", child_name);
	if (-1 == (AddSubSequence(interp, GetSeqId(new_seq_num), 
				  length - end + 1, 
				  length - start + 1, name)))
	    return -1;
    }
    return 0;
}
Example #9
0
void MSA::ListWeights() const
	{
	const unsigned uSeqCount = GetSeqCount();
	Log("Weights:\n");
	WEIGHT wTotal = 0;
	for (unsigned n = 0; n < uSeqCount; ++n)
		{
		wTotal += GetSeqWeight(n);
		Log("%6.3f %s\n", GetSeqWeight(n), GetSeqName(n));
		}
	Log("Total weights = %6.3f, should be 1.0\n", wTotal);
	}
Example #10
0
void MSA::ToPhyInterleavedFile(TextFile &File) const
	{
	const unsigned SeqCount = GetSeqCount();
	const unsigned ColCount = GetColCount();

	File.PutFormat("%d %d\n", SeqCount, ColCount);

	if (0 == ColCount)
		return;

        unsigned Col = 0;
	for (;;)
		{
		const unsigned ColBlockStart = Col;
		const unsigned MaxCols = (ColBlockStart == 0) ? (BLOCKSIZE - 10) : BLOCKSIZE;

		for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
			{
			if (0 == ColBlockStart)
				{
				char Name[11];
				const char *ptrName = GetSeqName(Seq);
				size_t n = strlen(ptrName);
				if (n > 10)
					n = 10;
				memcpy(Name, ptrName, n);
				Name[n] = 0;
				FixName(Name);
				File.PutFormat("%-10.10s", Name);
				}

			Col = ColBlockStart;
			for (unsigned ColsThisBlock = 0; ColsThisBlock < MaxCols; ++ColsThisBlock)
				{
				if (Col == ColCount)
					break;
				if (ColsThisBlock%10 == 0 && (0 == ColBlockStart || ColsThisBlock > 0))
					File.PutChar(' ');
				char c = GetChar(Seq, Col);
				if (isalpha(c))
					c = toupper(c);
				File.PutChar(c);
				++Col;
				}
			File.PutChar('\n');
			}
		if (Col == ColCount)
			break;
		File.PutChar('\n');
		}
	}
Example #11
0
/*
 * add new sequence to sequence manager, but keep as dna but with an extension
 * of _rf123 which signifies to the comparison functions that the sequence
 * is to translated into it's 3 reading frames, each of which will be used
 * in the comparison routine
 */
int TranslateTogether(Tcl_Interp *interp,
		      int seq_num)
{
    char *name;
    char *dna_seq;
    char *prot_seq;
    int seq_id = GetSeqId(seq_num);
    int new_seq_num;
    char *parental_name, *child_name;
    int start, end;

#ifdef DEBUG
    printf("START translate together \n");
#endif
    dna_seq = GetSeqSequence(seq_num);
    if (NULL == (prot_seq = strdup(dna_seq)))
	return -1;
    
    parental_name = GetParentalSeqName(seq_num);
    child_name = GetSeqName(seq_num);

    if (NULL == (name = (char *)xmalloc((strlen(parental_name)+7) * 
					sizeof(char))))
	return -1;
    sprintf(name, "%s_rf123", parental_name);
    if (-1 == (new_seq_num = AddSequence(interp, -1, GetSeqLibrary(seq_num), 
					 name, prot_seq, LINEAR, PROTEIN, NULL, " ")))
	return -1;

    xfree(name);

    if (strcmp(parental_name, child_name) != 0) {
	/* sub-sequence */
	/* 
	 * need to get seq num from seq_id instead of using seq_num incase
	 * AddSequence has deleted duplicate names
	 */
	start = GetSubSeqStart(GetSeqNum(seq_id));
	end = GetSubSeqEnd(GetSeqNum(seq_id));

	if (NULL == (name = (char *)xmalloc((strlen(child_name)+7) * 
					    sizeof(char))))
	    return -1;

	sprintf(name, "%s_rf123", child_name);
	new_seq_num = AddSubSequence(interp, GetSeqId(new_seq_num), start, end, 
				     name);
    }
    return new_seq_num;
}
int init_emboss_graph_create(Tcl_Interp *interp, 
			     int seq_id, 
			     int start, 
			     int end,
			     char *filename,
			     Tcl_Obj **graph_obj,
			     int *id)
{
    int seq_num, seq_len;
    in_emboss *input;
    Tcl_DString input_params;
    text_emboss *text_data;

    e_graph *data = NULL;

    seq_num = GetSeqNum(seq_id);
    seq_len = GetSeqLength(seq_num);

    /* if the end has not been defined, set it to be the sequence length */
    if (end == -1) {
	end = seq_len;
    }
    seq_len = end - start + 1;

    if (NULL == (input = (in_emboss *)xmalloc (sizeof(in_emboss))))
	return -1;

    read_emboss_data_file(filename, &data, graph_obj, &text_data);
    if (!data) {
	verror(ERR_FATAL,"emboss", "error in reading results\n");
	return -1;
    }

    /* create inputs parameters */
    Tcl_DStringInit(&input_params);
    vTcl_DStringAppend(&input_params, "sequence %s: from %d to %d\n",
		       GetSeqName(seq_num), start, end);

    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

    if (-1 == (*id = store_emboss_graph(seq_num, start, end, data, input, 
					text_data, graph_obj))) {
	verror(ERR_FATAL,"emboss", "error in saving results\n");
	return -1;
    }
    xfree(data);
    return 0;
}
Example #13
0
/* check if adding same sequence name, if are, remove previous seq */
void RemoveDuplicateSeq(Tcl_Interp *interp,
			char *name)
{
    int n_seqs;
    int i;
    /*
     * need to check if entry_name already exists in the database. If it does
     * then must delete current sequence (and all its results) and add the
     * new entry_name
     */
    n_seqs = NumSequences();
    for (i = 0; i < n_seqs; i++) {
	if (strcmp(name, GetSeqName(i)) == 0) {
	    verror(ERR_WARN, "RemoveDuplicateSeq", 
		   "%s already exists in. Removing previous sequence and adding new sequence\n", name); 
	    DeleteSequence(interp, i--);
	    n_seqs--;
	}
    }
}
void
nip_string_search_text_func(void *obj)
{
    int i;
    int string_length;
    char *seq_match;
    seq_result *result = (seq_result *) obj;
    in_string_search *input = result->input;
    stick *data = result->data;
    char *seq_name;
    int num_matches = data->ap_array[0].n_pts;
    char *sequence;
    int seq_num;
    int pos;
    double score;

    seq_num = GetSeqNum(result->seq_id[0]);
    seq_name = GetSeqName(seq_num);
    sequence = GetSeqSequence(seq_num);
    string_length = strlen(input->string);
    if (NULL == (seq_match = (char *)xcalloc((string_length + 1),
					     sizeof(char))))
	return;

    for (i = 0; i < num_matches; i++) {
	pos = data->ap_array[0].p_array[i].pos;
	score = data->ap_array[0].p_array[i].score;

	vmessage("Position %d score %f", pos, score);

	strncpy(seq_match, &sequence[pos-1], string_length);
	iubc_list_alignment(input->string, seq_match, "string", seq_name, 1, 
			    pos, "");
    }
    xfree(seq_match);
}
Example #15
0
int init_sip_similar_spans_create(Tcl_Interp *interp, 
				  int seq_id_h,
				  int seq_id_v, 
				  int start_h,
				  int end_h, 
				  int start_v,
				  int end_v, 
				  int win_len,
				  int min_match, 
				  int *id)
{
    in_comp_spans *input = NULL;
    int *seq1_match = NULL;
    int *seq2_match = NULL;
    int *match_score = NULL;
    int n_matches;
    char *seq1, *seq2;
    int seq1_len, seq2_len;
    int same_seq;
    int max_matches = get_max_matches();
    int seq1_num, seq2_num;
    int seq1_type, seq2_type;
    int sub1_len, sub2_len;
    Tcl_DString input_params;
   
    vfuncheader("find similar spans");
    
    if (NULL == (seq1_match = (int *)xmalloc(max_matches * sizeof(int))))
	goto error;
    if (NULL == (seq2_match = (int *)xmalloc(max_matches * sizeof(int))))
	goto error;
    if (NULL == (match_score = (int *)xmalloc(max_matches * sizeof(int))))
	goto error;
    if (NULL == (input = (in_comp_spans *)xmalloc(sizeof(in_comp_spans))))
	goto error;
    
    /* get first and second sequence saved using extract_sequence */
    seq1_num = GetSeqNum(seq_id_h);
    seq2_num = GetSeqNum(seq_id_v);
    
    if (seq1_num == -1) {
	verror(ERR_WARN, "find similar spans", "horizontal sequence undefined");
	goto error;
    } else if (seq2_num == -1) {
	verror(ERR_WARN, "find similar spans", "vertical sequence undefined");
	goto error;
    }

    seq1 = GetSeqSequence(seq1_num);
    seq2 = GetSeqSequence(seq2_num);
    seq1_len = GetSeqLength(seq1_num);
    seq2_len = GetSeqLength(seq2_num);
    seq1_type = GetSeqType(seq1_num);
    seq2_type = GetSeqType(seq2_num);

    if (end_h == -1)
	end_h = seq1_len;

    if (end_v == -1)
	end_v = seq2_len;

    if (seq1_type != seq2_type) {
	verror(ERR_WARN, "find similar spans", "sequences must both be either DNA or protein");
	return TCL_OK;
    } else if (seq1_type == PROTEIN) {
	set_char_set(PROTEIN);
        set_score_matrix(get_matrix_file(PROTEIN));
    } else if (seq1_type == DNA) {
	set_char_set(DNA);
        set_score_matrix(get_matrix_file(DNA));
    }

    /* 
     * first check if seq lengths are equal, if not the seqs cannot be the
     * same
     */

    /*
     * Should check length of sub sequences only. These lengths are not
     * stored, so have to calculate them here. Not storing them in
     * seq1_len and seq2_len as I'm unsure whether subsequent functions
     * expect the length of the whole sequence. Anyway, the compare_spans
     * function recalculates the lengths of the sub sequences before doing
     * the comparison.
     */

    sub1_len = end_h - start_h + 1;
    sub2_len = end_v - start_v + 1;

    if (sub1_len == sub2_len) {
	if (strncmp(seq1 + start_h - 1, seq2 + start_v - 1, sub1_len) == 0) {
	    same_seq = 1;
	} else {
	    same_seq = 0;
	}
    } else {
	same_seq = 0;
    }
    if (!get_remove_dup() && same_seq)
	same_seq = 0;

    Compare_Spans(seq1, seq2, seq1_len, seq2_len, start_h, end_h, 
		  start_v, end_v, max_matches, same_seq, 
		  win_len, min_match, 1, 0,
		  &seq1_match, &seq2_match, &match_score, &n_matches);

    /* n_matches == -1 if malloc problem or -2 if too many matches */
    if (n_matches == -2) {
	verror(ERR_WARN, "find similar spans", "too many matches");
	goto error;
    } else if (n_matches == -1) {
	goto error;
    } else if (n_matches == 0) {
	verror(ERR_WARN, "Find similar spans", "no matches found\n"); 
	if (seq1_match)
	    xfree (seq1_match);
	if (seq2_match)
	    xfree (seq2_match);
	if (match_score)
	    xfree(match_score);
	if (input)
	    xfree(input);
	return -1;
    }

    /* create inputs parameters */
    Tcl_DStringInit(&input_params);
    vTcl_DStringAppend(&input_params, "horizontal %s: %s \nvertical %s: %s\n"
	    "window length %d min match %d number of matches %d", 
	    GetSeqLibraryName(seq1_num), 
	    GetSeqName(seq1_num), 
	    GetSeqLibraryName(seq2_num), 
	    GetSeqName(seq2_num), 
	    win_len, min_match, n_matches);
    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

    if (-1 == (*id = store_sip_similar_spans(seq1_num, seq2_num, win_len,
					     min_match, start_h, end_h, 
					     start_v, end_v,
					     seq1_match, seq2_match, 
					     match_score, n_matches,
					     input))) {
	goto error;
    }
    
    if (seq1_match)
	xfree (seq1_match);
    if (seq2_match)
	xfree (seq2_match);
    if (match_score)
	xfree(match_score);
    return 0;
    
 error:
    verror(ERR_WARN, "find similar spans", "failure in find similar spans");
    if (seq1_match)
	xfree (seq1_match);
    if (seq2_match)
	xfree (seq2_match);
    if (match_score)
	xfree(match_score);
    if (input)
      xfree(input);
    return -1;
}
Example #16
0
int init_nip_start_codons_create(int seq_id,
				 int start,
				 int end,
				 int strand_sym,
				 Tcl_Obj **graph_obj,
				 int *id)
{
    in_s_codon *input1;
    in_s_codon *input2;
    in_s_codon *input3;
    char *seq;
    int seq_len;
    int sequence_type;
    int seq_num;
    s_codon_res *start_codon;
    int type = START_CODON;
    Tcl_DString input_params;
    char strand[8];

    vfuncheader("plot start codons");

    if (NULL == (input1 = (in_s_codon *)xmalloc
		 (sizeof(in_s_codon))))
	return -1;
    if (NULL == (input2 = (in_s_codon *)xmalloc
		 (sizeof(in_s_codon))))
	return -1;
    if (NULL == (input3 = (in_s_codon *)xmalloc
		 (sizeof(in_s_codon))))
	return -1;

    if (NULL == (start_codon = (s_codon_res *)xmalloc(sizeof(s_codon_res))))
	return -1;

    seq_num = GetSeqNum(seq_id);
    seq = GetSeqSequence(seq_num);
    seq_len = GetSeqLength(seq_num);
    sequence_type = GetSeqStructure(seq_num);

    /* if the end has not been defined, set it to be the sequence length */
    if (end == -1) {
	end = seq_len;
    }
    nip_start_codons(seq, sequence_type, start, end, strand_sym, start_codon);

    /* create inputs parameters */
    Tcl_DStringInit(&input_params);

    if (strand_sym & TOP_S) {
        strcpy(strand, "forward");
    } else if (strand_sym & BOTTOM_S) {
        strcpy(strand, "reverse");
    } else {
        strcpy(strand, "both");
    }
    vTcl_DStringAppend(&input_params, "sequence %s: from %d to %d\n"
		       "strand %s\n",
		       GetSeqName(seq_num), start, end, strand);

    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input1->params = strdup(Tcl_DStringValue(&input_params)); 
    input2->params = strdup(Tcl_DStringValue(&input_params)); 
    input3->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

    if (start_codon->n_match1 > 0) {
	if (-1 == (id[0] = store_stop_codons(seq_num, input1, start, end,
					     start_codon->match1, 
					     start_codon->n_match1,
					     1, type, &graph_obj[0]))){
	    verror(ERR_FATAL,"nip start codons", "error in saving matches\n");
	    return -1;
	}
    } else {
	id[0] = -1;
	free(input1->params);
	xfree(input1);
	xfree(start_codon->match1);
	verror(ERR_WARN, "nip start codons", "no matches found for frame 1");
    }
    if (start_codon->n_match2 > 0) {
	if (-1 == (id[1] = store_stop_codons(seq_num, input2, start, end,
					     start_codon->match2, 
					     start_codon->n_match2,
					     2, type, &graph_obj[1]))){
	    verror(ERR_FATAL,"nip start codons", "error in saving matches\n");
	    return -1;
	}
    } else {
	id[1] = -1;
	free(input2->params);
	xfree(input2);
	xfree(start_codon->match2);
	verror(ERR_WARN, "nip start codons", "no matches found for frame 2");
    }
    if (start_codon->n_match3 > 0) {
	if (-1 == (id[2] = store_stop_codons(seq_num, input3, start, end,
					     start_codon->match3, 
					     start_codon->n_match3,
					     3, type, &graph_obj[2]))){
	    verror(ERR_FATAL,"nip start codons", "error in saving matches\n");
	    return -1;
	}
    } else {
	id[2] = -1;
	free(input3->params);
	xfree(input3);
	xfree(start_codon->match3);
	verror(ERR_WARN, "nip start codons", "no matches found for frame 3");
    }
    xfree(start_codon);
    return 0;
}
Example #17
0
int init_sip_local_align_create(Tcl_Interp *interp, 
				int seq_id_h,
				int seq_id_v, 
				int start_h,
				int end_h, 
				int start_v,
				int end_v, 
				int num_align,
				float score_align,
				float match,
				float transition,
				float transversion,
				float start_gap,
				float cont_gap,
				int *id)
{
    char *seq1, *seq2;
    int seq1_len, seq2_len;
    int r_len1, r_len2;
    char *r_seq1, *r_seq2;
    char *name1, *name2;
    int i;
    int seq1_type, seq2_type;
    int seq1_num;
    int seq2_num;
    in_sim *input;
    align_int **res;
    long *start1, *start2, *end1, *end2;
    int cnt = 0;
    int max_align;
    Tcl_DString input_params;
    int num_elements;
    d_plot *data;

#define NUM_SCORES 100
    
    vfuncheader("local alignment");

    if (-1 == (seq1_num = GetSeqNum(seq_id_h))) {
	verror(ERR_WARN, "local alignment", 
	       "horizontal sequence undefined");
	goto error;
    }
     
    if (-1 == (seq2_num = GetSeqNum(seq_id_v))) {
	verror(ERR_WARN, "local alignment", 
	       "vertical sequence undefined");
	goto error;
    }

    /* only align dna or protein */
    seq1_type = GetSeqType(seq1_num);
    seq2_type = GetSeqType(seq2_num);

    if (seq1_type != seq2_type) {
	verror(ERR_FATAL, "sim alignment", "sequences must both be either DNA or protein");
	goto error;
    }

    seq1 = GetSeqSequence(seq1_num);
    if ((seq1_len = end_h - start_h + 1) < 1) {
	verror(ERR_WARN, "align sequences", "negative length");
	goto error;
    }
    seq2 = GetSeqSequence(seq2_num);
    if ((seq2_len = end_v - start_v + 1) < 1) {
	verror(ERR_WARN, "align sequences", "negative length");
	goto error;
    }

    if (NULL == (input = (in_sim *)xmalloc(sizeof(in_sim))))
	goto error;

     Tcl_DStringInit(&input_params);
     vTcl_DStringAppend(&input_params, "horizontal %s: %s from %d to %d\n"
	    "vertical %s: %s from %d to %d\n", 
	    GetSeqLibraryName(seq1_num), 
	    GetSeqName(seq1_num), 
	    start_h, end_h, 
	    GetSeqLibraryName(seq2_num), 
	    GetSeqName(seq2_num), 
	    start_v, end_v);

     if (score_align == -1) {
	 vTcl_DStringAppend(&input_params, "number of alignments %d \n",
			    num_align);
     } else {
	 vTcl_DStringAppend(&input_params, "alignments above score %g\n",
			    score_align);
     }
     
     if (GetSeqType(seq1_num) == DNA) {
	 vTcl_DStringAppend(&input_params, "score for match %g\n"
		  "score for transition %g\n"
		  "score for transversion %g\n",
		  match, transition, transversion);
     }
     vTcl_DStringAppend(&input_params, "penalty for starting gap %g\n"
			"penalty for each residue in gap %g\n",
			start_gap, cont_gap);

    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);
     
    if (NULL == (r_seq1 = (char *)xcalloc(((seq1_len+seq2_len)*2+1),
					  sizeof(char)))) {
	goto error;
    }
    if (NULL == (r_seq2 = (char *)xcalloc(((seq1_len+seq2_len)*2+1),
					  sizeof(char)))) {
	goto error;
    }
    
    if (score_align != -1) {
	num_align = NUM_SCORES;
    }
    max_align = num_align;

    if (NULL == (start1 = (long *)xmalloc(max_align * sizeof(long)))) {
      goto error;
    }
    if (NULL == (start2 = (long *)xmalloc(max_align * sizeof(long)))) {
	goto error;
    }
    if (NULL == (end1 = (long *)xmalloc(max_align * sizeof(long)))) {
	goto error;
    }
    if (NULL == (end2 = (long *)xmalloc(max_align * sizeof(long)))) {
	goto error;
    }
    if (NULL == (res = (align_int **)xmalloc(max_align *sizeof(align_int*)))) {
	goto error;
    }

    for (i = 0; i < max_align; i++) {
	if (NULL == (res[i] = (align_int *)xcalloc((seq1_len+seq2_len+1),
						   sizeof(align_int)))) {
	    goto error;
	}
    }
    
    /* 
     * if finding all alignments above a certain score, the return value of
     * num_align is the number of alignments found
     */
    sim_align(&seq1[start_h-1], &seq2[start_v-1], seq1_len,
	      seq2_len, seq1_type, &num_align, score_align,
	      match, transition, transversion, start_gap, cont_gap, res, 
	      start1, start2, end1, end2);

    if (num_align <= 0) {
	verror(ERR_WARN, "local alignment", "no matches found\n");
	goto error;
    }
    name1 = GetSeqBaseName(seq1_num);
    name2 = GetSeqBaseName(seq2_num);

    num_elements = (seq1_len + seq2_len + 1) * num_align;
    if (NULL == (data = (d_plot *)xmalloc(sizeof(d_plot))))
	goto error;
    
    if (NULL == (data->p_array = (pt_score *)xmalloc(sizeof(pt_score) * 
						     num_elements)))
	goto error;
    
    for (i = 0; i < num_align; i++) {
	store_sim1(&seq1[start_h+start1[i]-2], 
		  &seq2[start_v+start2[i]-2], seq1_len, 
		  seq2_len, end1[i]-start1[i]+1, end2[i]-start2[i]+1,
		  res[i], start_h+start1[i]-1, start_v+start2[i]-1,
		  data->p_array, &cnt);

	cexpand(&seq1[start_h+start1[i]-2], 
		&seq2[start_v+start2[i]-2], end1[i]-start1[i]+1,
		end2[i]-start2[i]+1, r_seq1, r_seq2, &r_len1, &r_len2, 
		ALIGN_J_SSH | ALIGN_J_PADS, res[i]);
	
	spin_list_alignment(r_seq1, r_seq2, name1, name2, start_h+start1[i]-1, 
		       start_v+start2[i]-1, "", seq1_type);
	
    }
    *id = store_sim2(seq1_num, seq2_num, start_h, end_h, start_v, end_v, 
		     input, data, cnt);

    xfree(r_seq1);
    xfree(r_seq2);
    xfree(start1);
    xfree(start2);
    xfree(end1);
    xfree(end2);
    for (i = 0; i < max_align; i++) {
	xfree(res[i]);
    }
    xfree(res);
    
    return 0;

 error:
    return -1;
}
Example #18
0
int
ReportCorelationships(char *pszOutFile)
{
char *pszPE1Chrom;
char *pszPE2Chrom;
char *pszSeqID;
int PrevPE1ChromID;
int PrevPE2ChromID;
int NumPEAligned;
int NumSEAligned;
int NumSenseSense;
int NumSenseAnti;
int	RevNumSenseSense;
int	RevNumSenseAnti;
int NumUnpaired;
int NumPaired;
tsPEScaffold *pPEScaffold;
bool bRevMate;
tsPEScaffold *pMateScaffold;
int Idx;
int BuffIdx;
char szBuff[16000];

gDiagnostics.DiagOut(eDLInfo,gszProcName,"Identifying and writing corelations to file: '%s'",pszOutFile);

PrevPE1ChromID = -1;
PrevPE2ChromID = -1;
NumPEAligned = 0;
NumSEAligned = 0;
NumSenseSense = 0;
NumSenseAnti = 0;
RevNumSenseSense = 0;
RevNumSenseAnti = 0;
NumUnpaired = 0;
NumPaired = 0;
BuffIdx = 0;
BuffIdx = sprintf(&szBuff[0],"\"PE1\",\"PE2\",\"NumAligned\",\"NumSenseSense\",\"NumSenseAnti\",\"Paired\",\"Self\",\"RevMate\",\"RevNumSenseSense\",\"RevNumSenseAnti\"\n");
pPEScaffold = m_pScaffolds;
for(Idx = 0; Idx < m_NumScaffolds; Idx++,pPEScaffold++)
	{
	if(pPEScaffold->PE1ChromID != PrevPE1ChromID || pPEScaffold->PE2ChromID != PrevPE2ChromID) // starting a different scaffold?
		{
		if(PrevPE1ChromID > 0 || PrevPE2ChromID > 0)
			{
			// write out any prev scaffold info here
			if(BuffIdx > (sizeof(szBuff)-500))
				{
				CUtility::SafeWrite(m_hOutFile,szBuff,BuffIdx);
				BuffIdx = 0;
				}
			if(PrevPE1ChromID > 0 && PrevPE2ChromID > 0)
				BuffIdx += sprintf(&szBuff[BuffIdx],"\"%s\",\"%s\",%d,%d,%d,\"Y\",\"%s\",\"%s\",%d,%d\n",
							pszPE1Chrom,pszPE2Chrom,NumPEAligned,NumSenseSense,NumSenseAnti,PrevPE1ChromID==PrevPE2ChromID ? "Y" : "N",bRevMate ? "Y" : "N",RevNumSenseSense,RevNumSenseAnti);
			else
				{
				if(PrevPE1ChromID > 0)
					BuffIdx += sprintf(&szBuff[BuffIdx],"\"%s\",\"%s\",%d,%d,%d,\"N\",\"N\",\"N\",0,0\n",pszPE1Chrom,"N/A",NumSEAligned,0,0);
				else
					BuffIdx += sprintf(&szBuff[BuffIdx],"\"%s\",\"%s\",%d,%d,%d,\"N\",\"N\",\"N\",0,0\n","N/A",pszPE2Chrom,NumSEAligned,0,0);
				}
			}

		NumPEAligned = 0;					// number aligning as paired ends
		NumSEAligned = 0;					// number aligning as single ended only
		NumSenseSense = 0;					// number aligning sense to sense
		NumSenseAnti = 0;					// number aligning sense to antisense (or antisense to sense)
		if(pPEScaffold->PE1ChromID > 0 && pPEScaffold->PE2ChromID > 0)
			{
			NumPEAligned = 1;
			if(pPEScaffold->PE1Sense == pPEScaffold->PE2Sense)
				NumSenseSense = 1;
			else
				NumSenseAnti = 1;
			NumPaired += 1;
			}
		else
			{
			NumSEAligned = 1;
			NumUnpaired += 1;
			}
		pszSeqID = GetSeqName(pPEScaffold->PE12SeqID);
		if(pPEScaffold->PE1ChromID)
			pszPE1Chrom = GetChromName(pPEScaffold->PE1ChromID);
		else
			pszPE1Chrom = (char *)"N/A";

		if(pPEScaffold->PE2ChromID)
			pszPE2Chrom = GetChromName(pPEScaffold->PE2ChromID);
		else
			pszPE2Chrom = (char *)"N/A";

		PrevPE1ChromID = pPEScaffold->PE1ChromID;
		PrevPE2ChromID = pPEScaffold->PE2ChromID;
	
		// if not to self then check if the PE2 chrom has any PE2 linking back to this PE1 and count linking sense/antisense
		bRevMate = false;
		RevNumSenseSense = 0;
		RevNumSenseAnti = 0;
		if(PrevPE1ChromID != PrevPE2ChromID && (PrevPE1ChromID > 0 && PrevPE2ChromID > 0))
			{
			pMateScaffold = LocateMateScaffold(PrevPE2ChromID,PrevPE1ChromID);
			if(pMateScaffold != NULL)
				{
				do {
					if(pMateScaffold->PE1Sense == pMateScaffold->PE2Sense)
						RevNumSenseSense += 1;
					else
						RevNumSenseAnti += 1;
					if(pMateScaffold->PEScafoldID == m_NumScaffolds)
						break;
					pMateScaffold += 1;
					}
				while(pMateScaffold->PE1ChromID == PrevPE2ChromID && pMateScaffold->PE2ChromID == PrevPE1ChromID);

				bRevMate = true;
				}
			}
		continue;
		}

	// same pair of chromosomes so accumulate counts
	if(pPEScaffold->PE1ChromID > 0 && pPEScaffold->PE2ChromID > 0)
		{
		NumPEAligned += 1;
		if(pPEScaffold->PE1Sense == pPEScaffold->PE2Sense)
			NumSenseSense += 1;
		else
			NumSenseAnti += 1;
		NumPaired += 1;
		}
	else
		{
		NumSEAligned += 1;
		NumUnpaired += 1;
		}
	}

if(BuffIdx > 0 || NumPEAligned > 0 || NumSEAligned > 0)
	{
	if(PrevPE1ChromID > 0 && PrevPE2ChromID > 0)
		BuffIdx += sprintf(&szBuff[BuffIdx],"\"%s\",\"%s\",%d,%d,%d,\"Y\",\"%s\",\"%s\",%d,%d\n",
							pszPE1Chrom,pszPE2Chrom,NumPEAligned,NumSenseSense,NumSenseAnti,PrevPE1ChromID==PrevPE2ChromID ? "Y" : "N",bRevMate ? "Y" : "N",RevNumSenseSense,RevNumSenseAnti);
	else
		{
		if(PrevPE1ChromID > 0)
			BuffIdx += sprintf(&szBuff[BuffIdx],"\"%s\",\"%s\",%d,%d,%d,\"N\",\"N\",\"N\",0,0\n",pszPE1Chrom,"N/A",NumSEAligned,0,0);
		else
			BuffIdx += sprintf(&szBuff[BuffIdx],"\"%s\",\"%s\",%d,%d,%d,\"N\",\"N\",\"N\",0,0\n","N/A",pszPE2Chrom,NumSEAligned,0,0);
		}

	CUtility::SafeWrite(m_hOutFile,szBuff,BuffIdx);
	}

close(m_hOutFile);
m_hOutFile = -1;

gDiagnostics.DiagOut(eDLInfo,gszProcName,"Completed writing corelations (%d paired, %d orphaned) to file",NumPaired,NumUnpaired);
return(0);
}
Example #19
0
void MSA::ToAlnFile(TextFile &File) const
	{
	if (getMuscleContext()->params.g_bClwStrict)
		File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
	else
		{
		File.PutString("MUSCLE ("
		  MUSCLE_MAJOR_VERSION "." MUSCLE_MINOR_VERSION ")"
		  " multiple sequence alignment\n");
		File.PutString("\n");
		}

	int iLongestNameLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		const char *ptrName = GetSeqName(uSeqIndex);
		const char *ptrBlank = strchr(ptrName, ' ');
		int iLength;
		if (0 != ptrBlank)
			iLength = (int) (ptrBlank - ptrName);
		else
			iLength = (int) strlen(ptrName);
		if (iLength > iLongestNameLength)
			iLongestNameLength = iLength;
		}
	if (iLongestNameLength > MAX_NAME)
		iLongestNameLength = MAX_NAME;
	if (iLongestNameLength < MIN_NAME)
		iLongestNameLength = MIN_NAME;

	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
		{
		File.PutString("\n");
		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
		if (uEndColIndex >= GetColCount())
			uEndColIndex = GetColCount() - 1;
		char Name[MAX_NAME+1];
		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
			{
			const char *ptrName = GetSeqName(uSeqIndex);
			const char *ptrBlank = strchr(ptrName, ' ');
			int iLength;
			if (0 != ptrBlank)
				iLength = (int) (ptrBlank - ptrName);
			else
				iLength = (int) strlen(ptrName);
			if (iLength > MAX_NAME)
				iLength = MAX_NAME;
			memset(Name, ' ', MAX_NAME);
			memcpy(Name, ptrName, iLength);
			Name[iLongestNameLength] = 0;

			File.PutFormat("%s      ", Name);
			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
			  ++uColIndex)
				{
				const char c = GetChar(uSeqIndex, uColIndex);
				File.PutFormat("%c", toupper(c));
				}
			File.PutString("\n");
			}

		memset(Name, ' ', MAX_NAME);
		Name[iLongestNameLength] = 0;
		File.PutFormat("%s      ", Name);
		for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
		  ++uColIndex)
			{
			const char c = GetAlnConsensusChar(*this, uColIndex);
			File.PutChar(c);
			}
		File.PutString("\n");
		}
	}
int init_splice_search_create(int seq_id,
			      int start,
			      int end,
			      char *donor,
			      char *acceptor,
			      int *id) /* out */
{
    in_splice *input1;
    in_splice *input2;
    in_splice *input3;
    char *seq;
    int seq_len;
    int seq_num;
    SpliceResults *splice_result;
    Tcl_DString input_params;
    int irs;

    vfuncheader("splice search");
    set_char_set(DNA);

    if (NULL == (input1 = (in_splice *)xmalloc (sizeof(in_splice))))
	return -1;

    if (NULL == (input2 = (in_splice *)xmalloc (sizeof(in_splice))))
	return -1;

    if (NULL == (input3 = (in_splice *)xmalloc (sizeof(in_splice))))
	return -1;

    if (NULL == (splice_result = (SpliceResults *)xmalloc 
		 (sizeof(SpliceResults))))
	return -1;

    seq_num = GetSeqNum(seq_id);
    seq = GetSeqSequence(seq_num);
    seq_len = GetSeqLength(seq_num);

    /* if the end has not been defined, set it to be the sequence length */
    if (end == -1) {
	end = seq_len;
    }

    irs = splice_search(seq, seq_len, start, end, donor, acceptor, 
			splice_result);
    
    if (irs == -1) {
      xfree(splice_result);
      xfree(input1);
      xfree(input2);
      xfree(input3);
      verror(ERR_WARN, "splice search",
	     "error in splice search (maybe none found)");
      return -1;
    }

    if (splice_result->ied_f1->number_of_res == 0 &&
	splice_result->ied_f2->number_of_res == 0 &&
	splice_result->ied_f3->number_of_res == 0 &&
	splice_result->eia_f1->number_of_res == 0 &&
	splice_result->eia_f2->number_of_res == 0 &&
	splice_result->eia_f3->number_of_res == 0) {
	verror(ERR_WARN, "splice search", "no matches found");
	xfree(splice_result);
	xfree(input1);
	xfree(input2);
	xfree(input3);
	return -1;
    }

    /* create inputs parameters */
    Tcl_DStringInit(&input_params);
    vTcl_DStringAppend(&input_params, "sequence %s: from %d to %d\n"
		       "donor weight matrix %s\nacceptor weight matrix %s\n",
		       GetSeqName(seq_num), start, end, donor, acceptor);

    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input1->params = strdup(Tcl_DStringValue(&input_params)); 
    input2->params = strdup(Tcl_DStringValue(&input_params)); 
    input3->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

    if (-1 == (id[0] = StoreSpliceSearch(seq_num, splice_result->ied_f1,
					 splice_result->eia_f1, input1, 
					 start, end, 1))){
	verror(ERR_FATAL,"nip splice search", "error in saving matches\n");
	return -1;
    }

    if (-1 == (id[1] = StoreSpliceSearch(seq_num, splice_result->ied_f2, 
					 splice_result->eia_f2, input2, 
					 start, end, 2))){
	verror(ERR_FATAL,"nip splice search", "error in saving matches\n");
	return -1;
    }

    if (-1 == (id[2] = StoreSpliceSearch(seq_num, splice_result->ied_f3,
					 splice_result->eia_f3, input3, 
					 start, end, 3))){
	verror(ERR_FATAL,"nip splice search", "error in saving matches\n");
	return -1;
    }
    xfree(splice_result);
    return 0;
}
void nip_string_search_callback(int seq_num, void *obj, seq_reg_data *jdata)
{
    seq_result *result = (seq_result *) obj;
    in_string_search *input = result->input;
    out_raster *output = result->output;
    stick *data = result->data;
    int id = result->id;
    char cmd[1024];

    switch(jdata->job) {
    case SEQ_QUERY_NAME:
	sprintf(jdata->name.line, "string search");
	break;
	
    case SEQ_KEY_NAME:
	sprintf(jdata->name.line, "string #%d", result->id);
	break;

    case SEQ_GET_BRIEF:
	sprintf(jdata->name.line, "string: seq=%s", 
		GetSeqName(GetSeqNum(result->seq_id[0])));
	break;
	
    case SEQ_GET_OPS:
	if (output->hidden) {
	    jdata->get_ops.ops = "Information\0List results\0"
		"PLACEHOLDER\0PLACEHOLDER\0Reveal\0SEPARATOR\0Remove\0";
	} else {
	    jdata->get_ops.ops = "Information\0List results\0Configure\0"
	       "Hide\0PLACEHOLDER\0SEPARATOR\0Remove\0";
	}
	break;
    case SEQ_INVOKE_OP:
	switch (jdata->invoke_op.op) {
	case 0: /* information */
	  {
	    vfuncheader("input parameters");
	    vmessage("%s\n", input->params);
	    break;
	  }
	case 1: /* results */
	    Tcl_Eval(output->interp, "SetBusy");
	    vfuncheader("results");
	    result->txt_func(result);
	    Tcl_Eval(output->interp, "ClearBusy");
	    break;
	case 2: /* configure */
	    sprintf(cmd, "RasterConfig %d", id);
	    if (TCL_OK != Tcl_Eval(output->interp, cmd)){
		puts(Tcl_GetStringResult(output->interp));
	    }
	    break;
	case 3: /* hide all */
	    output->hidden = 1;
	    ReplotAllCurrentZoom(output->interp, output->raster_win);
	    break;
	case 4: /* reveal all */
	    output->hidden = 0;
	    ReplotAllCurrentZoom(output->interp, output->raster_win);
	    break;
	case 5: /* remove */ 
	    {
		Tcl_Interp *interp = output->interp;
		nip_string_search_shutdown(interp, result, 
					   output->raster_win, seq_num);
		break;
	    }
	}
	break;
    case SEQ_PLOT:
	result->pr_func(result, (seq_reg_plot *)jdata);
	break;
    case SEQ_RESULT_INFO:
	switch (jdata->info.op) 
	    {
	    case OUTPUT:
		jdata->info.result = (void *)output;
		break;
	    case INPUT: 
		jdata->info.result = (void *)input;
		break;
	    case DIMENSIONS: 
		jdata->info.result = (void *)&data->ap_array[0].dim;
		break;
	    case INDEX: 
		jdata->info.result = (void *)id;
		break;
	    case RESULT:
		jdata->info.result = (void *)result;
		break;
	    case WIN_NAME:
		{
		    char *r_win = output->raster_win;
		    jdata->info.result = (void *)r_win;
		    break;
		}
	    case WIN_SIZE:
		{
		    static d_point pt;
		    Tcl_Interp *interp = output->interp;
		    pt.x = get_default_int(interp, nip_defs,
					   w("RASTER.PLOT_WIDTH"));
		    pt.y = get_default_double(interp, nip_defs,
					      w("NIP.STRING_SEARCH.PLOT_HEIGHT"));
		    
		    jdata->info.result = (void *)&pt;
		    break;
		}
	    }
	break;
    case SEQ_HIDE: 
	output->hidden = 1;
	break;
    case SEQ_REVEAL: 
	output->hidden = 0;
	break;	
    case SEQ_QUIT:
    case SEQ_DELETE: {
	Tcl_Interp *interp = output->interp;
	nip_string_search_shutdown(interp, result, output->raster_win, 
				   seq_num);
	
	break;
    }
    }
}
int init_nip_string_search_create(char *strand_sym,
				  float match,
				  char *string,
				  int use_iub_code,
				  int start,
				  int end,
				  int seq_id,
				  int *id)
{
    in_string_search *input;
    char *seq;
    int seq_len;
    int seq_num;
    int string_length;
    int max_matches, min_match;
    int n_matches;
    int *pos;
    int *score;
    Tcl_DString input_params;
    char strand[8];

    vfuncheader("string search");

    if (NULL == (input = (in_string_search *)xmalloc
		 (sizeof(in_string_search))))
	return -1;

    seq_num = GetSeqNum(seq_id);
    seq = GetSeqSequence(seq_num);
    seq_len = GetSeqLength(seq_num);

    /* if the end has not been defined, set it to be the sequence length */
    if (end == -1) {
	end = seq_len;
    }

    seq_len = end - start + 1;

    max_matches = seq_len;
    string_length = strlen(string);

    if (NULL == (pos = (int *)xmalloc((max_matches + 1) * sizeof(int))))
	return -1;
    
    if (NULL == (score = (int *)xmalloc((max_matches + 1) * sizeof(int))))
	return -1;

    /* convert percentage mis-matches into min matches */
    min_match = (int) (ceil(string_length * match / 100)); 

    /* complement */
    if (strcmp(strand_sym, "-") == 0) {
	complement_seq(string, string_length);
    }
    
#ifdef DEBUG
    printf("min_match %d match %f string %d\n", min_match, match, string_length);
#endif

    n_matches = iubc_inexact_match(&seq[start-1], seq_len, string, 
				   string_length, min_match, use_iub_code, 
				   pos, score, max_matches);

    if (n_matches <= 0) {
	vmessage("String search: no matches found\n");
	xfree(input);
	xfree(pos);
	xfree(score);
	return -1;
    }

    input->string = strdup(string);
    /* create inputs parameters */
    Tcl_DStringInit(&input_params);
    if (strcmp(strand_sym, "+") == 0) {
        strcpy(strand, "forward");
    } else {
        strcpy(strand, "reverse");
    }

    {
      char tmp[10];
      if (use_iub_code)
	strcpy(tmp, "iub");
      else
	strcpy(tmp, "literal");
      vTcl_DStringAppend(&input_params, "sequence %s: from %d to %d\n"
			 "strand %s\nuse %s code\nminimum percent match %f\nstring %s\n",
			 GetSeqName(seq_num), start, end,
			 strand, tmp, match, string);
    }
    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

    if (-1 == (*id = store_string_search(seq_num, input, start, end, pos, 
					 score, n_matches, string_length))){
	verror(ERR_FATAL,"string search", "error in saving matches\n");
	return -1;
    }

    xfree(pos);
    xfree(score);
    return 0;
}
int init_emboss_dot_create(Tcl_Interp *interp, 
			   int seq_id_h, 
			   int start_h, 
			   int end_h,
			   int seq_id_v, 
			   int start_v, 
			   int end_v,
			   char *filename,
			   Tcl_Obj **graph_obj,
			   int *id)
{
    int seq_num_h, seq_len_h, seq_num_v, seq_len_v;
    in_emboss *input;
    Tcl_DString input_params;
    int *seq1_match = NULL;
    int *seq2_match = NULL; 
    int *len_match = NULL;
    e_graph *data = NULL;
    text_emboss *text_data;

    seq_num_h = GetSeqNum(seq_id_h);
    seq_num_v = GetSeqNum(seq_id_v);

    seq_len_h = GetSeqLength(seq_num_h);
    seq_len_v = GetSeqLength(seq_num_v);

    /* if the end has not been defined, set it to be the sequence length */
   if (end_h == -1)
	end_h = seq_len_h;

    if (end_v == -1)
	end_v = seq_len_v;

    seq_len_h = end_h - start_h + 1;
    seq_len_v = end_v - start_v + 1;

    read_emboss_data_file(filename, &data, graph_obj, &text_data);
    if (!data) {
	verror(ERR_FATAL,"emboss", "error in reading results\n");
	return -1;
    }

    if (NULL == (seq1_match = (int *)xmalloc((data->n_data_obj+1) * sizeof(int))))
	return -1;
    if (NULL == (seq2_match = (int *)xmalloc((data->n_data_obj+1) * sizeof(int))))
	return -1;
    if (NULL == (len_match = (int *)xmalloc((data->n_data_obj+1) * sizeof(int))))
	return -1;

    if (NULL == (input = (in_emboss *)xmalloc (sizeof(in_emboss))))
	return -1;

    /* create inputs parameters */
    Tcl_DStringInit(&input_params);
    vTcl_DStringAppend(&input_params, "sequence %s: from %d to %d\nsequence %s: from %d to %d\n",
		       GetSeqName(seq_num_h), start_h, end_h,
		       GetSeqName(seq_num_v), start_v, end_v);

    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

    if (-1 == (*id = store_emboss_dot(seq_num_h, start_h, end_h,
				      seq_num_v, start_v, end_v, 
				      data, input, text_data, graph_obj))) {
	verror(ERR_FATAL,"emboss", "error in saving results\n");
	return -1;
    }

    xfree(seq1_match);
    xfree(seq2_match);
    xfree(len_match);
    xfree(data);
    return 0;
}
int init_emboss_stick_create(Tcl_Interp *interp, 
			     int seq_id, 
			     int start, 
			     int end,
			     char *filename,
			     int *id)
{
    FILE *fp;
    int i;
    int *pos = NULL;
    int *res = NULL;
    int seq_num, seq_len;
    in_emboss *input;
    Tcl_DString input_params;
    char dummy1[100];
    char dummy2;

    seq_num = GetSeqNum(seq_id);
    seq_len = GetSeqLength(seq_num);

    /* if the end has not been defined, set it to be the sequence length */
    if (end == -1) {
	end = seq_len;
    }
    seq_len = end - start + 1;

    if (NULL == (pos = (int *)xmalloc((seq_len+1) * sizeof(int))))
	return -1;
    if (NULL == (res = (int *)xmalloc((seq_len+1) * sizeof(res))))
	return -1;
    if (NULL == (input = (in_emboss *)xmalloc (sizeof(in_emboss))))
	return -1;

    if (NULL == (fp = fopen(filename, "r"))) {
	printf("unable to open file\n");
	return -1;
    }

    if (fgetc(fp) == 'P') {
	printf("first char\n");
	fgets(dummy1, 100, fp);
	fgets(dummy1, 100, fp);
	fgets(dummy1, 100, fp);
    } else {
	rewind(fp);
    }

    i = 0;
    while (fscanf(fp, "%d %c %d\n", &pos[i], &dummy2, &res[i]) != EOF) { 
#ifdef DEBUG
	printf("pos %d result %d\n", pos[i], res[i]);
#endif
	i++;
    }
    fclose (fp);

    printf("num points %d\n", i);

    /* create inputs parameters */
    Tcl_DStringInit(&input_params);
    vTcl_DStringAppend(&input_params, "sequence %s: from %d to %d\n",
		       GetSeqName(seq_num), start, end);

    vfuncparams("%s", Tcl_DStringValue(&input_params));
    input->params = strdup(Tcl_DStringValue(&input_params)); 
    Tcl_DStringFree(&input_params);

#ifdef TODO
    if (-1 == (*id = store_emboss_graph(seq_num, start, end, pos, score, 
					i, input))) {
	verror(ERR_FATAL,"emboss", "error in saving results\n");
	return -1;
    }
#endif
    if (pos)
	xfree(pos);
    if (res)
	xfree(res);

    return 0;
}
Example #25
0
void nip_stop_codons_callback(int seq_num, void *obj, seq_reg_data *jdata)
{
    seq_result *s_result = (seq_result *) obj;
    in_s_codon *input = s_result->input;
    int result_id = s_result->id;
    char cmd[1024];
    element *e = s_result->e;
    plot_data *result;
    Tcl_Interp *interp;

    if (e) {
	result = find_plot_data(e, result_id);
	interp = e->c->interp;
    }

    switch(jdata->job) {
    case SEQ_QUERY_NAME:
	if (s_result->type == SEQ_TYPE_STOPCODON) {
	    sprintf(jdata->name.line, "Plot stop codons");
	} else {
	    sprintf(jdata->name.line, "Plot start codons");
	}
	break;    

    case SEQ_KEY_NAME:
	if (s_result->type == SEQ_TYPE_STOPCODON) {
	    sprintf(jdata->name.line, "stop f%d #%d", s_result->frame,
		    result_id);
	} else {
	    sprintf(jdata->name.line, "start f%d #%d", s_result->frame,
		    result_id);
	}
	break;
	
    case SEQ_GET_BRIEF:
	if (s_result->type == SEQ_TYPE_STOPCODON) {
	    sprintf(jdata->name.line, "stop codons: seq=%s frame=%d", 
		    GetSeqName(GetSeqNum(s_result->seq_id[HORIZONTAL])), 
		    s_result->frame);
	} else {
	    sprintf(jdata->name.line, "start codons: seq=%s frame=%d", 
		    GetSeqName(GetSeqNum(s_result->seq_id[HORIZONTAL])), 
		    s_result->frame);
	}
	break;

    case SEQ_GET_OPS:
	if (result->hidden) {
	    jdata->get_ops.ops = "Information\0List results\0"
		"PLACEHOLDER\0PLACEHOLDER\0Reveal\0SEPARATOR\0Remove\0";
	} else {
	    jdata->get_ops.ops = "Information\0List results\0Configure\0"
	       "Hide\0PLACEHOLDER\0SEPARATOR\0Remove\0";
	}
	break;
    case SEQ_INVOKE_OP:
	switch (jdata->invoke_op.op) {
	case 0: /* information */
	    vfuncheader("input parameters");
	    vmessage("%s\n", input->params); 
	    break;
	case 1: /* results */
	    Tcl_Eval(interp, "SetBusy");
	    vfuncheader("results");
	    s_result->txt_func(s_result);
	    Tcl_Eval(interp, "ClearBusy");
	    break;
	case 2: /* configure */
	    sprintf(cmd, "result_config %d %d %s %d %s", 
		    result_id, result->line_width, result->colour, e->id, 
		    e->win);
	    if (TCL_OK != Tcl_Eval(interp, cmd)){
		puts(interp->result);
	    }
	    break;
	case 3: /* hide all */
	    result->hidden = 1;
	    Tcl_VarEval(e->c->interp, "result_list_update ", e->c->win, NULL);
	    e->replot_func(e);
	    break;
	case 4: /* reveal all */
	    result->hidden = 0;
	    Tcl_VarEval(e->c->interp, "result_list_update ", e->c->win, NULL);
	    e->replot_func(e);
	    break;
	case 5: /* remove */ 
	    {
		nip_stop_codons_shutdown(interp, s_result, e, seq_num);
		remove_result_from_element(e, result_id);
		break;
	    }
	}
	break;
    case SEQ_PLOT:
	{
	    Tcl_Obj *graph_obj = (Tcl_Obj *) s_result->data;
	    Graph *graph;
	    
	    /* HACK - but the only way I have discovered to making stop 
	       codons work. Need to set the dim.y1 to be the height of the
	       current canvas (for instance moving codons from gene pref plot
	       to own plot
	    */
	    graph = Tcl_GetGraphFromObj(graph_obj);
	    graph->dim.y1 = canvas_height(interp, e->win);
	    s_result->pr_func(s_result, (seq_reg_plot *)jdata);
	    break;
	}
    case SEQ_RESULT_INFO:
	switch (jdata->info.op) {
	case RESULT:
	    jdata->info.result = (void *)s_result;
	    break;
	case WIN_NAME:
	    {
		char *r_win = e->win;
		jdata->info.result = (void *)r_win;
		break;
	    }
	case WIN_SIZE:
	    {
		static d_point pt;
		pt.x = get_default_int(interp, tk_utils_defs,
					w("ELEMENT.PLOT_WIDTH"));
		pt.y = get_default_double(interp, tk_utils_defs,
					   w("ELEMENT.SINGLE.PLOT_HEIGHT"));

		jdata->info.result = (void *)&pt;
		break;
	    }
	}
	break;
    case SEQ_QUIT:
    case SEQ_DELETE: 
	{
	    nip_stop_codons_shutdown(interp, s_result, e, seq_num);
	    break;
	}
    }
}
Example #26
0
int TranslateSeq(Tcl_Interp *interp,
		 int seq_num,
		 int rf,
		 int start,
		 int end)
{
    int i;
    char *name;
    char *dna_seq;
    char *prot_seq;
    int cnt = 0;
    int seq_id = GetSeqId(seq_num);
    int new_seq_num;
    char *ptr;
    char *parental_name, *child_name;
    char *new_name;
    int length = end - start + 1;
    static int num = 0;

#ifdef DEBUG
    printf("START translate seq %d to %d\n", start, end);
#endif
    dna_seq = GetSeqSequence(seq_num);
    if (NULL == (prot_seq = (char *)xmalloc(((length/3)+3) * sizeof(char))))
	return -1;
    if (NULL == (new_name = (char *)xmalloc(strlen(GetSeqName(seq_num))
					    * sizeof(char))))
	return -1;

    for (i = rf+start-1; i < end-2; i+=3) {
	prot_seq[cnt++] = codon_to_acid1(&dna_seq[i]);
    }
    prot_seq[cnt] = '\0';
#ifdef DEBUG
    printf("%s\n", prot_seq);
#endif
    /* 
     * special case: remove _rf123 from name before adding _rfx to end 
     */
    parental_name = GetParentalSeqName(seq_num);
    child_name = GetSeqName(seq_num);
    ptr = strstr(parental_name, "_rf123");

    if (NULL == (name = (char *)xmalloc((strlen(parental_name)+28) 
					 * sizeof(char))))
	return -1;
    if (ptr) {
	strncpy(new_name, parental_name, (ptr - parental_name));
	new_name[ptr - parental_name] = '\0';
	strcat(new_name, ptr+6);
	sprintf(name, "%s_rf%d_%d", new_name, rf+1, num);
    } else {
	sprintf(name, "%s_rf%d_%d", parental_name, rf+1, num);
    }

    /* proteins can only be LINEAR ! */
    if (-1 == (new_seq_num = AddSequence(interp, -1, GetSeqLibrary(seq_num), 
					 name, prot_seq, LINEAR, PROTEIN, NULL, " ")))
	return -1;
    xfree(name);
    xfree(new_name);

    if (strcmp(parental_name, child_name) != 0) {
	/* sub-sequence */
	/* 
	 * need to get seq num from seq_id instead of using seq_num incase
	 * AddSequence has deleted duplicate names
	 */
	start = ceil((GetSubSeqStart(GetSeqNum(seq_id))-1)/3.0 + 1);
	end = (GetSubSeqEnd(GetSeqNum(seq_id)) - rf) / 3;
	if (NULL == (name = (char *)xmalloc((strlen(child_name)+15) * 
					    sizeof(char))))
	    return -1;
	if (NULL == (new_name = (char *)xmalloc(strlen(GetSeqName(seq_num))
						* sizeof(char))))
	    return -1;

	ptr = strstr(child_name, "_rf123");
	
	if (ptr) {
	    strncpy(new_name, child_name, (ptr - child_name));
	    new_name[ptr - child_name] = '\0';
	    strcat(new_name, ptr+6);
	    sprintf(name, "%s_rf%d_%d", new_name, rf+1, num);
	} else {
	    sprintf(name, "%s_rf%d_%d", child_name, rf+1, num);
	}

	/* sprintf(name, "%s_rf%d", child_name, rf+1); */
	new_seq_num = AddSubSequence(interp, GetSeqId(new_seq_num), start, end, 
				     name);
	xfree(new_name);
    }
    num++;
    return new_seq_num;
}