Esempio n. 1
0
static int GenerateProbes(sequence_tp *gen,char *name,oligoDesignParms *parms)
{
	sequence_tp*sel;
	int nbpos,*is_ok,nb_ok=0,nb_ok3p=0,nb_ok5p=0,i,is,pcnt=0,mm;
	double pos,step;
	nbpos = sequence_length(gen)-parms->oligo_len+1;
	is_ok = malloc((nbpos+1)*sizeof(int));
	for (i = 1; i <= nbpos; i++)
	{
		sel = sequence_select_part(gen,i,i+(parms->oligo_len-1));
		if (CheckProbe(sequence_select_part(gen,i,i+(parms->oligo_len-1)),parms))
		{
			is_ok[nb_ok] = i;
			nb_ok++;
			if (i <= nbpos/2) nb_ok5p++; else nb_ok3p++;
		}
	}
	if ((parms->nb_probes == 0) || (nb_ok <= parms->nb_probes)  || ((parms->side_bias == 0) && (nb_ok5p <= parms->nb_probes)) || ((parms->side_bias == 1) && (nb_ok3p <= parms->nb_probes)) )
	{
		if (parms->nb_probes == 0)
			mm = nb_ok;
		else
			mm = parms->nb_probes;
		if (parms->side_bias==1) i =nb_ok-1; else i = 0;
		if (parms->side_bias==1) is = -1;  else is = 1;
		while ((i<mm) && (i >= 0) && (i <nb_ok))
		{
/*			PrintProbe(sequence_select_part(gen,is_ok[i],is_ok[i]+(parms->oligo_len-1)),name,is_ok[i]); */
			i += is; pcnt++;
		}
	}
	else  /* More than maximum probes possible generate subset */
	{
		if (parms->side_bias == 2) /* uniformly distributed probes */
		{
			step = (double)nb_ok/parms->nb_probes;
			pos = 0;
		}
		if (parms->side_bias == 0)  /* 5' probes */
		{
			step = (double)nb_ok5p/parms->nb_probes;
			pos = 0;
		}
		if (parms->side_bias == 1)  /* 3' probes */
		{
			step = -(double)nb_ok3p/parms->nb_probes;
			pos = nb_ok-0.1;
		}
		while ((pcnt < parms->nb_probes) && ((int)pos >= 0) && ((int)pos < nb_ok))
		{
			i = is_ok[(int)pos],
/*			PrintProbe(sequence_select_part(gen,i,i+(parms->oligo_len-1)),name,i); */
			pos += step;
			pcnt++;
		}

	}
	free(is_ok);
	return pcnt;
}
Esempio n. 2
0
SEXP seqlib_gen_tile(SEXP Rsequence,SEXP Rlen,SEXP Rstep,SEXP Rcircular)
{
	sequence_tp *ms,*oligo;
	int cnt,pos,len,olen=60,step=1,circular=0,i=0;
	SEXP res;
	char *seqstring;
	if(!isString(Rsequence) || length(Rsequence) != 1)
	   error("gen_tile: sequence is not a single string");
	if (!isInteger(Rlen) || length(Rlen) != 1)
	   error("gen_tile: len value must be single int");
	if (!isInteger(Rstep) || length(Rstep) != 1)
	   error("gen_tile: step value must be single int");
	if (!isInteger(Rcircular) || length(Rcircular) != 1)
	   error("gen_tile: circular value must be single int");
	olen = INTEGER(Rlen)[0];
	step = INTEGER(Rstep)[0];
	circular = INTEGER(Rcircular)[0];
	if (olen < 1)
	   error("gen_tile: length value must be 1 or higher");
	if (step < 1)
	   error("gen_tile: step value must be 1 or higher");

	ms = sequence_from_string(CHAR(STRING_ELT(Rsequence,i)));
	len = sequence_length(ms);
	if (len < olen)
	{
	    sequence_free(ms);
	    error("gen_tile: tile len must be smaller than sequence length");
	}
	if (circular) 
		cnt = 1+ (len-1)/step;
	else
		cnt = 1+(len-olen)/step;
	pos = 1;
	seqstring= (char*)malloc(sizeof(char)*olen);
	PROTECT(res = allocVector(STRSXP,cnt));
	while (pos+olen-1 <= len)
	{
		oligo = sequence_select_part(ms,pos,pos+olen-1);
		SET_STRING_ELT(res,i,mkChar(sequence_chars(oligo,seqstring)));
		sequence_free(oligo);
		pos = pos+step;
		i++;
	}
	if (circular)
	{
		while (pos <= len)
		{
			oligo = sequence_select_part(ms,pos,len);
			sequence_append(oligo,sequence_select_part(ms,1,olen-(len-pos)-1));
			SET_STRING_ELT(res,i,mkChar(sequence_chars(oligo,seqstring)));
			sequence_free(oligo);
			pos = pos+step;
			i++;
		}
	}
	free(seqstring);
	UNPROTECT(1);
	return res;
}
Esempio n. 3
0
SEXP seqlib_read_fasta(SEXP filename)
{
	FILE *df;
	int i,cnt,sl,maxlen=0;
	sequence_tp *ds;
	SEXP res,names;
	char comment[SEQUENCE_MAX_COMMENT_LEN], *seqstring;
	if(!isString(filename) || length(filename) != 1)
	   error("filename is not a single string");

	df = fopen(CHAR(STRING_ELT(filename, 0)),"r");
	if (!df) 
	{
	   	error("can not open file");
	}
	cnt =0;
	sequence_catch_warning(warning_stub);
	while ((ds=sequence_read_fasta(df,comment,SEQ_TYPE_4BIT)))
	{
		sl = sequence_length(ds);
		if (sl > maxlen)
			maxlen = sl;
		sequence_free(ds);
		cnt++;
	}
	if (cnt)
	{
		rewind(df);
		seqstring= (char*)malloc(sizeof(char)*(maxlen+1));
		PROTECT(res = allocVector(STRSXP,cnt));
		PROTECT(names = allocVector(VECSXP, cnt));
		for (i=0; i< cnt; i++)
		{
			ds=sequence_read_fasta(df,comment,SEQ_TYPE_4BIT);
			SET_STRING_ELT(res,i,mkChar(sequence_chars(ds,seqstring)));
			SET_VECTOR_ELT(names,i,mkChar(comment));
		}
		free(seqstring);
		setAttrib(res, R_NamesSymbol, names);
		UNPROTECT(2);
	}
	else
	{
	   	error("can not open file");
	}
	fclose(df);
	return res;
}
Esempio n. 4
0
SEXP seqlib_rev_comp(SEXP sequences)
{
	int vlen,i;
	sequence_tp *ms, *rms;
	char *seqstring;
	SEXP res;
	if(!isString(sequences))
	   error("sequence must have character type");
	vlen = length(sequences);
	PROTECT(res = allocVector(STRSXP,vlen));
	for (i=0; i< vlen; i++)
	{
		ms = sequence_from_string(CHAR(STRING_ELT(sequences,i)));
		rms = sequence_reverse_complement(ms);
		seqstring= (char*)malloc(sizeof(char)*(2+sequence_length(rms)));
		SET_STRING_ELT(res,i,mkChar(sequence_chars(rms,seqstring)));
		free(seqstring);
		free(rms);
		free(ms);
	}
	UNPROTECT(1);
	return res;
}
Esempio n. 5
0
void GraphOutput::close()
{
    switch (graph_format){
        
        case 0:
            fprintf(graph_file,"}\n");
            fclose(graph_file);
        break;
    
        case 1:
            fclose(nodes_file);
            fclose(edges_file);
        break;
    
        case 2:
            fprintf(graph_file,"</graph>\n");
            fclose(graph_file);
        break;
      
        case 3:
            // We need to store all nodes and then all edges in the final .json file
            fclose(nodes_file);
            fclose(edges_file);
            ifstream nodes(json_nodes_file_name.c_str(), ios::in);
            ifstream edges(json_edges_file_name.c_str(), ios::in);
            
            if(!edges || !nodes){fprintf(stderr,"Cannot open file %s, %s or %s, exit\n", json_edges_file_suffix.c_str(), json_nodes_file_suffix.c_str()); exit(1);}
            
            string line;
            long count_nodes=0;
            long count_edges=0;
	    long cumul =0;

	    fprintf(graph_file,"{\n \"Starter\":[\n{");
	    fprintf(graph_file,"\n \"nodes\": [\n");
	    getline(nodes,line); 
      	    fprintf(graph_file,"%s",line.c_str()); // prints the first node without comma before
	    cumul = sequence_length(line); //save length of sequences
	    //for each node
	    while(getline(nodes,line)){
		cumul += sequence_length(line); //save length of sequences 
		fprintf(graph_file,",\n%s",line.c_str()); // prints the other nodes
		count_nodes++;
	    };
	    fprintf(graph_file,"\n],\n");
	    fprintf(graph_file,"\"edges\": [\n");
            getline(edges,line);
            fprintf(graph_file,"%s",line.c_str()); // prints the first edge without comma before
	    //for each edge
            while(getline(edges,line)) {
			fprintf(graph_file,",\n%s",line.c_str()); // prints the others edges
			count_edges++;
            };
            fprintf(graph_file,"\n],\n");
	    fprintf(graph_file,"\"metaInfo\":{\"cumulativeSequencesLenght\": %ld, \"totalNodes\": %ld,\"totalEdges\": %ld  }\n}", cumul, count_nodes, count_edges);
				
   
	    //end of graph file en close file
	    fprintf(graph_file,"\n]\n}\n");
	    nodes.close(); remove(json_nodes_file_name.c_str());
	    edges.close(); remove(json_edges_file_name.c_str());
            fclose(graph_file);
    }
}
Esempio n. 6
0
 void next(size_t n) {
     std::get<0>(state) = n;
     std::get<1>(state) = sequence_length(n);
 }
Esempio n. 7
0
 void next() {
     std::get<0>(state) += 1;
     std::get<1>(state) = sequence_length(std::get<0>(state));
 }
Esempio n. 8
0
void sequence_dump(sequence s){
  for (int i = 0; i < sequence_length(s); ++i)
    printf("sequence element %d: %d\n", i,  *(int*)sequence_find(s, i));
}
Esempio n. 9
0
SEXP sequence_clean(SEXP seqs,SEXP seqno,SEXP qstart,SEXP qstop,SEXP min_len)
{
	sequence_tp  **results,*ms;
	int  i,nb_cuts,nb_seqs,cidx,nb_res,qs,qe,cp,ml,maxlen = 0,seq_name_len,pcnt,*sorttab;
	char *seqstring,**rnames;
	const char *seq_name;
	SEXP res,names;
#if DEBUG
	fprintf(stderr,"Cleaning\n");
#endif
	if(!isString(seqs))
	   error("sequence must have character type");
	if (!isInteger(seqno))
	   error("seqno value must be integer");
	if (!isInteger(qstart))
	   error("start-values must be integer");
	if (!isInteger(qstop))
	   error("start-values must be integer");
	if (!isInteger(min_len) || (length(min_len) != 1))
	   error("start-values must be a single integer");
	nb_cuts = length(seqno);
	if ((length(qstop) != nb_cuts) || (length(qstart) != nb_cuts))
	   error("non-equal number of sequence references and start/stop-positions.");
	nb_seqs = length(seqs); 
	results = (sequence_tp**)malloc(sizeof(sequence_tp*)*nb_seqs*3);
	rnames  = (char**)malloc(sizeof(char*)*nb_seqs*3);
	nb_res = 0;
	cidx = 0;
	ml = INTEGER(min_len)[0];
	for (i=0; i<nb_cuts; i++)
		INTEGER(seqno)[i]--;
#if DEBUG
	fprintf(stderr,"Sorting \n");	
#endif
	sorttab = (int*)malloc(sizeof(int)*nb_cuts);
	for (i=0; i<nb_cuts; i++) sorttab[i] = i;
	
	sort_cut_list(seqno,qstart,qstop,sorttab);
#if DEBUG
	fprintf(stderr,"Done sorting \n");	
	
#endif
	for (i=0; i<nb_seqs; i++)
	{
		pcnt=1;
		seq_name = CHAR(STRING_ELT(getAttrib(seqs,R_NamesSymbol), i));
		seq_name_len = strlen(seq_name);
		ms = sequence_from_string(CHAR(STRING_ELT(seqs,i)));
#if DEBUG
		fprintf(stderr,"Doing sequence %d/%d: (1-%d)\n",i,nb_seqs,sequence_length(ms));
#endif
		if ((cidx < nb_cuts) && (INTEGER(seqno)[cidx] == i))
		{
			cp = 1;
			while ((INTEGER(seqno)[cidx] == i) && (cidx < nb_cuts))
			{
				qs = INTEGER(qstart)[sorttab[cidx]];
				qe = INTEGER(qstop)[sorttab[cidx]];
				if ((qs -cp) > ml) 
				{
					results[nb_res] = sequence_select_part(ms,cp,qs-1);
					rnames[nb_res] = (char*)malloc(sizeof(char)*(seq_name_len+5));
					sprintf(rnames[nb_res],"%s_p%d",seq_name,pcnt++);
#if DEBUG
					fprintf(stderr,"Add result %d : %d (%d-%d)\n",nb_res,i,cp,qs-1);
#endif
					nb_res++;
				}
				if ((qe+1) > cp) cp = qe+1;
				cidx++;
			}
			if ((sequence_length(ms)-cp) >= ml)
			{
				results[nb_res] = sequence_select_part(ms,cp,sequence_length(ms));
				rnames[nb_res] = (char*)malloc(sizeof(char)*(seq_name_len+5));
				sprintf(rnames[nb_res],"%s_p%d",seq_name,pcnt);
#if DEBUG
				fprintf(stderr,"Add tail %d : %d (%d-%d)\n",nb_res,i,cp,sequence_length(ms));
#endif
				nb_res++;
			}
			sequence_free(ms);
		}
		else
		{
			results[nb_res] = ms;
#if DEBUG
			fprintf(stderr,"Not cutting sequence %d\n",i);
#endif
			rnames[nb_res] = (char*)malloc(sizeof(char)*(seq_name_len+5));
			sprintf(rnames[nb_res],"%s",seq_name);
			nb_res++;
		}
	}
	for (i=0; i<nb_res; i++)
		if (sequence_length(results[i]) > maxlen) { maxlen = sequence_length(results[i]);}	
	maxlen+=10;
	PROTECT(res = allocVector(STRSXP,nb_res));
	PROTECT(names = allocVector(VECSXP, nb_res));
	seqstring= (char*)malloc(sizeof(char)*(maxlen));
	for (i=0; i< nb_res; i++)
	{
#if DEBUG
		fprintf(stderr,"setting sequence %d \n",i);
#endif
		SET_STRING_ELT(res,i,mkChar(sequence_chars(results[i],seqstring)));
		SET_VECTOR_ELT(names,i,mkChar(rnames[i]));
	}
	free(seqstring);
	free(sorttab);
	for (i=0; i<nb_res; i++)
	{
		free(rnames[i]);
		sequence_free(results[i]);
	}
	free(results);
	free(rnames);
	setAttrib(res, R_NamesSymbol, names);
	UNPROTECT(2);
#if DEBUG
	fprintf(stderr,"Done cleaning\n");
#endif
	return res;
}