Sequence * magic_trunc_Sequence(Sequence * seq,int start,int end) { Sequence * temp; Sequence * out; if( is_dna_Sequence(seq) == FALSE) { warn("Cannot magic truncate on a non DNA sequence... type is %s",Sequence_type_to_string(seq->type)); return NULL; } if( start < 0 || end < 0 ) { warn("Attempting a magic truncation on indices which are less than zero [%d:%d]. Clearly impossible",start,end); return NULL; } if( start < end ) { return trunc_Sequence(seq,start,end); } else { temp = trunc_Sequence(seq,end,start); if( temp == NULL ) { warn("Unable to truncate sequence"); return NULL; } out = reverse_complement_Sequence(temp); free_Sequence(temp); return out; } }
cDNADB * new_cDNADB_from_single_seq(cDNA * seq) { ComplexSequence * cs,*cs_rev; Sequence * temp; ComplexSequenceEvalSet * cses; cses = default_cDNA_ComplexSequenceEvalSet(); cs = new_ComplexSequence(seq->baseseq,cses); temp = reverse_complement_Sequence(seq->baseseq); cs_rev = new_ComplexSequence(temp,cses); free_Sequence(temp); free_ComplexSequenceEvalSet(cses); return new_cDNADB_from_forrev_cseq(cs,cs_rev); }
int main(int argc,char ** argv) { Sequence * in; Sequence * rev; while( in = read_fasta_Sequence(stdin) ) { in->type = SEQUENCE_DNA; rev = reverse_complement_Sequence(in); write_fasta_Sequence(rev,stdout); free_Sequence(in); free_Sequence(rev); } }
cDNA * get_cDNA_from_cDNADB(cDNADB * cdnadb,DataEntry * de) { Sequence * seq; Sequence * temp; if( cdnadb == NULL ) { warn("Cannot get entry from a null database"); return NULL; } if( de == NULL ) { warn("Cannot get entry with a null dataentry"); return NULL; } if( cdnadb->is_single_seq == TRUE ) { if( de->is_reversed == TRUE ) return cDNA_from_Sequence(hard_link_Sequence(cdnadb->rev->seq)); else return cDNA_from_Sequence(hard_link_Sequence(cdnadb->forw->seq)); } /* we need to get out the Sequence from seqdb */ seq = get_Sequence_from_SequenceDB(cdnadb->sdb,de); if( seq == NULL ) { warn("Cannot get entry for %s from cDNA db",de->name); return NULL; } if( seq->type != SEQUENCE_DNA) { warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",de->name); } force_to_dna_Sequence(seq,1.0,NULL); if( de->is_reversed == TRUE ) { temp = reverse_complement_Sequence(seq); free_Sequence(seq); seq = temp; } return cDNA_from_Sequence(seq); }
Genomic * get_Genomic_from_GenomicDB(GenomicDB * gendb,DataEntry * de) { Sequence * seq; Sequence * temp; /* we need to get out the Sequence from seqdb */ if( gendb == NULL || de == NULL ) { warn("Cannot get a genomic sequence from NULL objects. Ugh!"); return NULL; } if( gendb->is_single_seq) { if( de->is_reversed == TRUE ) return hard_link_Genomic(gendb->revsingle); else return hard_link_Genomic(gendb->single); } seq = get_Sequence_from_SequenceDB(gendb->sdb,de); if( seq == NULL ) { warn("Cannot get entry for %s from Genomic db",de->name); } /* check dna status. We assumme someone knows what he is doing when he makes a genomic db!*/ if( seq->type != SEQUENCE_DNA) { warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",de->name); } force_to_dna_Sequence(seq,1.0,NULL); if( de->is_reversed == TRUE ) { temp = reverse_complement_Sequence(seq); free_Sequence(seq); seq = temp; } return Genomic_from_Sequence_Nheuristic(seq,gendb->length_of_N); }
ComplexSequence * reload_cDNADB(ComplexSequence * last,cDNADB * cdnadb,int * return_status) { ComplexSequence * cs; Sequence * seq,*temp; /** free Complex Sequence **/ if ( last != NULL ) { free_ComplexSequence(last); } if( cdnadb->forward_only == TRUE) { temp = reload_SequenceDB(NULL,cdnadb->sdb,return_status); if ( *return_status != DB_RETURN_OK ) { return NULL; } cs = new_ComplexSequence(temp,cdnadb->cses); return cs; } if( cdnadb->is_single_seq == TRUE ) { if( cdnadb->done_forward == TRUE ) { *return_status = DB_RETURN_OK; cdnadb->done_forward = FALSE; return hard_link_ComplexSequence(cdnadb->rev); } else { *return_status = DB_RETURN_END; return NULL; } } /** standard database **/ if( cdnadb->done_forward == TRUE ) { if( cdnadb->current == NULL ) { warn("A bad internal cDNA db error - unable to find current sequence in db reload"); *return_status = DB_RETURN_ERROR; return NULL; } temp = reverse_complement_Sequence(cdnadb->current); if( temp == NULL ) { warn("A bad internal cDNA db error - unable to reverse complements current"); *return_status = DB_RETURN_ERROR; return NULL; } cs = new_ComplexSequence(temp,cdnadb->cses); if( cs == NULL ) { warn("A bad internal cDNA db error - unable to make complex sequence in db reload"); *return_status = DB_RETURN_ERROR; return NULL; } free_Sequence(temp); cdnadb->done_forward = FALSE; return cs; } /* otherwise we have to get a new sequence */ seq = reload_SequenceDB(NULL,cdnadb->sdb,return_status); if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) { return NULL; /** error already reported **/ } uppercase_Sequence(seq); if( force_to_dna_Sequence(seq,cdnadb->error_tol,NULL) == FALSE ) { if( cdnadb->error_handling == CDNADB_READ_THROUGH ) { warn("Unable to map %s sequence to a cDNA sequence, but ignoring that for the moment...",seq->name); free_Sequence(seq); return reload_cDNADB(NULL,cdnadb,return_status); } else { warn("Unable to map %s sequence to a cDNA sequence. Failing",seq->name); *return_status = DB_RETURN_ERROR; return NULL; } } cs = new_ComplexSequence(seq,cdnadb->cses); if( cs == NULL ) { if( cdnadb->error_handling == CDNADB_READ_THROUGH ) { warn("Unable to map %s sequence to a cDNA sequence, but ignoring that for the moment...",seq->name); free_Sequence(seq); return reload_cDNADB(NULL,cdnadb,return_status); } else { warn("Unable to map %s sequence to a cDNA sequence. Failing",seq->name); *return_status = DB_RETURN_ERROR; return NULL; } } cdnadb->current = free_Sequence(cdnadb->current); cdnadb->current = seq; cdnadb->done_forward= TRUE; return cs; }
DnaProfileSet * DnaProfileSet_from_leaf_node(Sequence * one,DnaProfileSet * two,DnaProfileEnginePara * dpep) { DnaProfileSet * out; DnaProfile * dnap; DnaProfile * dnapr; DnaProfileMatchPairSet * dpmps; Sequence * rev; SeqAlign * sa; DnaProfile * new_dnap; int i; int j; dpmps = DnaProfileMatchPairSet_alloc_std(); out = DnaProfileSet_alloc_std(); rev = reverse_complement_Sequence(one); dnap = naive_DnaProfile_from_Sequence(one,dpep->seq_id,dpep->m2i,dpep->m2d,dpep->i2i,dpep->d2d); dnapr = naive_DnaProfile_from_Sequence(rev,dpep->seq_id,dpep->m2i,dpep->m2d,dpep->i2i,dpep->d2d); fold_RandomModel_DnaProfile(dnap,dpep->rm); fold_RandomModel_DnaProfile(dnapr,dpep->rm); for(i=0; i<two->len; i++) { fprintf(stderr,"Processing %d\n",i); add_DnaProfileMatchPairSet(dpmps,DnaProfileMatchPair_from_DnaProfile(dnap,two->dnap[i],dpep)); add_DnaProfileMatchPairSet(dpmps,DnaProfileMatchPair_from_DnaProfile(dnapr,two->dnap[i],dpep)); } fprintf(stderr,"Sorting....\n"); sort_DnaProfileMatchPairSet_by_score(dpmps); for(i=0; i<dpmps->len; i++) { /* check this profile has not already been used */ /* not done yet */ if( dpmps->pair[i]->score < dpep->min_seq_prof ) { fprintf(stderr,"Warning... rejecting match due to score %d vs %d\n",dpmps->pair[i]->score,dpep->min_seq_prof); break; } sa = merged_SeqAlign(dpmps->pair[i]->query,dpmps->pair[i]->target,dpmps->pair[i]->alb); new_dnap = naive_DnaProfile_from_SeqAlign(sa,dpep->pseudo,dpep->m2i,dpep->m2d,dpep->i2i,dpep->d2d); /* need to log-odds dnap here */ fold_RandomModel_DnaProfile(new_dnap,dpep->rm); add_DnaProfileSet(out,new_dnap); } fprintf(stderr,"Freeing DNA profiles...\n"); free_DnaProfile(dnap); free_DnaProfile(dnapr); fprintf(stderr,"Freeing sequences\n"); free_Sequence(rev); return out; }
DnaProfileSet * DnaProfileSet_from_leaf_leaf(Sequence * one,Sequence * two,DnaProfileEnginePara * dpep) { DnaProfileSet * out; DnaMatrix * dm; DnaProbMatrix * dmp; PairwiseShortDna * psd; LocalCisHitSet * set; Sequence * two_rev; DnaProfile * dp; SeqAlign * sa; Sequence * temp1; Sequence * temp2; char * temp_seq1; char * temp_seq2; int unmatched; int seq1_i,seq2_i; AlnColumn * alc; int i; two_rev = reverse_complement_Sequence(two); dmp = DnaProbMatrix_from_match(0.65,NMaskType_BANNED); assert(dmp); flat_null_DnaProbMatrix(dmp); dm = DnaMatrix_from_DnaProbMatrix(dmp); show_DnaMatrix(dm,stderr); psd = query_to_reverse_target(one,two,dm,0,one->len,0,two->len); set = make_LocalCisHitSet(one,two,two_rev,psd->forward,psd->reverse,dpep->setpara,dpep->lchs,NULL,NULL,NULL,NULL,0,dpep->dpri); temp_seq1 = calloc(one->len > two->len ? one->len : two->len,sizeof(char)); temp_seq2 = calloc(one->len > two->len ? one->len : two->len,sizeof(char)); out = DnaProfileSet_alloc_std(); for(i=0; i<set->len; i++) { unmatched = 1; sa = NULL; /* * Main loop over DBA style alignment. We need to make one * DnaProfile per matching block, which are separated by unmatched * blocks. Could potentially be no blocks. * * Extra annoyance provided by the "wrong" convention being used in * DBA alignments, meaning that "inserts" label the "sequence" containing * strand, not the non-sequence containing strand. Stupid, but dbadisplay * uses this convention, so if we changed, would have to fix lots of exisiting * code. Not ideal. * */ for(alc=set->lch[i]->alb->start; alc != NULL; alc=alc->next) { /* hitting an unmatched block */ if( unmatched == 0 && (strcmp(alc->alu[0]->text_label,"UM") == 0 || strcmp(alc->alu[0]->text_label,"UI") == 0 || strcmp(alc->alu[0]->text_label,"END") == 0) ) { /* if we have an alignment, put it away now */ if( sa != NULL ) { temp_seq1[seq1_i] = '\0'; temp_seq2[seq2_i] = '\0'; temp1 = Sequence_from_static_memory(one->name,temp_seq1); temp2 = Sequence_from_static_memory(two->name,temp_seq2); add_SeqAlign(sa,temp1); add_SeqAlign(sa,temp2); dp = naive_DnaProfile_from_SeqAlign(sa,0.15,0.1,0.1,0.8,0.8); fold_RandomModel_DnaProfile(dp,dpep->rm); add_DnaProfileSet(out,dp); free_SeqAlign(sa); /* hard linked inside DP */ sa = NULL; } continue; } else if( unmatched == 1 && (strstartcmp(alc->alu[0]->text_label,"MM") == 0 || strstartcmp(alc->alu[0]->text_label,"MI") == 0 ) ) { unmatched = 0; sa = SeqAlign_alloc_len(2); seq1_i = 0; seq2_i = 0; } /* only if we are in a matched block */ if( unmatched == 0 ) { /* Bloody twisted DBA convention - Niclas has alot to answer for. Evil stuff -- MI is on the wrong strand! */ if( strstartcmp(alc->alu[0]->text_label,"MI") == 0 ) { /* means 0 has sequence, other has gap */ temp_seq1[seq1_i++] = one->seq[alc->alu[0]->end]; temp_seq2[seq2_i++] = '-'; } else if ( strstartcmp(alc->alu[1]->text_label,"MI") == 0 ) { temp_seq1[seq1_i++] = '-'; temp_seq2[seq2_i++] = two->seq[alc->alu[1]->end]; } else if ( strstartcmp(alc->alu[0]->text_label,"MM") == 0 && strstartcmp(alc->alu[1]->text_label,"MM") == 0 ) { temp_seq1[seq1_i++] = one->seq[alc->alu[0]->end]; temp_seq2[seq2_i++] = two->seq[alc->alu[1]->end]; } else { warn("Impossible label pair reached in matched block local cis hit stuff, %s,%s",alc->alu[0]->text_label,alc->alu[1]->text_label); } } } } free(temp_seq1); free(temp_seq2); free_PairwiseShortDna(psd); free_LocalCisHitSet(set); free_DnaMatrix(dm); free_DnaProbMatrix(dmp); return out; }
int main(int argc,char ** argv) { int type = ALIGN_NORMAL; DPRunImpl * dpri = NULL; HitList * hl; HitListOutputImpl * hloi; Sequence * query; Sequence * target; Sequence * target_rev; PairwiseShortDna * two; LocalCisHitSet * set; LocalCisHitSet * greedy_set; LocalCisHitScore * lchs; LocalCisHitSetPara * setpara; MotifMatrixPara * mmp; MotifMatrixScore * mms; TransFactorMatchSet * tfms_query = NULL; TransFactorMatchSet * tfms_target = NULL; TransFactorMatchSet * tfms_target_rev = NULL; int qstart = -1; int qend = -1; int tstart = -1; int tend = -1; int i; char * temp; DnaMatrix * dm; DnaProbMatrix * dmp; TransFactorBuildPara * tfbp; TransFactorMatchPara * tfmp; TransFactorSet * tfs; char * motif_library = NULL; int use_laurence = FALSE; int use_ben = FALSE; dmp = DnaProbMatrix_from_match(0.75,NMaskType_BANNED); assert(dmp); flat_null_DnaProbMatrix(dmp); dm = DnaMatrix_from_DnaProbMatrix(dmp); dpri = new_DPRunImpl_from_argv(&argc,argv); hloi = new_HitListOutputImpl_from_argv(&argc,argv); setpara = new_LocalCisHitSetPara_from_argv(&argc,argv); mmp = new_MotifMatrixPara_from_argv(&argc,argv); tfbp = new_TransFactorBuildPara_from_argv(&argc,argv); tfmp = new_TransFactorMatchPara_from_argv(&argc,argv); strip_out_integer_argument(&argc,argv,"s",&qstart); strip_out_integer_argument(&argc,argv,"t",&qend); strip_out_integer_argument(&argc,argv,"u",&tstart); strip_out_integer_argument(&argc,argv,"v",&tend); temp = strip_out_assigned_argument(&argc,argv,"motiflib"); if( temp != NULL ) { motif_library = stringalloc(temp); } use_laurence = strip_out_boolean_argument(&argc,argv,"lr"); use_ben = strip_out_boolean_argument(&argc,argv,"ben"); temp = strip_out_assigned_argument(&argc,argv,"align"); if( temp != NULL ) { if( strcmp(temp,"motif") == 0 ) { type = ALIGN_MOTIF; } else if ( strcmp(temp,"normal") == 0 ) { type = ALIGN_NORMAL; } else { fatal("cannot recognise string %s as align type",temp); } } strip_out_standard_options(&argc,argv,show_help,show_version); if( argc != 3 ) { show_help(stdout); exit(12); } lchs = standard_LocalCisHitScore(NMaskType_VARIABLE); query = read_fasta_file_Sequence(argv[1]); target = read_fasta_file_Sequence(argv[2]); for(i=0;i<query->len;i++) { query->seq[i] = toupper(query->seq[i]); } assert(query != NULL); assert(target != NULL); target_rev = reverse_complement_Sequence(target); mms = MotifMatrixScore_from_MotifMatrixPara(mmp); if( type == ALIGN_MOTIF ) { if( motif_library == NULL ) { fatal("Wanted to align with motif but not motif library. Must use -motiflib"); } if( use_laurence == TRUE ) { tfs = read_laurence_TransFactorSet_file(motif_library); } else if( use_ben == TRUE ) { tfs = read_ben_IUPAC_TransFactorSet_file(motif_library); } else { tfs = read_TransFactorSet_file(motif_library); } build_TransFactorSet(tfs,tfbp); tfms_query = calculate_TransFactorMatchSet(query,tfs,tfmp); sort_by_start_TransFactorMatchSet(tfms_query); tfms_target = calculate_TransFactorMatchSet(target,tfs,tfmp); sort_by_start_TransFactorMatchSet(tfms_target); tfms_target_rev = calculate_TransFactorMatchSet(target_rev,tfs,tfmp); sort_by_start_TransFactorMatchSet(tfms_target); fprintf(stdout,"Motif Set: %d in query and %d in target\n",tfms_query->len,tfms_target->len); } if( qstart == -1 ) { qstart = 0; } if( qend == -1 ) { qend = query->len; } if( tstart == -1 ) { tstart = 0; } if( tend == -1 ) { tend = target->len; } two = query_to_reverse_target(query,target,dm,qstart,qend,tstart,tend); set = make_LocalCisHitSet(query,target,target_rev,two->forward,two->reverse,setpara,lchs,tfms_query,tfms_target,tfms_target_rev,mms,type == ALIGN_MOTIF ? 1 : 0,dpri); greedy_set = greedy_weed_LocalCisHitSet(set,setpara); hl = HitList_from_LocalCisHitSet(greedy_set); show_HitList_HitListOutputImpl(hloi,hl,stdout); return 0; }