Sequence * magic_trunc_Sequence(Sequence * seq,int start,int end)
{
  Sequence * temp;
  Sequence * out;

  if( is_dna_Sequence(seq) == FALSE) {
    warn("Cannot magic truncate on a non DNA sequence... type is %s",Sequence_type_to_string(seq->type));
    return NULL;
  }

  if( start < 0 || end < 0 ) {
    warn("Attempting a magic truncation on indices which are less than zero [%d:%d]. Clearly impossible",start,end);
    return NULL;
  }

  if( start < end ) {
    return trunc_Sequence(seq,start,end);
  }
  else {
    temp = trunc_Sequence(seq,end,start);
    if( temp == NULL ) {
      warn("Unable to truncate sequence");
      return NULL;
    }


    out = reverse_complement_Sequence(temp);

    free_Sequence(temp);

    return out;
  }

}
Beispiel #2
0
cDNADB * new_cDNADB_from_single_seq(cDNA * seq)
{
  ComplexSequence * cs,*cs_rev;
  Sequence * temp;
  ComplexSequenceEvalSet * cses;
  
  cses = default_cDNA_ComplexSequenceEvalSet();

  cs = new_ComplexSequence(seq->baseseq,cses);
  temp = reverse_complement_Sequence(seq->baseseq);
  cs_rev = new_ComplexSequence(temp,cses);
  free_Sequence(temp);
  free_ComplexSequenceEvalSet(cses);

  return new_cDNADB_from_forrev_cseq(cs,cs_rev);
}
Beispiel #3
0
int main(int argc,char ** argv)
{
  Sequence * in;
  Sequence * rev;


  while( in = read_fasta_Sequence(stdin) ) {
    in->type = SEQUENCE_DNA;
    rev = reverse_complement_Sequence(in);
    write_fasta_Sequence(rev,stdout);
    free_Sequence(in);
    free_Sequence(rev);
  }


}
Beispiel #4
0
cDNA * get_cDNA_from_cDNADB(cDNADB * cdnadb,DataEntry * de)
{
  Sequence * seq;
  Sequence * temp;

  if( cdnadb == NULL ) {
    warn("Cannot get entry from a null database");
    return NULL;
  }

  if( de == NULL ) {
    warn("Cannot get entry with a null dataentry");
    return NULL;
  }


  if( cdnadb->is_single_seq == TRUE ) {
    if( de->is_reversed == TRUE ) 
      return cDNA_from_Sequence(hard_link_Sequence(cdnadb->rev->seq));
    else 
      return cDNA_from_Sequence(hard_link_Sequence(cdnadb->forw->seq));
  }

  /* we need to get out the Sequence from seqdb */

  seq = get_Sequence_from_SequenceDB(cdnadb->sdb,de);
  if( seq == NULL ) {
    warn("Cannot get entry for %s from cDNA db",de->name);
    return NULL;
  }

  if( seq->type != SEQUENCE_DNA) {
    warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",de->name);
  }

  force_to_dna_Sequence(seq,1.0,NULL);

  if( de->is_reversed == TRUE ) {
    temp = reverse_complement_Sequence(seq);
    free_Sequence(seq);
    seq = temp;
  }

  return cDNA_from_Sequence(seq);
}
Beispiel #5
0
Genomic * get_Genomic_from_GenomicDB(GenomicDB * gendb,DataEntry * de)
{
  Sequence * seq;
  Sequence * temp;
  /* we need to get out the Sequence from seqdb */

  if( gendb == NULL || de == NULL ) {
    warn("Cannot get a genomic sequence from NULL objects. Ugh!");
    return NULL;
  }


  if( gendb->is_single_seq) {
    if( de->is_reversed == TRUE ) 
      return hard_link_Genomic(gendb->revsingle);
    else
      return hard_link_Genomic(gendb->single);
  }

  seq = get_Sequence_from_SequenceDB(gendb->sdb,de);


  if( seq == NULL ) {
    warn("Cannot get entry for %s from Genomic db",de->name);
  }

  /* check dna status. We assumme someone knows what he is doing when he makes a genomic db!*/
  if( seq->type != SEQUENCE_DNA) {
    warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",de->name);
  }

  force_to_dna_Sequence(seq,1.0,NULL);

  if( de->is_reversed == TRUE ) {
    temp = reverse_complement_Sequence(seq);
    free_Sequence(seq);
    seq = temp;
  }

  return Genomic_from_Sequence_Nheuristic(seq,gendb->length_of_N);
}
Beispiel #6
0
ComplexSequence * reload_cDNADB(ComplexSequence * last,cDNADB * cdnadb,int * return_status)
{
  ComplexSequence * cs;
  Sequence * seq,*temp;
  

  /** free Complex Sequence **/

  if ( last != NULL ) {
    free_ComplexSequence(last);
  }

  if( cdnadb->forward_only == TRUE) {
     temp = reload_SequenceDB(NULL,cdnadb->sdb,return_status);
     if ( *return_status  != DB_RETURN_OK ) {
         return NULL;
     }
    cs = new_ComplexSequence(temp,cdnadb->cses);
    return cs;
  }

  if( cdnadb->is_single_seq == TRUE ) {
    if( cdnadb->done_forward == TRUE ) {
      *return_status = DB_RETURN_OK;
      cdnadb->done_forward = FALSE;
      return hard_link_ComplexSequence(cdnadb->rev);
    } else {
      *return_status = DB_RETURN_END;
      return NULL;
    }
  }

  
  /** standard database **/


  if( cdnadb->done_forward == TRUE ) {
    if( cdnadb->current == NULL ) {
      warn("A bad internal cDNA db error - unable to find current sequence in db reload");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    temp = reverse_complement_Sequence(cdnadb->current);


    if( temp == NULL ) {
      warn("A bad internal cDNA db error - unable to reverse complements current");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    cs = new_ComplexSequence(temp,cdnadb->cses);

    if( cs == NULL ) {
      warn("A bad internal cDNA db error - unable to make complex sequence in db reload");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    free_Sequence(temp);
    cdnadb->done_forward = FALSE;
    return cs;
  }


  /* otherwise we have to get a new sequence */

  seq = reload_SequenceDB(NULL,cdnadb->sdb,return_status);

  if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) {
    return NULL; /** error already reported **/
  }

  uppercase_Sequence(seq);

  if( force_to_dna_Sequence(seq,cdnadb->error_tol,NULL) == FALSE ) {
    if( cdnadb->error_handling == CDNADB_READ_THROUGH ) {
      warn("Unable to map %s sequence to a cDNA sequence, but ignoring that for the moment...",seq->name);
      free_Sequence(seq);
      return reload_cDNADB(NULL,cdnadb,return_status);
    } else {
      warn("Unable to map %s sequence to a cDNA sequence. Failing",seq->name);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
  }


  cs = new_ComplexSequence(seq,cdnadb->cses);
  if( cs == NULL ) {
    if( cdnadb->error_handling == CDNADB_READ_THROUGH ) {
      warn("Unable to map %s sequence to a cDNA sequence, but ignoring that for the moment...",seq->name);
      free_Sequence(seq);
      return reload_cDNADB(NULL,cdnadb,return_status);
    } else {
      warn("Unable to map %s sequence to a cDNA sequence. Failing",seq->name);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
  }

  cdnadb->current = free_Sequence(cdnadb->current);
  cdnadb->current = seq;
  cdnadb->done_forward= TRUE;

  return cs;
}
Beispiel #7
0
    DnaProfileSet * DnaProfileSet_from_leaf_node(Sequence * one,DnaProfileSet * two,DnaProfileEnginePara * dpep)
    {
        DnaProfileSet * out;
        DnaProfile * dnap;
        DnaProfile * dnapr;
        DnaProfileMatchPairSet * dpmps;
        Sequence * rev;
        SeqAlign * sa;
        DnaProfile * new_dnap;
        int i;
        int j;


        dpmps = DnaProfileMatchPairSet_alloc_std();

        out = DnaProfileSet_alloc_std();

        rev = reverse_complement_Sequence(one);

        dnap = naive_DnaProfile_from_Sequence(one,dpep->seq_id,dpep->m2i,dpep->m2d,dpep->i2i,dpep->d2d);
        dnapr = naive_DnaProfile_from_Sequence(rev,dpep->seq_id,dpep->m2i,dpep->m2d,dpep->i2i,dpep->d2d);

        fold_RandomModel_DnaProfile(dnap,dpep->rm);
        fold_RandomModel_DnaProfile(dnapr,dpep->rm);


        for(i=0; i<two->len; i++) {
            fprintf(stderr,"Processing %d\n",i);
            add_DnaProfileMatchPairSet(dpmps,DnaProfileMatchPair_from_DnaProfile(dnap,two->dnap[i],dpep));
            add_DnaProfileMatchPairSet(dpmps,DnaProfileMatchPair_from_DnaProfile(dnapr,two->dnap[i],dpep));
        }

        fprintf(stderr,"Sorting....\n");

        sort_DnaProfileMatchPairSet_by_score(dpmps);

        for(i=0; i<dpmps->len; i++) {
            /* check this profile has not already been used */
            /* not done yet */

            if( dpmps->pair[i]->score < dpep->min_seq_prof ) {
                fprintf(stderr,"Warning... rejecting match due to score %d vs %d\n",dpmps->pair[i]->score,dpep->min_seq_prof);
                break;
            }

            sa = merged_SeqAlign(dpmps->pair[i]->query,dpmps->pair[i]->target,dpmps->pair[i]->alb);


            new_dnap = naive_DnaProfile_from_SeqAlign(sa,dpep->pseudo,dpep->m2i,dpep->m2d,dpep->i2i,dpep->d2d);

            /* need to log-odds dnap here */

            fold_RandomModel_DnaProfile(new_dnap,dpep->rm);


            add_DnaProfileSet(out,new_dnap);
        }


        fprintf(stderr,"Freeing DNA profiles...\n");

        free_DnaProfile(dnap);
        free_DnaProfile(dnapr);

        fprintf(stderr,"Freeing sequences\n");

        free_Sequence(rev);



        return out;
    }
Beispiel #8
0
    DnaProfileSet * DnaProfileSet_from_leaf_leaf(Sequence * one,Sequence * two,DnaProfileEnginePara * dpep)
    {
        DnaProfileSet * out;
        DnaMatrix * dm;
        DnaProbMatrix * dmp;
        PairwiseShortDna * psd;
        LocalCisHitSet * set;
        Sequence * two_rev;
        DnaProfile * dp;
        SeqAlign * sa;

        Sequence * temp1;
        Sequence * temp2;

        char * temp_seq1;
        char * temp_seq2;

        int unmatched;
        int seq1_i,seq2_i;

        AlnColumn * alc;
        int i;

        two_rev = reverse_complement_Sequence(two);


        dmp = DnaProbMatrix_from_match(0.65,NMaskType_BANNED);
        assert(dmp);
        flat_null_DnaProbMatrix(dmp);

        dm = DnaMatrix_from_DnaProbMatrix(dmp);

        show_DnaMatrix(dm,stderr);

        psd = query_to_reverse_target(one,two,dm,0,one->len,0,two->len);


        set = make_LocalCisHitSet(one,two,two_rev,psd->forward,psd->reverse,dpep->setpara,dpep->lchs,NULL,NULL,NULL,NULL,0,dpep->dpri);

        temp_seq1 = calloc(one->len > two->len ? one->len : two->len,sizeof(char));
        temp_seq2 = calloc(one->len > two->len ? one->len : two->len,sizeof(char));

        out = DnaProfileSet_alloc_std();

        for(i=0; i<set->len; i++) {
            unmatched = 1;
            sa = NULL;

            /*
             * Main loop over DBA style alignment. We need to make one
             * DnaProfile per matching block, which are separated by unmatched
             * blocks. Could potentially be no blocks.
             *
             * Extra annoyance provided by the "wrong" convention being used in
             * DBA alignments, meaning that "inserts" label the "sequence" containing
             * strand, not the non-sequence containing strand. Stupid, but dbadisplay
             * uses this convention, so if we changed, would have to fix lots of exisiting
             * code. Not ideal.
             *
             */


            for(alc=set->lch[i]->alb->start; alc != NULL; alc=alc->next) {

                /* hitting an unmatched block */
                if( unmatched == 0 && (strcmp(alc->alu[0]->text_label,"UM") == 0 ||
                                       strcmp(alc->alu[0]->text_label,"UI") == 0 || strcmp(alc->alu[0]->text_label,"END") == 0) ) {
                    /* if we have an alignment, put it away now */
                    if( sa != NULL ) {
                        temp_seq1[seq1_i] = '\0';
                        temp_seq2[seq2_i] = '\0';

                        temp1 = Sequence_from_static_memory(one->name,temp_seq1);
                        temp2 = Sequence_from_static_memory(two->name,temp_seq2);

                        add_SeqAlign(sa,temp1);
                        add_SeqAlign(sa,temp2);

                        dp = naive_DnaProfile_from_SeqAlign(sa,0.15,0.1,0.1,0.8,0.8);
                        fold_RandomModel_DnaProfile(dp,dpep->rm);

                        add_DnaProfileSet(out,dp);
                        free_SeqAlign(sa); /* hard linked inside DP */
                        sa = NULL;
                    }

                    continue;
                } else if( unmatched == 1 && (strstartcmp(alc->alu[0]->text_label,"MM") == 0 ||
                                              strstartcmp(alc->alu[0]->text_label,"MI") == 0 ) ) {
                    unmatched = 0;

                    sa = SeqAlign_alloc_len(2);
                    seq1_i = 0;
                    seq2_i = 0;
                }

                /* only if we are in a matched block */
                if( unmatched == 0 ) {
                    /* Bloody twisted DBA convention - Niclas has alot to answer for.
                       Evil stuff -- MI is on the wrong strand! */
                    if( strstartcmp(alc->alu[0]->text_label,"MI") == 0 ) {
                        /* means 0 has sequence, other has gap */
                        temp_seq1[seq1_i++] = one->seq[alc->alu[0]->end];
                        temp_seq2[seq2_i++] = '-';
                    } else if ( strstartcmp(alc->alu[1]->text_label,"MI") == 0 ) {
                        temp_seq1[seq1_i++] = '-';
                        temp_seq2[seq2_i++] = two->seq[alc->alu[1]->end];
                    } else if ( strstartcmp(alc->alu[0]->text_label,"MM") == 0 &&
                                strstartcmp(alc->alu[1]->text_label,"MM") == 0 ) {
                        temp_seq1[seq1_i++] = one->seq[alc->alu[0]->end];
                        temp_seq2[seq2_i++] = two->seq[alc->alu[1]->end];
                    } else {
                        warn("Impossible label pair reached in matched block local cis hit stuff, %s,%s",alc->alu[0]->text_label,alc->alu[1]->text_label);
                    }
                }

            }
        }


        free(temp_seq1);
        free(temp_seq2);
        free_PairwiseShortDna(psd);
        free_LocalCisHitSet(set);
        free_DnaMatrix(dm);
        free_DnaProbMatrix(dmp);

        return out;
    }
int main(int argc,char ** argv)
{
  int type = ALIGN_NORMAL;
  DPRunImpl * dpri = NULL;
  HitList * hl;
  HitListOutputImpl * hloi;

  Sequence * query;
  Sequence * target;
  Sequence * target_rev;
  PairwiseShortDna * two;


  LocalCisHitSet * set;
  LocalCisHitSet * greedy_set;

  LocalCisHitScore * lchs;
  LocalCisHitSetPara * setpara;

  MotifMatrixPara  * mmp;
  MotifMatrixScore * mms;

  TransFactorMatchSet * tfms_query = NULL;
  TransFactorMatchSet * tfms_target = NULL;
  TransFactorMatchSet * tfms_target_rev = NULL;

  int qstart = -1;
  int qend   = -1;
  
  int tstart = -1;
  int tend   = -1;
  int i;

  char * temp;

  DnaMatrix * dm;
  DnaProbMatrix * dmp;
  
  TransFactorBuildPara * tfbp;
  TransFactorMatchPara * tfmp;

  TransFactorSet * tfs;

  char * motif_library = NULL;
  int use_laurence     = FALSE;
  int use_ben          = FALSE;

  dmp = DnaProbMatrix_from_match(0.75,NMaskType_BANNED);  
  assert(dmp);
  flat_null_DnaProbMatrix(dmp);  

  dm = DnaMatrix_from_DnaProbMatrix(dmp);
  
  dpri      = new_DPRunImpl_from_argv(&argc,argv);
  hloi      = new_HitListOutputImpl_from_argv(&argc,argv);
  setpara   = new_LocalCisHitSetPara_from_argv(&argc,argv);
  mmp       = new_MotifMatrixPara_from_argv(&argc,argv);
  tfbp      = new_TransFactorBuildPara_from_argv(&argc,argv);
  tfmp      = new_TransFactorMatchPara_from_argv(&argc,argv);

  strip_out_integer_argument(&argc,argv,"s",&qstart);
  strip_out_integer_argument(&argc,argv,"t",&qend);
  strip_out_integer_argument(&argc,argv,"u",&tstart);
  strip_out_integer_argument(&argc,argv,"v",&tend);

  temp = strip_out_assigned_argument(&argc,argv,"motiflib");
  if( temp != NULL ) {
    motif_library = stringalloc(temp);
  }

  use_laurence = strip_out_boolean_argument(&argc,argv,"lr");
  use_ben      = strip_out_boolean_argument(&argc,argv,"ben");


  temp = strip_out_assigned_argument(&argc,argv,"align");
  if( temp != NULL ) {
    if( strcmp(temp,"motif") == 0 ) {
      type = ALIGN_MOTIF;
    } else if ( strcmp(temp,"normal") == 0 ) {
      type = ALIGN_NORMAL;
    } else {
      fatal("cannot recognise string %s as align type",temp);
    }
  }

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

    

  lchs = standard_LocalCisHitScore(NMaskType_VARIABLE);

  query = read_fasta_file_Sequence(argv[1]);
  target = read_fasta_file_Sequence(argv[2]);

  for(i=0;i<query->len;i++) {
    query->seq[i] = toupper(query->seq[i]);
  }

  assert(query != NULL);
  assert(target != NULL);

  target_rev = reverse_complement_Sequence(target);

  mms = MotifMatrixScore_from_MotifMatrixPara(mmp);

  if( type == ALIGN_MOTIF ) {
    if( motif_library == NULL ) {
      fatal("Wanted to align with motif but not motif library. Must use -motiflib");
    }


    if( use_laurence == TRUE ) {
      tfs = read_laurence_TransFactorSet_file(motif_library);
    } else if( use_ben == TRUE ) {
      tfs = read_ben_IUPAC_TransFactorSet_file(motif_library);
    } else {
      tfs = read_TransFactorSet_file(motif_library);
    }


    build_TransFactorSet(tfs,tfbp);

    tfms_query = calculate_TransFactorMatchSet(query,tfs,tfmp);
    sort_by_start_TransFactorMatchSet(tfms_query);

    tfms_target = calculate_TransFactorMatchSet(target,tfs,tfmp);
    sort_by_start_TransFactorMatchSet(tfms_target);

    tfms_target_rev = calculate_TransFactorMatchSet(target_rev,tfs,tfmp);
    sort_by_start_TransFactorMatchSet(tfms_target);

    fprintf(stdout,"Motif Set: %d in query and %d in target\n",tfms_query->len,tfms_target->len);
  }


  if( qstart == -1 ) {
    qstart = 0;
  }
  if( qend == -1 ) {
    qend = query->len;
  }
  if( tstart == -1 ) {
    tstart = 0;
  }
  if( tend == -1 ) {
    tend = target->len;
  }

  
  two = query_to_reverse_target(query,target,dm,qstart,qend,tstart,tend);

  set = make_LocalCisHitSet(query,target,target_rev,two->forward,two->reverse,setpara,lchs,tfms_query,tfms_target,tfms_target_rev,mms,type == ALIGN_MOTIF ? 1 : 0,dpri);

  greedy_set = greedy_weed_LocalCisHitSet(set,setpara);


  hl = HitList_from_LocalCisHitSet(greedy_set);

  show_HitList_HitListOutputImpl(hloi,hl,stdout);

  return 0;
}