void ramen_load_motifs() { BOOLEAN_T read_file = FALSE; MREAD_T *mread; ARRAYLST_T* read_motifs; int num_motifs_before_rc; int i; int j; memset(&motifs, 0, sizeof(ramen_motifs_t)); read_motifs = arraylst_create(); for (i = 0; i < args.number_motif_files; i++) { mread = mread_create(args.motif_filenames[i], OPEN_MFILE); if (args.bg_format == FILE_BG) { mread_set_bg_source(mread, args.bg_filename); } else { mread_set_background(mread, motifs.bg_freqs); } mread_set_pseudocount(mread, args.pseudocount); mread_load(mread, read_motifs); if (!(motifs.bg_freqs)) motifs.bg_freqs = mread_get_background(mread); mread_destroy(mread); } // reverse complement the originals adding to the original read in list num_motifs_before_rc = arraylst_size(read_motifs); add_reverse_complements(read_motifs); motifs.num = arraylst_size(read_motifs); //Allocate array for the motifs motif_list_to_array(read_motifs, &(motifs.motifs), &(motifs.num)); //free the list of motifs free_motifs(read_motifs); // check reverse complements. assert(motifs.num / 2 == num_motifs_before_rc); // reset motif count to before rev comp motifs.num = num_motifs_before_rc; //Now, we need to convert the motifs into odds matrices if we're doing that kind of scoring for (i=0;i<2*motifs.num;i++) { convert_to_odds_matrix(motif_at(motifs.motifs, i), motifs.bg_freqs); } }
MaxResults suboptSampler(Model B, PoSition **Pos, SimTime *S) { int **nNumMotifs, **startPos, /* startPos[MotifType][motifnum] */ i, j, seed_run = 0, t, k; int maxnum; /* maximum number of motif in one type motif */ register Mlist M; MaxResults maxData, locMax; RPType RP; IPtype IP; /* for quick access of input data */ PoSition *Pos_t; /* Pos_t=Pos[t] */ int n; /* BT 2/7/97 */ int nPos; int sum; double dCurrProb; #ifdef _MPI_ MPI_Status status; double dTempInfo[4]; #endif IP=B->IP; RP = B->RP; init_maxdata(&maxData); init_maxdata(&locMax); BeginTime(S); #ifdef _MPI_ if( ! IP->is_defined[cl_hm] ) { /* get the go ahead to start */ Gibbs_MPI_Recv( B, &dTempInfo, 4, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status ); if( status.MPI_TAG == G_MPI_DONE ) return maxData; if( IP->is_defined[cl_X] && B->AN->bExchange ) { B->AN->currTemp = dTempInfo[0]; B->AN->dMinTemp = B->AN->currTemp; B->AN->dMaxTemp = B->AN->currTemp; PrintTempOut(IP->Datafiles->mpiTemp_fpt, "currTemp = %f maxTemp = %f minTemp = %f\n", B->AN->currTemp, B->AN->dMaxTemp, B->AN->dMinTemp ); } } #endif /* save the inital number of motifs */ for(t = 0; t < IP->nNumMotifTypes; t++) /* BT 2/7/97 */ { RP->nPriorMotifSites[t][FORWARD] = IP->nNumMotifs[t][FORWARD]; RP->nPriorMotifSites[t][REVERSE] = IP->nNumMotifs[t][REVERSE]; } /* copy the original counts into current counts */ copy_counts(B); nNumMotifs = copy_motif_num(IP); zero_motifs(IP); /* set motif count to zero */ /* If not running Wilcox test initialize random number gerator. */ /* Wilcox already did this */ if( ! IP->is_defined[cl_l] ) /* BT 7/23/97 */ sRandom(B, IP->lSeedVal); /* calculate the probability of NULL model */ /* IP->dnull_map = CalcMapProb(B, IP->is_defined[cl_R]); */ IP->dnull_map = CalcNullMap(B); /* 2/16/2001 */ reset_motif_num(nNumMotifs,IP); /* mark all possible motif sites */ if( ! IP->inCentroidAlign ) set_indicator_vector(Pos, B->Seq, B); maxnum = findMaxNumMotif(IP); if( IP->is_defined[cl_E] || RP->nUseSpacingProb || IP->is_defined[cl_T] ) /* BT 11/20/2000 */ { CountAlignments( B, Pos ); SaveAlignmentCounts( B ); } /* allocate space for start positions of motifs */ NEWP(startPos,IP->nNumMotifTypes, int); for(i = 0; i <IP->nNumMotifTypes ; i++) NEW(startPos[i],maxnum, int); /* Loop through for a number of different seeds */ while(seed_run++ < IP->nSeeds) { if( IP->is_defined[cl_X] ) { /* if( ! B->AN->bExchange ) B->AN->currTemp = B->AN->dMaxTemp; */ if( ! IP->is_defined[cl_Z] ) printf( "Current temperature: %7.2f\n", B->AN->currTemp ); } IP->nSeedRun = seed_run; if(!IP->is_defined[cl_F]) initMask(B); if( IP->is_defined[cl_X] ) init_maxdata( &(B->AN->results[seed_run]) ); /* initialize the Pos */ for(t=0;t<IP->nNumMotifTypes;t++) { Pos_t=Pos[t]; for(i = 0; i < IP->nSeqLen; i++) { Pos_t[i].nInMotif=FALSE; Pos_t[i].nMotifStartPos=FALSE; } } for( t= 0; t < IP->nNumMotifTypes; t++ ) /* BT 5/23/97 */ { IP->nMotifLen[t] = IP->nInputMotifLen[t]; /* restore original lengths */ SetPossibleSites( B, t ); /* restore original site count */ } if( IP->is_defined[cl_d] && ! IP->inCentroidAlign ) set_indicator_vector(Pos, B->Seq, B); if( ! IP->site_samp ) /* BT 7/16/97 */ set_posterior_prob(IP, B->C); /* Set initial posterior prob */ /* random select a set of motifs */ if( ! IP->is_defined[cl_V] ) /* BT 04/16/03 */ { if( B->InitPos == NULL || seed_run > 1 ) { if( IP->is_defined[cl_A] ) { set_posterior_prob(IP, B->C); for( t = 0; t < IP->nNumMotifTypes; t++ ) { IP->nNumMotifs[t][FORWARD] = 0; IP->nNumMotifs[t][REVERSE] = 0; } } set_random_sequences(B, Pos, startPos); /* set alignment &*/ } else { SetInitSequences( B, Pos, startPos ); /* BT 8/5/98 */ set_posterior_prob(IP, B->First); if( RP->bUsePosMatrix ) InitializePosMatrix( B ); else if( RP->bUseTrans ) InitializeTransMatrix( B ); } } else { if( B->InitPos == NULL ) { for( t = 0; t < IP->nNumMotifTypes; t++ ) { IP->nNumMotifs[t][FORWARD] = 0; IP->nNumMotifs[t][REVERSE] = 0; } } else { SetInitSequences( B, Pos, startPos ); set_counts( B ); /* BT 10/23/03 */ set_posterior_prob(IP, B->First); } set_posterior_prob(IP, B->C); } reset_counts(B, startPos, Pos); /* its counts */ if( B->InitPos != NULL && seed_run == 1 ) { RP->dInitProb = CalcMapProb( B, IP->is_defined[cl_R] ); if( ! IP->inCentroidAlign ) { fprintf( IP->Datafiles->out_fpt, "seed = %d Initial MAP = %.5f sites = %d \n", IP->nSeedRun, RP->dInitProb, TotalNumMotifs( B )); for( t = 0; t < IP->nNumMotifTypes; t++ ) { fprintf(IP->Datafiles->out_fpt, "Motif %d Map = %.5f Frag = %.5f\n", t, CalcMotifMap(B, t, IP->is_defined[cl_R]), CalcMotifFragMap(B, t, IP->is_defined[cl_R]) ); } } #ifdef _MPI_ PrintTempOut( IP->Datafiles->mpiTemp_fpt, "rank = %d process = %d seed = %d Initial MAP = %.5f sites = %d\n", IP->nRank, IP->nMPIProcesses, IP->nSeedRun, RP->dInitProb, TotalNumMotifs( B ) ); for( t = 0; t < IP->nNumMotifTypes; t++ ) { PrintTempOut( IP->Datafiles->mpiTemp_fpt, "%d Motif = %.5f Frag = %.5f\n", t, CalcMotifMap(B, t, IP->is_defined[cl_R]), CalcMotifFragMap(B, t, IP->is_defined[cl_R]) ); } PrintTempOut( IP->Datafiles->mpiTemp_fpt, "Bkgnd = %.5f Beta = %.5f Null = %.5f\n", CalcBkgndMap(B, IP->is_defined[cl_R]), CalcBetaMap( B, IP->is_defined[cl_R]), IP->dnull_map ); #else fprintf( stdout, "seed = %d Initial MAP = %.5f sites = %d\n", IP->nSeedRun, B->RP->dInitProb, TotalNumMotifs( B ) ); for( t = 0; t < IP->nNumMotifTypes; t++ ) { fprintf( stdout, "%d Motif = %.5f Frag = %.5f\n", t, CalcMotifMap(B, t, IP->is_defined[cl_R]), CalcMotifFragMap(B, t, IP->is_defined[cl_R]) ); } fprintf( stdout, "Bkgnd = %.5f Beta = %.5f Seq = %.5f Null = %.5f\n", CalcBkgndMap(B, IP->is_defined[cl_R]), CalcBetaMap( B, IP->is_defined[cl_R]), CalcSitePerSeqMap( B ), IP->dnull_map ); #endif for( t = 0; t < IP->nNumMotifTypes; t++ ) { fprintf( stdout, "-----------------------------------------------------------\n" ); fprintf( stdout, " MOTIF %c\n\n", (char)(97 + t)); DumpMotifPositions( t, B, Pos, stdout ); fprintf( stdout, "%d sites\n", NUMMOTIFS( IP->nNumMotifs[t]) ); } fflush( stdout ); } /* create a motif element list */ M = set_motif_info(IP, startPos, B->Seq); #ifdef _DEBUG_ /* ==================================================================== */ for( t = 0; t < IP->nNumMotifTypes; t++ ) { fprintf( stdout, "-----------------------------------------------------------\n" ); fprintf( stdout, " MOTIF %c\n\n", (char)(97 + t)); DumpMotifPositions( t, B, Pos, stdout ); fprintf( stdout, "%d sites\n", NUMMOTIFS( IP->nNumMotifs[t]) ); DumpCounts( B, t, stdout ); fflush( stdout ); } /* ==================================================================== */ #endif if( ! IP->is_defined[cl_Z] ) { put_prior(B); fprintf(stdout, "\r** %d **\n", seed_run); fflush( stdout ); /* BT 9/19/97 */ } if(seed_run > 1) { RestoreAlignmentCounts( B ); if(IP->site_samp) locMax = site_sampler(B, Pos, M); else if( IP->is_defined[cl_bayes] ) locMax = bayes_sampler(B, Pos, M, seed_run); else if( IP->is_defined[cl_E] ) locMax = rsite_sampler(B, Pos, M, seed_run); else locMax = motif_sampler(B,Pos,M); if( IP->is_defined[cl_X] ) CopyMaxResults( &(B->AN->results[seed_run]), &locMax, B ); dCurrProb = locMax.dProbability; if(locMax.dProbability > maxData.dProbability) { /* Check to see if current run is max */ free_maxdata(&maxData, IP); maxData = locMax; maxData.nSuboptSeed = seed_run; if( (! IP->is_defined[cl_Z]) && IP->is_defined[cl_opt] ) print_maxData(IP->nNumMotifTypes, maxData); } else free_maxdata(&locMax, IP); } else { /* Set Maximum first time through */ if(IP->site_samp) maxData = site_sampler(B, Pos, M); else if( IP->is_defined[cl_bayes] ) maxData = bayes_sampler(B, Pos, M, seed_run); else if( IP->is_defined[cl_E] ) maxData = rsite_sampler(B, Pos, M, seed_run); else maxData = motif_sampler(B,Pos,M); dCurrProb = maxData.dProbability; maxData.nSuboptSeed = seed_run; if( IP->is_defined[cl_X] ) CopyMaxResults( &(B->AN->results[seed_run]), &maxData, B ); if( (! IP->is_defined[cl_Z]) && IP->is_defined[cl_opt] ) print_maxData(IP->nNumMotifTypes, maxData); } reset_motif_num(nNumMotifs, IP); copy_counts(B); /* Counts w/o motifs */ free_motifs(B, M); #ifdef _MPI_ if( IP->is_defined[cl_hm] ) { if( seed_run == IP->nSeeds ) SendSuboptMsg( B, G_MPI_SUBOPT_DONE, seed_run ); else SendSuboptMsg( B, G_MPI_SEED_DONE, seed_run ); } else { dTempInfo[1] = dCurrProb; dTempInfo[3] = 0; if( IP->is_defined[cl_opt] ) { for(t = 0; t < IP->nNumMotifTypes; t++) { dTempInfo[3] += (double) maxData.nNumMotifs[t]; } } Gibbs_MPI_Send( B, dTempInfo, 4, MPI_DOUBLE, 0, G_MPI_DATA, MPI_COMM_WORLD ); } Gibbs_MPI_Recv( B, dTempInfo, 4, MPI_DOUBLE, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status ); if( status.MPI_TAG == G_MPI_DONE ) { PrintTempOut( IP->Datafiles->mpiTemp_fpt, "Received MPI_DONE signal\n" ); break; } else if( status.MPI_TAG == G_MPI_FINISH ) { PrintTempOut( IP->Datafiles->mpiTemp_fpt, "Received subopt end signal\n" ); break; } #endif }/* end of while loop, at this time we have MaxData */ FREEP(startPos,IP->nNumMotifTypes ); if(!IP->is_defined[cl_F] && maxData.F) { for(t = 0; t < IP->nNumMotifTypes; t++) /* BT 2/7/97 */ { for(n = 0; n < B->F->nMaxLen[t]; n++) B->F->nColMask[t][n] = maxData.F->nColMask[t][n]; B->F->FragWidth[t] = maxData.F->FragWidth[t]; for( n = 0; n < IP->nMotifLen[t]; n++ ) B->F->fragPos[t][n] = maxData.F->fragPos[t][n]; } } for(t = 0; t < IP->nNumMotifTypes; t++) { /* Add in motifs */ if( maxData.nMotifLen ) IP->nMotifLen[t] = maxData.nMotifLen[t]; /* Reset Pos in case width changed */ if( ! IP->inCentroidAlign ) set_indicator_vector(Pos, B->Seq, B); /* BT 9/12/97 */ if( maxData.nNumMotifs ) { for(i = 0; i < maxData.nNumMotifs[t]; i++) /* from the maximum */ adjust_counts(B, ADD, maxData.nMotifLoc[i][t], /* alignment */ t, maxData.RevComp[i][t]); } } if( maxData.nNumMotifs ) { setMotifNum(IP, maxData); /* BT 5/30/97 */ } if( IP->is_defined[cl_u] ) print_info(B, maxData, TRUE, SUBOPT); /* BT 3/19/97 */ if( IP->is_defined[cl_d] && IP->is_defined[cl_q] ) GetWidthCounts( B ); FREEP(nNumMotifs, IP->nNumMotifTypes); free(nNumMotifs); if( IP->is_defined[cl_Q] ) /* BT 3/27/98 */ { fprintf( IP->Datafiles->occur_fpt, "seq pos pos2 motif count\n" ); for( i = 0; i < IP->nNumSequences; i++ ) { for( j = 0; j < SequenceLength( B, i ); j++ ) { nPos = SequenceStartPos( B, i ) + j; for( t = 0; t < IP->nNumMotifTypes; t++ ) { for( sum = 0, k = 1; k <= IP->nSeeds; k++ ) { sum += IP->nAlignCnts[k][t][i][j]; } fprintf( IP->Datafiles->occur_fpt, "%5d %5d %5d %5d %5d\n", i, j, nPos, t, sum ); } } fflush( IP->Datafiles->occur_fpt ); } } if( ! IP->is_defined[cl_Z] && ! IP->is_defined[cl_nopt] ) printf( "Max subopt MAP found on seed %d\n", maxData.nSuboptSeed ); return maxData; }
void generate_ceq_logos(char *meme_path, char *output_dir) { int i, dir_len, prefix_len, path_len; ARRAY_T *background; BOOLEAN_T has_reverse_strand; char *path, *alphabet; double logo_height, logo_width; ARRAYLST_T *motifs; MOTIF_T *motif; motifs = arraylst_create(); logo_height = LOGOHEIGHT; //make the path dir_len = strlen(output_dir); prefix_len = strlen(LOGO_PREFIX); path_len = dir_len + 1 + prefix_len + MAX_MOTIF_ID_LENGTH + 1; path = malloc(sizeof(char)*path_len); strncpy(path, output_dir, path_len); if (path[dir_len-1] != '/') { path[dir_len] = '/'; path[++dir_len] = '\0'; } strncpy(path+dir_len, LOGO_PREFIX, path_len - dir_len); // Read all motifs into an array. read_meme_file2(meme_path, NULL, // bg file name DEFAULT_PSEUDOCOUNTS, REQUIRE_PSPM, motifs, NULL,//motif occurrences &has_reverse_strand, &background); // global alphabet is set by read_meme_file alphabet = get_alphabet(FALSE); if (create_output_directory(output_dir, TRUE, (verbosity >= NORMAL_VERBOSE))) { // Failed to create output directory. exit(1); } for(i = 0; i < arraylst_size(motifs); i++) { motif = (MOTIF_T*)arraylst_get(i, motifs); logo_width = get_motif_length(motif); if (logo_width > MAXLOGOWIDTH) logo_width = MAXLOGOWIDTH; copy_and_sanatise_name(path+(dir_len+prefix_len), get_motif_id(motif), path_len - (dir_len + prefix_len)); CL_create2( motif, // motif "", // no title NULL, // no second motif "", // no x-axis label FALSE, // no error bars FALSE, // ssc logo_height, // logo height (cm) logo_width, // logo width (cm) alphabet, // alphabet 0, // no offset to second motif path, // output file path "MEME (no SSC)" // program name ); } free_motifs(motifs); free_array(background); // not used free(path); }