// Write out (possibly multiple) webm cluster(s) from frames split on video key frames.
//
// last:
//   current flush is triggered by EOS instead of a second outstanding video key frame.
void WebmFrameSinkThread::flushFrames(List<const sp<WebmFrame> >& frames, bool last) {
    if (frames.empty()) {
        return;
    }

    uint64_t clusterTimecodeL;
    List<sp<WebmElement> > children;
    initCluster(frames, clusterTimecodeL, children);

    uint64_t cueTime = clusterTimecodeL;
    off_t fpos = ::lseek(mFd, 0, SEEK_CUR);
    size_t n = frames.size();
    if (!last) {
        // If we are not flushing the last sequence of outstanding frames, flushFrames
        // must have been called right after we have pushed a second outstanding video key
        // frame (the last frame), which belongs to the next cluster; also hold back on
        // flushing the second to last frame before we check its type. A audio frame
        // should precede the aforementioned video key frame in the next sequence, a video
        // frame should be the last frame in the current (to-be-flushed) sequence.
        CHECK_GE(n, 2);
        n -= 2;
    }

    for (size_t i = 0; i < n; i++) {
        const sp<WebmFrame> f = *(frames.begin());
        if (f->mType == kVideoType && f->mKey) {
            cueTime = f->mAbsTimecode;
        }

        if (f->mAbsTimecode - clusterTimecodeL > INT16_MAX) {
            writeCluster(children);
            initCluster(frames, clusterTimecodeL, children);
        }

        frames.erase(frames.begin());
        children.push_back(f->SimpleBlock(clusterTimecodeL));
    }

    // equivalent to last==false
    if (!frames.empty()) {
        // decide whether to write out the second to last frame.
        const sp<WebmFrame> secondLastFrame = *(frames.begin());
        if (secondLastFrame->mType == kVideoType) {
            frames.erase(frames.begin());
            children.push_back(secondLastFrame->SimpleBlock(clusterTimecodeL));
        }
    }

    writeCluster(children);
    sp<WebmElement> cuePoint = WebmElement::CuePointEntry(cueTime, 1, fpos - mSegmentDataStart);
    mCues.push_back(cuePoint);
}
Ejemplo n.º 2
0
/**
 *	Writes the contents of the buffer to the specified cluster. The buffer must 
 *	be less than or equal to the size of a cluster.
 *
 *	@param fatfs
 *	@param uint 			the cluster to write 
 *	@param unsigned char 	the buffer to write into
 *	@param uint 			the size of the buffer
 *
 *	@return bool 			true on success, false on failure
 */
bool writeClusterSafe( fatfs* fs, uint cluster, unsigned char* buffer, uint size ){
	int startLocation = SEEK_CUR;
	fseek( fs->fp, fs->clusterSize * cluster, SEEK_SET );
	bool result = writeCluster( fs, buffer, size );
	fseek( fs->fp, startLocation, SEEK_SET );
	return result;
}
Ejemplo n.º 3
0
static void writeCluster(
	std::ostream &out, int depth,
	const ClusterArray < std::vector<edge> > &edgeMap,
	const ClusterGraph &C, const ClusterGraphAttributes *CA, const cluster &c,
	int &clusterId)
{
	if(C.rootCluster() == c) {
		writeHeader(out, depth++, CA);
	} else {
		GraphIO::indent(out, depth++) << "subgraph cluster" << clusterId
		                              << " {\n";
	}
	clusterId++;

	bool whitespace; // True if a whitespace should printed (readability).

	whitespace = false;
	if(CA) {
		writeAttributes(out, depth, *CA, c);
		whitespace = true;
	}

	if(whitespace) {
		out << "\n";
	}

	// Recursively export all subclusters.
	whitespace = false;
	for(ListConstIterator<cluster> cit = c->cBegin(); cit.valid(); ++cit) {
		writeCluster(out, depth, edgeMap, C, CA, *cit, clusterId);
		whitespace = true;
	}

	if(whitespace) {
		out << "\n";
	}

	// Then, print all nodes whithout an adjacent edge.
	whitespace = false;
	for(ListConstIterator<node> nit = c->nBegin(); nit.valid(); ++nit) {
		whitespace |= writeNode(out, depth, CA, *nit);
	}

	if(whitespace) {
		out << "\n";
	}

	// Finally, we print all edges for this cluster (ugly version for now).
	const std::vector<edge> &edges = edgeMap[c];
	whitespace = false;
	for(size_t i = 0; i < edges.size(); i++) {
		whitespace |= writeEdge(out, depth, CA, edges[i]);
	}

	GraphIO::indent(out, --depth) << "}\n";
}
Ejemplo n.º 4
0
static void writeCluster(
	std::ostream &out, int depth,
	const ClusterGraph &C, const ClusterGraphAttributes *CA, cluster c)
{
	if(C.rootCluster() != c) {
		GraphIO::indent(out, depth) << "<node "
		                            << "id=\"cluster" << c->index() << "\""
		                            << ">\n";
	} else {
		const std::string dir =
			(CA && !CA->directed()) ? "undirected" : "directed";
		GraphIO::indent(out, depth) << "<graph "
		                            << "mode=\"static\""
		                            << "defaultedgetype=\"" << dir << "\""
		                            << ">\n";

		if(CA) {
			defineAttributes(out, depth + 1, *CA);
		}
	}

	GraphIO::indent(out, depth + 1) << "<nodes>\n";

	for(ListConstIterator<cluster> cit = c->cBegin(); cit.valid(); ++cit) {
		writeCluster(out, depth + 2, C, CA, *cit);
	}

	for(ListConstIterator<node> nit = c->nBegin(); nit.valid(); ++nit) {
		writeNode(out, depth + 2, CA, *nit);
	}

	GraphIO::indent(out, depth + 1) << "</nodes>\n";

	if(C.rootCluster() != c) {
		GraphIO::indent(out, depth) << "</node>\n";
	} else {
		writeEdges(out, C.constGraph(), CA);
		GraphIO::indent(out, depth) << "</graph>\n";
	}
}
Ejemplo n.º 5
0
void edwClusterMethylBed(char *inputName, char *outputName)
/* edwClusterMethylBed - cluster CpG regions from an input bed file and perform some analysis on them. */
{

// open the input and output files
struct lineFile *input = lineFileOpen(inputName, TRUE);
FILE *out = fopen(outputName, "w");

// keep 2 sets of everything, one for plus one for minus. Here are pointers to the previous element in the list
struct bedNamedScore *plusPrev = NULL;
struct bedNamedScore *minusPrev = NULL;

// set up lists for the clusters as we build them up
struct bedNamedScore *plusClusters = NULL;
struct bedNamedScore *minusClusters = NULL;

int plusClusterSize = 0;
int minusClusterSize = 0;

// this could be done better, but it seems to work okay for now. It will crash on sufficiently large clusters
int hist[16384];
int i;
for (i = 0; i < 16384; i++)
    hist[i] = 0;

// loop over bed file
for (;;)
    {
    struct bedNamedScore *record = bedNamedScoreLoadNext(input);
    
    // at the end, we print out the last cluster
    if (record == NULL)
        {
        if (plusClusterSize > 0)
            {
            hist[plusClusterSize]++;
            writeCluster(plusClusters, out);
            }
        if (minusClusterSize > 0)
            {
            hist[minusClusterSize]++;
            writeCluster(minusClusters, out);
            }
    
        break;
        }
    
    // handling each strand separately seemed easier but this could be refactored
    if (record->strand == '+')
        {
        // if we're out of range (one way or another) then we print out this cluster and start anew
        if (plusPrev != NULL && (strcmp(record->chrom, plusPrev->chrom) != 0 || record->chromStart - plusPrev->chromStart > clJoinSize))
            {
            hist[plusClusterSize]++;
            writeCluster(plusClusters, out);
            slFreeList(&plusClusters);
            plusClusterSize = 0;
            }
        slAddHead(&plusClusters, record);
        plusClusterSize++;
        plusPrev = record;
        }
    else
        {
        if (minusPrev != NULL && (strcmp(record->chrom, minusPrev->chrom) != 0 || record->chromStart - minusPrev->chromStart > clJoinSize))
            {
            hist[minusClusterSize]++;
            writeCluster(minusClusters, out);
            slFreeList(&minusClusters);
            minusClusterSize = 0;
            }
        slAddHead(&minusClusters, record);
        minusClusterSize++;
        minusPrev = record;
        }
    }

// close input and output
lineFileClose(&input);
fclose(out);

// print out the histogram if option is enabled
if (clHist)
    {
    for (i = 0; i < 16384; i++)
        {
        if (hist[i] > 0)
            printf("%5d%10d\n", i, hist[i]);
        }
    }

}
Ejemplo n.º 6
0
static bool writeCluster(
	std::ostream &out, int depth,
	const ClusterArray < std::vector<edge> > &edgeMap,
	const ClusterGraph &C, const ClusterGraphAttributes *CA, const cluster &c,
	int &clusterId)
{
	std::ios_base::fmtflags currentFlags = out.flags();
	out.flags(currentFlags | std::ios::fixed);
	bool result = out.good();

	if(result) {
		if (C.rootCluster() == c) {
			writeHeader(out, depth++, CA);
		} else {
			GraphIO::indent(out, depth++) << "subgraph cluster" << clusterId << " {\n";
		}
		clusterId++;

		bool whitespace; // True if a whitespace should printed (readability).

		whitespace = false;
		if (CA) {
			writeAttributes(out, depth, *CA, c);
			whitespace = true;
		}

		if (whitespace) {
			out << "\n";
		}

		// Recursively export all subclusters.
		whitespace = false;
		for (cluster child : c->children) {
			writeCluster(out, depth, edgeMap, C, CA, child, clusterId);
			whitespace = true;
		}

		if (whitespace) {
			out << "\n";
		}

		// Then, print all nodes whithout an adjacent edge.
		whitespace = false;
		for (node v : c->nodes) {
			whitespace |= writeNode(out, depth, CA, v);
		}

		if (whitespace) {
			out << "\n";
		}

		// Finally, we print all edges for this cluster (ugly version for now).
		const std::vector<edge> &edges = edgeMap[c];
		whitespace = false;
		for (auto &e : edges) {
			whitespace |= writeEdge(out, depth, CA, e);
		}

		GraphIO::indent(out, --depth) << "}\n";
	}

	out.flags(currentFlags);

	return result;
}
Ejemplo n.º 7
0
void printCluster(clusters_t clusters, int c, int num_dimensions) {
    writeCluster(stdout,clusters,c,num_dimensions);
}
Ejemplo n.º 8
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv) {
    int num_clusters;
    
    // For profiling 
    clock_t seed_start, seed_end, seed_total = 0;
    clock_t regroup_start, regroup_end, regroup_total = 0;
    int regroup_iterations = 0;
    clock_t params_start, params_end, params_total = 0;
    int params_iterations = 0;
    clock_t constants_start, constants_end, constants_total = 0;
    int constants_iterations = 0;
    clock_t total_timer = clock();
    double total_time = 0;
    clock_t io_timer;
    double io_time = 0;
    clock_t cpu_timer;
    double cpu_time = 0;

    io_timer = clock();
    // Validate the command-line arguments, parse # of clusters, etc 
    if(validateArguments(argc,argv,&num_clusters)) {
        return 1; //Bard args
    }
    
    int num_dimensions;
    int num_events;
    
    // Read FCS data   
    PRINT("Parsing input file...");
    // This stores the data in a 1-D array with consecutive values being the dimensions from a single event
    // (num_events by num_dimensions matrix)
    float* fcs_data_by_event = readData(argv[2],&num_dimensions,&num_events);    

    if(!fcs_data_by_event) {
        printf("Error parsing input file. This could be due to an empty file ");
        printf("or an inconsistent number of dimensions. Aborting.\n");
        return 1;
    }
    
    // Transpose the event data (allows coalesced access pattern in E-step kernel)
    // This has consecutive values being from the same dimension of the data 
    // (num_dimensions by num_events matrix)
    float* fcs_data_by_dimension  = (float*) malloc(sizeof(float)*num_events*num_dimensions);
    
    for(int e=0; e<num_events; e++) {
        for(int d=0; d<num_dimensions; d++) {
            fcs_data_by_dimension[d*num_events+e] = fcs_data_by_event[e*num_dimensions+d];
        }
    }    

    io_time += (double)(clock() - io_timer);
   
    PRINT("Number of events: %d\n",num_events);
    PRINT("Number of dimensions: %d\n",num_dimensions);
    PRINT("Number of target clusters: %d\n\n",num_clusters);
   
    cpu_timer = clock();
    
    // Setup the cluster data structures on host
    clusters_t clusters;
    clusters.N = (float*) malloc(sizeof(float)*num_clusters);
    clusters.pi = (float*) malloc(sizeof(float)*num_clusters);
    clusters.constant = (float*) malloc(sizeof(float)*num_clusters);
    clusters.avgvar = (float*) malloc(sizeof(float)*num_clusters);
    clusters.means = (float*) malloc(sizeof(float)*num_dimensions*num_clusters);
    clusters.R = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions*num_clusters);
    clusters.Rinv = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions*num_clusters);
    clusters.memberships = (float*) malloc(sizeof(float)*num_events*num_clusters);
    if(!clusters.means || !clusters.R || !clusters.Rinv || !clusters.memberships) { 
        printf("ERROR: Could not allocate memory for clusters.\n"); 
        return 1; 
    }
    DEBUG("Finished allocating memory on host for clusters.\n");
    
    float rissanen;
    
    //////////////// Initialization done, starting kernels //////////////// 
    DEBUG("Invoking seed_clusters kernel.\n");
    fflush(stdout);

    // seed_clusters sets initial pi values, 
    // finds the means / covariances and copies it to all the clusters
    // TODO: Does it make any sense to use multiple blocks for this?
    seed_start = clock();
    seed_clusters(fcs_data_by_event, &clusters, num_dimensions, num_clusters, num_events);
   
    DEBUG("Invoking constants kernel.\n");
    // Computes the R matrix inverses, and the gaussian constant
    //constants_kernel<<<num_clusters, num_threads>>>(d_clusters,num_clusters,num_dimensions);
    constants(&clusters,num_clusters,num_dimensions);
    constants_iterations++;
    seed_end = clock();
    seed_total = seed_end - seed_start;
    
    // Calculate an epsilon value
    //int ndata_points = num_events*num_dimensions;
    float epsilon = (1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)*log((float)num_events*num_dimensions)*0.01;
    float likelihood, old_likelihood;
    int iters;
    
    epsilon = 1e-6;
    PRINT("Gaussian.cu: epsilon = %f\n",epsilon);

    /*************** EM ALGORITHM *****************************/
    
    // do initial regrouping
    // Regrouping means calculate a cluster membership probability
    // for each event and each cluster. Each event is independent,
    // so the events are distributed to different blocks 
    // (and hence different multiprocessors)
    DEBUG("Invoking regroup (E-step) kernel with %d blocks.\n",NUM_BLOCKS);
    regroup_start = clock();
    estep1(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
    estep2(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
    //estep2b(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
    regroup_end = clock();
    regroup_total += regroup_end - regroup_start;
    regroup_iterations++;
    DEBUG("Regroup Kernel Iteration Time: %f\n\n",((double)(regroup_end-regroup_start))/CLOCKS_PER_SEC);

    DEBUG("Likelihood: %e\n",likelihood);

    float change = epsilon*2;
    
    PRINT("Performing EM algorithm on %d clusters.\n",num_clusters);
    iters = 0;
    // This is the iterative loop for the EM algorithm.
    // It re-estimates parameters, re-computes constants, and then regroups the events
    // These steps keep repeating until the change in likelihood is less than some epsilon        
    while(iters < MIN_ITERS || (fabs(change) > epsilon && iters < MAX_ITERS)) {
        old_likelihood = likelihood;
        
        DEBUG("Invoking reestimate_parameters (M-step) kernel.\n");
        params_start = clock();
        // This kernel computes a new N, pi isn't updated until compute_constants though
        mstep_n(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events);
        mstep_mean(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events);
        mstep_covar(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events);
        params_end = clock();
        params_total += params_end - params_start;
        params_iterations++;
        DEBUG("Model M-Step Iteration Time: %f\n\n",((double)(params_end-params_start))/CLOCKS_PER_SEC);
        //return 0; // RETURN FOR FASTER PROFILING
        
        DEBUG("Invoking constants kernel.\n");
        // Inverts the R matrices, computes the constant, normalizes cluster probabilities
        constants_start = clock();
        constants(&clusters,num_clusters,num_dimensions);
        constants_end = clock();
        constants_total += constants_end - constants_start;
        constants_iterations++;
        DEBUG("Constants Kernel Iteration Time: %f\n\n",((double)(constants_end-constants_start))/CLOCKS_PER_SEC);

        DEBUG("Invoking regroup (E-step) kernel with %d blocks.\n",NUM_BLOCKS);
        regroup_start = clock();
        // Compute new cluster membership probabilities for all the events
        estep1(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
        estep2(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
        //estep2b(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
        regroup_end = clock();
        regroup_total += regroup_end - regroup_start;
        regroup_iterations++;
        DEBUG("E-step Iteration Time: %f\n\n",((double)(regroup_end-regroup_start))/CLOCKS_PER_SEC);
    
        change = likelihood - old_likelihood;
        DEBUG("likelihood = %f\n",likelihood);
        DEBUG("Change in likelihood: %f\n",change);

        iters++;

    }
    
    // Calculate Rissanen Score
    rissanen = -likelihood + 0.5*(num_clusters*(1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)-1)*logf((float)num_events*num_dimensions);
    PRINT("\nFinal rissanen Score was: %f, with %d clusters.\n",rissanen,num_clusters);
    
    char* result_suffix = ".results";
    char* summary_suffix = ".summary";
    int filenamesize1 = strlen(argv[3]) + strlen(result_suffix) + 1;
    int filenamesize2 = strlen(argv[3]) + strlen(summary_suffix) + 1;
    char* result_filename = (char*) malloc(filenamesize1);
    char* summary_filename = (char*) malloc(filenamesize2);
    strcpy(result_filename,argv[3]);
    strcpy(summary_filename,argv[3]);
    strcat(result_filename,result_suffix);
    strcat(summary_filename,summary_suffix);
    
    PRINT("Summary filename: %s\n",summary_filename);
    PRINT("Results filename: %s\n",result_filename);
    cpu_time += (double)(clock() - cpu_timer);
    
    io_timer = clock();
    // Open up the output file for cluster summary
    FILE* outf = fopen(summary_filename,"w");
    if(!outf) {
        printf("ERROR: Unable to open file '%s' for writing.\n",argv[3]);
    }

    // Print the clusters with the lowest rissanen score to the console and output file
    for(int c=0; c<num_clusters; c++) {
        //if(saved_clusters.N[c] == 0.0) {
        //    continue;
        //}
        if(ENABLE_PRINT) {
            // Output the final cluster stats to the console
            PRINT("Cluster #%d\n",c);
            printCluster(clusters,c,num_dimensions);
            PRINT("\n\n");
        }

        if(ENABLE_OUTPUT) {
            // Output the final cluster stats to the output file        
            fprintf(outf,"Cluster #%d\n",c);
            writeCluster(outf,clusters,c,num_dimensions);
            fprintf(outf,"\n\n");
        }
    }
    
    // Print profiling information
    printf("Program Component\tTotal\tIters\tTime Per Iteration\n");
    printf("        Seed Kernel:\t%7.4f\t%d\t%7.4f\n",seed_total/(double)CLOCKS_PER_SEC,1, (double) seed_total / (double) CLOCKS_PER_SEC);
    printf("      E-step Kernel:\t%7.4f\t%d\t%7.4f\n",regroup_total/(double)CLOCKS_PER_SEC,regroup_iterations, (double) regroup_total / (double) CLOCKS_PER_SEC / (double) regroup_iterations);
    printf("      M-step Kernel:\t%7.4f\t%d\t%7.4f\n",params_total/(double)CLOCKS_PER_SEC,params_iterations, (double) params_total / (double) CLOCKS_PER_SEC / (double) params_iterations);
    printf("   Constants Kernel:\t%7.4f\t%d\t%7.4f\n",constants_total/(double)CLOCKS_PER_SEC,constants_iterations, (double) constants_total / (double) CLOCKS_PER_SEC / (double) constants_iterations);    
   
    // Write profiling info to summary file
    fprintf(outf,"Program Component\tTotal\tIters\tTime Per Iteration\n");
    fprintf(outf,"        Seed Kernel:\t%7.4f\t%d\t%7.4f\n",seed_total/(double)CLOCKS_PER_SEC,1, (double) seed_total / (double) CLOCKS_PER_SEC);
    fprintf(outf,"      E-step Kernel:\t%7.4f\t%d\t%7.4f\n",regroup_total/(double)CLOCKS_PER_SEC,regroup_iterations, (double) regroup_total / (double) CLOCKS_PER_SEC / (double) regroup_iterations);
    fprintf(outf,"      M-step Kernel:\t%7.4f\t%d\t%7.4f\n",params_total/(double)CLOCKS_PER_SEC,params_iterations, (double) params_total / (double) CLOCKS_PER_SEC / (double) params_iterations);
    fprintf(outf,"   Constants Kernel:\t%7.4f\t%d\t%7.4f\n",constants_total/(double)CLOCKS_PER_SEC,constants_iterations, (double) constants_total / (double) CLOCKS_PER_SEC / (double) constants_iterations);    
    fclose(outf);
    
    
    // Open another output file for the event level clustering results
    FILE* fresults = fopen(result_filename,"w");
   
    if(ENABLE_OUTPUT) { 
        for(int i=0; i<num_events; i++) {
            for(int d=0; d<num_dimensions-1; d++) {
                fprintf(fresults,"%f,",fcs_data_by_event[i*num_dimensions+d]);
            }
            fprintf(fresults,"%f",fcs_data_by_event[i*num_dimensions+num_dimensions-1]);
            fprintf(fresults,"\t");
            for(int c=0; c<num_clusters-1; c++) {
                fprintf(fresults,"%f,",clusters.memberships[c*num_events+i]);
            }
            fprintf(fresults,"%f",clusters.memberships[(num_clusters-1)*num_events+i]);
            fprintf(fresults,"\n");
        }
    }
    fclose(fresults); 
    io_time += (double)(clock() - io_timer);
    printf("\n");
    printf( "I/O time: %f (ms)\n", 1000.0*io_time/CLOCKS_PER_SEC);
    printf( "CPU processing time: %f (ms)\n", 1000.0*cpu_time/CLOCKS_PER_SEC);
    total_time += (double)(clock() - total_timer);
    printf( "Total time: %f (ms)\n", 1000.0*total_time/CLOCKS_PER_SEC);
 
    // cleanup host memory
    free(fcs_data_by_event);
    free(fcs_data_by_dimension);
    free(clusters.N);
    free(clusters.pi);
    free(clusters.constant);
    free(clusters.avgvar);
    free(clusters.means);
    free(clusters.R);
    free(clusters.Rinv);
    free(clusters.memberships);

    return 0;
}