// Write out (possibly multiple) webm cluster(s) from frames split on video key frames. // // last: // current flush is triggered by EOS instead of a second outstanding video key frame. void WebmFrameSinkThread::flushFrames(List<const sp<WebmFrame> >& frames, bool last) { if (frames.empty()) { return; } uint64_t clusterTimecodeL; List<sp<WebmElement> > children; initCluster(frames, clusterTimecodeL, children); uint64_t cueTime = clusterTimecodeL; off_t fpos = ::lseek(mFd, 0, SEEK_CUR); size_t n = frames.size(); if (!last) { // If we are not flushing the last sequence of outstanding frames, flushFrames // must have been called right after we have pushed a second outstanding video key // frame (the last frame), which belongs to the next cluster; also hold back on // flushing the second to last frame before we check its type. A audio frame // should precede the aforementioned video key frame in the next sequence, a video // frame should be the last frame in the current (to-be-flushed) sequence. CHECK_GE(n, 2); n -= 2; } for (size_t i = 0; i < n; i++) { const sp<WebmFrame> f = *(frames.begin()); if (f->mType == kVideoType && f->mKey) { cueTime = f->mAbsTimecode; } if (f->mAbsTimecode - clusterTimecodeL > INT16_MAX) { writeCluster(children); initCluster(frames, clusterTimecodeL, children); } frames.erase(frames.begin()); children.push_back(f->SimpleBlock(clusterTimecodeL)); } // equivalent to last==false if (!frames.empty()) { // decide whether to write out the second to last frame. const sp<WebmFrame> secondLastFrame = *(frames.begin()); if (secondLastFrame->mType == kVideoType) { frames.erase(frames.begin()); children.push_back(secondLastFrame->SimpleBlock(clusterTimecodeL)); } } writeCluster(children); sp<WebmElement> cuePoint = WebmElement::CuePointEntry(cueTime, 1, fpos - mSegmentDataStart); mCues.push_back(cuePoint); }
/** * Writes the contents of the buffer to the specified cluster. The buffer must * be less than or equal to the size of a cluster. * * @param fatfs * @param uint the cluster to write * @param unsigned char the buffer to write into * @param uint the size of the buffer * * @return bool true on success, false on failure */ bool writeClusterSafe( fatfs* fs, uint cluster, unsigned char* buffer, uint size ){ int startLocation = SEEK_CUR; fseek( fs->fp, fs->clusterSize * cluster, SEEK_SET ); bool result = writeCluster( fs, buffer, size ); fseek( fs->fp, startLocation, SEEK_SET ); return result; }
static void writeCluster( std::ostream &out, int depth, const ClusterArray < std::vector<edge> > &edgeMap, const ClusterGraph &C, const ClusterGraphAttributes *CA, const cluster &c, int &clusterId) { if(C.rootCluster() == c) { writeHeader(out, depth++, CA); } else { GraphIO::indent(out, depth++) << "subgraph cluster" << clusterId << " {\n"; } clusterId++; bool whitespace; // True if a whitespace should printed (readability). whitespace = false; if(CA) { writeAttributes(out, depth, *CA, c); whitespace = true; } if(whitespace) { out << "\n"; } // Recursively export all subclusters. whitespace = false; for(ListConstIterator<cluster> cit = c->cBegin(); cit.valid(); ++cit) { writeCluster(out, depth, edgeMap, C, CA, *cit, clusterId); whitespace = true; } if(whitespace) { out << "\n"; } // Then, print all nodes whithout an adjacent edge. whitespace = false; for(ListConstIterator<node> nit = c->nBegin(); nit.valid(); ++nit) { whitespace |= writeNode(out, depth, CA, *nit); } if(whitespace) { out << "\n"; } // Finally, we print all edges for this cluster (ugly version for now). const std::vector<edge> &edges = edgeMap[c]; whitespace = false; for(size_t i = 0; i < edges.size(); i++) { whitespace |= writeEdge(out, depth, CA, edges[i]); } GraphIO::indent(out, --depth) << "}\n"; }
static void writeCluster( std::ostream &out, int depth, const ClusterGraph &C, const ClusterGraphAttributes *CA, cluster c) { if(C.rootCluster() != c) { GraphIO::indent(out, depth) << "<node " << "id=\"cluster" << c->index() << "\"" << ">\n"; } else { const std::string dir = (CA && !CA->directed()) ? "undirected" : "directed"; GraphIO::indent(out, depth) << "<graph " << "mode=\"static\"" << "defaultedgetype=\"" << dir << "\"" << ">\n"; if(CA) { defineAttributes(out, depth + 1, *CA); } } GraphIO::indent(out, depth + 1) << "<nodes>\n"; for(ListConstIterator<cluster> cit = c->cBegin(); cit.valid(); ++cit) { writeCluster(out, depth + 2, C, CA, *cit); } for(ListConstIterator<node> nit = c->nBegin(); nit.valid(); ++nit) { writeNode(out, depth + 2, CA, *nit); } GraphIO::indent(out, depth + 1) << "</nodes>\n"; if(C.rootCluster() != c) { GraphIO::indent(out, depth) << "</node>\n"; } else { writeEdges(out, C.constGraph(), CA); GraphIO::indent(out, depth) << "</graph>\n"; } }
void edwClusterMethylBed(char *inputName, char *outputName) /* edwClusterMethylBed - cluster CpG regions from an input bed file and perform some analysis on them. */ { // open the input and output files struct lineFile *input = lineFileOpen(inputName, TRUE); FILE *out = fopen(outputName, "w"); // keep 2 sets of everything, one for plus one for minus. Here are pointers to the previous element in the list struct bedNamedScore *plusPrev = NULL; struct bedNamedScore *minusPrev = NULL; // set up lists for the clusters as we build them up struct bedNamedScore *plusClusters = NULL; struct bedNamedScore *minusClusters = NULL; int plusClusterSize = 0; int minusClusterSize = 0; // this could be done better, but it seems to work okay for now. It will crash on sufficiently large clusters int hist[16384]; int i; for (i = 0; i < 16384; i++) hist[i] = 0; // loop over bed file for (;;) { struct bedNamedScore *record = bedNamedScoreLoadNext(input); // at the end, we print out the last cluster if (record == NULL) { if (plusClusterSize > 0) { hist[plusClusterSize]++; writeCluster(plusClusters, out); } if (minusClusterSize > 0) { hist[minusClusterSize]++; writeCluster(minusClusters, out); } break; } // handling each strand separately seemed easier but this could be refactored if (record->strand == '+') { // if we're out of range (one way or another) then we print out this cluster and start anew if (plusPrev != NULL && (strcmp(record->chrom, plusPrev->chrom) != 0 || record->chromStart - plusPrev->chromStart > clJoinSize)) { hist[plusClusterSize]++; writeCluster(plusClusters, out); slFreeList(&plusClusters); plusClusterSize = 0; } slAddHead(&plusClusters, record); plusClusterSize++; plusPrev = record; } else { if (minusPrev != NULL && (strcmp(record->chrom, minusPrev->chrom) != 0 || record->chromStart - minusPrev->chromStart > clJoinSize)) { hist[minusClusterSize]++; writeCluster(minusClusters, out); slFreeList(&minusClusters); minusClusterSize = 0; } slAddHead(&minusClusters, record); minusClusterSize++; minusPrev = record; } } // close input and output lineFileClose(&input); fclose(out); // print out the histogram if option is enabled if (clHist) { for (i = 0; i < 16384; i++) { if (hist[i] > 0) printf("%5d%10d\n", i, hist[i]); } } }
static bool writeCluster( std::ostream &out, int depth, const ClusterArray < std::vector<edge> > &edgeMap, const ClusterGraph &C, const ClusterGraphAttributes *CA, const cluster &c, int &clusterId) { std::ios_base::fmtflags currentFlags = out.flags(); out.flags(currentFlags | std::ios::fixed); bool result = out.good(); if(result) { if (C.rootCluster() == c) { writeHeader(out, depth++, CA); } else { GraphIO::indent(out, depth++) << "subgraph cluster" << clusterId << " {\n"; } clusterId++; bool whitespace; // True if a whitespace should printed (readability). whitespace = false; if (CA) { writeAttributes(out, depth, *CA, c); whitespace = true; } if (whitespace) { out << "\n"; } // Recursively export all subclusters. whitespace = false; for (cluster child : c->children) { writeCluster(out, depth, edgeMap, C, CA, child, clusterId); whitespace = true; } if (whitespace) { out << "\n"; } // Then, print all nodes whithout an adjacent edge. whitespace = false; for (node v : c->nodes) { whitespace |= writeNode(out, depth, CA, v); } if (whitespace) { out << "\n"; } // Finally, we print all edges for this cluster (ugly version for now). const std::vector<edge> &edges = edgeMap[c]; whitespace = false; for (auto &e : edges) { whitespace |= writeEdge(out, depth, CA, e); } GraphIO::indent(out, --depth) << "}\n"; } out.flags(currentFlags); return result; }
void printCluster(clusters_t clusters, int c, int num_dimensions) { writeCluster(stdout,clusters,c,num_dimensions); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { int num_clusters; // For profiling clock_t seed_start, seed_end, seed_total = 0; clock_t regroup_start, regroup_end, regroup_total = 0; int regroup_iterations = 0; clock_t params_start, params_end, params_total = 0; int params_iterations = 0; clock_t constants_start, constants_end, constants_total = 0; int constants_iterations = 0; clock_t total_timer = clock(); double total_time = 0; clock_t io_timer; double io_time = 0; clock_t cpu_timer; double cpu_time = 0; io_timer = clock(); // Validate the command-line arguments, parse # of clusters, etc if(validateArguments(argc,argv,&num_clusters)) { return 1; //Bard args } int num_dimensions; int num_events; // Read FCS data PRINT("Parsing input file..."); // This stores the data in a 1-D array with consecutive values being the dimensions from a single event // (num_events by num_dimensions matrix) float* fcs_data_by_event = readData(argv[2],&num_dimensions,&num_events); if(!fcs_data_by_event) { printf("Error parsing input file. This could be due to an empty file "); printf("or an inconsistent number of dimensions. Aborting.\n"); return 1; } // Transpose the event data (allows coalesced access pattern in E-step kernel) // This has consecutive values being from the same dimension of the data // (num_dimensions by num_events matrix) float* fcs_data_by_dimension = (float*) malloc(sizeof(float)*num_events*num_dimensions); for(int e=0; e<num_events; e++) { for(int d=0; d<num_dimensions; d++) { fcs_data_by_dimension[d*num_events+e] = fcs_data_by_event[e*num_dimensions+d]; } } io_time += (double)(clock() - io_timer); PRINT("Number of events: %d\n",num_events); PRINT("Number of dimensions: %d\n",num_dimensions); PRINT("Number of target clusters: %d\n\n",num_clusters); cpu_timer = clock(); // Setup the cluster data structures on host clusters_t clusters; clusters.N = (float*) malloc(sizeof(float)*num_clusters); clusters.pi = (float*) malloc(sizeof(float)*num_clusters); clusters.constant = (float*) malloc(sizeof(float)*num_clusters); clusters.avgvar = (float*) malloc(sizeof(float)*num_clusters); clusters.means = (float*) malloc(sizeof(float)*num_dimensions*num_clusters); clusters.R = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions*num_clusters); clusters.Rinv = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions*num_clusters); clusters.memberships = (float*) malloc(sizeof(float)*num_events*num_clusters); if(!clusters.means || !clusters.R || !clusters.Rinv || !clusters.memberships) { printf("ERROR: Could not allocate memory for clusters.\n"); return 1; } DEBUG("Finished allocating memory on host for clusters.\n"); float rissanen; //////////////// Initialization done, starting kernels //////////////// DEBUG("Invoking seed_clusters kernel.\n"); fflush(stdout); // seed_clusters sets initial pi values, // finds the means / covariances and copies it to all the clusters // TODO: Does it make any sense to use multiple blocks for this? seed_start = clock(); seed_clusters(fcs_data_by_event, &clusters, num_dimensions, num_clusters, num_events); DEBUG("Invoking constants kernel.\n"); // Computes the R matrix inverses, and the gaussian constant //constants_kernel<<<num_clusters, num_threads>>>(d_clusters,num_clusters,num_dimensions); constants(&clusters,num_clusters,num_dimensions); constants_iterations++; seed_end = clock(); seed_total = seed_end - seed_start; // Calculate an epsilon value //int ndata_points = num_events*num_dimensions; float epsilon = (1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)*log((float)num_events*num_dimensions)*0.01; float likelihood, old_likelihood; int iters; epsilon = 1e-6; PRINT("Gaussian.cu: epsilon = %f\n",epsilon); /*************** EM ALGORITHM *****************************/ // do initial regrouping // Regrouping means calculate a cluster membership probability // for each event and each cluster. Each event is independent, // so the events are distributed to different blocks // (and hence different multiprocessors) DEBUG("Invoking regroup (E-step) kernel with %d blocks.\n",NUM_BLOCKS); regroup_start = clock(); estep1(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood); estep2(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood); //estep2b(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood); regroup_end = clock(); regroup_total += regroup_end - regroup_start; regroup_iterations++; DEBUG("Regroup Kernel Iteration Time: %f\n\n",((double)(regroup_end-regroup_start))/CLOCKS_PER_SEC); DEBUG("Likelihood: %e\n",likelihood); float change = epsilon*2; PRINT("Performing EM algorithm on %d clusters.\n",num_clusters); iters = 0; // This is the iterative loop for the EM algorithm. // It re-estimates parameters, re-computes constants, and then regroups the events // These steps keep repeating until the change in likelihood is less than some epsilon while(iters < MIN_ITERS || (fabs(change) > epsilon && iters < MAX_ITERS)) { old_likelihood = likelihood; DEBUG("Invoking reestimate_parameters (M-step) kernel.\n"); params_start = clock(); // This kernel computes a new N, pi isn't updated until compute_constants though mstep_n(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events); mstep_mean(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events); mstep_covar(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events); params_end = clock(); params_total += params_end - params_start; params_iterations++; DEBUG("Model M-Step Iteration Time: %f\n\n",((double)(params_end-params_start))/CLOCKS_PER_SEC); //return 0; // RETURN FOR FASTER PROFILING DEBUG("Invoking constants kernel.\n"); // Inverts the R matrices, computes the constant, normalizes cluster probabilities constants_start = clock(); constants(&clusters,num_clusters,num_dimensions); constants_end = clock(); constants_total += constants_end - constants_start; constants_iterations++; DEBUG("Constants Kernel Iteration Time: %f\n\n",((double)(constants_end-constants_start))/CLOCKS_PER_SEC); DEBUG("Invoking regroup (E-step) kernel with %d blocks.\n",NUM_BLOCKS); regroup_start = clock(); // Compute new cluster membership probabilities for all the events estep1(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood); estep2(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood); //estep2b(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood); regroup_end = clock(); regroup_total += regroup_end - regroup_start; regroup_iterations++; DEBUG("E-step Iteration Time: %f\n\n",((double)(regroup_end-regroup_start))/CLOCKS_PER_SEC); change = likelihood - old_likelihood; DEBUG("likelihood = %f\n",likelihood); DEBUG("Change in likelihood: %f\n",change); iters++; } // Calculate Rissanen Score rissanen = -likelihood + 0.5*(num_clusters*(1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)-1)*logf((float)num_events*num_dimensions); PRINT("\nFinal rissanen Score was: %f, with %d clusters.\n",rissanen,num_clusters); char* result_suffix = ".results"; char* summary_suffix = ".summary"; int filenamesize1 = strlen(argv[3]) + strlen(result_suffix) + 1; int filenamesize2 = strlen(argv[3]) + strlen(summary_suffix) + 1; char* result_filename = (char*) malloc(filenamesize1); char* summary_filename = (char*) malloc(filenamesize2); strcpy(result_filename,argv[3]); strcpy(summary_filename,argv[3]); strcat(result_filename,result_suffix); strcat(summary_filename,summary_suffix); PRINT("Summary filename: %s\n",summary_filename); PRINT("Results filename: %s\n",result_filename); cpu_time += (double)(clock() - cpu_timer); io_timer = clock(); // Open up the output file for cluster summary FILE* outf = fopen(summary_filename,"w"); if(!outf) { printf("ERROR: Unable to open file '%s' for writing.\n",argv[3]); } // Print the clusters with the lowest rissanen score to the console and output file for(int c=0; c<num_clusters; c++) { //if(saved_clusters.N[c] == 0.0) { // continue; //} if(ENABLE_PRINT) { // Output the final cluster stats to the console PRINT("Cluster #%d\n",c); printCluster(clusters,c,num_dimensions); PRINT("\n\n"); } if(ENABLE_OUTPUT) { // Output the final cluster stats to the output file fprintf(outf,"Cluster #%d\n",c); writeCluster(outf,clusters,c,num_dimensions); fprintf(outf,"\n\n"); } } // Print profiling information printf("Program Component\tTotal\tIters\tTime Per Iteration\n"); printf(" Seed Kernel:\t%7.4f\t%d\t%7.4f\n",seed_total/(double)CLOCKS_PER_SEC,1, (double) seed_total / (double) CLOCKS_PER_SEC); printf(" E-step Kernel:\t%7.4f\t%d\t%7.4f\n",regroup_total/(double)CLOCKS_PER_SEC,regroup_iterations, (double) regroup_total / (double) CLOCKS_PER_SEC / (double) regroup_iterations); printf(" M-step Kernel:\t%7.4f\t%d\t%7.4f\n",params_total/(double)CLOCKS_PER_SEC,params_iterations, (double) params_total / (double) CLOCKS_PER_SEC / (double) params_iterations); printf(" Constants Kernel:\t%7.4f\t%d\t%7.4f\n",constants_total/(double)CLOCKS_PER_SEC,constants_iterations, (double) constants_total / (double) CLOCKS_PER_SEC / (double) constants_iterations); // Write profiling info to summary file fprintf(outf,"Program Component\tTotal\tIters\tTime Per Iteration\n"); fprintf(outf," Seed Kernel:\t%7.4f\t%d\t%7.4f\n",seed_total/(double)CLOCKS_PER_SEC,1, (double) seed_total / (double) CLOCKS_PER_SEC); fprintf(outf," E-step Kernel:\t%7.4f\t%d\t%7.4f\n",regroup_total/(double)CLOCKS_PER_SEC,regroup_iterations, (double) regroup_total / (double) CLOCKS_PER_SEC / (double) regroup_iterations); fprintf(outf," M-step Kernel:\t%7.4f\t%d\t%7.4f\n",params_total/(double)CLOCKS_PER_SEC,params_iterations, (double) params_total / (double) CLOCKS_PER_SEC / (double) params_iterations); fprintf(outf," Constants Kernel:\t%7.4f\t%d\t%7.4f\n",constants_total/(double)CLOCKS_PER_SEC,constants_iterations, (double) constants_total / (double) CLOCKS_PER_SEC / (double) constants_iterations); fclose(outf); // Open another output file for the event level clustering results FILE* fresults = fopen(result_filename,"w"); if(ENABLE_OUTPUT) { for(int i=0; i<num_events; i++) { for(int d=0; d<num_dimensions-1; d++) { fprintf(fresults,"%f,",fcs_data_by_event[i*num_dimensions+d]); } fprintf(fresults,"%f",fcs_data_by_event[i*num_dimensions+num_dimensions-1]); fprintf(fresults,"\t"); for(int c=0; c<num_clusters-1; c++) { fprintf(fresults,"%f,",clusters.memberships[c*num_events+i]); } fprintf(fresults,"%f",clusters.memberships[(num_clusters-1)*num_events+i]); fprintf(fresults,"\n"); } } fclose(fresults); io_time += (double)(clock() - io_timer); printf("\n"); printf( "I/O time: %f (ms)\n", 1000.0*io_time/CLOCKS_PER_SEC); printf( "CPU processing time: %f (ms)\n", 1000.0*cpu_time/CLOCKS_PER_SEC); total_time += (double)(clock() - total_timer); printf( "Total time: %f (ms)\n", 1000.0*total_time/CLOCKS_PER_SEC); // cleanup host memory free(fcs_data_by_event); free(fcs_data_by_dimension); free(clusters.N); free(clusters.pi); free(clusters.constant); free(clusters.avgvar); free(clusters.means); free(clusters.R); free(clusters.Rinv); free(clusters.memberships); return 0; }