예제 #1
0
// Start here
int main(int argc,char *argv[])
{
	struct arguments arguments;
	arguments.inputSize = 0;
	arguments.nChannels = 0;
	arguments.beforeSpike = -1;
	arguments.afterSpike = -1;
	arguments.peakPosition = -1;
	arguments.spikeLength = -1;
	arguments.nComponents = 0; // number of principal components
	arguments.isCenteredData = false;
	arguments.offset = 0;
	
	arguments.isInputFileProvided = false;
	arguments.isOutputFileProvided = false;
	arguments.isInputSizeProvided = false;
	arguments.isNChannelsProvided = false;
	arguments.isBeforeSpikeProvided = false;
	arguments.isAfterSpikeProvided = false;
	arguments.isPeakPositionProvided = false;
	arguments.isSpikeLengthProvided = false;
	arguments.isNComponentsProvided = false;
	arguments.isExtraFeaturesProvided = false;
	arguments.isOffsetProvided = false;
#ifdef NBITS
	arguments.nBits = int(RECORD_BYTE_SIZE*8);
	arguments.isNBitsProvided = false;
#endif
	
	parseArgs(argc,argv,arguments); // Parse command-line
	
	short *rawData; // data, means/channel
	short **peakVal; // peak values for extra features output
	// means, sum and variance-cov matrix for each dimension in each channel
	double **mean,**sum;
	// data for each channel/ spike dimension/ spike & reduced data (PCA)
	gsl_matrix **datSpkChanCenter, ** datSpkChan,**reducedData;
	gsl_matrix **varcov;// variance-cov matrix for each dimension
	FILE *inputFile = NULL,*outputFile = NULL;
	unsigned long int nSpikes = -1,nRecordsRead = 0;
	int data2use = arguments.spikeLength; // number of record to use in the waveform
	int recShift = 0; // shift for first record to consider in the waveform
	if(arguments.isBeforeSpikeProvided)
	{
		data2use = (arguments.beforeSpike+1+arguments.afterSpike);
		recShift = arguments.peakPosition-arguments.beforeSpike;
	}
	gsl_vector *eigenValues = gsl_vector_alloc(data2use);
	gsl_matrix *eigenVectors = gsl_matrix_alloc(data2use,data2use);
	gsl_eigen_symmv_workspace * w = gsl_eigen_symmv_alloc(data2use);
	gsl_matrix_view reducedEigenVectors; // for storing final data
	
	// open input
	if ( arguments.isInputFileProvided )
	{
		inputFile = fopen(arguments.inputFileName,"rb");
		if ( inputFile == NULL )
		{
			cerr << "error: cannot open '" << arguments.inputFileName << "'." << endl;
			exit(1);
		} // if
		fseeko(inputFile,0,SEEK_END); // put the position indicator at the end of the stream
		arguments.inputSize = ftello(inputFile); // value of the position indicator of the stream (here ~file size)
	} // if
	
	// Check Input Size
	if ( !checkInputs(arguments) ) // check arguments value
		exit(1);
	
	if ( verbose )
	{
		cout << endl;
		cout << "Input File            = ";
		if ( arguments.isInputFileProvided ) cout << arguments.inputFileName << endl;
		else cout << "-" << endl;
		cout << "Output File           = " << arguments.outputFileName << endl;
		cout << "Input Size            = ";
		if ( arguments.isInputSizeProvided ) cout << arguments.inputSize << endl;
		else cout << "N/A" << endl;
		
#ifdef NBITS
		cout << "Resolution            = ";
		if ( arguments.isNBitsProvided ) cout << arguments.nBits << " bits" << endl;
		else cout << "default" << endl;
#endif
		cout << endl;
	} // if verbose
	
	// number of records (for all and one channels)
	nRecords = arguments.inputSize/RECORD_BYTE_SIZE;
	nRecordsPerChannel = nRecords/arguments.nChannels;
	nSpikes = nRecordsPerChannel/arguments.spikeLength; // nb spikes
	
	if ( nRecords < 1 || nRecordsPerChannel < 1 )
	{
		cerr << "error: not enough records (size " << arguments.inputSize << ")" << endl;
		exit(1);
	}
	if ( nSpikes < 1 )
	{
		cerr << "error: incorrect spike number (" << nSpikes << "), check number of channels ("
				<< arguments.nChannels << ") and input size (" << arguments.inputSize << ")." << endl;
		exit(1);
	}
	
	if ( verbose )
	{
		cout << "Waveform length                 = " << arguments.spikeLength << endl;
		cout << "Number of principal components  = " << arguments.nComponents << endl;
		if(arguments.isBeforeSpikeProvided)
		{
			cout << "Number of samples before peak   = " << arguments.beforeSpike << endl;
			cout << "Number of samples after peak    = " << arguments.afterSpike << endl;
			cout << "Peak position in waveform       = " << arguments.peakPosition << endl;
		}
		cout << "Projection with centered data   = " << arguments.isCenteredData << endl;
		///cout << "Record size basis               = " << (RECORD_BYTE_SIZE*8) << " bits" << endl;
		cout << "Number of input records         = " << nRecords << endl;
		cout << "Number of records per channel   = " << nRecordsPerChannel << endl;
		cout << "Number of spikes                = " << nSpikes <<endl;
		cout << endl;
	}

	ProgressBar *progress = new ProgressBar("","PCA",(arguments.nChannels+4));
	
	// Init arrays
	rawData = new short[nRecords]; // Buffer for all data (all channels)
	mean = new double* [arguments.nChannels]; // means init
	sum = new double* [arguments.nChannels]; // sums init
	if(arguments.isExtraFeaturesProvided)
		peakVal = new short* [arguments.nChannels]; // peak values init&
	if(!arguments.isCenteredData)
		datSpkChan = new gsl_matrix* [arguments.nChannels];
	datSpkChanCenter = new gsl_matrix* [arguments.nChannels];
	reducedData = new gsl_matrix* [arguments.nChannels];
	varcov = new gsl_matrix* [arguments.nChannels];
	
	progress->start(); // Start progress bar
	///progress->message("Extracting data");
	// Get data
	if ( arguments.isInputFileProvided )
	{
		rewind(inputFile); // put the position indicator at the beginning of the stream
		nRecordsRead = fread(rawData,sizeof(short),nRecords,inputFile);
		fclose(inputFile);
	}
	else
	{
		nRecordsRead = fread(rawData,sizeof(short),nRecords,stdin);
	} // else
	
	if ( nRecordsRead != nRecords )
	{
		cerr << "error: insufficient number of records in the file (" << nRecordsRead 
		<< ", expecting " << nRecords << ")" << endl;
		exit(1);
	}
	progress->advance(); // Complete data importation
	
	// Fill arrays with rec values & compute means
	///progress->message("Preparing data for PCA");
	for ( int i = 0 ; i < arguments.nChannels ; ++i )
	{
		mean[i] = new double[data2use];
		sum[i] = new double[data2use];
		datSpkChanCenter[i] = gsl_matrix_alloc(data2use,nSpikes);
		if(!arguments.isCenteredData)
			datSpkChan[i] = gsl_matrix_alloc(data2use,nSpikes);
		if(arguments.isExtraFeaturesProvided)
			peakVal[i] = new short[nSpikes];
		
		varcov[i] = gsl_matrix_alloc(data2use,data2use);
		reducedData[i] = gsl_matrix_alloc(arguments.nComponents,nSpikes);
		for ( int j = 0 ; j < data2use ; ++j )
		{
			mean[i][j] = -1;
			sum[i][j] = 0;
			for ( unsigned int k = 0 ; k < nSpikes ; ++k )
			{
				double v = rawData[(arguments.nChannels*arguments.spikeLength)*k+((j+recShift)*arguments.nChannels)+i];
				gsl_matrix_set(datSpkChanCenter[i],j,k,v);
				if(!arguments.isCenteredData)
					gsl_matrix_set(datSpkChan[i],j,k,v);
				sum[i][j] += v;
				if(arguments.isExtraFeaturesProvided && (j+recShift)==arguments.peakPosition)
					peakVal[i][k] = v; // if this is the peak, store it !
			} // for k
			mean[i][j] = sum[i][j]/nSpikes; // mean computation
			for ( unsigned int k = 0 ; k < nSpikes ; ++k )
			{
				gsl_matrix_set (datSpkChanCenter[i],j,k,(gsl_matrix_get(datSpkChanCenter[i],j,k)-mean[i][j]));
			} // for k
		} // for j
	} // for i
	delete[] rawData; // free memory
	progress->advance(); // Complete data initialization
	
	if ( verbose )
	{
		cout << endl << endl;
		cout << "Total number of channels = " << arguments.nChannels << endl;
		for ( int c = 0 ; c < arguments.nChannels ; ++c )
		{
			cout << "Means for channel #" << c << " = ";
			for ( int d = 0 ; d < data2use ; ++d )
			{
				cout << mean[c][d] << ", ";
			} // for d
			cout << endl;
			cout << endl;
		} // for c
		cout << endl;
	} // verbose
	
	// PCA statement
	///progress->message("Computing PCA");
	for ( int i = 0 ; i < arguments.nChannels ; ++i )
	{
		// compute variance-covariance matrix (Data * DataTrans)
		gsl_blas_dgemm(CblasNoTrans,CblasTrans,(1.0/(nSpikes-1)),datSpkChanCenter[i],datSpkChanCenter[i],0.0,varcov[i]);
		
		// solve eigen system to get eigen values and vectors
		gsl_eigen_symmv(varcov[i],eigenValues,eigenVectors,w);
		gsl_eigen_symmv_sort(eigenValues,eigenVectors,GSL_EIGEN_SORT_VAL_DESC); // descending sort for eigen vectors and values
		
		// keep only eigen vectors for the given firt principal components
		reducedEigenVectors = gsl_matrix_submatrix(eigenVectors,0,0,data2use,arguments.nComponents);
		
		// compute new coordinates for data : Trans(evec_reduce) x Trans(data)
		if(!arguments.isCenteredData)
			gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,&reducedEigenVectors.matrix,datSpkChan[i],0.0,reducedData[i]);
		else
			gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,&reducedEigenVectors.matrix,datSpkChanCenter[i],0.0,reducedData[i]);
		
		progress->advance(); // Complete PCA for channel i
		
	} // for i
	
	///progress->message("Saving");
	// Write ouput file (FET format)
	outputFile = fopen(arguments.outputFileName,"w");
	if( !arguments.isExtraFeaturesProvided )
		fprintf(outputFile,"%i\n",(arguments.nChannels*arguments.nComponents));
	else
		fprintf(outputFile,"%i\n",(arguments.nChannels*arguments.nComponents + arguments.nChannels));
	for ( unsigned int k = 0 ; k < nSpikes ; ++k )
	{
		for ( int i = 0 ; i < arguments.nChannels ; ++i )
		{
			for ( int j = 0 ; j < arguments.nComponents ; ++j )
			{
				double *d = gsl_matrix_ptr(reducedData[i],j,k);
				if ( !fprintf(outputFile,"%i ",int(*d)) )
					cerr << "warning: missing data (channel " << i << ", dimension " << j << ", spike " 
					<< k << ")." << endl;
			} // for j
		} // for i
		if( arguments.isExtraFeaturesProvided ) // write peak value after all data
			for ( int i = 0; i < arguments.nChannels ; ++i )
				if ( !fprintf(outputFile,"%i ",int(peakVal[i][k])) )
						cerr << "warning: missing peak (channel " << i << ", spike " << k << ")." << endl;
			
		fprintf(outputFile,"\n");
	} // for k
	fclose(outputFile);
	progress->advance(); // Complete saving results
	
	// Free Memory
	///progress->message("Free Memory");
	for ( int i = 0 ; i < arguments.nChannels ; ++i )
	{
		delete[] mean[i];
		delete[] sum[i];
		if(arguments.isExtraFeaturesProvided)
			delete[] peakVal[i];
		if(!arguments.isCenteredData)
			gsl_matrix_free(datSpkChan[i]);
		gsl_matrix_free(datSpkChanCenter[i]);
		gsl_matrix_free(varcov[i]);
		gsl_matrix_free(reducedData[i]);
	}
	
	if(arguments.isExtraFeaturesProvided)
		delete[] peakVal;
	if(!arguments.isCenteredData)
		delete[] datSpkChan;
	delete[] datSpkChanCenter;
	delete[] mean;
	delete[] sum;
	delete[] varcov;
	delete[] reducedData;
	
	gsl_eigen_symmv_free(w);
	gsl_vector_free(eigenValues);
	gsl_matrix_free(eigenVectors);
	
	progress->advance(); // Complete free memory
	delete progress;
	
	if ( verbose ) cout << endl;
		
	return 0;
} // main