示例#1
0
文件: loadfile.c 项目: Matafight/orca
BOOL smo_Loadfile ( Data_List * pairs, char * inputfilename, int inputdim ) 
{ 

	FILE * smo_stream ;
	FILE * smo_target = NULL ;
	char * pstr = NULL ;
	char buf[LENGTH] ;
	char * temp ;
	int dim = -2 ;
	unsigned long index = 1 ;
	unsigned int result, sz ;
	int var = 0, chg = 0 ;
	double * point = NULL ;
	unsigned int y ;
	int i = 0, j = 0 ;
	double mean = 0 ;
	double ymax = LONG_MIN ;
	double ymin = LONG_MAX ;
	double * xmean = NULL;
	Data_Node * node = NULL ;
	int t0=0, tr=0 ;
	FILE * fid ;

	Data_List label ;

	if ( NULL == pairs || NULL == inputfilename )
		return FALSE ;
	
	Clear_Data_List( pairs ) ;
	Create_Data_List( &label ) ;

	if( (smo_stream = fopen( inputfilename, "r+t" )) == NULL )
	{
		//printf( "can not open the file %s.\n", inputfilename );
		return FALSE ;
	}
	
	// save file name 
	var = strlen( inputfilename ) ;
	if (NULL != pairs->filename) 
		free(pairs->filename) ;
	pairs->filename = (char*)malloc((var+1)*sizeof(char)) ;
	if (NULL == pairs->filename)
	{
		printf("fail to malloc for pairs->filename.\n") ;
		exit(0) ;
	}
	strncpy(pairs->filename,inputfilename,var) ;
	pairs->filename[var]='\0' ;

	// check the input dimension here

	if ( NULL == fgets( buf, LENGTH, smo_stream ))
	{
		printf( "fgets error in reading the first line.\n" );
		fclose( smo_stream );
		return FALSE ;
	}
	
	var = strlen( buf ) ;
	
	if (var >= LENGTH-1) 
	{
		printf( "the line is too long in the file %s.\n", inputfilename );
		fclose( smo_stream );
		return FALSE ;
	}
	
	if (0 < var)
	{		
		do 
		{
			dim = dim + 1 ;
			strtod( buf, &temp ) ;
			strcpy( buf, temp ) ;
			chg = var - strlen(buf) ;
			var = var - chg ;
		}
		while ( 0 != chg ) ;
	}
	else
	{ 
		fclose( smo_stream );
		printf("the first line in the file is empty.\n") ;
		return FALSE ;
	}

	if ( 0 > dim || (0 == dim && 0 == inputdim) ) 
	{
		fclose( smo_stream );

#ifdef SMO_DEBUG
		printf( "input dimension is less than one.\n") ;
#endif
		return FALSE ;
	}

	if (inputdim > 0)
	{
		if (inputdim == dim + 1 ) // test file without target
		{
			// try to open "*target*.*" as target
			// create target file name
			pstr = strstr( inputfilename , "test" ) ;
			if (NULL != pstr)
			{
				result = abs( inputfilename - pstr ) ;
				strncpy (buf, inputfilename, result ) ;
				buf[result] = '\0' ;
				strcat(buf, "targets") ;
				strcat (buf, pstr+4) ;
				smo_target = fopen( buf, "r+t" ) ;
			}
			dim = inputdim ;
			pairs->dimen = dim ;
		}
		else if ( inputdim != dim )
		{
			printf("Dimensionality in testdata is inconsistent with traindata.\n") ;
			return FALSE ;
		}
		else
			pairs->dimen = dim ;
	}
	else
		pairs->dimen = dim ;
	
	//initialize the x_mean and x_devi in Data_List pairs

	if ( NULL == (pairs->x_mean = (double *)(malloc(dim*sizeof(double))) ) 
		|| NULL == (pairs->x_devi = (double *)(malloc(dim*sizeof(double))) ) 
		|| NULL == (xmean = (double *)(malloc(dim*sizeof(double))) ) )
	{		
		if (NULL != pairs->x_mean) 
			free(pairs->x_mean) ;
		if (NULL != pairs->x_devi) 
			free(pairs->x_devi) ;
		if (NULL != xmean)
			free(xmean) ;
		if (NULL != smo_target)
			fclose( smo_target ) ;
		if (NULL != smo_stream)
			fclose( smo_stream );
		return FALSE ;
	}
	for ( j = 0; j < dim; j ++ )
		pairs->x_mean[j] = 0 ;
	for ( j = 0; j < dim; j ++ )
		pairs->x_devi[j] = 0 ;
	for ( j = 0; j < dim; j ++ )
		xmean[j] = 0 ;

	// begin to initialize data_list for digital input only
	printf("\nLoading %s ...  \n", inputfilename) ;
	pairs->datatype = CLASSIFICATION ; 

	rewind( smo_stream ) ;
	fgets( buf, LENGTH, smo_stream ) ;
	do
	{

#ifdef SMO_DEBUG 
		printf("%d\n", index) ;
		printf("%s\n\n\n", buf) ;
#endif
		point = (double *) malloc( (dim+1) * sizeof(double) ) ; // Pairs to free them
		if ( NULL == point )
		{
			printf("not enough memory.\n") ;
			if (NULL != smo_target)
				fclose( smo_target ) ;
			if (NULL != smo_stream)
				fclose( smo_stream );
			if (NULL != pairs->x_mean) 
				free(pairs->x_mean) ;
			if (NULL != pairs->x_devi) 
				free(pairs->x_devi) ;
			if (NULL != xmean)
				free(xmean) ;
			Clear_Data_List( pairs ) ;
			return FALSE ;
		}
		var = strlen( buf ) ;	
		i = 0 ;
		chg = dim ;

		while ( chg>0 && i<dim)
		{
			point[i] = strtod( buf, &temp ) ;
			i++ ;
			strcpy( buf, temp ) ;
			chg = var - strlen(buf) ;
			var = var - chg ;
		}
		point[dim]=0 ;
		if (i==dim && chg>0 && var>0)
			y = (unsigned int)strtod( buf, &temp ) ;
		else
		{
			free(point) ;
			y = 0 ;
			printf("Warning: the input file %s contains a blank or defective line.\n",inputfilename) ;
			exit(1) ;
		}
		// load y as target from other file when dim+1
		if (NULL != smo_target)
		{
			if ( NULL != fgets( buf, LENGTH, smo_target ) )
			{
				var = strlen( buf ) ;
				y = (int)strtod( buf, &temp ) ;
				strcpy( buf, temp ) ;
				chg = var - strlen(buf) ;
				if (0==chg)
					printf("Warning: the target file contains a blank line.\n") ;
			}
			else
				printf("Warning: the target file is shorter than the input file.\n") ;
		}

		/*	
			for ( i = 0; i < dim; i ++ )
			{
				point[i] = strtod( buf, &temp ) ;
				strcpy( buf, temp ) ;
			}
			y = strtod( buf, &temp ) ;

			// load y as target from other file when dim+1
			if (NULL != smo_target)
			{
				fgets( buf, LENGTH, smo_target ) ;
				y = strtod( buf, &temp ) ;
			}			
		*/

		if (chg>0) 
		{	
						
			if ( TRUE == Add_Data_List( pairs, Create_Data_Node(index, point, y) ) )
			{
				// update statistics
				pairs->mean = (mean * (((double)(pairs->count)) - 1) + y )/ ((double)(pairs->count))  ;
				pairs->deviation = pairs->deviation + (y-mean)*(y-mean) * ((double)(pairs->count)-1)/((double)(pairs->count));			
				mean = pairs->mean ;	
				for ( j=0; j<dim; j++ )
				{
					pairs->x_mean[j] = (xmean[j] * (((double)(pairs->count)) - 1) + point[j] )/ ((double)(pairs->count))  ;
					pairs->x_devi[j] = pairs->x_devi[j] + (point[j]-xmean[j])*(point[j]-xmean[j]) * ((double)(pairs->count)-1)/((double)(pairs->count));			
					xmean[j] = pairs->x_mean[j] ;
				}
				if (y>ymax)
				{ ymax = y ; pairs->i_ymax = index ;}
				if (y<ymin)
				{ ymin = y ; pairs->i_ymin = index ;}
				
				// check data type 
				Add_Label_Data_List( &label, Create_Data_Node(index, point, y) ) ;
				index ++ ;
			}
			else
			{
#ifdef SMO_DEBUG 
				printf("%d\n", index) ;
				printf("duplicate data \n") ;
#endif
			}
		}
	}
	while( !feof( smo_stream ) && NULL != fgets( buf, LENGTH, smo_stream ) ) ;

	if (label.count>=2||inputdim>0)
		pairs->datatype = ORDINAL ;
	else
		printf("Warning : not a ordinal regression.\n") ;

	if (pairs->count < MINNUM || (pairs->datatype == UNKNOWN && inputdim == 0 ) ) 
	{
		printf("too few input pairs\n") ;
		Clear_Data_List( pairs ) ;
		if (NULL != pairs->x_mean) 
			free(pairs->x_mean) ;
		if (NULL != pairs->x_devi) 
			free(pairs->x_devi) ;
		if (NULL != xmean)
			free(xmean) ;
		if (NULL != smo_target)
			fclose( smo_target ) ;
		if (NULL != smo_stream)
			fclose( smo_stream );
		return FALSE ;
	}
	// load index file for feature types strstr

	pairs->featuretype = (int *) malloc(pairs->dimen*sizeof(int)) ;
	if (NULL != pairs->featuretype)
	{
		//default 0
		for (sz=0;sz<pairs->dimen;sz++)
			pairs->featuretype[sz] = 0 ;
		
		if (0==inputdim)
			pstr = strstr( inputfilename, "train") ;	// 46
		else
			pstr = strstr( inputfilename, "test") ;	// 46
		if (NULL != pstr)
		{
			sz = abs( pstr - inputfilename ) ;
			pstr = strrchr( inputfilename, '.') ;	// 46
			strncpy( buf, inputfilename, sz ) ;
			buf[sz]='\0' ;
			strcat( buf, "feature" ) ;			
			strcat( buf, pstr ) ;
			fid = fopen(buf,"r+t") ;
			if (NULL != fid)
			{
				printf("Loading the specifications of feature type in %s ...",buf) ;
				sz = 0 ;
				while (!feof(fid) && NULL!=fgets(buf,LENGTH,fid) )
				{
					i=strlen(buf) ;
					if (i>1)
					{
						if (sz>=pairs->dimen)
						{
							printf("Warning : feature type file is too long.\n") ;
							sz = pairs->dimen-1 ;
						}
						pairs->featuretype[sz] = atoi(buf) ;
						sz += 1 ;
					}
					else
						printf("Warning : blank line in feature type file.\n") ;
				}
				if (sz!=pairs->dimen)
				{
					//default 0
					for (sz=0;sz<pairs->dimen;sz++)
						pairs->featuretype[sz] = 0 ;
					printf(" RESET as default.\n") ;
				}
				else
					printf(" done.\n") ;
				fclose(fid) ;
			}
		}
	}

	pairs->deviation = sqrt( pairs->deviation / ((double)(pairs->count - 1.0)) ) ;
	for ( j=0; j<dim; j++ )
		pairs->x_devi[j] = sqrt( pairs->x_devi[j] / ((double)(pairs->count - 1.0)) ) ;	
	
	// set target value as +1 or -1, if data type is CLASSIFICATION
	if ( UNKNOWN != pairs->datatype && 0 == inputdim )
	{
			pairs->deviation = 1.0 ;
			pairs->mean = 0 ;
			pairs->normalized_output = FALSE ;
	}

	for ( j=0; j<dim; j++ )
	{
		if (pairs->featuretype[j] != 0)
		{
			pairs->x_devi[j] = 1 ;
			pairs->x_mean[j] = 0 ;
		}
	}

	if (inputdim>0) // do not normailize data for TESTING
	{
		pairs->normalized_output = FALSE ;
		pairs->normalized_input = FALSE ; 
	}

	// normalize the target if needed 
	node = pairs->front ;
	while ( node != NULL )
	{
		if ( TRUE == pairs->normalized_input )
		{
			for ( j=0; j<dim; j++ )
			{				
				if (pairs->x_devi[j]>0)
					node->point[j] = (node->point[j]-pairs->x_mean[j])/(pairs->x_devi[j]) ;
				else
					node->point[j] = 0 ;
			}
		}
		node = node->next ; 
	}
	printf("Total %lu samples with %u dimensions for ", pairs->count, pairs->dimen) ;	

	if	(inputdim > 0)
		printf("TESTING.\r\n") ;
	else 
	{
		if( CLASSIFICATION == pairs->datatype )
			printf("CLASSIFICATION.\r\n") ;
		else if ( ORDINAL == pairs->datatype )
		{
			printf("ORDINAL %lu REGRESSION.\r\n",label.count) ;
			pairs->classes = label.count ;
			if (NULL != pairs->labels)
				free( pairs->labels ) ;
			i=0;
			pairs->labels = (unsigned int*)malloc(pairs->classes*sizeof(unsigned int)) ;
			pairs->labelnum = (unsigned int*)malloc(pairs->classes*sizeof(unsigned int)) ;
			if (NULL != pairs->labels&&NULL != pairs->labelnum)
			{
				node = label.front ;
				printf("ordinal varibles : ") ;
				while (NULL!=node)
				{
					if (node->target<1 || node->target>pairs->classes)
					{
						printf("Error : targets should be from 1 to %d.\n",(int)pairs->classes) ;
						exit(1) ;
					}
					pairs->labels[node->target-1] = node->target ;
					if (node->target-1==0)
						t0 = node->target ;
					if (node->target==(int)pairs->classes)
						tr = node->target ;
					pairs->labelnum[node->target-1] = node->fold ;
					i += node->fold ;
					printf("%d(%d)  ", node->target, node->fold) ;
					node = node->next ;
				}
				printf("\n") ;
				if (i!=(int)pairs->count||t0!=1||tr!=(int)pairs->classes)
				{
					printf("Error in data list.\n") ;
					exit(1) ;
				}
			}
			else
			{
				printf("fail to malloc for pairs->labels.\n") ;			
				exit(1) ;
			}
		}
		else 
			printf("UNKNOWN.\r\n") ;
	}
	if (1 == pairs->normalized_input)
		printf("Inputs are normalized.\r\n") ;
	
	if (1 == pairs->normalized_output && pairs->deviation > 0)
		printf("Outputs are normalized.\r\n") ;
	if ( inputdim > 0 && pairs->deviation <= 0 )
		printf("Tragets are not at hand.\r\n") ;

	Clear_Label_Data_List (&label) ;
	if (NULL != smo_target)
		fclose( smo_target ) ;
	if (NULL != smo_stream)
		fclose( smo_stream );
	if ( NULL != xmean )
		free( xmean ) ;
	return TRUE ;
}
示例#2
0
def_Settings * Create_def_Settings ( char * filename ) 
{

	def_Settings * settings = NULL ;
	char * pstr = NULL ;
	unsigned int result = 0 ;
	char buf[LENGTH] = "" ;

	if (NULL == filename)
	{
		printf("\r\nFATAL ERROR : the input pointer is NULL.\r\n") ;
		return NULL ;
	}
	if (NULL == (settings = (def_Settings *)(malloc(sizeof(def_Settings)))))
	{
		printf("\r\nFATAL ERROR : fail to malloc def_settings.\r\n") ;
		return NULL ;
	}

	// set all elements as default values
	INPUTFILE = NULL ;
	TESTFILE = NULL ;
	VC = DEF_VC ;
	TOL = DEF_TOL ;
	KAPPA = DEF_KAPPA ;
	EPS = DEF_EPS ;
	P = DEF_P ;
	METHOD = DEF_METHOD ;
	KERNEL = DEF_KERNEL ;
	SMO_DISPLAY = DEF_DISPLAY ;
	KFOLD = DEF_KFOLD ;
	INDEX = 1 ;
	
	settings->cache_size = DEF_CACHE ;
	settings->lnC_step = DEF_COARSESTEP ;
	settings->lnK_step = DEF_COARSESTEP ;
	settings->lnC_start = DEF_INFERLNC ;
	settings->lnK_start = DEF_INFERLNK ;
	settings->lnC_end = DEF_SUPERLNC ;
	settings->lnK_end = DEF_SUPERLNK ;
	
	settings->def_lnC_step = DEF_COARSESTEP ;
	settings->def_lnK_step = DEF_COARSESTEP ;
	settings->def_lnC_start = DEF_INFERLNC ;
	settings->def_lnK_start = DEF_INFERLNK ;
	settings->def_lnC_end = DEF_SUPERLNC ;
	settings->def_lnK_end = DEF_SUPERLNK ;

	settings->zoomin = DEF_ZOOMIN ;
	settings->loops = DEF_LOOP ;	
	settings->ardon = DEF_ARDON ;
	settings->repeat = DEF_REPEAT ;
	settings->seeds = 0 ;
	settings->smo_balance = DEF_BALANCE ;
	settings->best_rate = 0 ;
	settings->time = 0 ;

	settings->normalized_input = DEF_NORMALIZEINPUT ;
	settings->normalized_output = DEF_NORMALIZETARGET ;
	settings->trainmethod = DEF_TRAINING ;
	
	// save data file name in INPUTFILE
	if ( 0!=strlen(filename) && '-'!=filename[0] ) 
	{
		if ( NULL != INPUTFILE )
		{
			free( (void*) INPUTFILE ) ;
			INPUTFILE = NULL ;						
		}		
		if ( NULL == ( INPUTFILE = strdup(filename) ) )
		{
			// clear the structure before exit
			free (settings) ;
			printf("\r\nFATAL ERROR : fail to save the name of input file.\r\n") ;
			return NULL ;
		}
	}
	else
	{
		free (settings) ;
		return NULL ;
	}

	// if there is "train" in the file name of training data, such as "*train*.*",
	// test data set should be named as "*test*.*".
	// if we fail to find "train" in the training data file,
	// we just use the train data as test data.

	// create testing file name 
	pstr = strstr( INPUTFILE, "train" ) ;
	if (NULL == pstr)
		TESTFILE = strdup(filename) ;
	else
	{
		result = abs( INPUTFILE - pstr ) ;
		strncpy (buf, INPUTFILE, result ) ;
		buf[result] = '\0' ;
		strcat(buf, "test") ;
		strcat (buf, pstr+5) ;
		TESTFILE = strdup(buf) ;
	}
	
	Create_Data_List( &(settings->pairs) ) ;
	Create_Data_List( &(settings->testdata) ) ;
	Create_Data_List( &(settings->training) ) ;
	Create_Data_List( &(settings->validation) ) ;

	return settings ; 
}
示例#3
0
文件: loadfile.c 项目: Matafight/orca
BOOL smo_LoadMatrix ( Data_List * pairs, char * inputfilename, int inputdim, int nFil, int nCol, double ** matrix) 
{ 

	int fila=0,columna=0;
	FILE * smo_stream ;
	FILE * smo_target = NULL ;
	char * pstr = NULL ;
	char buf[LENGTH] ;
	char * temp ;
	int dim = -2 ;
	unsigned long index = 1 ;
	unsigned int result, sz ;
	int var = 0, chg = 0 ;
	double * point = NULL ;
	unsigned int y ;
	int i = 0, j = 0 ;
	double mean = 0 ;
	double ymax = LONG_MIN ;
	double ymin = LONG_MAX ;
	double * xmean = NULL;
	Data_Node * node = NULL ;
	int t0=0, tr=0 ;
	FILE * fid ;

	Data_List label ;

	if ( NULL == pairs || NULL == inputfilename )
		return FALSE ;
	
	Clear_Data_List( pairs ) ;
	Create_Data_List( &label ) ;

	dim = nCol-1;


	if (NULL != pairs->filename) 
		free(pairs->filename) ;

	pairs->dimen = dim ;

	/*/initialize the x_mean and x_devi in Data_List pairs*/

	if ( NULL == (pairs->x_mean = (double *)(malloc(dim*sizeof(double))) ) 
		|| NULL == (pairs->x_devi = (double *)(malloc(dim*sizeof(double))) ) 
		|| NULL == (xmean = (double *)(malloc(dim*sizeof(double))) ) )
	{		
		if (NULL != pairs->x_mean) 
			free(pairs->x_mean) ;
		if (NULL != pairs->x_devi) 
			free(pairs->x_devi) ;
		if (NULL != xmean)
			free(xmean) ;
		if (NULL != smo_target)
			fclose( smo_target ) ;
		if (NULL != smo_stream)
			fclose( smo_stream );
		return FALSE ;
	}
	for ( j = 0; j < dim; j ++ )
		pairs->x_mean[j] = 0 ;
	for ( j = 0; j < dim; j ++ )
		pairs->x_devi[j] = 0 ;
	for ( j = 0; j < dim; j ++ )
		xmean[j] = 0 ;

	/*/ begin to initialize data_list for digital input only*/
	//printf("\nLOADING.... \n") ;
	pairs->datatype = CLASSIFICATION ; 

	do
	{

#ifdef SMO_DEBUG 
		printf("%d\n", index) ;
		printf("%s\n\n\n", buf) ;
#endif

		point = (double *) malloc( (dim+1) * sizeof(double) ) ; /*/ Pairs to free them*/
		if ( NULL == point )
		{
			printf("not enough memory.\n") ;
			if (NULL != smo_target)
				fclose( smo_target ) ;
			if (NULL != smo_stream)
				fclose( smo_stream );
			if (NULL != pairs->x_mean) 
				free(pairs->x_mean) ;
			if (NULL != pairs->x_devi) 
				free(pairs->x_devi) ;
			if (NULL != xmean)
				free(xmean) ;
			Clear_Data_List( pairs ) ;
			return FALSE ;
		}

		columna = 0 ;



		while( columna < dim )
		{

			
			point[columna]= matrix[fila][columna];
	

			
			columna++ ;


		}

		y = matrix[fila][columna];


		fila++;

		point[dim]=0 ;
		

						
			if ( TRUE == Add_Data_List( pairs, Create_Data_Node(index, point, y) ) )
			{
				
				pairs->mean = (mean * (((double)(pairs->count)) - 1) + y )/ ((double)(pairs->count))  ;
				pairs->deviation = pairs->deviation + (y-mean)*(y-mean) * ((double)(pairs->count)-1)/((double)(pairs->count));			
				mean = pairs->mean ;	
				for ( j=0; j<dim; j++ )
				{
					pairs->x_mean[j] = (xmean[j] * (((double)(pairs->count)) - 1) + point[j] )/ ((double)(pairs->count))  ;
					pairs->x_devi[j] = pairs->x_devi[j] + (point[j]-xmean[j])*(point[j]-xmean[j]) * ((double)(pairs->count)-1)/((double)(pairs->count));			
					xmean[j] = pairs->x_mean[j] ;
				}
				if (y>ymax)
				{ ymax = y ; pairs->i_ymax = index ;}
				if (y<ymin)
				{ ymin = y ; pairs->i_ymin = index ;}
				
				/*/ check data type */
				Add_Label_Data_List( &label, Create_Data_Node(index, point, y) ) ;
				index ++ ;
			}
			else
			{
#ifdef SMO_DEBUG 
				printf("%d\n", index) ;
				printf("duplicate data \n") ;
#endif
			}
		
	}
	while ( fila < nFil);

	if (label.count>=2||inputdim>0)
		pairs->datatype = ORDINAL ;
	else
		printf("Warning : not a ordinal regression.\n") ;

	if (pairs->count < MINNUM || (pairs->datatype == UNKNOWN && inputdim == 0 ) ) 
	{
		printf("too few input pairs\n") ;
		Clear_Data_List( pairs ) ;
		if (NULL != pairs->x_mean) 
			free(pairs->x_mean) ;
		if (NULL != pairs->x_devi) 
			free(pairs->x_devi) ;
		if (NULL != xmean)
			free(xmean) ;
		if (NULL != smo_target)
			fclose( smo_target ) ;
		if (NULL != smo_stream)
			fclose( smo_stream );
		return FALSE ;
	}
	/*/ load index file for feature types strstr*/

	pairs->featuretype = (int *) malloc(pairs->dimen*sizeof(int)) ;
	if (NULL != pairs->featuretype)
	{
		/*default 0*/
		for (sz=0;sz<pairs->dimen;sz++)
			pairs->featuretype[sz] = 0 ;
		
		if (0==inputdim)
			pstr = strstr( inputfilename, "train") ;	
		else
			pstr = strstr( inputfilename, "test") ;	
		if (NULL != pstr)
		{
			sz = abs( pstr - inputfilename ) ;
			pstr = strrchr( inputfilename, '.') ;	
			strncpy( buf, inputfilename, sz ) ;
			buf[sz]='\0' ;
			strcat( buf, "feature" ) ;			
			strcat( buf, pstr ) ;
			fid = fopen(buf,"r+t") ;
			if (NULL != fid)
			{
				printf("Loading the specifications of feature type in %s ...",buf) ;
				sz = 0 ;
				while (!feof(fid) && NULL!=fgets(buf,LENGTH,fid) )
				{
					i=strlen(buf) ;
					if (i>1)
					{
						if (sz>=pairs->dimen)
						{
							printf("Warning : feature type file is too long.\n") ;
							sz = pairs->dimen-1 ;
						}
						pairs->featuretype[sz] = atoi(buf) ;
						sz += 1 ;
					}
					else
						printf("Warning : blank line in feature type file.\n") ;
				}
				if (sz!=pairs->dimen)
				{
					/*/default 0*/
					for (sz=0;sz<pairs->dimen;sz++)
						pairs->featuretype[sz] = 0 ;
					printf(" RESET as default.\n") ;
				}
				else
					printf(" done.\n") ;
				fclose(fid) ;
			}
		}
	}

	pairs->deviation = sqrt( pairs->deviation / ((double)(pairs->count - 1.0)) ) ;
	for ( j=0; j<dim; j++ )
		pairs->x_devi[j] = sqrt( pairs->x_devi[j] / ((double)(pairs->count - 1.0)) ) ;	
	
	/*/ set target value as +1 or -1, if data type is CLASSIFICATION*/
	if ( UNKNOWN != pairs->datatype && 0 == inputdim )
	{
			pairs->deviation = 1.0 ;
			pairs->mean = 0 ;
			pairs->normalized_output = FALSE ;
	}

	for ( j=0; j<dim; j++ )
	{
		if (pairs->featuretype[j] != 0)
		{
			pairs->x_devi[j] = 1 ;
			pairs->x_mean[j] = 0 ;
		}
	}

	if (inputdim>0) /*/ do not normailize data for TESTING*/
	{
		pairs->normalized_output = FALSE ;
		pairs->normalized_input = FALSE ; 
	}

	/*/ normalize the target if needed */
	node = pairs->front ;
	while ( node != NULL )
	{
		if ( TRUE == pairs->normalized_input )
		{
			for ( j=0; j<dim; j++ )
			{				
				if (pairs->x_devi[j]>0)
					node->point[j] = (node->point[j]-pairs->x_mean[j])/(pairs->x_devi[j]) ;
				else
					node->point[j] = 0 ;
			}
		}
		node = node->next ; 
	}
	//printf("Total %lu samples with %u dimensions for ", pairs->count, pairs->dimen) ;	

	//if	(inputdim > 0)
		//printf("TESTING.\r\n") ;
	//else 
	if	(inputdim <= 0)	
	{
		if( CLASSIFICATION == pairs->datatype )
			printf("CLASSIFICATION.\r\n") ;
		else if ( ORDINAL == pairs->datatype )
		{
			//printf("ORDINAL %lu REGRESSION.\r\n",label.count) ;
			pairs->classes = label.count ;
			if (NULL != pairs->labels)
				free( pairs->labels ) ;
			i=0;
			pairs->labels = (unsigned int*)malloc(pairs->classes*sizeof(unsigned int)) ;
			pairs->labelnum = (unsigned int*)malloc(pairs->classes*sizeof(unsigned int)) ;
			if (NULL != pairs->labels&&NULL != pairs->labelnum)
			{
				node = label.front ;
				//printf("ordinal varibles : ") ;
				while (NULL!=node)
				{
					if (node->target<1 || node->target>pairs->classes)
					{
						printf("Error : targets should be from 1 to %d.\n",(int)pairs->classes) ;
						exit(1) ;
					}
					pairs->labels[node->target-1] = node->target ;
					if (node->target-1==0)
						t0 = node->target ;
					if (node->target==(int)pairs->classes)

						tr = node->target ;
					pairs->labelnum[node->target-1] = node->fold ;
					i += node->fold ;
					//printf("%d(%d)  ", node->target, node->fold) ;
					node = node->next ;
				}
				//printf("\n") ;
				if (i!=(int)pairs->count||t0!=1||tr!=(int)pairs->classes)
				{
					printf("Error in data list.\n") ;
					exit(1) ;
				}
			}
			else
			{
				printf("fail to malloc for pairs->labels.\n") ;			
				exit(1) ;
			}
		}
		else 
			printf("UNKNOWN.\r\n") ;
	}
	if (1 == pairs->normalized_input)
		printf("Inputs are normalized.\r\n") ;
	
	if (1 == pairs->normalized_output && pairs->deviation > 0)
		printf("Outputs are normalized.\r\n") ;
	if ( inputdim > 0 && pairs->deviation <= 0 )
		printf("Tragets are not at hand.\r\n") ;

	Clear_Label_Data_List (&label) ;
	if (NULL != smo_target)
		fclose( smo_target ) ;
	if (NULL != smo_stream)
		fclose( smo_stream );
	if ( NULL != xmean )
		free( xmean ) ;
	return TRUE ;
}
示例#4
0
def_Settings * Create_def_Settings_Matlab ( void )
{

	/*/printf("%s\n",filename);*/

	char filename[30];

	strcpy(filename,"mytask_train.0");

	/*printf("%s\n",filename);*/

	def_Settings * settings = NULL ;
	char * pstr = NULL ;
	unsigned int result = 0 ;
	char buf[LENGTH] = "" ;

	if (NULL == filename)
	{
		printf("\r\nFATAL ERROR : the input pointer is NULL.\r\n") ;
		return NULL ;
	}
	if (NULL == (settings = (def_Settings *)(malloc(sizeof(def_Settings)))))
	{
		printf("\r\nFATAL ERROR : fail to malloc def_settings.\r\n") ;
		return NULL ;
	}

	/* set all elements as default values*/
	INPUTFILE = NULL ;
	TESTFILE = NULL ;
	VC = DEF_VC ;
	TOL = DEF_TOL ;
	KAPPA = DEF_KAPPA ;
	EPS = DEF_EPS ;
	P = DEF_P ;
	METHOD = DEF_METHOD ;
	KERNEL = DEF_KERNEL ;
	SMO_DISPLAY = DEF_DISPLAY ;
	KFOLD = DEF_KFOLD ;
	INDEX = 1 ;
	
	settings->cache_size = DEF_CACHE ;
	settings->lnC_step = DEF_COARSESTEP ;
	settings->lnK_step = DEF_COARSESTEP ;
	settings->lnC_start = DEF_INFERLNC ;
	settings->lnK_start = DEF_INFERLNK ;
	settings->lnC_end = DEF_SUPERLNC ;
	settings->lnK_end = DEF_SUPERLNK ;
	
	settings->def_lnC_step = DEF_COARSESTEP ;
	settings->def_lnK_step = DEF_COARSESTEP ;
	settings->def_lnC_start = DEF_INFERLNC ;
	settings->def_lnK_start = DEF_INFERLNK ;
	settings->def_lnC_end = DEF_SUPERLNC ;
	settings->def_lnK_end = DEF_SUPERLNK ;

	settings->zoomin = DEF_ZOOMIN ;
	settings->loops = DEF_LOOP ;	
	settings->ardon = DEF_ARDON ;
	settings->repeat = DEF_REPEAT ;
	settings->seeds = 0 ;
	settings->smo_balance = DEF_BALANCE ;
	settings->best_rate = 0 ;
	settings->time = 0 ;

	settings->normalized_input = DEF_NORMALIZEINPUT ;
	settings->normalized_output = DEF_NORMALIZETARGET ;
	settings->trainmethod = DEF_TRAINING ;
	
	/* save data file name in INPUTFILE*/
	if ( 0!=strlen(filename) && '-'!=filename[0] ) 
	{
		if ( NULL != INPUTFILE )
		{
			free( (void*) INPUTFILE ) ;
			INPUTFILE = NULL ;						
		}		
		if ( NULL == ( INPUTFILE = strdup(filename) ) )
		{

			free (settings) ;
			printf("\r\nFATAL ERROR : fail to save the name of input file.\r\n") ;
			return NULL ;
		}
	}
	else
	{
		free (settings) ;
		return NULL ;
	}


	pstr = strstr( INPUTFILE, "train" ) ;
	if (NULL == pstr)
		TESTFILE = strdup(filename) ;
	else
	{
		result = abs( INPUTFILE - pstr ) ;
		strncpy (buf, INPUTFILE, result ) ;
		buf[result] = '\0' ;
		strcat(buf, "test") ;
		strcat (buf, pstr+5) ;
		TESTFILE = strdup(buf) ;
	}
	
	Create_Data_List( &(settings->pairs) ) ;
	Create_Data_List( &(settings->testdata) ) ;
	Create_Data_List( &(settings->training) ) ;
	Create_Data_List( &(settings->validation) ) ;

	return settings ; 
}