/* Entry point for GUI prompting from SQLDriverConnect(). */ BOOL Driver_Prompt(HWND hWnd, SQLWCHAR *instr, SQLUSMALLINT completion, SQLWCHAR *outstr, SQLSMALLINT outmax, SQLSMALLINT *outlen) { DataSource *ds= ds_new(); BOOL rc= FALSE; /* parse the attr string, dsn lookup will have already been done in the driver */ if (instr && *instr) { if (ds_from_kvpair(ds, instr, (SQLWCHAR)';')) { rc= FALSE; goto exit; } } /* Show the dialog and handle result */ if (ShowOdbcParamsDialog(ds, hWnd, TRUE) == 1) { int len; /* serialize to outstr */ if ((len= ds_to_kvpair(ds, outstr, outmax, (SQLWCHAR)';')) == -1) { /* truncated, up to caller to see outmax < *outlen */ if (outlen) { *outlen= ds_to_kvpair_len(ds); } /* Prevent access violation if outstr is NULL */ if (outstr) { outstr[outmax]= 0; } } else if (outlen) *outlen= len; rc= TRUE; } exit: ds_delete(ds); return rc; }
/* Add, edit, or remove a Data Source Name (DSN). This function is called by "Data Source Administrator" on Windows, or similar application on Unix. */ BOOL INSTAPI ConfigDSNW(HWND hWnd, WORD nRequest, LPCWSTR pszDriver, LPCWSTR pszAttributes) { DataSource *ds= ds_new(); BOOL rc= TRUE; Driver *driver= NULL; SQLWCHAR *origdsn= NULL; if (pszAttributes && *pszAttributes) { SQLWCHAR delim= ';'; #ifdef _WIN32 /* if there's no ;, then it's most likely null-delimited NOTE: the double null-terminated strings are not working * with UnixODBC-GUI-Qt (posted a bug ) */ if (!sqlwcharchr(pszAttributes, delim)) delim= 0; #endif if (ds_from_kvpair(ds, pszAttributes, delim)) { SQLPostInstallerError(ODBC_ERROR_INVALID_KEYWORD_VALUE, W_INVALID_ATTR_STR); rc= FALSE; goto exitConfigDSN; } if (ds_lookup(ds) && nRequest != ODBC_ADD_DSN) { /* ds_lookup() will already set SQLInstallerError */ rc= FALSE; goto exitConfigDSN; } origdsn= sqlwchardup(ds->name, SQL_NTS); } switch (nRequest) { case ODBC_ADD_DSN: driver= driver_new(); memcpy(driver->name, pszDriver, (sqlwcharlen(pszDriver) + 1) * sizeof(SQLWCHAR)); if (driver_lookup(driver)) { rc= FALSE; break; } if (hWnd) { /* hWnd means we will at least try to prompt, at which point the driver lib will be replaced by the name */ ds_set_strattr(&ds->driver, driver->lib); } else { /* no hWnd is a likely a call from an app w/no prompting so we put the driver name immediately */ ds_set_strattr(&ds->driver, driver->name); } case ODBC_CONFIG_DSN: #ifdef _WIN32 /* for windows, if hWnd is NULL, we try to add the dsn with what information was given */ if (!hWnd || ShowOdbcParamsDialog(ds, hWnd, FALSE) == 1) #else if (ShowOdbcParamsDialog(ds, hWnd, FALSE) == 1) #endif { /* save datasource */ if (ds_add(ds)) rc= FALSE; /* if the name is changed, remove the old dsn */ if (origdsn && memcmp(origdsn, ds->name, (sqlwcharlen(origdsn) + 1) * sizeof(SQLWCHAR))) SQLRemoveDSNFromIni(origdsn); } break; case ODBC_REMOVE_DSN: if (SQLRemoveDSNFromIni(ds->name) != TRUE) rc= FALSE; break; } exitConfigDSN: x_free(origdsn); ds_delete(ds); if (driver) driver_delete(driver); return rc; }
int dt_split_on_node(dt_node *node, data_set *train_data, int depth, split_criterion criterion) { if(!dt_should_split(train_data)) { // all y values are the same, so make a leaf! node->is_leaf = 1; node->prediction_value = train_data->y_data[0]; return 1; } else if(train_data->rowcount < 1) { // this is generally a bad place to be // should never happen fprintf(stderr, "No rows left in training set!\n"); return 1; } // pick the best column based in info gain unsigned int col = dt_pick_best_column(train_data, criterion); // split on the mean of the column node->split_value = ds_col_mean(train_data, col); node->split_col = col; // make a new data set for all of the rows less than the mean data_set *lesser_data = ds_new(train_data->colcount, 1); // add all rows < mean for(int i = 0; i < train_data->rowcount; i++) { float val = train_data->x_data[i][col]; if(val < node->split_value) { ds_add_item(lesser_data, train_data->x_data[i], train_data->y_data[i]); } } int c1 = 0; if(lesser_data->rowcount > 0) { // if we have data that was less than the mean (should always happen) // then recurse on that new data set dt_node *left_node = dt_new_node(); left_node->is_lesser = 1; node->left = left_node; c1 = dt_split_on_node(left_node, lesser_data, depth+1, criterion); } else { node->left = NULL; } ds_free(lesser_data); // make a data set for values >= mean data_set *greater_data = ds_new(train_data->colcount, 1); for(int i = 0; i < train_data->rowcount; i++) { float val = train_data->x_data[i][col]; if(val >= node->split_value) { ds_add_item(greater_data, train_data->x_data[i], train_data->y_data[i]); } } int c2 = 0; if(greater_data->rowcount > 0) { // recurse on the new data set dt_node *right_node = dt_new_node(); node->right = right_node; right_node->is_lesser = 0; c2 = dt_split_on_node(right_node, greater_data, depth+1, criterion); } else { node->right = NULL; } ds_free(greater_data); // return a count of all of the decendent nodes for the current node return c1+c2; }
// pick the best column to split on, based on the information gain metric int dt_pick_best_column(data_set *data, split_criterion criterion) { float *gains = malloc(data->colcount * sizeof(float)); for(int col = 0; col < data->colcount; col++) { // divide up the data based on the mean of the chosen column float mean = ds_col_mean(data, col); data_set *lesser = ds_new(data->colcount, 1); data_set *greater = ds_new(data->colcount, 1); for(int row = 0; row < data->rowcount; row++) { if(data->x_data[row][col] < mean) { ds_add_item(lesser, data->x_data[row], data->y_data[row]); } else { ds_add_item(greater, data->x_data[row], data->y_data[row]); } } float main_splitscore; float lesser_splitscore; float greater_splitscore; if(criterion == CR_ENTROPY) { // entropy estimation for the whole data set and the two splits main_splitscore = ds_entropy(data); lesser_splitscore = ds_entropy(lesser); greater_splitscore = ds_entropy(greater); } else if(criterion == CR_GINI) { main_splitscore = ds_gini(data); lesser_splitscore = ds_gini(lesser); greater_splitscore = ds_gini(greater); } else { fprintf(stderr, "Unknown criterion %d!\n", criterion); return 0; } // ratios for split data sets float lesser_frac = ((float)lesser->rowcount) / data->rowcount; float greater_frac = ((float)greater->rowcount) / data->rowcount; // this is either information gain if the splitscore is entropy // or it is the total population diversity score if using gini float gain; if(criterion == CR_ENTROPY) { gain = main_splitscore - ((lesser_frac * lesser_splitscore) + (greater_frac * greater_splitscore)); } else if(criterion == CR_GINI) { gain = (lesser_frac * lesser_splitscore) + (greater_frac * greater_splitscore); } else { fprintf(stderr, "Unknown criterion %d!\n", criterion); return 0; } gains[col] = gain; ds_free(lesser); ds_free(greater); } // pick the best gain float best = gains[0]; int bestcol = 0; for(int i = 0; i < data->colcount; i++) { if(gains[i] > best) { best = gains[i]; bestcol = i; } } free(gains); return bestcol; }