/*
   Entry point for GUI prompting from SQLDriverConnect().
*/
BOOL Driver_Prompt(HWND hWnd, SQLWCHAR *instr, SQLUSMALLINT completion,
                   SQLWCHAR *outstr, SQLSMALLINT outmax, SQLSMALLINT *outlen)
{
  DataSource *ds= ds_new();
  BOOL rc= FALSE;

  /*
     parse the attr string, dsn lookup will have already been
     done in the driver
  */
  if (instr && *instr)
  {
    if (ds_from_kvpair(ds, instr, (SQLWCHAR)';'))
    {
      rc= FALSE;
      goto exit;
    }
  }

  /* Show the dialog and handle result */
  if (ShowOdbcParamsDialog(ds, hWnd, TRUE) == 1)
  {
    int len;
    /* serialize to outstr */
    if ((len= ds_to_kvpair(ds, outstr, outmax, (SQLWCHAR)';')) == -1)
    {
      /* truncated, up to caller to see outmax < *outlen */
      if (outlen)
      {
        *outlen= ds_to_kvpair_len(ds);
      }
      
      /* Prevent access violation if outstr is NULL */
      if (outstr)
      {
        outstr[outmax]= 0;
      }
    }
    else if (outlen)
      *outlen= len;
    rc= TRUE;
  }

exit:
  ds_delete(ds);
  return rc;
}
/*
   Add, edit, or remove a Data Source Name (DSN). This function is
   called by "Data Source Administrator" on Windows, or similar
   application on Unix.
*/
BOOL INSTAPI ConfigDSNW(HWND hWnd, WORD nRequest, LPCWSTR pszDriver,
                        LPCWSTR pszAttributes)
{
  DataSource *ds= ds_new();
  BOOL rc= TRUE;
  Driver *driver= NULL;
  SQLWCHAR *origdsn= NULL;

  if (pszAttributes && *pszAttributes)
  {
    SQLWCHAR delim= ';';

#ifdef _WIN32
    /* 
      if there's no ;, then it's most likely null-delimited

     NOTE: the double null-terminated strings are not working
     *      with UnixODBC-GUI-Qt (posted a bug ) 
    */
    if (!sqlwcharchr(pszAttributes, delim))
      delim= 0;
#endif

    if (ds_from_kvpair(ds, pszAttributes, delim))
    {
      SQLPostInstallerError(ODBC_ERROR_INVALID_KEYWORD_VALUE,
                            W_INVALID_ATTR_STR);
      rc= FALSE;
      goto exitConfigDSN;
    }
    if (ds_lookup(ds) && nRequest != ODBC_ADD_DSN)
    {
      /* ds_lookup() will already set SQLInstallerError */
      rc= FALSE;
      goto exitConfigDSN;
    }
    origdsn= sqlwchardup(ds->name, SQL_NTS);
  }

  switch (nRequest)
  {
  case ODBC_ADD_DSN:
    driver= driver_new();
    memcpy(driver->name, pszDriver,
           (sqlwcharlen(pszDriver) + 1) * sizeof(SQLWCHAR));
    if (driver_lookup(driver))
    {
      rc= FALSE;
      break;
    }
    if (hWnd)
    {
      /*
        hWnd means we will at least try to prompt, at which point
        the driver lib will be replaced by the name
      */
      ds_set_strattr(&ds->driver, driver->lib);
    }
    else
    {
      /*
        no hWnd is a likely a call from an app w/no prompting so
        we put the driver name immediately
      */
      ds_set_strattr(&ds->driver, driver->name);
    }
  case ODBC_CONFIG_DSN:
#ifdef _WIN32
    /*
      for windows, if hWnd is NULL, we try to add the dsn
      with what information was given
    */
    if (!hWnd || ShowOdbcParamsDialog(ds, hWnd, FALSE) == 1)
#else
    if (ShowOdbcParamsDialog(ds, hWnd, FALSE) == 1)
#endif
    {
      /* save datasource */
      if (ds_add(ds))
        rc= FALSE;
      /* if the name is changed, remove the old dsn */
      if (origdsn && memcmp(origdsn, ds->name,
                            (sqlwcharlen(origdsn) + 1) * sizeof(SQLWCHAR)))
        SQLRemoveDSNFromIni(origdsn);
    }
    break;
  case ODBC_REMOVE_DSN:
    if (SQLRemoveDSNFromIni(ds->name) != TRUE)
      rc= FALSE;
    break;
  }

exitConfigDSN:
  x_free(origdsn);
  ds_delete(ds);
  if (driver)
    driver_delete(driver);
  return rc;
}
int dt_split_on_node(dt_node *node, data_set *train_data, int depth, split_criterion criterion) {
    if(!dt_should_split(train_data)) {
        // all y values are the same, so make a leaf!
        node->is_leaf = 1;
        node->prediction_value = train_data->y_data[0];
        return 1;
    }
    else if(train_data->rowcount < 1) {
        // this is generally a bad place to be
        // should never happen
        fprintf(stderr, "No rows left in training set!\n");
        return 1;
    }

    // pick the best column based in info gain
    unsigned int col = dt_pick_best_column(train_data, criterion);

    // split on the mean of the column
    node->split_value = ds_col_mean(train_data, col);
    node->split_col = col;

    // make a new data set for all of the rows less than the mean
    data_set *lesser_data = ds_new(train_data->colcount, 1);

    // add all rows < mean
    for(int i = 0; i < train_data->rowcount; i++) {
        float val = train_data->x_data[i][col];
        if(val < node->split_value) {
            ds_add_item(lesser_data, train_data->x_data[i], train_data->y_data[i]);
        }
    }

    int c1 = 0;
    if(lesser_data->rowcount > 0) {
        // if we have data that was less than the mean (should always happen)
        // then recurse on that new data set
        dt_node *left_node = dt_new_node();
        left_node->is_lesser = 1;
        node->left = left_node;
        c1 = dt_split_on_node(left_node, lesser_data, depth+1, criterion);
    }
    else {
        node->left = NULL;
    }
    ds_free(lesser_data);

    // make a data set for values >= mean
    data_set *greater_data = ds_new(train_data->colcount, 1);

    for(int i = 0; i < train_data->rowcount; i++) {
        float val = train_data->x_data[i][col];
        if(val >= node->split_value) {
            ds_add_item(greater_data, train_data->x_data[i], train_data->y_data[i]);
        }
    }

    int c2 = 0;
    if(greater_data->rowcount > 0) {
        // recurse on the new data set
        dt_node *right_node = dt_new_node();
        node->right = right_node;
        right_node->is_lesser = 0;
        c2 = dt_split_on_node(right_node, greater_data, depth+1, criterion);
    }
    else {
        node->right = NULL;
    }
    ds_free(greater_data);

    // return a count of all of the decendent nodes for the current node
    return c1+c2;
}
// pick the best column to split on, based on the information gain metric
int dt_pick_best_column(data_set *data, split_criterion criterion) {
    float *gains = malloc(data->colcount * sizeof(float));

    for(int col = 0; col < data->colcount; col++) {
        // divide up the data based on the mean of the chosen column
        float mean = ds_col_mean(data, col);

        data_set *lesser = ds_new(data->colcount, 1);
        data_set *greater = ds_new(data->colcount, 1);

        for(int row = 0; row < data->rowcount; row++) {
            if(data->x_data[row][col] < mean) {
                ds_add_item(lesser, data->x_data[row], data->y_data[row]);
            }
            else {
                ds_add_item(greater, data->x_data[row], data->y_data[row]);
            }
        }

        float main_splitscore;
        float lesser_splitscore;
        float greater_splitscore;

        if(criterion == CR_ENTROPY) {
            // entropy estimation for the whole data set and the two splits
            main_splitscore = ds_entropy(data);
            lesser_splitscore = ds_entropy(lesser);
            greater_splitscore = ds_entropy(greater);
        }
        else if(criterion == CR_GINI) {
            main_splitscore = ds_gini(data);
            lesser_splitscore = ds_gini(lesser);
            greater_splitscore = ds_gini(greater);
        }
        else {
            fprintf(stderr, "Unknown criterion %d!\n", criterion);
            return 0;
        }

        // ratios for split data sets
        float lesser_frac = ((float)lesser->rowcount) / data->rowcount;
        float greater_frac = ((float)greater->rowcount) / data->rowcount;

        // this is either information gain if the splitscore is entropy
        // or it is the total population diversity score if using gini
        float gain;
        if(criterion == CR_ENTROPY) {
            gain = main_splitscore - ((lesser_frac * lesser_splitscore) +
                    (greater_frac * greater_splitscore));
        }
        else if(criterion == CR_GINI) {
            gain = (lesser_frac * lesser_splitscore) +
                (greater_frac * greater_splitscore);
        }
        else {
            fprintf(stderr, "Unknown criterion %d!\n", criterion);
            return 0;
        }

        gains[col] = gain;
        ds_free(lesser);
        ds_free(greater);
    }

    // pick the best gain
    float best = gains[0];
    int bestcol = 0;
    for(int i = 0; i < data->colcount; i++) {
        if(gains[i] > best) {
            best = gains[i];
            bestcol = i;
        }
    }

    free(gains);
    return bestcol;
}