Example #1
0
/*
* read_tasks_from_file : reads the table information from a text file.
* Parameters:
* - file_name : the file containing the table definitions
* - count_only : indicates if the file contains information to count the records
*                from the source DB or to actually trasmit the data
* - tasks : output parameter that will contain a task for each table definition loaded
*           from the file
  - resume : indicates if the file contains information to resume copying data from
             last PK
* - max_count : limit copied rows count to max_count
*
* Remarks : Each table is defined in a single line with the next format for count_only = true and resume = false
*           <src_schema>\t<src_table>\n
*
*           and in the next format for a count_only = false or resume = true
*           <src_schema>\t<src_table>\t<tgt_schema>\t<tgt_table>\t<source_pk_columns>\t<target_pk_columns>\t<select_expression>
*/
bool read_tasks_from_file(const std::string file_name, bool count_only, TaskQueue& tasks, std::set<std::string> &trigger_schemas,
                          bool resume, long long int max_count)
{
  std::ifstream ifs ( file_name.data() , std::ifstream::in );
  unsigned int field_count = count_only && !resume ? 2 : 7;
  bool error = false;

  printf("Loading table information from file %s\n", file_name.data());

  while (!error && ifs.good())
  {
    TableParam param;
    std::string line;
    getline(ifs, line);

    if (line.length())
    {
      log_info("--table %s\n", line.data());

      std::vector<std::string> fields = base::split(line, "\t", field_count);

      if (fields.size() == field_count)
      {
        param.source_schema = fields[0];
        param.source_table = fields[1];

        if (!(count_only && !resume))
        {
          param.target_schema = fields[2];
          param.target_table = fields[3];
          if(std::strcmp(fields[4].c_str(), "-") != 0)
            param.source_pk_columns = base::split(fields[4], ",", -1);
          if(std::strcmp(fields[5].c_str(), "-") != 0)
            param.target_pk_columns = base::split(fields[5], ",", -1);
          param.select_expression = fields[6];

          trigger_schemas.insert(param.target_schema);
        }

        param.copy_spec.resume = resume;
        param.copy_spec.max_count = max_count;
        param.copy_spec.type = CopyAll;
        tasks.add_task(param);
      }
      else
        error = true;
    }
  }

  ifs.close();

  return !error;
}
Example #2
0
int main(int argc, char **argv)
{
  std::string app_name = base::basename(argv[0]);

  base::threading_init();
  
  TaskQueue tables;

  std::string source_password;
  std::string source_connstring;
  bool source_use_cleartext_plugin = false;
  bool source_is_utf8 = false;
  std::string source_charset;
  SourceType source_type = ST_MYSQL;

  std::string target_connstring;
  std::string target_password;
  bool target_use_cleartext_plugin = false;
  std::string log_level;
  std::string log_file;

  bool passwords_from_stdin = false;
  bool count_only = false;
  bool check_types_only = false;
  bool truncate_target = false;
  bool show_progress = false;
  bool abort_on_oversized_blobs = false;
  bool disable_triggers = false;
  bool reenable_triggers = false;
  bool disable_triggers_on_copy = true;
  bool resume = false;
  int thread_count = 1;
  long long bulk_insert_batch = 100;
  long long max_count = 0;

  std::string table_file;

  std::set<std::string> trigger_schemas;
  std::string source_rdbms_type = "unknown";

  bool log_level_set = false;
  int i = 1;
  while (i < argc)
  {
    char *argval = NULL;

    if (check_arg_with_value(argv, i, "--log-level", argval, true))
      log_level = argval;
    else if (check_arg_with_value(argv, i, "--log-file", argval, true))
      log_file = argval;
    else if (check_arg_with_value(argv, i, "--odbc-source", argval, true))
    {
      source_type = ST_ODBC;
      source_connstring = base::trim(argval, "\"");
    }
    else if (check_arg_with_value(argv, i, "--mysql-source", argval, true))
    {
      source_type = ST_MYSQL;
      source_connstring = base::trim(argval, "\"");
    }
    else if (check_arg_with_value(argv, i, "--pythondbapi-source", argval, true))
    {
      source_type = ST_PYTHON;
      source_connstring = base::trim(argval, "\"");
    }
    else if (check_arg_with_value(argv, i, "--source-password", argval, true))
      source_password = argval;
    else if (check_arg_with_value(argv, i, "--target-password", argval, true))
      target_password = argval;
    else if (strcmp(argv[i], "--force-utf8-for-source") == 0)
      source_is_utf8 = true;
    else if (check_arg_with_value(argv, i, "--source-charset", argval, true))
      source_charset = argval;
    else if (strcmp(argv[i], "--progress") == 0)
      show_progress = true;
    else if (strcmp(argv[i], "--truncate-target") == 0)
      truncate_target = true;
    else if (strcmp(argv[i], "--count-only") == 0)
    {
      // Count only will be allowed only if one of the trigger
      // operations has not been indicated first
      if ( !disable_triggers && !reenable_triggers)
        count_only = true;
    }
    else if (strcmp(argv[i], "--check-types-only") == 0)
      check_types_only = true;
    else if (strcmp(argv[i], "--passwords-from-stdin") == 0)
      passwords_from_stdin = true;
    else if (strcmp(argv[i], "--abort-on-oversized-blobs") == 0)
      abort_on_oversized_blobs = true;
    else if (strcmp(argv[i], "--dont-disable-triggers") == 0)
      disable_triggers_on_copy = false;
    else if (strcmp(argv[i], "--resume") == 0)
      resume = true;
    else if (check_arg_with_value(argv, i, "--disable-triggers-on", argval, true))
    {
      // disabling/enabling triggers are standalone operations and mutually exclusive
      // so here it ensures a request for trigger enabling was not found first
      if (!reenable_triggers && !count_only)
      {
        disable_triggers = true;
        trigger_schemas.insert(argval);
      }
    }
    else if (check_arg_with_value(argv, i, "--reenable-triggers-on", argval, true))
    {
      // disabling/enabling triggers are standalone operations and mutually exclusive
      // so here it ensures a request for trigger enabling was not found first
      if (!disable_triggers && !count_only)
      {
        reenable_triggers = true;
        trigger_schemas.insert(argval);
      }
    }
    else if (check_arg_with_value(argv, i, "--thread-count", argval, true))
    {
      thread_count = base::atoi<int>(argval, 0);
      if (thread_count < 1)
        thread_count = 1;
    }
    else if (check_arg_with_value(argv, i, "--bulk-insert-batch-size", argval, true))
    {
      bulk_insert_batch = base::atoi<int>(argval, 0);
      if (bulk_insert_batch < 1)
        bulk_insert_batch = 100;
    }
    else if (strcmp(argv[i], "--version") == 0)
    {
      const char *type = APP_EDITION_NAME;
      if (strcmp(APP_EDITION_NAME, "Community") == 0)
        type = "CE";

      printf("%s %s (%s) %i.%i.%i %s build %i\n"
             , base::basename(argv[0]).c_str()
             , type, APP_LICENSE_TYPE
             , APP_MAJOR_NUMBER
             , APP_MINOR_NUMBER
             , APP_RELEASE_NUMBER
             , APP_RELEASE_TYPE
             , APP_BUILD_NUMBER
            );
      exit(0);
    }
    else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
    {
      show_help();
      exit(0);
    }
    else if (check_arg_with_value(argv, i, "--target", argval, true))
    {
      target_connstring = base::trim(argval, "\"");
    }
    else if (check_arg_with_value(argv, i, "--table-file", argval, true))
      table_file = argval;
    else if (strcmp(argv[i], "--table") == 0)
    {
      TableParam param;

      if ((!count_only && i + 7 >= argc) || (count_only && i + 2 >= argc))
      {
        fprintf(stderr, "%s: Missing value for table copy specification\n", argv[0]);
        exit(1);
      }

      param.source_schema = argv[++i];
      param.source_table = argv[++i];
      if (!(count_only && !resume))
      {
        param.target_schema = argv[++i];
        param.target_table = argv[++i];
        if(std::strcmp(argv[++i], "-") != 0)
          param.source_pk_columns = base::split(argv[i], ",", -1);
        if(std::strcmp(argv[++i], "-") != 0)
          param.target_pk_columns = base::split(argv[i], ",", -1);
        param.select_expression = argv[++i];

        trigger_schemas.insert(param.target_schema);
      }

      param.copy_spec.resume = resume;
      param.copy_spec.max_count = max_count;

      param.copy_spec.type = CopyAll;

      tables.add_task(param);
    }
    else if (strcmp(argv[i], "--table-range") == 0)
    {
      TableParam param;

      if ((!count_only && i + 10 >= argc) || (count_only && i + 5 >= argc))
      {
        fprintf(stderr, "%s: Missing value for table copy specification\n", argv[0]);
        exit(1);
      }
      param.source_schema = argv[++i];
      param.source_table = argv[++i];
      if (!(count_only && !resume))
      {
        param.target_schema = argv[++i];
        param.target_table = argv[++i];
        if(std::strcmp(argv[++i], "-") != 0)
          param.source_pk_columns = base::split(argv[i], ",", -1);
        if(std::strcmp(argv[++i], "-") != 0)
          param.target_pk_columns = base::split(argv[i], ",", -1);
        param.select_expression = argv[++i];

        trigger_schemas.insert(param.target_schema);
      }
      param.copy_spec.range_key = argv[++i];
      param.copy_spec.range_start = base::atoi<long long>(argv[++i], 0ll);
      param.copy_spec.range_end = base::atoi<long long>(argv[++i], 0ll);
      param.copy_spec.type = CopyRange;

      tables.add_task(param);
    }
    else if (strcmp(argv[i], "--table-row-count") == 0)
    {
      TableParam param;

      if ((!count_only && i + 8 >= argc) || (count_only && i + 3 >= argc))
      {
        fprintf(stderr, "%s: Missing value for table copy specification\n", argv[0]);
        exit(1);
      }
      param.source_schema = argv[++i];
      param.source_table = argv[++i];
      if (!(count_only && !resume))
      {
        param.target_schema = argv[++i];
        param.target_table = argv[++i];
        if(std::strcmp(argv[++i], "-") != 0)
          param.source_pk_columns = base::split(argv[i], ",", -1);
        if(std::strcmp(argv[++i], "-") != 0)
          param.target_pk_columns = base::split(argv[i], ",", -1);
        param.select_expression = argv[++i];
      }
      param.copy_spec.row_count = base::atoi<long long>(argv[++i], 0ll);
      param.copy_spec.resume = resume;
      param.copy_spec.type = CopyCount;

      tables.add_task(param);
    }
    else if (check_arg_with_value(argv, i, "--source-rdbms-type", argval, false))
    	source_rdbms_type = argval;
    else if (strcmp(argv[i], "--table-where") == 0)
    {
      TableParam param;

      if ((!count_only && i + 8 >= argc) || (count_only && i + 4 >= argc))
      {
        fprintf(stderr, "%s: Missing value for table copy specification\n", argv[0]);
        exit(1);
      }
      param.source_schema = argv[++i];
      param.source_table = argv[++i];
      if (!(count_only && !resume))
      {
        param.target_schema = argv[++i];
        param.target_table = argv[++i];
        if(std::strcmp(argv[++i], "-") != 0)
          param.source_pk_columns = base::split(argv[i], ",", -1);
        if(std::strcmp(argv[++i], "-") != 0)
          param.target_pk_columns = base::split(argv[i], ",", -1);
        param.select_expression = argv[++i];
        param.copy_spec.where_expression = argv[++i];

        trigger_schemas.insert(param.target_schema);
      }
      else
      {
        param.select_expression = argv[++i];
        param.copy_spec.where_expression = argv[++i];
      }
      param.copy_spec.type = CopyWhere;

      tables.add_task(param);
    }
    else if (check_arg_with_value(argv, i, "--max-count", argval, true))
    {
      max_count = base::atoi<int>(argval, 0);
    }
    else if (strcmp(argv[i], "--source-use-cleartext") == 0)
      source_use_cleartext_plugin = true;
    else if (strcmp(argv[i], "--target-use-cleartext") == 0)
      target_use_cleartext_plugin = true;
    else
    {
      fprintf(stderr, "%s: Invalid option %s\n", argv[0], argv[i]);
      exit(1);
    }

    i++;
  }

  // Creates the log to the target file if any, if not
  // uses std_error
  base::Logger logger(true, log_file);

  if (!log_level.empty())
  {
      if (!set_log_level(log_level))
      {
        fprintf(stderr, "%s: invalid argument '%s' for option %s\n", argv[0], log_level.data(), "--log-level");
        exit(1);
      }
      else
        log_level_set = true;
  }

  // Set the log level from environment var WB_LOG_LEVEL if specified or set a default log level.
  if (!log_level_set)
  {
    const char* log_setting = getenv("WB_LOG_LEVEL");
    if (log_setting == NULL)
      log_setting = "info";
    else
      log_level_set = true;

    std::string level = base::tolower(log_setting);
    base::Logger::active_level(level);
  }

  // If needed, reads the tasks from the table definition file
  if (!table_file.empty())
  {
    if (!read_tasks_from_file(table_file, count_only, tables, trigger_schemas, resume, max_count))
    {
      fprintf(stderr, "Invalid table definitions format in file: %s\n", table_file.data());
      exit(1);
    }
  }

  // Not having the source connection data is an error unless
  // the standalone operations to disable or reenable triggers
  // are called
  if (source_connstring.empty() && !reenable_triggers && ! disable_triggers)
  {
    fprintf(stderr, "Missing source DB server\n");
    exit(1);
  }

  if (target_connstring.empty() && !(count_only && !resume))
  {
    fprintf(stderr, "Missing target DB server\n");
    exit(1);
  }

  // Table definitions will be required only if the standalone operations to
  // Reenable or disable triggers are not called
  if (tables.empty() && !reenable_triggers && ! disable_triggers)
  {
    log_warning("Missing table list specification\n");
    exit(0);
  }

  std::string source_host;
  std::string source_user;
  int source_port = -1;
  std::string source_socket;

  // Source connection is parsed only when NOT executing the
  // Standalone operatios on triggers
  if (source_type == ST_MYSQL && !reenable_triggers && ! disable_triggers)
  {
    if (!parse_mysql_connstring(source_connstring, source_user, source_password,
                                source_host, source_port, source_socket))
    {
      fprintf(stderr, "Invalid MySQL connection string %s for source database. Must be in format user[:pass]@host:port or user[:pass]@::socket\n", target_connstring.c_str());
      exit(1);
    }
  }

  std::string target_host;
  std::string target_user;
  int target_port = -1;
  std::string target_socket;
  if (!(count_only && !resume) && !parse_mysql_connstring(target_connstring, target_user, target_password,
                                             target_host, target_port, target_socket))
  {
    fprintf(stderr, "Invalid MySQL connection string %s for target database. Must be in format user[:pass]@host:port or user[:pass]@::socket\n", target_connstring.c_str());
    exit(1);
  }

  if (passwords_from_stdin)
  {
    char password[200];
    if (!fgets(password, sizeof(password), stdin))
    {
      log_error("Error reading passwords from stdin\n");
      exit(1);
    }

    if ((count_only && !resume)|| reenable_triggers || disable_triggers)
    {
      char *ptr = strtok(password, "\t\r\n");
      if (ptr)
      {
        if (count_only)
          source_password = ptr;
        else
          target_password = ptr;
      }
    }
    else
    {
      char *ptr = strtok(password, "\r\n");
      if (ptr)
      {
        ptr = strchr(password, '\t');
        if (ptr)
        {
          source_password = std::string(password, ptr-password);
          target_password = ptr+1;
        }
        else
          source_password = password;
      }
    }
  }

  static SQLHENV odbc_env;

  PyThreadState *state = NULL;
  if (source_type == ST_PYTHON)
  {
    Py_Initialize();
    PyEval_InitThreads();
    state = PyEval_SaveThread();
  }
  try
  {
    if (count_only)
    {
      boost::scoped_ptr<CopyDataSource> psource;

      if (source_type == ST_ODBC)
      {
        SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &odbc_env);
        SQLSetEnvAttr(odbc_env, SQL_ATTR_ODBC_VERSION, (void *) SQL_OV_ODBC3, 0);

        psource.reset(new ODBCCopyDataSource(odbc_env, source_connstring, source_password, source_is_utf8, source_rdbms_type));
      }
      else if (source_type == ST_MYSQL)
        psource.reset(new MySQLCopyDataSource(source_host, source_port, source_user, source_password, source_socket, source_use_cleartext_plugin));
      else
        psource.reset(new PythonCopyDataSource(source_connstring, source_password));

      boost::scoped_ptr<MySQLCopyDataTarget> ptarget;
      TableParam task;
      while(tables.get_task(task))
      {
        std::vector<std::string> last_pkeys;
        if (task.copy_spec.resume)
        {
          if(!ptarget.get())
            ptarget.reset(new MySQLCopyDataTarget(target_host, target_port, target_user, target_password, target_socket, target_use_cleartext_plugin, app_name, source_charset, source_rdbms_type));
          last_pkeys = ptarget->get_last_pkeys(task.target_pk_columns, task.target_schema, task.target_table);
        }
        count_rows(psource, task.source_schema, task.source_table, task.source_pk_columns, task.copy_spec, last_pkeys);
      }
    }
    else if (reenable_triggers || disable_triggers)
    {
      boost::scoped_ptr<MySQLCopyDataTarget> ptarget;
      ptarget.reset(new MySQLCopyDataTarget(target_host, target_port, target_user, target_password, target_socket, target_use_cleartext_plugin, app_name, source_charset, source_rdbms_type));

      if (disable_triggers)
        ptarget->backup_triggers(trigger_schemas);
      else
        ptarget->restore_triggers(trigger_schemas);
    }
    else
    {
      std::vector<CopyDataTask*> threads;

      boost::scoped_ptr<MySQLCopyDataTarget> ptarget_conn;
      MySQLCopyDataTarget *ptarget = NULL;
      CopyDataSource *psource = NULL;

      if (disable_triggers_on_copy)
      {
        ptarget_conn.reset(new MySQLCopyDataTarget(target_host, target_port, target_user, target_password, target_socket, target_use_cleartext_plugin, app_name, source_charset, source_rdbms_type));
        ptarget_conn->backup_triggers(trigger_schemas);
      }

      for (int index = 0; index < thread_count; index++)
      {
        if (source_type == ST_ODBC)
        {
          SQLAllocHandle(SQL_HANDLE_ENV, SQL_NULL_HANDLE, &odbc_env);
          SQLSetEnvAttr(odbc_env, SQL_ATTR_ODBC_VERSION, (void *) SQL_OV_ODBC3, 0);

          psource = new ODBCCopyDataSource(odbc_env, source_connstring, source_password, source_is_utf8, source_rdbms_type);
        }
        else if (source_type == ST_MYSQL)
          psource = new MySQLCopyDataSource(source_host, source_port, source_user, source_password, source_socket, source_use_cleartext_plugin);
        else
          psource = new PythonCopyDataSource(source_connstring, source_password);

        ptarget = new MySQLCopyDataTarget(target_host, target_port, target_user, target_password, target_socket, target_use_cleartext_plugin, app_name, source_charset, source_rdbms_type);

        psource->set_max_blob_chunk_size(ptarget->get_max_allowed_packet());
        psource->set_max_parameter_size((unsigned long)ptarget->get_max_long_data_size());
        psource->set_abort_on_oversized_blobs(abort_on_oversized_blobs);
        ptarget->set_truncate(truncate_target);
        if (max_count > 0)
          bulk_insert_batch = max_count;
        ptarget->set_bulk_insert_batch_size((int)bulk_insert_batch);

        if (check_types_only)
        {
          //XXXX
          delete psource;
        }
        else
        {
          threads.push_back(new CopyDataTask(base::strfmt("Task %d", index + 1), psource, ptarget, &tables, show_progress));
        }
      }

      // Waits for all the threads to complete
      for (size_t index = 0; index < threads.size(); index++)
        threads[index]->wait();

      // Finally destroys the threads and connections
      for (size_t index = 0; index < threads.size(); index++)
        delete threads[index];

      // Finally restores the triggers
      if (disable_triggers_on_copy)
        ptarget_conn->restore_triggers(trigger_schemas);
    }
  }
  catch (std::exception &e)
  {
    log_error("Exception: %s\n", e.what());
    if (source_type == ST_PYTHON)
    {
      PyEval_RestoreThread(state);
      Py_Finalize();
    }
    exit(1);
  }

  if (source_type == ST_PYTHON)
  {
    PyEval_RestoreThread(state);
    Py_Finalize();
  }

  printf("FINISHED\n");
  fflush(stdout);

  return 0;
}