Пример #1
0
int main(int argc, char* argv[])
{
  // setlocale(LC_ALL, "");

  if (argc < 2) {
    printf("No connection string specified.\n");
    print_help();
    return -1;
  }
  if (argc < 3) {
    printf("No database specified.\n");
    print_help();
    return -1;
  }
  if (argc < 4) {
    printf("No filepath specified.\n");
    print_help();
    return -1;
  }  
  if (argc < 5) {
    printf("No table format file specified.\n");
    print_help();
    return -1;
  }
  if (argc >= 6) {
    tNoOfParallelTrans = atoi(argv[5]);
  }
  if (argc >= 7) {
    sleepTimeMilli = atoi(argv[6]);
  }

  strcpy(connstring, argv[1]);
  strcpy(database, argv[2]);
  strcpy(filepath, argv[3]);
  strcpy(tablefilepath, argv[4]);

  ifstream fin(tablefilepath);
  // first line is table name.
  string tmpTableName;
  getline(fin, tmpTableName);
  strcpy(tablename, tmpTableName.c_str());

  // find a "nokey" in table name
  istringstream lineReader(tmpTableName);
  string firstpart;
  if( getline(lineReader, firstpart, ' ') )
  {
    // printf("first part: %s", firstpart.c_str());
    string secondpart;
    if( getline(lineReader, secondpart) ) {
      // "firstpart secondpart"
      if (secondpart != NOKEY_IDENTIFIER) {
        cerr << "ERROR: could not recognize identifier: " << secondpart << endl;
        cerr << "Do you mean: '" << NOKEY_IDENTIFIER << "'?"<< endl;
        exit(-1);
      } else {
        noKey = true;
      }
      // printf("Attr: %s %s\n", key.c_str(), value.c_str());
    }
    strcpy(tablename, firstpart.c_str());
  } 


  // Initialize transaction array
  for(int i = 0 ; i < MAXTRANS ; i++) 
  {
    transaction[i].used = 0;
    transaction[i].conn = 0;
  }

  // each line is a column
  for (string row; getline(fin, row); ) {
    istringstream lineReader(row);
    string key;
    if( getline(lineReader, key, ' ') )
    {
      string value;
      if( getline(lineReader, value) ) {
        // "key value"
        fieldName.push_back(key);
        fieldType.push_back(value);
        // printf("Attr: %s %s\n", key.c_str(), value.c_str());
      }
    }
  }
  fin.close();

  Ndb_cluster_connection *conn = connect_to_cluster(connstring);

  Ndb* ndb = new Ndb(conn, database);
  if (ndb->init(1024) == -1)
  {
     // pass
  }
  ndb->waitUntilReady(10000);
  printf("Connected: database [%s], connstr [%s], #parallelTrans=[%d]. Load table [%s] from file [%s]...\n", database, connstring, tNoOfParallelTrans, tablename, filepath);

  // do_insert(*ndb);

  const NdbDictionary::Dictionary* myDict= ndb->getDictionary();
  // printf("table name: %s\n", tablename);
  const NdbDictionary::Table *myTable= myDict->getTable(tablename);

  if (myTable == NULL) 
    APIERROR(myDict->getNdbError());

  // Load the data
  bool dataleft = false;

  typedef vector<vector<string> > Rows;

  // Rows rows;
  ifstream input(filepath);
  char const row_delim = '\n';
  char const field_delim = '\t';
  int rowCounter = 0;
  for (string row; getline(input, row, row_delim); ) {

    // Find a slot in the transaction array
    async_callback_t * cb;
    // int retries = 0;
    int current = -1;

    int cursor = transTail + 1;
    if (cursor >= MAXTRANS) {
      cursor = 0;
    }

    for(int retries = 0; retries < MAX_TRANSALLOC_RETRY; retries++) {
      // for(int cursor=0; cursor < MAXTRANS; cursor++) 
      while(true)
      {
        if(transaction[cursor].used == 0)
        {          
          current = cursor;
          cb = new async_callback_t;
          /**
           * Set data used by the callback
           */
          cb->ndb = ndb;  //handle to Ndb object so that we can close transaction
                            // in the callback (alt. make myNdb global).

          cb->transaction = current; //This is the number (id)  of this transaction
          transaction[current].used = 1 ; //Mark the transaction as used
          transTail = current; // optimizing scan

          break;
        }
        else { // used
          cursor += 1; 
          if (cursor >= MAXTRANS) {
            cursor = 0;
          }

        }
      }
      if(current == -1) {
        cerr << "WARNING: Number of transactions in parallel exceeds the maximum. retrying..." << endl;
        usleep(1000);
        continue;
      } else {
        break;
      }
    }

    transaction[current].conn = ndb->startTransaction();

    istringstream ss(row);
    // NdbOperation *myOperation= myTransaction->getNdbOperation(myTable);
    NdbOperation *myOperation= transaction[current].conn->getNdbOperation(myTable);
    myOperation->insertTuple();

    // If no primary key is assigned, have to set the tupleId explicitly
    if (noKey) {
      unsigned long long tupleId = 0;
      // int
      // Ndb::getAutoIncrementValue(const char* aTableName,
      //                      Uint64 & autoValue, Uint32 cacheSize,
      //                      Uint64 step, Uint64 start)
      if (ndb->getAutoIncrementValue(myTable, tupleId, TUPLEID_FETCH_SIZE, 1, 1) != 0) {
        cerr << "Error occurs while getting tupleID to insert.\n";
        exit(-1);
      }

      myOperation->equal("$PK", tupleId);
      //if (tupleId % 10000 == 0) {
      //  cerr <<"DEBUG: set tupleID to " << tupleId << endl;
      //}
    }
    // Iterate for each field
    int i = 0;
    for (string field; getline(ss, field, field_delim); i++) {

      // For NULL value, do not set value for this field
      if (strcmp(field.c_str(), "\\N") == 0) {
        continue;
      }

      if (strcmp(fieldType[i].c_str(), "int") == 0) {
        // using a int64 to prevent problems..
        long long value = atoll(field.c_str());
        myOperation->setValue(fieldName[i].c_str(), value);
      }

      if (strcmp(fieldType[i].c_str(), "real") == 0) {
        double value = atof(field.c_str());
        myOperation->setValue(fieldName[i].c_str(), value);    
      }
      
      if (strcmp(fieldType[i].c_str(), "varchar") == 0) {
        char buffer[65535] = {};
        make_ndb_varchar(buffer, field.c_str());
        myOperation->setValue(fieldName[i].c_str(), buffer);
      }
      
      if (strcmp(fieldType[i].c_str(), "char") == 0) {
        char buffer[65535] = {};
        make_ndb_char(buffer, field.c_str());
        myOperation->setValue(fieldName[i].c_str(), buffer);
        // myOperation->setValue(fieldName[i].c_str(), field.c_str());
      }

      if (strcmp(fieldType[i].c_str(), "boolean") == 0) {
        int value = atoi(field.c_str());
        myOperation->setValue(fieldName[i].c_str(), value);
      }

      if (strcmp(fieldType[i].c_str(), "text") == 0) {
        NdbBlob *myBlobHandle = myOperation->getBlobHandle(fieldName[i].c_str());
        if (myBlobHandle == NULL) {
          cerr << "Hint: in the TSV file any TEXT/BLOB attribute must come after the primary key column.\n";
          APIERROR(myOperation->getNdbError());
        }
        myBlobHandle->setValue(field.c_str(), field.length());
        // myBlobHandle->setNull();
      }

    }

    transaction[current].conn->executeAsynchPrepare( NdbTransaction::Commit, 
                                         &callback, 
                                         cb
                                         );
    nPreparedTransactions++;
    rowCounter++;
    dataleft = true;
    /**
     * When we have prepared parallelism number of transactions ->
     * send the transaction to ndb. 
     * Next time we will deal with the transactions are in the 
     * callback. There we will see which ones that were successful
     * and which ones to retry.
     */
    if (nPreparedTransactions >= tNoOfParallelTrans)
    {
      // send-poll all transactions
      // close transaction is done in callback
      ndb->sendPollNdb(3000, tNoOfParallelTrans );
      nPreparedTransactions=0;
      dataleft = false;
      
      usleep(sleepTimeMilli);

    }

    // The SYNC way that can set multiple operations in one commit:
    // if (myTransaction->execute( NdbTransaction::NoCommit ) == -1)
    //   APIERROR(myTransaction->getNdbError());

    // if (rowCounter % TRANACTION_SIZE == 0) {
    //   // commit
    //   if (myTransaction->execute( NdbTransaction::Commit ) == -1)
    //     APIERROR(myTransaction->getNdbError());
    //   ndb->closeTransaction(myTransaction);
    //   myTransaction = ndb->startTransaction();
    //   dataleft = false;
    // }

  }

  if (dataleft) {
    ndb->sendPollNdb(3000, nPreparedTransactions );
    nPreparedTransactions=0;
      
    // SYNC way
    // if (myTransaction->execute( NdbTransaction::Commit ) == -1)
    //   APIERROR(myTransaction->getNdbError());
    // ndb->closeTransaction(myTransaction);    
  }

  ndb->waitUntilReady(10000);

  delete ndb;
  disconnect_from_cluster(conn);

  return EXIT_SUCCESS;
}