int main(int argc, char* argv[]) { // setlocale(LC_ALL, ""); if (argc < 2) { printf("No connection string specified.\n"); print_help(); return -1; } if (argc < 3) { printf("No database specified.\n"); print_help(); return -1; } if (argc < 4) { printf("No filepath specified.\n"); print_help(); return -1; } if (argc < 5) { printf("No table format file specified.\n"); print_help(); return -1; } if (argc >= 6) { tNoOfParallelTrans = atoi(argv[5]); } if (argc >= 7) { sleepTimeMilli = atoi(argv[6]); } strcpy(connstring, argv[1]); strcpy(database, argv[2]); strcpy(filepath, argv[3]); strcpy(tablefilepath, argv[4]); ifstream fin(tablefilepath); // first line is table name. string tmpTableName; getline(fin, tmpTableName); strcpy(tablename, tmpTableName.c_str()); // find a "nokey" in table name istringstream lineReader(tmpTableName); string firstpart; if( getline(lineReader, firstpart, ' ') ) { // printf("first part: %s", firstpart.c_str()); string secondpart; if( getline(lineReader, secondpart) ) { // "firstpart secondpart" if (secondpart != NOKEY_IDENTIFIER) { cerr << "ERROR: could not recognize identifier: " << secondpart << endl; cerr << "Do you mean: '" << NOKEY_IDENTIFIER << "'?"<< endl; exit(-1); } else { noKey = true; } // printf("Attr: %s %s\n", key.c_str(), value.c_str()); } strcpy(tablename, firstpart.c_str()); } // Initialize transaction array for(int i = 0 ; i < MAXTRANS ; i++) { transaction[i].used = 0; transaction[i].conn = 0; } // each line is a column for (string row; getline(fin, row); ) { istringstream lineReader(row); string key; if( getline(lineReader, key, ' ') ) { string value; if( getline(lineReader, value) ) { // "key value" fieldName.push_back(key); fieldType.push_back(value); // printf("Attr: %s %s\n", key.c_str(), value.c_str()); } } } fin.close(); Ndb_cluster_connection *conn = connect_to_cluster(connstring); Ndb* ndb = new Ndb(conn, database); if (ndb->init(1024) == -1) { // pass } ndb->waitUntilReady(10000); printf("Connected: database [%s], connstr [%s], #parallelTrans=[%d]. Load table [%s] from file [%s]...\n", database, connstring, tNoOfParallelTrans, tablename, filepath); // do_insert(*ndb); const NdbDictionary::Dictionary* myDict= ndb->getDictionary(); // printf("table name: %s\n", tablename); const NdbDictionary::Table *myTable= myDict->getTable(tablename); if (myTable == NULL) APIERROR(myDict->getNdbError()); // Load the data bool dataleft = false; typedef vector<vector<string> > Rows; // Rows rows; ifstream input(filepath); char const row_delim = '\n'; char const field_delim = '\t'; int rowCounter = 0; for (string row; getline(input, row, row_delim); ) { // Find a slot in the transaction array async_callback_t * cb; // int retries = 0; int current = -1; int cursor = transTail + 1; if (cursor >= MAXTRANS) { cursor = 0; } for(int retries = 0; retries < MAX_TRANSALLOC_RETRY; retries++) { // for(int cursor=0; cursor < MAXTRANS; cursor++) while(true) { if(transaction[cursor].used == 0) { current = cursor; cb = new async_callback_t; /** * Set data used by the callback */ cb->ndb = ndb; //handle to Ndb object so that we can close transaction // in the callback (alt. make myNdb global). cb->transaction = current; //This is the number (id) of this transaction transaction[current].used = 1 ; //Mark the transaction as used transTail = current; // optimizing scan break; } else { // used cursor += 1; if (cursor >= MAXTRANS) { cursor = 0; } } } if(current == -1) { cerr << "WARNING: Number of transactions in parallel exceeds the maximum. retrying..." << endl; usleep(1000); continue; } else { break; } } transaction[current].conn = ndb->startTransaction(); istringstream ss(row); // NdbOperation *myOperation= myTransaction->getNdbOperation(myTable); NdbOperation *myOperation= transaction[current].conn->getNdbOperation(myTable); myOperation->insertTuple(); // If no primary key is assigned, have to set the tupleId explicitly if (noKey) { unsigned long long tupleId = 0; // int // Ndb::getAutoIncrementValue(const char* aTableName, // Uint64 & autoValue, Uint32 cacheSize, // Uint64 step, Uint64 start) if (ndb->getAutoIncrementValue(myTable, tupleId, TUPLEID_FETCH_SIZE, 1, 1) != 0) { cerr << "Error occurs while getting tupleID to insert.\n"; exit(-1); } myOperation->equal("$PK", tupleId); //if (tupleId % 10000 == 0) { // cerr <<"DEBUG: set tupleID to " << tupleId << endl; //} } // Iterate for each field int i = 0; for (string field; getline(ss, field, field_delim); i++) { // For NULL value, do not set value for this field if (strcmp(field.c_str(), "\\N") == 0) { continue; } if (strcmp(fieldType[i].c_str(), "int") == 0) { // using a int64 to prevent problems.. long long value = atoll(field.c_str()); myOperation->setValue(fieldName[i].c_str(), value); } if (strcmp(fieldType[i].c_str(), "real") == 0) { double value = atof(field.c_str()); myOperation->setValue(fieldName[i].c_str(), value); } if (strcmp(fieldType[i].c_str(), "varchar") == 0) { char buffer[65535] = {}; make_ndb_varchar(buffer, field.c_str()); myOperation->setValue(fieldName[i].c_str(), buffer); } if (strcmp(fieldType[i].c_str(), "char") == 0) { char buffer[65535] = {}; make_ndb_char(buffer, field.c_str()); myOperation->setValue(fieldName[i].c_str(), buffer); // myOperation->setValue(fieldName[i].c_str(), field.c_str()); } if (strcmp(fieldType[i].c_str(), "boolean") == 0) { int value = atoi(field.c_str()); myOperation->setValue(fieldName[i].c_str(), value); } if (strcmp(fieldType[i].c_str(), "text") == 0) { NdbBlob *myBlobHandle = myOperation->getBlobHandle(fieldName[i].c_str()); if (myBlobHandle == NULL) { cerr << "Hint: in the TSV file any TEXT/BLOB attribute must come after the primary key column.\n"; APIERROR(myOperation->getNdbError()); } myBlobHandle->setValue(field.c_str(), field.length()); // myBlobHandle->setNull(); } } transaction[current].conn->executeAsynchPrepare( NdbTransaction::Commit, &callback, cb ); nPreparedTransactions++; rowCounter++; dataleft = true; /** * When we have prepared parallelism number of transactions -> * send the transaction to ndb. * Next time we will deal with the transactions are in the * callback. There we will see which ones that were successful * and which ones to retry. */ if (nPreparedTransactions >= tNoOfParallelTrans) { // send-poll all transactions // close transaction is done in callback ndb->sendPollNdb(3000, tNoOfParallelTrans ); nPreparedTransactions=0; dataleft = false; usleep(sleepTimeMilli); } // The SYNC way that can set multiple operations in one commit: // if (myTransaction->execute( NdbTransaction::NoCommit ) == -1) // APIERROR(myTransaction->getNdbError()); // if (rowCounter % TRANACTION_SIZE == 0) { // // commit // if (myTransaction->execute( NdbTransaction::Commit ) == -1) // APIERROR(myTransaction->getNdbError()); // ndb->closeTransaction(myTransaction); // myTransaction = ndb->startTransaction(); // dataleft = false; // } } if (dataleft) { ndb->sendPollNdb(3000, nPreparedTransactions ); nPreparedTransactions=0; // SYNC way // if (myTransaction->execute( NdbTransaction::Commit ) == -1) // APIERROR(myTransaction->getNdbError()); // ndb->closeTransaction(myTransaction); } ndb->waitUntilReady(10000); delete ndb; disconnect_from_cluster(conn); return EXIT_SUCCESS; }