Esempio n. 1
0
void hash_index_object_t::test<6>()
{
    LLUUIDHashMap<UUIDTableEntry, 2>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    LLUUIDHashMapIter<UUIDTableEntry, 2> hashIter(&hashTable);
    const int numElementsToCheck = 256;
    std::vector<LLUUID> idList(numElementsToCheck);
    int i;

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        // LLUUIDHashMap uses mData[0] to pick the bucket
        // overwrite mData[0] so that it ranges from 0 to 255
        // to create a sparse map
        id.mData[0] = i;
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;
    }

    hashIter.first();
    int numElementsIterated = 0;
    while(!hashIter.done())
    {
        numElementsIterated++;
        UUIDTableEntry tableEntry = *hashIter;
        LLUUID id = tableEntry.getID();
        hashIter.next();
        ensure("Iteration failed for sparse map", tableEntry.getValue() < (size_t)numElementsToCheck && idList[tableEntry.getValue()] ==  tableEntry.getID());
    }

    ensure("iteration count failed", numElementsIterated == numElementsToCheck);
}
Esempio n. 2
0
void testHashTable()
{
    HashTable hashTable(10000);

    for (int i = 0; i != NUM_ITER; ++i)
    {
        guts::String x = names[rand() % INT_RANGE]; 
        
        if (hashTable.retrieve(x) == 0)
        {
            //std::cout << x << ' ';
            hashTable.insert(x);
        }
    }

    //std::cout << std::endl;

    while (!hashTable.empty())
    {
        guts::String x = names[rand() % INT_RANGE];
        if (hashTable.retrieve(x) != 0)
        {
            hashTable.remove(x);
            //std::cout << x << ' ';
        }
    }
}
Esempio n. 3
0
void hash_index_object_t::test<3>()
{
    LLUUIDHashMap<UUIDTableEntry, 5>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    const int numElementsToCheck = 10;
    std::vector<LLUUID> idList(numElementsToCheck);
    int i;

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id = idList[i];
        // set new entry with value = i+numElementsToCheck
        UUIDTableEntry entry(id, i+numElementsToCheck);
        hashTable.set(id, entry);
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        UUIDTableEntry entryToCheck = hashTable.get(idToCheck);
        ensure("set/get did not work", entryToCheck.getID() == idToCheck && entryToCheck.getValue() == (size_t)(i+numElementsToCheck));
    }
}
Esempio n. 4
0
void hash_index_object_t::test<7>()
{
    LLUUIDHashMap<UUIDTableEntry, 2>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    LLUUIDHashMapIter<UUIDTableEntry, 2> hashIter(&hashTable);
    const int numElementsToCheck = 256;
    std::vector<LLUUID> idList(numElementsToCheck);
    int i;

    LLUUID uuidtoSearch;
    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        // LLUUIDHashMap uses mData[0] to pick the bucket
        // overwrite mData[0] so that it ranges from 0 to 255
        // to create a sparse map
        id.mData[0] = i;
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;

        // pick uuid somewhere in the middle
        if (i == 5)
        {
            uuidtoSearch = id;
        }
    }

    hashIter.first();
    int numElementsIterated = 0;
    while(!hashIter.done())
    {
        numElementsIterated++;
        UUIDTableEntry tableEntry = *hashIter;
        LLUUID id = tableEntry.getID();
        if (uuidtoSearch == id)
        {
            break;
        }
        hashIter.next();
    }

    // current iterator implementation will not allow any remove operations
    // until ALL elements have been iterated over. this seems to be
    // an unnecessary restriction. Iterator should have a method to
    // reset() its state so that further operations (inckuding remove)
    // can be performed on the HashMap without having to iterate thru
    // all the remaining nodes.

//		 hashIter.reset();
//		 hashTable.remove(uuidtoSearch);
//		 ensure("remove after iteration reset failed", hashTable.check(uuidtoSearch) == FALSE);
}
Esempio n. 5
0
int main() 
{
  hemi::Array<unsigned long long> hashTable(HT_SIZE, false);
  // create an obj for the hash functions, pointing at
  // the correct location.
  // Apparently, you have to load data into the host before calling the generic ptr() functions.
  // This probably has something to do with the lazy update.
  printf("Initializing data...\n");
  load_hashvals();


  unsigned gridDim = 1, blockDim = 1;
  int testInsert = 5;
  int key = 23423;
  initCuckooArray(hashTable.writeOnlyHostPtr(), HT_SIZE);
  CuckooTable hf = CuckooTable(hash_a.readOnlyPtr(), hash_b.readOnlyPtr(),hashTable.ptr(), HT_SIZE, rebuild.ptr(),rebuild.ptr(hemi::host)); 
  printf("Data initialized.\n");
  HEMI_KERNEL_LAUNCH(testCuckooInit, gridDim, blockDim, 0, 0, hashTable.readOnlyPtr(), HT_SIZE);
    #ifdef HEMI_CUDA_COMPILER
      cudaDeviceSynchronize();
    #endif
  printf("Testing insert.\n");
  HEMI_KERNEL_LAUNCH(testCuckooInsert, gridDim, blockDim, 0, 0, key, testInsert, hf);
    #ifdef HEMI_CUDA_COMPILER
      cudaDeviceSynchronize();
    #endif


  printf("Testing too many inserts (rebuild flag).\n");
  for (int i = 0; i < hf.size+1; i++) {
    HEMI_KERNEL_LAUNCH(testCuckooInsertTooMany, gridDim, blockDim, 0, 0, i * 73, i, hf);
    
    #ifdef HEMI_CUDA_COMPILER
      cudaDeviceSynchronize();
    #endif
    if (rebuild.readOnlyPtr(hemi::host)[0] == true)
    {
      // we have run into a problem 
      printf("Detected insert failure for %dth element for table of size %d! Need to generate new functions and try again.\n", i, hf.size);
      i = HT_SIZE+1; continue;
    }
  }
  #ifdef HEMI_CUDA_COMPILER
    cudaDeviceSynchronize();
  #endif

  printf("Rebuild flag: %s\n", (rebuild.readOnlyPtr(hemi::host)[0] == true ? "True" : "False"));
  assert(rebuild.readOnlyPtr(hemi::host)[0] == true);
  return 0;
}
Esempio n. 6
0
vector< pair<string, int> > WordFreq::getWords( int threshold ) {
    vector< pair<string, int> > ret;

    // note: see char_counter.cpp if you're having trouble.
    TextFile infile( filename );
    StringHash hasher;


    int length = filename.length();
    int words = 1;                          
	 int freq = 1;  	

    for (int size = 0; size < length ; size++)
   	 if (filename[size] == ' ' && filename[size-1] != ' ') words++;

    SCHashTable<string, int> hashTable( words , hasher);

    while( infile.good() ) {
        string word = infile.getNextWord();
               
        if( hashTable.keyExists( word ) )
				{		
				freq = hashTable.find( word ); 
   	      hashTable.remove( word );
	         hashTable.insert( word, freq + 1 );
				}
		  else
		  		{
		  		hashTable.insert( word, 1);
		  		}
    }
    vector< pair<string, int> > vect = hashTable.vectorize();
	 pair<string,int> p;
	 
    for( unsigned int i = 0; i < vect.size(); i++ ) 
    {
        if( vect[i].second >= threshold )
        	{
        	p = make_pair( vect[i].first , vect[i].second );
         ret.push_back(p);
         }
    }
   
    return ret;
}
Esempio n. 7
0
void hash_index_object_t::test<4>()
{
    LLUUIDHashMap<UUIDTableEntry, 5>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    const int numElementsToCheck = 10;
    std::vector<LLUUID> idList(numElementsToCheck);
    int i;

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;
    }

    hashTable.removeAll();
    ensure("removeAll failed", hashTable.getLength() == 0);
}
Esempio n. 8
0
void hash_index_object_t::test<2>()
{
    LLUUIDHashMap<UUIDTableEntry, 2>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    const int numElementsToCheck = 5;
    std::vector<LLUUID> idList(numElementsToCheck*10);
    int i;

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;
    }

    ensure("getLength failed", hashTable.getLength() == numElementsToCheck);

    // remove all but the last element
    for (i = 0; i < numElementsToCheck-1; i++)
    {
        LLUUID idToCheck = idList[i];
        hashTable.remove(idToCheck);
    }

    // there should only be one element left now.
    ensure("getLength failed", hashTable.getLength() == 1);

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        if (i != numElementsToCheck - 1)
        {
            ensure("remove did not work", hashTable.check(idToCheck)  == FALSE);
        }
        else
        {
            UUIDTableEntry entryToCheck = hashTable.get(idToCheck);
            ensure("remove did not work", entryToCheck.getID() == idToCheck && entryToCheck.getValue() == (size_t)i);
        }
    }
}
Esempio n. 9
0
void hash_index_object_t::test<5>()
{
    LLUUIDHashMap<UUIDTableEntry, 2>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    const int numElementsToCheck = 256;
    std::vector<LLUUID> idList(numElementsToCheck);
    int i;

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        // LLUUIDHashMap uses mData[0] to pick the bucket
        // overwrite mData[0] so that it ranges from 0 to 255
        id.mData[0] = i;
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        UUIDTableEntry entryToCheck = hashTable.get(idToCheck);
        ensure("set/get did not work for sparse map", entryToCheck.getID() == idToCheck && entryToCheck.getValue() == (size_t)i);
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        if (i % 2 != 0)
        {
            hashTable.remove(idToCheck);
        }
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        ensure("remove or check did not work for sparse map", (i % 2 == 0 && hashTable.check(idToCheck)) || (i % 2 != 0 && !hashTable.check(idToCheck)));
    }
}
Esempio n. 10
0
void hash_index_object_t::test<1>()
{
    LLUUIDHashMap<UUIDTableEntry, 32>	hashTable(UUIDTableEntry::uuidEq, UUIDTableEntry());
    const int numElementsToCheck = 32*256*32;
    std::vector<LLUUID> idList(numElementsToCheck);
    int i;

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID id;
        id.generate();
        UUIDTableEntry entry(id, i);
        hashTable.set(id, entry);
        idList[i] = id;
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        UUIDTableEntry entryToCheck = hashTable.get(idToCheck);
        ensure("set/get did not work", entryToCheck.getID() == idToCheck && entryToCheck.getValue() == (size_t)i);
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        if (i % 2 != 0)
        {
            hashTable.remove(idToCheck);
        }
    }

    for (i = 0; i < numElementsToCheck; i++)
    {
        LLUUID idToCheck = idList[i];
        ensure("remove or check did not work", (i % 2 == 0 && hashTable.check(idToCheck)) || (i % 2 != 0 && !hashTable.check(idToCheck)));
    }
}
    //! @name Constructor/Destructor
    //@{
    AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC,LO,GO,NO> > &inA, Teuchos::ParameterList &paramListIn) {
      RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
      int numProcs = comm->getSize();
      int myRank   = comm->getRank();

      RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize");
      amgxTimer->start();
      // Initialize
      AMGX_SAFE_CALL(AMGX_initialize());
      AMGX_SAFE_CALL(AMGX_initialize_plugins());

      /*system*/
      //AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
      AMGX_SAFE_CALL(AMGX_install_signal_handler());
      Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true);
      if (configs.isParameter("json file")) {
        AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char *) &configs.get<std::string>("json file")[0]));
      } else {
        std::ostringstream oss;
        oss << "";
        ParameterList::ConstIterator itr;
        for (itr = configs.begin(); itr != configs.end(); ++itr) {
          const std::string&    name  = configs.name(itr);
          const ParameterEntry& entry = configs.entry(itr);
          oss << name << "=" << filterValueToString(entry) << ", ";
        }
        oss << "\0";
        std::string configString = oss.str();
        if (configString == "") {
          //print msg that using defaults
          //GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n";
        }
        AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
      }

      // TODO: we probably need to add "exception_handling=1" to the parameter list
      // to switch on internal error handling (with no need for AMGX_SAFE_CALL)

#define NEW_COMM
#ifdef NEW_COMM
      // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring
      // FIXME: fix for serial comm
      RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
      TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm");

      RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
      MPI_Comm mpiComm = *rawMpiComm;
#endif

      // Construct AMGX resources
      if (numProcs == 1) {
        AMGX_resources_create_simple(&Resources_, Config_);

      } else {
        int numGPUDevices;
        cudaGetDeviceCount(&numGPUDevices);
        int device[] = {(comm->getRank() % numGPUDevices)};

        AMGX_config_add_parameters(&Config_, "communicator=MPI");
#ifdef NEW_COMM
        AMGX_resources_create(&Resources_, Config_, &mpiComm, 1/* number of GPU devices utilized by this rank */, device);
#else
        AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1/* number of GPU devices utilized by this rank */, device);
#endif
      }

      AMGX_Mode mode = AMGX_mode_dDDI;
      AMGX_solver_create(&Solver_, Resources_, mode,  Config_);
      AMGX_matrix_create(&A_,      Resources_, mode);
      AMGX_vector_create(&X_,      Resources_, mode);
      AMGX_vector_create(&Y_,      Resources_, mode);

      amgxTimer->stop();
      amgxTimer->incrementNumCalls();

      std::vector<int> amgx2muelu;

      // Construct AMGX communication pattern
      if (numProcs > 1) {
        RCP<const Tpetra::Import<LO,GO> > importer = inA->getCrsGraph()->getImporter();

        TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object.");

        Tpetra::Distributor distributor = importer->getDistributor();

        Array<int> sendRanks = distributor.getImagesTo();
        Array<int> recvRanks = distributor.getImagesFrom();

        std::sort(sendRanks.begin(), sendRanks.end());
        std::sort(recvRanks.begin(), recvRanks.end());

        bool match = true;
        if (sendRanks.size() != recvRanks.size()) {
          match = false;
        } else {
          for (int i = 0; i < sendRanks.size(); i++) {
            if (recvRanks[i] != sendRanks[i])
              match = false;
              break;
          }
        }
        TEUCHOS_TEST_FOR_EXCEPTION(!match, MueLu::Exceptions::RuntimeError, "AMGX requires that the processors that we send to and receive from are the same. "
                                   "This is not the case: we send to {" << sendRanks << "} and receive from {" << recvRanks << "}");

        int        num_neighbors = sendRanks.size();  // does not include the calling process
        const int* neighbors     = &sendRanks[0];

        // Later on, we'll have to organize the send and recv data by PIDs,
        // i.e, a vector V of vectors, where V[i] is PID i's vector of data.
        // Hence we need to be able to quickly look up  an array index
        // associated with each PID.
        Tpetra::Details::HashTable<int,int> hashTable(3*num_neighbors);
        for (int i = 0; i < num_neighbors; i++)
          hashTable.add(neighbors[i], i);

        // Get some information out
        ArrayView<const int> exportLIDs = importer->getExportLIDs();
        ArrayView<const int> exportPIDs = importer->getExportPIDs();
        Array<int> importPIDs;
        Tpetra::Import_Util::getPids(*importer, importPIDs, true/* make local -1 */);

        // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation
        RCP<const Map> rowMap = inA->getRowMap();
        RCP<const Map> colMap = inA->getColMap();

        int N = rowMap->getNodeNumElements(), Nc = colMap->getNodeNumElements();
        muelu2amgx_.resize(Nc, -1);

        int numUniqExports = 0;
        for (int i = 0; i < exportLIDs.size(); i++)
          if (muelu2amgx_[exportLIDs[i]] == -1) {
            numUniqExports++;
            muelu2amgx_[exportLIDs[i]] = -2;
          }

        int localOffset = 0, exportOffset = N - numUniqExports;
        // Go through exported LIDs and put them at the end of LIDs
        for (int i = 0; i < exportLIDs.size(); i++)
          if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique
            muelu2amgx_[exportLIDs[i]] = exportOffset++;
        // Go through all non-export LIDs, and put them at the beginning of LIDs
        for (int i = 0; i < N; i++)
          if (muelu2amgx_[i] == -1)
            muelu2amgx_[i] = localOffset++;
        // Go through the tail (imported LIDs), and order those by neighbors
        int importOffset = N;
        for (int k = 0; k < num_neighbors; k++)
          for (int i = 0; i < importPIDs.size(); i++)
            if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
              muelu2amgx_[i] = importOffset++;

        amgx2muelu.resize(muelu2amgx_.size());
        for (int i = 0; i < muelu2amgx_.size(); i++)
          amgx2muelu[muelu2amgx_[i]] = i;

        // Construct send arrays
        std::vector<std::vector<int> > sendDatas (num_neighbors);
        std::vector<int>               send_sizes(num_neighbors, 0);
        for (int i = 0; i < exportPIDs.size(); i++) {
          int index = hashTable.get(exportPIDs[i]);
          sendDatas [index].push_back(muelu2amgx_[exportLIDs[i]]);
          send_sizes[index]++;
        }
        // FIXME: sendDatas must be sorted (based on GIDs)

        std::vector<const int*> send_maps(num_neighbors);
        for (int i = 0; i < num_neighbors; i++)
          send_maps[i] = &(sendDatas[i][0]);

        // Debugging
        printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector");

        // Construct recv arrays
        std::vector<std::vector<int> > recvDatas (num_neighbors);
        std::vector<int>               recv_sizes(num_neighbors, 0);
        for (int i = 0; i < importPIDs.size(); i++)
          if (importPIDs[i] != -1) {
            int index = hashTable.get(importPIDs[i]);
            recvDatas [index].push_back(muelu2amgx_[i]);
            recv_sizes[index]++;
        }
        // FIXME: recvDatas must be sorted (based on GIDs)

        std::vector<const int*> recv_maps(num_neighbors);
        for (int i = 0; i < num_neighbors; i++)
          recv_maps[i] = &(recvDatas[i][0]);

        // Debugging
        printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector");

        AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));

        AMGX_vector_bind(X_, A_);
        AMGX_vector_bind(Y_, A_);
      }

      RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix");
      matrixTransformTimer->start();

      ArrayRCP<const size_t> ia_s;
      ArrayRCP<const int>    ja;
      ArrayRCP<const double> a;
      inA->getAllValues(ia_s, ja, a);

      ArrayRCP<int> ia(ia_s.size());
      for (int i = 0; i < ia.size(); i++)
        ia[i] = Teuchos::as<int>(ia_s[i]);

      N_      = inA->getNodeNumRows();
      int nnz = inA->getNodeNumEntries();

      matrixTransformTimer->stop();
      matrixTransformTimer->incrementNumCalls();


      // Upload matrix
      // TODO Do we need to pin memory here through AMGX_pin_memory?
      RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix  CPU->GPU");
      matrixTimer->start();
      if (numProcs == 1) {
        AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);

      } else {
        // Transform the matrix
        std::vector<int>    ia_new(ia.size());
        std::vector<int>    ja_new(ja.size());
        std::vector<double> a_new (a.size());

        ia_new[0] = 0;
        for (int i = 0; i < N_; i++) {
          int oldRow = amgx2muelu[i];

          ia_new[i+1] = ia_new[i] + (ia[oldRow+1] - ia[oldRow]);

          for (int j = ia[oldRow]; j < ia[oldRow+1]; j++) {
            int offset = j - ia[oldRow];
            ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
            a_new [ia_new[i] + offset] = a[j];
          }
          // Do bubble sort on two arrays
          // NOTE: There are multiple possible optimizations here (even of bubble sort)
          bool swapped;
          do {
            swapped = false;

            for (int j = ia_new[i]; j < ia_new[i+1]-1; j++)
              if (ja_new[j] > ja_new[j+1]) {
                std::swap(ja_new[j], ja_new[j+1]);
                std::swap(a_new [j], a_new [j+1]);
                swapped = true;
              }
          } while (swapped == true);
        }

        AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
      }
      matrixTimer->stop();
      matrixTimer->incrementNumCalls();

      domainMap_ = inA->getDomainMap();
      rangeMap_  = inA->getRangeMap();

      RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: real setup");
      realSetupTimer->start();
      AMGX_solver_setup(Solver_, A_);
      realSetupTimer->stop();
      realSetupTimer->incrementNumCalls();

      vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU");
      vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector  GPU->CPU");
    }
Esempio n. 12
0
TR_LocalAnalysisInfo::TR_LocalAnalysisInfo(TR::Compilation *c, bool t)
   : _compilation(c), _trace(t), _trMemory(c->trMemory())
   {
   _numNodes = -1;

#if 0  // somehow stops PRE from happening
   // We are going to increment visit count for every tree so can reach max
   // for big methods quickly. Perhaps can improve containsCall() in the future.
   comp()->resetVisitCounts(0);
#endif
   if (comp()->getVisitCount() > HIGH_VISIT_COUNT)
      {
      _compilation->resetVisitCounts(1);
      dumpOptDetails(comp(), "\nResetting visit counts for this method before LocalAnalysisInfo\n");
      }

   TR::CFG *cfg = comp()->getFlowGraph();
   _numBlocks = cfg->getNextNodeNumber();
   TR_ASSERT(_numBlocks > 0, "Local analysis, node numbers not assigned");

   // Allocate information on the stack. It is the responsibility of the user
   // of this class to determine the life of the information by using jitStackMark
   // and jitStackRelease.
   //
   //_blocksInfo = (TR::Block **) trMemory()->allocateStackMemory(_numBlocks*sizeof(TR::Block *));
   //memset(_blocksInfo, 0, _numBlocks*sizeof(TR::Block *));

   TR::TreeTop *currentTree = comp()->getStartTree();

   // Only do this if not done before; typically this would be done in the
   // first call to this method through LocalTransparency and would NOT
   // need to be re-done by LocalAnticipatability.
   //
   if (_numNodes < 0)
      {
      _optimizer = comp()->getOptimizer();

      int32_t numBuckets;
      int32_t numNodes = comp()->getNodeCount();
      if (numNodes < 10)
         numBuckets = 1;
      else if (numNodes < 100)
         numBuckets = 7;
      else if (numNodes < 500)
         numBuckets = 31;
      else if (numNodes < 3000)
         numBuckets = 127;
      else if (numNodes < 6000)
         numBuckets = 511;
      else
         numBuckets = 1023;

      // Allocate hash table for matching expressions
      //
      HashTable hashTable(numBuckets, comp());
      _hashTable = &hashTable;

      // Null checks are handled differently as the criterion for
      // commoning a null check is different than that used for
      // other nodes; for a null check, the null check reference is
      // important (and not the actual indirect access itself)
      //
      _numNullChecks = 0;
      while (currentTree)
         {
         if (currentTree->getNode()->getOpCodeValue() == TR::NULLCHK)
         //////if (currentTree->getNode()->getOpCode().isNullCheck())
            _numNullChecks++;

         currentTree = currentTree->getNextTreeTop();
         }

      if (_numNullChecks == 0)
         _nullCheckNodesAsArray = NULL;
      else
         {
         _nullCheckNodesAsArray = (TR::Node**)trMemory()->allocateStackMemory(_numNullChecks*sizeof(TR::Node*));
         memset(_nullCheckNodesAsArray, 0, _numNullChecks*sizeof(TR::Node*));
         }

      currentTree = comp()->getStartTree();
      int32_t symRefCount = comp()->getSymRefCount();
      _checkSymbolReferences = new (trStackMemory()) TR_BitVector(symRefCount, trMemory(), stackAlloc);

      _numNodes = 1;
      _numNullChecks = 0;

      // This loop counts all the nodes that are going to take part in PRE.
      // This is a computation intensive loop as we check if the node that
      // is syntactically equivalent to a given node has been seen before
      // and if so we use the local index of the original node (that
      // is syntactically equivalent to the given node). Could be improved
      // in complexity with value numbering at some stage.
      //
      _visitCount = comp()->incVisitCount();
      while (currentTree)
         {
         TR::Node *firstNodeInTree = currentTree->getNode();
         TR::ILOpCode *opCode = &firstNodeInTree->getOpCode();

         if (((firstNodeInTree->getOpCodeValue() == TR::treetop) ||
              (comp()->useAnchors() && firstNodeInTree->getOpCode().isAnchor())) &&
             (firstNodeInTree->getFirstChild()->getOpCode().isStore()))
            {
            firstNodeInTree->setLocalIndex(-1);
            if (comp()->useAnchors() && firstNodeInTree->getOpCode().isAnchor())
               firstNodeInTree->getSecondChild()->setLocalIndex(-1);

            firstNodeInTree = firstNodeInTree->getFirstChild();
            opCode = &firstNodeInTree->getOpCode();
            }

         // This call finds nodes with opcodes that are supported by PRE
         // in this subtree; this accounts for all opcodes other than stores/checks
         // which are handled later on below
         //
         bool firstNodeInTreeHasCallsInStoreLhs = false;
         countSupportedNodes(firstNodeInTree, NULL, firstNodeInTreeHasCallsInStoreLhs);

         if ((opCode->isStore() && !firstNodeInTree->getSymbolReference()->getSymbol()->isAutoOrParm()) ||
             opCode->isCheck())
            {
            int32_t oldExpressionOnRhs = hasOldExpressionOnRhs(firstNodeInTree);

            //
            // Return value 0 denotes that the node contains some sub-expression
            // that cannot participate in PRE; e.g. a call or a new
            //
            // Return value -1 denotes that the node can participate in PRE
            // but did not match with any existing expression seen so far
            //
            // Any other return value (should be positive always) denotes that
            // the node can participate in PRE and has been matched with a seen
            // expression having local index == return value
            //
            if (oldExpressionOnRhs == -1)
               {
               if (trace())
                  {
                  traceMsg(comp(), "\nExpression #%d is : \n", _numNodes);
                  comp()->getDebug()->print(comp()->getOutFile(), firstNodeInTree, 6, true);
                  }

               firstNodeInTree->setLocalIndex(_numNodes++);
               }
            else
               firstNodeInTree->setLocalIndex(oldExpressionOnRhs);

            if (opCode->isCheck() &&
                (firstNodeInTree->getFirstChild()->getOpCode().isStore() &&
                 !firstNodeInTree->getFirstChild()->getSymbolReference()->getSymbol()->isAutoOrParm()))
               {
               int oldExpressionOnRhs = hasOldExpressionOnRhs(firstNodeInTree->getFirstChild());

               if (oldExpressionOnRhs == -1)
                  {
                  if (trace())
                     {
                     traceMsg(comp(), "\nExpression #%d is : \n", _numNodes);
                     comp()->getDebug()->print(comp()->getOutFile(), firstNodeInTree->getFirstChild(), 6, true);
                     }

                  firstNodeInTree->getFirstChild()->setLocalIndex(_numNodes++);
                  }
               else
                  firstNodeInTree->getFirstChild()->setLocalIndex(oldExpressionOnRhs);
               }
            }
         else
            firstNodeInTree->setLocalIndex(-1);

         currentTree = currentTree->getNextTreeTop();
         }
      }

   _supportedNodesAsArray = (TR::Node**)trMemory()->allocateStackMemory(_numNodes*sizeof(TR::Node*));
   memset(_supportedNodesAsArray, 0, _numNodes*sizeof(TR::Node*));
   _checkExpressions = new (trStackMemory()) TR_BitVector(_numNodes, trMemory(), stackAlloc);

   //_checkExpressions.init(_numNodes, trMemory(), stackAlloc);

   // This loop goes through the trees and collects the nodes
   // that would take part in PRE. Each node has its local index set to
   // the bit position that it occupies in the bit vector analyses.
   //
   currentTree = comp()->getStartTree();
   _visitCount = comp()->incVisitCount();
   while (currentTree)
      {
      TR::Node *firstNodeInTree = currentTree->getNode();
      TR::ILOpCode *opCode = &firstNodeInTree->getOpCode();

      if (((firstNodeInTree->getOpCodeValue() == TR::treetop) ||
           (comp()->useAnchors() && firstNodeInTree->getOpCode().isAnchor())) &&
          (firstNodeInTree->getFirstChild()->getOpCode().isStore()))
         {
         firstNodeInTree = firstNodeInTree->getFirstChild();
         opCode = &firstNodeInTree->getOpCode();
         }

      collectSupportedNodes(firstNodeInTree, NULL);

      if ((opCode->isStore() && !firstNodeInTree->getSymbolReference()->getSymbol()->isAutoOrParm()) ||
          opCode->isCheck())
         {
        if (opCode->isCheck())
            {
            _checkSymbolReferences->set(firstNodeInTree->getSymbolReference()->getReferenceNumber());
            _checkExpressions->set(firstNodeInTree->getLocalIndex());
            }

         if (!_supportedNodesAsArray[firstNodeInTree->getLocalIndex()])
            _supportedNodesAsArray[firstNodeInTree->getLocalIndex()] = firstNodeInTree;

         if (opCode->isCheck() &&
             firstNodeInTree->getFirstChild()->getOpCode().isStore() &&
             !firstNodeInTree->getFirstChild()->getSymbolReference()->getSymbol()->isAutoOrParm() &&
             !_supportedNodesAsArray[firstNodeInTree->getFirstChild()->getLocalIndex()])
            _supportedNodesAsArray[firstNodeInTree->getFirstChild()->getLocalIndex()] = firstNodeInTree->getFirstChild();
         }

      currentTree = currentTree->getNextTreeTop();
      }

   //initialize(toBlock(cfg->getStart()));
   }