bool ParallelStacksID::find_plan(const int& h_choice){

	bool s = false;
	bool k = false;
	bool* solved =& s;
	bool* killed =& k;

	busy.resize(K, false);

	double heuristic_timer;
    heuristic_timer = 0;

    // Need to create an instance of the heuristic to process the initial state
	heuristic = Utilities::get_heuristic(h_choice, goals, operators, tg);
	// Set up initial bound
	int init_h = heuristic->calc_h(std::make_shared<LiteState>(initial_state));
	
	// Test beginning heuristic to ensure it is a solvable task.
	if (init_h == std::numeric_limits<int>::max()) {
		std::cout << "Initial estimate assumes unsolvable." << std::endl;
		return false;
	}

	initial_state.set_h(init_h);
    delete heuristic;


#pragma omp parallel shared(solved, killed) num_threads(K) reduction(+ : heuristic_timer)
	{
		// Each thread gets a private copy of the heuristic function for its use

		if (omp_get_thread_num() == 0){;
			parallel_search(solved, killed);
		}
		else {
			Heuristic* thread_heuristic = Utilities::get_heuristic(h_choice, goals, operators, tg);
			compute_heuristics(thread_heuristic, solved, killed, heuristic_timer);
            delete thread_heuristic;
		}
	}

    std::cout << "Average time each thread spent performing heuristic computation: " << (heuristic_timer/(K-1)) << " seconds" << std::endl;

	Utilities::clear(open_list);
	Utilities::clear(process_list);

	return *solved;
}
void SharedDomainMap<Mesh,CoordinateField>::setup( 
    const RCP_MeshManager& source_mesh_manager, 
    const RCP_CoordFieldManager& target_coord_manager,
    double tolerance )
{
    // Create existence values for the managers.
    bool source_exists = true;
    if ( source_mesh_manager.is_null() ) source_exists = false;
    bool target_exists = true;
    if ( target_coord_manager.is_null() ) target_exists = false;

    // Create local to global process indexers for the managers.
    RCP_Comm source_comm;
    if ( source_exists )
    {
	source_comm = source_mesh_manager->comm();
    }
    RCP_Comm target_comm;
    if ( target_exists )
    {
	target_comm = target_coord_manager->comm();
    }
    d_source_indexer = CommIndexer( d_comm, source_comm );
    d_target_indexer = CommIndexer( d_comm, target_comm );

    // Check the source and target dimensions for consistency.
    if ( source_exists )
    {
	DTK_REQUIRE( source_mesh_manager->dim() == d_dimension );
    }

    if ( target_exists )
    {
	DTK_REQUIRE( CFT::dim( *target_coord_manager->field() ) 
			  == d_dimension );
    }

    // Build the domain space and map from the source information.
    // -----------------------------------------------------------

    // Create an entity set from the local source mesh.
    Teuchos::RCP<DataTransferKit::ClassicMesh<Mesh> > classic_mesh =
	Teuchos::rcp( new DataTransferKit::ClassicMesh<Mesh>(source_mesh_manager) );
    ClassicMeshEntitySet<Mesh> source_entity_set( classic_mesh );

    // Create a local map.
    ClassicMeshElementLocalMap<Mesh> source_local_map(classic_mesh);
    
    // Build the target space and map from the target information.
    // -----------------------------------------------------------

    // Compute a unique global ordinal for each point in the coordinate field.
    Teuchos::Array<GlobalOrdinal> target_ordinals;
    computePointOrdinals( target_coord_manager, target_ordinals );

    // Create an entity set from the local target points.
    BasicEntitySet target_entity_set( d_comm, d_dimension );
    if ( target_exists )
    {
	Teuchos::ArrayRCP<const typename CFT::value_type> coords_view =
	    FieldTools<CoordinateField>::view( *target_coord_manager->field() );
	Teuchos::Array<double> target_coords( d_dimension );
	int local_num_targets = target_ordinals.size();
	for ( int i = 0; i < local_num_targets; ++i )
	{
	    for ( int d = 0; d < d_dimension; ++d )
	    {
		target_coords[d] = coords_view[d*local_num_targets + i];
	    }
	    target_entity_set.addEntity(
		DataTransferKit::Point( target_ordinals[i],
					d_comm->getRank(),
					target_coords )
		);
	}
    }

    // Create a local map.
    DataTransferKit::BasicGeometryLocalMap target_local_map;
    
    // Find the location of the target points in the source mesh.
    // --------------------------------------------------------------
    
    // Create parameters for the mapping.
    Teuchos::ParameterList search_list;
    search_list.set<bool>("Track Missed Range Entities",d_store_missed_points);
    search_list.set<double>("Point Inclusion Tolerance", 1.0e-9 );
    
    // Do the parallel search.
    EntityIterator source_iterator = source_entity_set.entityIterator( d_dimension );
    EntityIterator target_iterator = target_entity_set.entityIterator( 0 );
    ParallelSearch parallel_search( d_comm, 
				    d_dimension, 
				    source_iterator, 
				    Teuchos::rcpFromRef(source_local_map), 
				    search_list );
    parallel_search.search( target_iterator, 
			    Teuchos::rcpFromRef(target_local_map), 
			    search_list );
    
    // Build the mapping.
    // -----------------------

    // Get the source-target parings.
    EntityIterator source_begin = source_iterator.begin();
    EntityIterator source_end = source_iterator.end();
    Teuchos::Array<EntityId> found_targets;
    Teuchos::Array<std::pair<EntityId,EntityId> > src_tgt_pairs;
    for ( auto src_geom = source_begin; src_geom != source_end; ++src_geom )
    {
	// Get the target points found in this source geometry.
	parallel_search.getRangeEntitiesFromDomain(
	    src_geom->id(), found_targets );

	// If we found any points, add them to the mapping.
	for ( auto found_tgt : found_targets )
	{
	    src_tgt_pairs.push_back(
		std::make_pair(src_geom->id(),found_tgt) );
	}
    }

    // Filter the source-target pairings so we only find a target point in one
    // geometry on this process. This handles the local uniqueness
    // problem. The tpetra import will handle the global uniqueness problem.
    auto sort_func = [] (std::pair<EntityId,EntityId> a,
			 std::pair<EntityId,EntityId> b )
		     { return a.second < b.second; };
    std::sort( src_tgt_pairs.begin(), src_tgt_pairs.end(), sort_func );
    auto unique_func = [] (std::pair<EntityId,EntityId> a,
			   std::pair<EntityId,EntityId> b )
		       { return a.second == b.second; };
    auto unique_it = std::unique( src_tgt_pairs.begin(),
				  src_tgt_pairs.end(),
				  unique_func );

    // Extract the mapping data.
    int num_tgt = std::distance( src_tgt_pairs.begin(), unique_it );
    Teuchos::Array<GlobalOrdinal> source_ordinals( num_tgt );
    d_source_geometry.resize( num_tgt );
    d_target_coords.resize( num_tgt * d_dimension );
    Teuchos::ArrayView<const double> tgt_coords;
    for ( int i = 0; i < num_tgt; ++i )
    {
	// Get the source geom id.
	d_source_geometry[i] = src_tgt_pairs[i].first;

	// Get the target point id.
	source_ordinals[i] = src_tgt_pairs[i].second;

	// Get the coordinates of the target point.
	parallel_search.rangeParametricCoordinatesInDomain(
	    src_tgt_pairs[i].first,
	    src_tgt_pairs[i].second,
	    tgt_coords );

	for ( int d = 0; d < d_dimension; ++d )
	{
	    d_target_coords[ d*num_tgt + i ] = tgt_coords[d];
	}
    }

    // Create the data map in the source decomposition.
    d_source_map = Tpetra::createNonContigMap<int,GlobalOrdinal>(
	source_ordinals(), d_comm );
    
    // Create the data map in the target decomposition.
    d_target_map = Tpetra::createNonContigMap<int,GlobalOrdinal>(
	target_ordinals(), d_comm );

    // Build the source-to-target importer.
    d_source_to_target_importer = 
      Teuchos::rcp( new Tpetra::Import<int,GlobalOrdinal>(
          d_source_map, d_target_map ) );
    
    // Extract the missed points.
    if ( d_store_missed_points )
    {
	std::unordered_map<GlobalOrdinal,int> target_g2l;
	int local_num_targets = target_ordinals.size();
	for ( int t = 0; t < local_num_targets; ++t )
	{
	    target_g2l.emplace( target_ordinals[t], t );
	}

	Teuchos::ArrayView<const EntityId> missed =
	    parallel_search.getMissedRangeEntityIds();

	int num_missed = missed.size();
	d_missed_points.resize( num_missed );
	for ( int i = 0; i < num_missed; ++i )
	{
	    DTK_CHECK( target_g2l.count(missed[i]) );
	    d_missed_points[i] =
		target_g2l.find( missed[i] )->second;
	}
    }
}
int main(int argc, char **argv) {
    // binsearch [random-seed]
    if (argc > 1) srand(atoi(argv[1]));

#ifdef POWER_OF_TWO
    size_t lengths[] = {
        256, 1024, 256 * 256, 256 * 1024,
        1024*1024, 16*1024*1024, 256*1024*1024
    };
#else
    size_t lengths[] = {
        100, 1000, 10*1000, 100*1000,
        1000*1000, 10*1000*1000, 100*1000*1000
    };
#endif

    size_t batches[] = { // batch sizes reduced to target count as needed
        1, 2, 4, 6, 8, 10, 12, 14, 16, 20,
        40, 60, 100, 200, 400, 1000, UINT32_MAX
    };

    for (size_t n = 0; n < COUNT(lengths); n++) {
        size_t length = lengths[n];
        size_t count = 1000*1000;
        size_t i;
        uint32_t *targets = create_search_targets(count, 2*length);
        printf("%zd Targets with Array Length", count);
        fflush(NULL);

        uint32_t *array = create_sorted_array(length, 2*length);
        printf(" %zd\n", length);

        size_t *reference_indexes = malloc(count * sizeof(size_t));
#ifdef LINEAR_REFERENCE
        linear_search(array, length,
                      targets, reference_indexes, count);
#else
        parallel_search(array, length,
                        targets, reference_indexes, count);
#endif

        size_t *indexes = malloc(count * sizeof(*indexes));
        for (i = 0; i < COUNT(batches); i++) {
            size_t batch = batches[i];
            size_t c;
            float cycles_per_search;
            memset(indexes, 0, count * sizeof(*indexes));
            if (batch > count) batch = count;
            printf("  Batch %3zd: ", batch);
            fflush(NULL);
            uint64_t cycles_start, cycles_final;
            RDTSC_START(cycles_start);
            for (c = 0; c < count; c += batch) {
                // possible smaller batch for last iteration
                if (c > count - batch) batch = count % batch;
                parallel_search(array, length, targets + c,
                                indexes + c, batch);
            }
            RDTSC_FINAL(cycles_final);
            cycles_per_search =
                (cycles_final - cycles_start) / (float) count;
            printf("cmov?: %.2f cycles/search ", cycles_per_search);
            verify_indexes(reference_indexes, indexes, count,
                           array, targets, length);

            RDTSC_START(cycles_start);
            for (c = 0; c < count; c += batch) {
                // possible smaller batch for last iteration
                if (c > count - batch) batch = count % batch;
                portable_parallel_search(array, length, targets + c,
                                indexes + c, batch);
            }
            RDTSC_FINAL(cycles_final);
            cycles_per_search =
                (cycles_final - cycles_start) / (float) count;
            printf(", portable: %.2f cycles/search", cycles_per_search);
            RDTSC_START(cycles_start);
            for (size_t c = 0; c < count; c += batch) {
                // possible smaller batch for last iteration
                if (c > count - batch) batch = count % batch;
                portable_parallel_search2(array, length, targets + c,
                                indexes + c, batch);
            }
            RDTSC_FINAL(cycles_final);
            cycles_per_search =
                (cycles_final - cycles_start) / (float) count;
            printf(", portable2: %.2f cycles/search", cycles_per_search);

            printf("\n");

        }
        free(array);
        free(targets);
        free(indexes);
        free(reference_indexes);
        printf("\n");
    }
}