/*
 * ======================================================
 * This function appends all additional subroutines
 * called inside the user subroutine. It is specialised
 * for CUDA in the related subclass
 * ======================================================
 */
void FortranCUDAUserSubroutine::appendAdditionalSubroutines ( SgScopeStatement * moduleScope,
  FortranParallelLoop * parallelLoop, FortranProgramDeclarationsAndDefinitions * declarations,
  FortranConstantDeclarations * CUDAconstants, std::vector < SgProcedureHeaderStatement * > * allCalledRoutines)
{
  using std::vector;
  using boost::iequals;
  /*
   * ======================================================
   * First removes duplicates in calledRoutines itself
   * ======================================================
   */
  sort ( calledRoutines.begin(), calledRoutines.end() );
  calledRoutines.erase ( unique ( calledRoutines.begin(), calledRoutines.end() ), calledRoutines.end() );

  Debug::getInstance ()->debugMessage ("Before removing, the list of routine calls found in the user kernels is: ",
   Debug::FUNCTION_LEVEL, __FILE__, __LINE__);
  vector < SgProcedureHeaderStatement * > :: iterator routinesIt2;
  for ( routinesIt2 = calledRoutines.begin (); routinesIt2 != calledRoutines.end (); routinesIt2++ )
  {
    string appendingSubroutine = (*routinesIt2)->get_name ().getString ();
    Debug::getInstance ()->debugMessage (appendingSubroutine,
      Debug::FUNCTION_LEVEL, __FILE__, __LINE__);
  }
  
  /*
   * ======================================================
   * The removes routines already appended by other user
   * kernels, using the list in allCalledRoutines
   * ======================================================
   */
  Debug::getInstance ()->debugMessage ("Removing global duplicates, the number of routines in the list is: '"
    + boost::lexical_cast<string> ((int) calledRoutines.size()) + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);

  vector < SgProcedureHeaderStatement * > :: iterator routinesIt;
  for ( routinesIt = calledRoutines.begin (); routinesIt != calledRoutines.end (); ) //routinesIt++ )
  {
    string appendingSubroutine = (*routinesIt)->get_name ().getString ();

    Debug::getInstance ()->debugMessage ("Checking routine for deletion: '"
      + appendingSubroutine + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);

    bool foundAndErased = false;
    vector < SgProcedureHeaderStatement * > :: iterator finder;
    for ( finder = allCalledRoutines->begin (); finder != allCalledRoutines->end (); finder++ )
    {
      Debug::getInstance ()->debugMessage ("Checking against: '"
        + (*finder)->get_name ().getString () + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);

      if ( iequals ((*finder)->get_name ().getString (), appendingSubroutine) )
      {
        /*
         * ======================================================
         * Routine already appended by another user kernel:
         * delete it from list of routines to be appended for
         * this user kernel, and exit this loop
         * ======================================================
         */      
        Debug::getInstance ()->debugMessage ("Deleting: '"
          + appendingSubroutine + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);
        
        calledRoutines.erase (routinesIt++);

        if ( calledRoutines.empty () ) return;

        foundAndErased = true;

        routinesIt--;
        break;
      }      
    }

    if ( foundAndErased == false )
    {
        /*
         * ======================================================
         * New routine: it must be added to the list of 
         * routines called by all previous user kernels because
         * recursively called routines need to discard those
         * already appended by this routine
         * ======================================================
         */
         Debug::getInstance ()->debugMessage ("Not found, appending: '"
          + appendingSubroutine + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);
        
        allCalledRoutines->push_back ( *routinesIt );
        
        routinesIt++;
    }
  }
  
  
  vector < SgProcedureHeaderStatement * > :: iterator it;
  for ( it = calledRoutines.begin(); it != calledRoutines.end(); it++ )
  {
   
    string calledSubroutineName = (*it)->get_name ().getString ();
    
    Debug::getInstance ()->debugMessage ("Appending new subroutine '"
        + calledSubroutineName + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);
   
    FortranCUDAUserSubroutine * newRoutine = new FortranCUDAUserSubroutine ( moduleScope, 
        parallelLoop, declarations, calledSubroutineName );

    newRoutine->createFormalParameterDeclarations ();
    newRoutine->createStatements ();
   
    additionalSubroutines.push_back (newRoutine);        
  }
  
  vector < FortranUserSubroutine * > :: iterator itRecursive;
  for ( itRecursive = additionalSubroutines.begin(); itRecursive != additionalSubroutines.end(); itRecursive++ )
  {
    FortranCUDAUserSubroutine * cudaSubroutineCasting = (FortranCUDAUserSubroutine *) *itRecursive;

    CUDAconstants->patchReferencesToConstants (
      (cudaSubroutineCasting )->getSubroutineHeaderStatement ());
          
    RoseHelper::forceOutputOfCodeToFile (
      (cudaSubroutineCasting )->getSubroutineHeaderStatement ());
  }

  for ( itRecursive = additionalSubroutines.begin(); itRecursive != additionalSubroutines.end(); itRecursive++ )
  {
    FortranCUDAUserSubroutine * cudaSubroutineCasting = (FortranCUDAUserSubroutine *) *itRecursive;

    cudaSubroutineCasting->appendAdditionalSubroutines (moduleScope, parallelLoop, declarations, CUDAconstants, allCalledRoutines);
  }
}
void
FortranCUDASubroutinesGeneration::createSubroutines ()
{
  using std::string;
  using std::map;

  CUDAconstants->appendConstantInitialisationToModule ( moduleScope, declarations, /* isCuda = */ true );
  
  /*
   * ======================================================
   * This vector contains all subroutines called by user
   * kernels, to avoid their duplication in the output
   * file
   * ======================================================
   */
  vector < SgProcedureHeaderStatement * > allCalledRoutines;

  for (map <string, ParallelLoop *>::const_iterator it =
      declarations->firstParallelLoop (); it
      != declarations->lastParallelLoop (); ++it)
  {
    string const userSubroutineName = it->first;

    Debug::getInstance ()->debugMessage ("Analysing user subroutine '"
        + userSubroutineName + "' with subroutines already defined in previous kernels (number = "
        + boost::lexical_cast<string> (allCalledRoutines.size()) + ": ", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);

    vector < SgProcedureHeaderStatement * > :: iterator finder;
    for ( finder = allCalledRoutines.begin (); finder != allCalledRoutines.end (); finder++ )
    {
      Debug::getInstance ()->debugMessage ("Routine: '" + (*finder)->get_name ().getString () + "'", Debug::FUNCTION_LEVEL, __FILE__, __LINE__);
    }
        
        
    FortranParallelLoop * parallelLoop =
        static_cast <FortranParallelLoop *> (it->second);

    FortranCUDAUserSubroutine * userDeviceSubroutine =
        new FortranCUDAUserSubroutine (moduleScope, parallelLoop, declarations);

    userDeviceSubroutine->createStatements ();        
        
    CUDAconstants->patchReferencesToConstants (
        userDeviceSubroutine->getSubroutineHeaderStatement ());

    /*
     * ======================================================
     * We have to set each node in the AST representation of
     * this subroutine as compiler generated, otherwise chunks
     * of the user kernel are missing in the output
     * ======================================================
     */

    RoseHelper::forceOutputOfCodeToFile (
        userDeviceSubroutine->getSubroutineHeaderStatement ());
        

    /*
     * ======================================================
     * When the user subroutine has calls to other user
     * subroutines we need to add them to the generated file
     * This call also eliminates automatically the duplicates
     * of routines already contained in allCalledRoutines
     * ======================================================
     */
    userDeviceSubroutine->appendAdditionalSubroutines (moduleScope, parallelLoop, declarations, CUDAconstants, &allCalledRoutines);
        
//    vector < FortranUserSubroutine * > additionalSubroutines = userDeviceSubroutine->getAdditionalSubroutines ();
    
    /*
     * ======================================================
     * Appending new routines to the list of all routines
     * appended
     * ======================================================
     */    
    //allCalledRoutines.insert(allCalledRoutines.end(), additionalSubroutines.begin(), additionalSubroutines.end());
    
    FortranCUDAKernelSubroutine * kernelSubroutine;

    if (parallelLoop->isDirectLoop ())
    {
      kernelSubroutine
          = new FortranCUDAKernelSubroutineDirectLoop (
              moduleScope,
              userDeviceSubroutine,
              parallelLoop,
              reductionSubroutines,
              cardinalitiesDeclarations[userSubroutineName],
              dimensionsDeclarations[userSubroutineName],
              static_cast <FortranCUDAModuleDeclarations *> (moduleDeclarations[userSubroutineName]));

      hostSubroutines[userSubroutineName]
          = new FortranCUDAHostSubroutineDirectLoop (
              moduleScope,
              kernelSubroutine,
              parallelLoop,
              cardinalitiesDeclarations[userSubroutineName],
              dimensionsDeclarations[userSubroutineName],
              static_cast <FortranCUDAModuleDeclarations *> (moduleDeclarations[userSubroutineName]));
    }
    else
    {
      kernelSubroutine
          = new FortranCUDAKernelSubroutineIndirectLoop (
              moduleScope,
              userDeviceSubroutine,
              parallelLoop,
              reductionSubroutines,
              static_cast <FortranCUDAOpDatCardinalitiesDeclarationIndirectLoop *> (cardinalitiesDeclarations[userSubroutineName]),
              dimensionsDeclarations[userSubroutineName],
              static_cast <FortranCUDAModuleDeclarations *> (moduleDeclarations[userSubroutineName]));

      hostSubroutines[userSubroutineName]
          = new FortranCUDAHostSubroutineIndirectLoop (
              moduleScope,
              kernelSubroutine,
              parallelLoop,
              static_cast <FortranCUDAOpDatCardinalitiesDeclarationIndirectLoop *> (cardinalitiesDeclarations[userSubroutineName]),
              dimensionsDeclarations[userSubroutineName],
              static_cast <FortranCUDAModuleDeclarations *> (moduleDeclarations[userSubroutineName]));
    }
  }
}