Exemplo n.º 1
0
NABoolean HHDFSTableStats::validateAndRefresh(Int64 expirationJTimestamp, NABoolean refresh)
{
  NABoolean result = TRUE;
  // initial heap allocation size
  Int32 initialSize = heap_->getAllocSize();

  diags_.reset();

  // check if the stats needs to be fetched within a specified time interval
  // when not requested to refresh
  if (! refresh && (expirationJTimestamp == -1 ||
      (expirationJTimestamp > 0 &&
       validationJTimestamp_ < expirationJTimestamp)))
    return result; // consider the stats still valid

  // if partitions get added or deleted, that gets
  // caught in the Hive metadata, so no need to check for
  // that here
  for (int p=0; p<totalNumPartitions_ && result && diags_.isSuccess(); p++)
    {
      HHDFSListPartitionStats *partStats = listPartitionStatsList_[p];
      NAString hdfsHost;
      Int32 hdfsPort;
      NAString partDir;

      result = splitLocation(partStats->getDirName(),
                             hdfsHost,
                             hdfsPort, 
                             partDir,
                             diags_,
                             hdfsPortOverride_);
      if (! result)
        break;

      if (! connectHDFS(hdfsHost, hdfsPort))
        return FALSE;

      subtract(partStats);
      result = partStats->validateAndRefresh(fs_, diags_, refresh);
      if (result)
        add(partStats);
    }

  disconnectHDFS();
  validationJTimestamp_ = JULIANTIMESTAMP();
  // account for the heap used by stats. Heap released during
  // stats refresh will also be included
  hiveStatsSize_ += (heap_->getAllocSize() - initialSize);

  return result;
}
Exemplo n.º 2
0
NABoolean HHDFSTableStats::processDirectory(const NAString &dir, Int32 numOfBuckets, NABoolean doEstimate, char recordTerminator, NABoolean isSequenceFile)
{
  NABoolean result = TRUE;

  HHDFSListPartitionStats *partStats = new(heap_) HHDFSListPartitionStats(heap_);
  partStats->populate(fs_, dir, numOfBuckets, doEstimate, recordTerminator, isSequenceFile);

  listPartitionStatsList_.insertAt(listPartitionStatsList_.entries(), partStats);
  totalNumPartitions_++;
  // aggregate stats
  add(partStats);

  return TRUE;
}
Exemplo n.º 3
0
void HHDFSTableStats::processDirectory(const NAString &dir, Int32 numOfBuckets, 
                                       NABoolean doEstimate, char recordTerminator)
{
  HHDFSListPartitionStats *partStats = new(heap_)
    HHDFSListPartitionStats(heap_, this);
  partStats->populate(fs_, dir, numOfBuckets, diags_, doEstimate, recordTerminator);

  if (diags_.isSuccess())
    {
      listPartitionStatsList_.insertAt(listPartitionStatsList_.entries(), partStats);
      totalNumPartitions_++;
      // aggregate stats
      add(partStats);

      if (partStats->dirInfo()->mLastMod > modificationTS_)
        modificationTS_ = partStats->dirInfo()->mLastMod;
    }
}
Exemplo n.º 4
0
NABoolean HHDFSTableStats::validateAndRefresh(Int64 expirationJTimestamp, NABoolean refresh)
{
  NABoolean result = TRUE;
  // initial heap allocation size
  Int32 initialSize = heap_->getAllocSize();

  // check only once within a specified time interval
  if (expirationJTimestamp == -1 ||
      (expirationJTimestamp > 0 &&
       validationJTimestamp_ < expirationJTimestamp))
    return result; // consider the stats still valid

  // if partitions get added or deleted, that gets
  // caught in the Hive metadata, so no need to check for
  // that here
  for (int p=0; p<totalNumPartitions_ && result; p++)
    {
      HHDFSListPartitionStats *partStats = listPartitionStatsList_[p];
      NAString hdfsHost;
      Int32 hdfsPort;
      NAString partDir;

      splitLocation(partStats->getDirName(), hdfsHost, hdfsPort, partDir);
      if (! connectHDFS(hdfsHost, hdfsPort))
        CMPASSERT(fs_);

      subtract(partStats);
      result = partStats->validateAndRefresh(fs_, refresh);
      add(partStats);
    }

  disconnectHDFS();
  validationJTimestamp_ = JULIANTIMESTAMP();
  // account for the heap used by stats. Heap released during
  // stats refresh will also be included
  hiveStatsSize_ += (heap_->getAllocSize() - initialSize);

  return result;
}