예제 #1
0
qioerr hdfs_locales_for_range(void* file, off_t start_byte, off_t end_byte, const char*** loc_names_out, int* num_locs_out, void* fs) 
{
  int i = 0;
  int j = 0;
  char*** info = NULL;

  info = hdfsGetHosts(to_hdfs_fs(fs)->hfs, to_hdfs_file(file)->pathnm, start_byte, end_byte);

  // unable to get hosts for this byte range
  if (!info || !info[0]) {
    *num_locs_out = 0;
    hdfsFreeHosts(info);
    QIO_RETURN_CONSTANT_ERROR(EREMOTEIO, "Unable to get owners for byterange");
  }

  while(info[0][i]) {
    info[0][i] = get_locale_name(info[0][i]);
    i++;
  }

  *num_locs_out = i - 1;
  *loc_names_out = (const char**)info[0];

  // Free the other hosts that we don't need
  for (i = 1; info[i]; i++) {
    for (j = 0; info[i][j]; j++)
      qio_free(info[i][j]);
    qio_free(info[i]);
  }

  return 0;
}
예제 #2
0
NABoolean HHDFSFileStats::populate(hdfsFS fs, hdfsFileInfo *fileInfo, 
                                   Int32& samples,
                                   NABoolean doEstimation, char recordTerminator, 
                                   NABoolean isSequenceFile)
{
  NABoolean result = TRUE;

  // copy fields from fileInfo
  fileName_       = fileInfo->mName;
  replication_    = (Int32) fileInfo->mReplication;
  totalSize_      = (Int64) fileInfo->mSize;
  blockSize_      = (Int64) fileInfo->mBlockSize;
  modificationTS_ = fileInfo->mLastMod;
  numFiles_       = 1;
  isSequenceFile_ = isSequenceFile;

  Int64 sampleBufferSize = MINOF(blockSize_, 65536);
  NABoolean sortHosts = (CmpCommon::getDefault(HIVE_SORT_HDFS_HOSTS) == DF_ON);

  sampleBufferSize = MINOF(sampleBufferSize,totalSize_/10);

  if (doEstimation && sampleBufferSize > 100) {

     // 
     // Open the hdfs file to estimate record length. Read one block at
     // a time searching for <s> instances of record separators. Stop reading 
     // when either <s> instances have been found or a partial number of
     // instances have and we have exhausted all data content in the block.
     // We will keep reading if the current block does not contain 
     // any instance of the record separator.
     // 
     hdfsFile file = 
                 hdfsOpenFile(fs, fileInfo->mName, 
                              O_RDONLY, 
                              sampleBufferSize, // buffer size
                              0, // replication, take the default size 
                              fileInfo->mBlockSize // blocksize 
                              ); 
      
     if ( file != NULL ) {
        tOffset offset = 0;
        tSize bufLen = sampleBufferSize;
        char* buffer = new (heap_) char[bufLen+1];
        buffer[bufLen] = 0; // extra null at the end to protect strchr()
                            // to run over the buffer.
   
        NABoolean sampleDone = FALSE;
   
        Int32 totalSamples = 10;
        Int32 totalLen = 0;
   
        while (!sampleDone) {
   
           tSize szRead = hdfsPread(fs, file, offset, buffer, bufLen);
      
           char* pos = NULL;
   
           if ( szRead > 0 ) {
                
             //if (isSequenceFile && offset==0 && memcmp(buffer, "SEQ6", 4) == 0)
             //   isSequenceFile_ = TRUE;
   
             char* start = buffer;
   
             for (Int32 i=0; i<totalSamples; i++ ) {
   
                if ( (pos=strchr(start, recordTerminator)) ) {
   
                  totalLen += pos - start + 1 + offset;
                  samples++;
   
                  start = pos+1;
   
                  if ( start > buffer + bufLen ) {
                     sampleDone = TRUE;
                     break;
                  }
                } else 
                  break;
             }
   
             if ( samples > 0 )
               break;
             else
               offset += bufLen;
     
           } else  
              break; // fail to read any bytes. Bail out. 
       }
   
       NADELETEBASIC(buffer, heap_);
   
       if ( samples > 0 ) {
         sampledBytes_ += totalLen;
         sampledRows_  += samples;
       }
   
       hdfsCloseFile(fs, file);
     } else {
       // can not do hdfs open on the file. Assume the file is empty.
     }
  }

  if (blockSize_)
    {
      numBlocks_ = totalSize_ / blockSize_;
      if (totalSize_ % blockSize_ > 0)
        numBlocks_++; // partial block at the end
    }
  else
    {
      CMPASSERT(blockSize_);
      // TBD:DIAGS
      result = FALSE;
    }

  if ( totalSize_ > 0 )
    {

      blockHosts_ = new(heap_) HostId[replication_*numBlocks_];

      // walk through blocks and record their locations
      tOffset o = 0;
      Int64 blockNum;
      for (blockNum=0; blockNum < numBlocks_ && result; blockNum++)
        {
          char*** blockHostNames = hdfsGetHosts(fs,
                                                fileInfo->mName, 
                                                o,
                                                fileInfo->mBlockSize);

          o += blockSize_;

          if (blockHostNames == NULL)
            {
              CMPASSERT(blockHostNames);
              // TBD:DIAGS
              result = FALSE;
            }
          else
            {
              char **h = *blockHostNames;
              HostId hostId;

              for (Int32 r=0; r<replication_; r++)
                {
                  if (h[r])
                    hostId = HHDFSMasterHostList::getHostNum(h[r]);
                  else
                    hostId = HHDFSMasterHostList::InvalidHostId;
                  blockHosts_[r*numBlocks_+blockNum] = hostId;
                }
              if (sortHosts)
                sortHostArray(blockHosts_,
                              (Int32) numBlocks_,
                              replication_,
                              getFileName());
            }
          hdfsFreeHosts(blockHostNames);
        }
    }
  return result;
}