示例#1
0
struct hive_pkey_desc* populatePartitionKey(HiveMetaData *md, Int32 tblID,  
                                            NAText* tblStr, size_t& pos)
{
  hive_pkey_desc* result = NULL;
  hive_pkey_desc* last = NULL;

  std::size_t foundB ;
  if (!findAToken(md, tblStr, pos, "partitionKeys:",
                  "populatePartitionKeys::partitionKeys:###"))
    return NULL;
  
  std::size_t foundE = pos ;
  if (!findAToken(md, tblStr, foundE, "],",
                  "populatePartitionKeys::partitionKeys:],###"))
    return NULL;
  
  Int32 colIdx = 0;
  while (pos < foundE)
    {
      foundB = tblStr->find("FieldSchema(name:", pos);
      if ((foundB == std::string::npos)||(foundB > foundE)) {
        return NULL; // no part Key
      }
      
      foundB = foundB + strlen("FieldSchema(name:");
      pos = foundB ;
      if (!findAToken(md, tblStr, pos, ",",
                      "populatePartitionKeys::comment:,###"))
        return NULL;
      
      NAText nameStr = tblStr->substr(foundB, pos-foundB);
      
      NAText typeStr;
      if(!extractValueStr(md, tblStr, pos, "type:", ", comment", 
                          typeStr, "populatePartitionKeys::type:###"))
        return NULL;
      
      pos++;
      if (!findAToken(md, tblStr, pos, ",",
                      "populateColumns::comment:,###"))
        return NULL;
      
      
      hive_pkey_desc* newPkey = new (CmpCommon::contextHeap())
        struct hive_pkey_desc(nameStr.c_str(),
                              typeStr.c_str(),
                              colIdx);
      
      if ( result == NULL ) {
        last = result = newPkey;
      } else {
        last->next_ = newPkey;
        last = newPkey;
      }
      
      colIdx++;
    } // end of while

  return result;
}
NABoolean populateSerDeParams(HiveMetaData *md, Int32 serdeID, 
                              char& fieldTerminator, char& recordTerminator,
                              NAText* tblStr, size_t& pos)
{

  fieldTerminator  = '\001';  // this the Hive default ^A or ascii code 1
  recordTerminator = '\n';    // this is the Hive default

  std::size_t foundB ;
  if (!findAToken(md, tblStr, pos, "serdeInfo:",
                  "populateSerDeParams::serdeInfo:###"))
    return NULL;

  std::size_t foundE = pos ;
  if (!findAToken(md, tblStr, foundE, "}),",
                  "populateSerDeParams::serDeInfo:)},###"))
    return NULL;
  
  
  const char * fieldStr = "field.delim" ;
  const char * lineStr = "line.delim" ;

  foundB = tblStr->find(fieldStr,pos);
  if ((foundB != std::string::npos) && (foundB < foundE))
    fieldTerminator = tblStr->at(foundB+strlen(fieldStr)+1);
  
  foundB = tblStr->find("line.delim=",pos);
  if ((foundB != std::string::npos) && (foundB < foundE))
    recordTerminator = tblStr->at(foundB+strlen(lineStr)+1);
  
  pos = foundE;
  
  return TRUE;
}
示例#3
0
struct hive_skey_desc* populateSortCols(HiveMetaData *md, Int32 sdID,  
                                        NAText* tblStr, size_t& pos)
{
  hive_skey_desc* result = NULL;
  hive_skey_desc* last = NULL;

  std::size_t foundB ;
  if (!findAToken(md, tblStr, pos, "sortCols:",
                  "populateSortCols::sortCols:###"))
    return NULL;
  
  std::size_t foundE = pos ;
  if (!findAToken(md, tblStr, foundE, "],",
                  "populateSortCols::sortCols:],###"))
    return NULL;
  
  
  Int32 colIdx = 0;
  while (pos < foundE)
    {
      foundB = tblStr->find("Order(col:", pos);
      if ((foundB == std::string::npos)||(foundB > foundE)) {
        return NULL;
      }
      
      foundB = foundB + strlen("Order(col:");
      pos = foundB ;
      if (!findAToken(md, tblStr, pos, ",",
                      "populateSortCols::name:,###"))
        return NULL;
      NAText nameStr = tblStr->substr(foundB, pos-foundB);
      
      NAText orderStr;
      if(!extractValueStr(md, tblStr, pos, "order:", ",", 
                          orderStr, "populateSortCols::order:###"))
        return NULL;
      
      pos++;
      if (!findAToken(md, tblStr, pos, ",",
                      "populateSortColumns::comment:,###"))
        return NULL;
      
      hive_skey_desc* newSkey  = new (CmpCommon::contextHeap())
        struct hive_skey_desc(nameStr.c_str(),
                              colIdx,
                              atoi(orderStr.c_str()));
      
      if ( result == NULL ) {
        last = result = newSkey;
      } else {
        last->next_ = newSkey;
        last = newSkey;
      }
      
      colIdx++;
    } // end of while

  return result;
}
示例#4
0
struct hive_column_desc* populateColumns(HiveMetaData *md, Int32 cdID,  
                                         NAText* tblStr, size_t& pos)
{
  struct hive_column_desc* result = NULL;
  struct hive_column_desc* last = result;

  std::size_t foundB ;
  if (!findAToken(md, tblStr, pos, "cols:", 
                  "populateColumns::cols:###"))
    return NULL;
  
  std::size_t foundE = pos;
  if (!findAToken(md, tblStr, foundE, ")],", 
                  "populateColumns::cols:],###"))
    return NULL;
  
  Int32 colIdx = 0;
  while (pos < foundE)
    {
      NAText nameStr;
      if(!extractValueStr(md, tblStr, pos, "FieldSchema(name:", ",", 
                          nameStr, "populateColumns::FieldSchema(name:###"))
        return NULL;
      
      NAText typeStr;
      if(!extractValueStr(md, tblStr, pos, "type:", ", comment", 
                          typeStr, "populateColumns::type:###"))
        return NULL;
      
      pos++;
      if (!findAToken(md, tblStr, pos, ",", 
                      "populateColumns::comment:,###"))
        return NULL;
      
      struct hive_column_desc* newCol = new (CmpCommon::contextHeap())
        struct hive_column_desc(0, 
                                nameStr.c_str(),
                                typeStr.c_str(),
                                colIdx);
      
      if ( result == NULL ) {
        last = result = newCol;
      } else {
        last->next_ = newCol;
        last = newCol;
      }
      
      colIdx++;
    } // end of while
  
  return result;
}
示例#5
0
NABoolean extractValueStr (HiveMetaData *md, NAText* tblStr, size_t& pos, 
                           const char* beginTok, const char* endTok,
                           NAText& valueStr, const char* errStr, 
                           NABoolean raiseError)
{
  if (!findAToken(md, tblStr, pos, beginTok, errStr, raiseError))
    return FALSE;
  size_t foundB = pos + strlen(beginTok);
  if (!findAToken(md, tblStr, pos, endTok, errStr, TRUE))
    return FALSE;
  valueStr.append(tblStr->substr(foundB, pos-foundB ));
  return TRUE;
}
示例#6
0
struct hive_bkey_desc* populateBucketingCols(HiveMetaData *md, Int32 sdID,  
                                             NAText* tblStr, size_t& pos)
{
  hive_bkey_desc* result = NULL;
  hive_bkey_desc* last = NULL;

  std::size_t foundB ;
  if (!findAToken(md, tblStr, pos, "bucketCols:",
                  "populateBucketingCols::bucketCols:###"))
    return NULL;

  std::size_t foundE = pos ;
  if (!findAToken(md, tblStr, foundE, "],",
                  "populateBucketingCols::bucketCols:],###"))
    return NULL;
  
  
  pos = pos + strlen("bucketCols:[");
  if (pos == foundE)
    return NULL ; // empty bucket cols list. This line is code is for 
  // clarity alone, the while condition alone is sufficient.
  
  Int32 colIdx = 0;
  while (pos < foundE)
    {
      foundB = tblStr->find(",", pos);
      if ((foundB == std::string::npos)||(foundB > foundE)) {
        foundB = foundE; // we have only one bucketing col or
        // this is the last bucket col
      }
      NAText nameStr = tblStr->substr(pos, foundB-pos);
      pos = foundB + 1;
      
      hive_bkey_desc* newBkey  = new (CmpCommon::contextHeap())
        struct hive_bkey_desc(nameStr.c_str(),
                              colIdx);
      
      if ( result == NULL ) {
        last = result = newBkey;
      } else {
        last->next_ = newBkey;
        last = newBkey;
      }
      
      colIdx++;
    } // end of while

  return result;
}
示例#7
0
NABoolean populateSerDeParams(HiveMetaData *md, Int32 serdeID, 
                              char& fieldTerminator, char& recordTerminator,
                              NABoolean &nullFormatSpec, NAString &nullFormat,
                              NAText* tblStr, size_t& pos)
{

  fieldTerminator  = '\001';  // this the Hive default ^A or ascii code 1
  recordTerminator = '\n';    // this is the Hive default

  std::size_t foundB ;
  if (!findAToken(md, tblStr, pos, "serdeInfo:",
                  "populateSerDeParams::serdeInfo:###"))
    return NULL;

  std::size_t foundE = pos ;
  if (!findAToken(md, tblStr, foundE, "}),",
                  "populateSerDeParams::serDeInfo:)},###"))
    return NULL;
  
  const char * nullStr = "serialization.null.format=";
  const char * fieldStr = "field.delim" ;
  const char * lineStr = "line.delim" ;

  nullFormatSpec = FALSE;
  foundB = tblStr->find(nullStr,pos);
  if ((foundB != std::string::npos) && (foundB < foundE))
    {
      nullFormatSpec = TRUE;
      std::size_t foundNB = foundB + strlen(nullStr);
      std::size_t foundNE = tblStr->find(", ", foundNB);
      nullFormat = NAString(tblStr->substr(foundNB, (foundNE-foundNB)));
    }

  foundB = tblStr->find(fieldStr,pos);
  if ((foundB != std::string::npos) && (foundB < foundE))
    fieldTerminator = tblStr->at(foundB+strlen(fieldStr)+1);
  
  foundB = tblStr->find("line.delim=",pos);
  if ((foundB != std::string::npos) && (foundB < foundE))
    recordTerminator = tblStr->at(foundB+strlen(lineStr)+1);
  
  pos = foundE;
  
  return TRUE;
}
示例#8
0
struct hive_sd_desc* populateSD(HiveMetaData *md, Int32 mainSdID, 
                                Int32 tblID,  NAText* tblStr, size_t& pos)
{
  struct hive_sd_desc* result = NULL;
  struct hive_sd_desc* mainSD = NULL;
  struct hive_sd_desc* last = NULL;
  char fieldTerminator, recordTerminator;

  size_t foundB;
  
  if (!findAToken(md, tblStr, pos, "sd:StorageDescriptor(", 
                  "getTableDesc::sd:StorageDescriptor(###"))
    return NULL;
  struct hive_column_desc* newColumns = populateColumns(md, 0, 
                                                        tblStr, pos);
  if (!newColumns)
    return NULL;

  NAText locationStr;
  if(!extractValueStr(md, tblStr, pos, "location:", ",", 
                      locationStr, "populateSD::location:###"))
    return NULL;
    
  NAText inputStr;
  if(!extractValueStr(md, tblStr, pos, "inputFormat:", ",", 
                      inputStr, "populateSD:inputFormat:###"))
    return NULL;
  
  NAText outputStr;
  if(!extractValueStr(md, tblStr, pos, "outputFormat:", ",", 
                      outputStr, "populateSD:outputFormat:###"))
    return NULL;
  
  NAText compressedStr;
  NABoolean isCompressed = FALSE;
  if(!extractValueStr(md, tblStr, pos, "compressed:", ",", 
                      compressedStr, "populateSD:compressed:###"))
    return NULL;
  if (compressedStr == "true")
    isCompressed = TRUE;
  
  NAText numBucketsStr;
  if(!extractValueStr(md, tblStr, pos, "numBuckets:", ",", 
                      numBucketsStr, "populateSD:numBuckets:###"))
    return NULL;
  Int32 numBuckets = atoi(numBucketsStr.c_str());
  
  NABoolean nullFormatSpec = FALSE;
  NAString nullFormat;
  NABoolean success = populateSerDeParams(md, 0, fieldTerminator, 
                                          recordTerminator, 
                                          nullFormatSpec, nullFormat,
                                          tblStr, pos);
  if (!success)
    return NULL;

  struct hive_bkey_desc* newBucketingCols = 
    populateBucketingCols(md, 0, tblStr, pos);

  struct hive_skey_desc* newSortCols = populateSortCols(md, 0, 
                                                        tblStr, pos);

  struct hive_sd_desc* newSD = new (CmpCommon::contextHeap()) 
    struct hive_sd_desc(0, //SdID
                        locationStr.c_str(),
                        0, // creation time
                        numBuckets,
                        inputStr.c_str(),
                        outputStr.c_str(),
                        (nullFormatSpec ? nullFormat.data() : NULL),
                        hive_sd_desc::TABLE_SD, 
                        // TODO : no support for hive_sd_desc::PARTN_SD
                        newColumns, 
                        newSortCols, 
                        newBucketingCols,
                        fieldTerminator,
                        recordTerminator,
                        isCompressed
                        );
  
  result = newSD;
  
  // TODO : loop over SDs
  if (findAToken(md, tblStr, pos, "sd:StorageDescriptor(", 
                 "getTableDesc::sd:StorageDescriptor(###)",FALSE))
    return NULL;

  return result;
}