struct hive_pkey_desc* populatePartitionKey(HiveMetaData *md, Int32 tblID, NAText* tblStr, size_t& pos) { hive_pkey_desc* result = NULL; hive_pkey_desc* last = NULL; std::size_t foundB ; if (!findAToken(md, tblStr, pos, "partitionKeys:", "populatePartitionKeys::partitionKeys:###")) return NULL; std::size_t foundE = pos ; if (!findAToken(md, tblStr, foundE, "],", "populatePartitionKeys::partitionKeys:],###")) return NULL; Int32 colIdx = 0; while (pos < foundE) { foundB = tblStr->find("FieldSchema(name:", pos); if ((foundB == std::string::npos)||(foundB > foundE)) { return NULL; // no part Key } foundB = foundB + strlen("FieldSchema(name:"); pos = foundB ; if (!findAToken(md, tblStr, pos, ",", "populatePartitionKeys::comment:,###")) return NULL; NAText nameStr = tblStr->substr(foundB, pos-foundB); NAText typeStr; if(!extractValueStr(md, tblStr, pos, "type:", ", comment", typeStr, "populatePartitionKeys::type:###")) return NULL; pos++; if (!findAToken(md, tblStr, pos, ",", "populateColumns::comment:,###")) return NULL; hive_pkey_desc* newPkey = new (CmpCommon::contextHeap()) struct hive_pkey_desc(nameStr.c_str(), typeStr.c_str(), colIdx); if ( result == NULL ) { last = result = newPkey; } else { last->next_ = newPkey; last = newPkey; } colIdx++; } // end of while return result; }
NABoolean populateSerDeParams(HiveMetaData *md, Int32 serdeID, char& fieldTerminator, char& recordTerminator, NAText* tblStr, size_t& pos) { fieldTerminator = '\001'; // this the Hive default ^A or ascii code 1 recordTerminator = '\n'; // this is the Hive default std::size_t foundB ; if (!findAToken(md, tblStr, pos, "serdeInfo:", "populateSerDeParams::serdeInfo:###")) return NULL; std::size_t foundE = pos ; if (!findAToken(md, tblStr, foundE, "}),", "populateSerDeParams::serDeInfo:)},###")) return NULL; const char * fieldStr = "field.delim" ; const char * lineStr = "line.delim" ; foundB = tblStr->find(fieldStr,pos); if ((foundB != std::string::npos) && (foundB < foundE)) fieldTerminator = tblStr->at(foundB+strlen(fieldStr)+1); foundB = tblStr->find("line.delim=",pos); if ((foundB != std::string::npos) && (foundB < foundE)) recordTerminator = tblStr->at(foundB+strlen(lineStr)+1); pos = foundE; return TRUE; }
struct hive_skey_desc* populateSortCols(HiveMetaData *md, Int32 sdID, NAText* tblStr, size_t& pos) { hive_skey_desc* result = NULL; hive_skey_desc* last = NULL; std::size_t foundB ; if (!findAToken(md, tblStr, pos, "sortCols:", "populateSortCols::sortCols:###")) return NULL; std::size_t foundE = pos ; if (!findAToken(md, tblStr, foundE, "],", "populateSortCols::sortCols:],###")) return NULL; Int32 colIdx = 0; while (pos < foundE) { foundB = tblStr->find("Order(col:", pos); if ((foundB == std::string::npos)||(foundB > foundE)) { return NULL; } foundB = foundB + strlen("Order(col:"); pos = foundB ; if (!findAToken(md, tblStr, pos, ",", "populateSortCols::name:,###")) return NULL; NAText nameStr = tblStr->substr(foundB, pos-foundB); NAText orderStr; if(!extractValueStr(md, tblStr, pos, "order:", ",", orderStr, "populateSortCols::order:###")) return NULL; pos++; if (!findAToken(md, tblStr, pos, ",", "populateSortColumns::comment:,###")) return NULL; hive_skey_desc* newSkey = new (CmpCommon::contextHeap()) struct hive_skey_desc(nameStr.c_str(), colIdx, atoi(orderStr.c_str())); if ( result == NULL ) { last = result = newSkey; } else { last->next_ = newSkey; last = newSkey; } colIdx++; } // end of while return result; }
struct hive_column_desc* populateColumns(HiveMetaData *md, Int32 cdID, NAText* tblStr, size_t& pos) { struct hive_column_desc* result = NULL; struct hive_column_desc* last = result; std::size_t foundB ; if (!findAToken(md, tblStr, pos, "cols:", "populateColumns::cols:###")) return NULL; std::size_t foundE = pos; if (!findAToken(md, tblStr, foundE, ")],", "populateColumns::cols:],###")) return NULL; Int32 colIdx = 0; while (pos < foundE) { NAText nameStr; if(!extractValueStr(md, tblStr, pos, "FieldSchema(name:", ",", nameStr, "populateColumns::FieldSchema(name:###")) return NULL; NAText typeStr; if(!extractValueStr(md, tblStr, pos, "type:", ", comment", typeStr, "populateColumns::type:###")) return NULL; pos++; if (!findAToken(md, tblStr, pos, ",", "populateColumns::comment:,###")) return NULL; struct hive_column_desc* newCol = new (CmpCommon::contextHeap()) struct hive_column_desc(0, nameStr.c_str(), typeStr.c_str(), colIdx); if ( result == NULL ) { last = result = newCol; } else { last->next_ = newCol; last = newCol; } colIdx++; } // end of while return result; }
NABoolean extractValueStr (HiveMetaData *md, NAText* tblStr, size_t& pos, const char* beginTok, const char* endTok, NAText& valueStr, const char* errStr, NABoolean raiseError) { if (!findAToken(md, tblStr, pos, beginTok, errStr, raiseError)) return FALSE; size_t foundB = pos + strlen(beginTok); if (!findAToken(md, tblStr, pos, endTok, errStr, TRUE)) return FALSE; valueStr.append(tblStr->substr(foundB, pos-foundB )); return TRUE; }
struct hive_bkey_desc* populateBucketingCols(HiveMetaData *md, Int32 sdID, NAText* tblStr, size_t& pos) { hive_bkey_desc* result = NULL; hive_bkey_desc* last = NULL; std::size_t foundB ; if (!findAToken(md, tblStr, pos, "bucketCols:", "populateBucketingCols::bucketCols:###")) return NULL; std::size_t foundE = pos ; if (!findAToken(md, tblStr, foundE, "],", "populateBucketingCols::bucketCols:],###")) return NULL; pos = pos + strlen("bucketCols:["); if (pos == foundE) return NULL ; // empty bucket cols list. This line is code is for // clarity alone, the while condition alone is sufficient. Int32 colIdx = 0; while (pos < foundE) { foundB = tblStr->find(",", pos); if ((foundB == std::string::npos)||(foundB > foundE)) { foundB = foundE; // we have only one bucketing col or // this is the last bucket col } NAText nameStr = tblStr->substr(pos, foundB-pos); pos = foundB + 1; hive_bkey_desc* newBkey = new (CmpCommon::contextHeap()) struct hive_bkey_desc(nameStr.c_str(), colIdx); if ( result == NULL ) { last = result = newBkey; } else { last->next_ = newBkey; last = newBkey; } colIdx++; } // end of while return result; }
NABoolean populateSerDeParams(HiveMetaData *md, Int32 serdeID, char& fieldTerminator, char& recordTerminator, NABoolean &nullFormatSpec, NAString &nullFormat, NAText* tblStr, size_t& pos) { fieldTerminator = '\001'; // this the Hive default ^A or ascii code 1 recordTerminator = '\n'; // this is the Hive default std::size_t foundB ; if (!findAToken(md, tblStr, pos, "serdeInfo:", "populateSerDeParams::serdeInfo:###")) return NULL; std::size_t foundE = pos ; if (!findAToken(md, tblStr, foundE, "}),", "populateSerDeParams::serDeInfo:)},###")) return NULL; const char * nullStr = "serialization.null.format="; const char * fieldStr = "field.delim" ; const char * lineStr = "line.delim" ; nullFormatSpec = FALSE; foundB = tblStr->find(nullStr,pos); if ((foundB != std::string::npos) && (foundB < foundE)) { nullFormatSpec = TRUE; std::size_t foundNB = foundB + strlen(nullStr); std::size_t foundNE = tblStr->find(", ", foundNB); nullFormat = NAString(tblStr->substr(foundNB, (foundNE-foundNB))); } foundB = tblStr->find(fieldStr,pos); if ((foundB != std::string::npos) && (foundB < foundE)) fieldTerminator = tblStr->at(foundB+strlen(fieldStr)+1); foundB = tblStr->find("line.delim=",pos); if ((foundB != std::string::npos) && (foundB < foundE)) recordTerminator = tblStr->at(foundB+strlen(lineStr)+1); pos = foundE; return TRUE; }
struct hive_sd_desc* populateSD(HiveMetaData *md, Int32 mainSdID, Int32 tblID, NAText* tblStr, size_t& pos) { struct hive_sd_desc* result = NULL; struct hive_sd_desc* mainSD = NULL; struct hive_sd_desc* last = NULL; char fieldTerminator, recordTerminator; size_t foundB; if (!findAToken(md, tblStr, pos, "sd:StorageDescriptor(", "getTableDesc::sd:StorageDescriptor(###")) return NULL; struct hive_column_desc* newColumns = populateColumns(md, 0, tblStr, pos); if (!newColumns) return NULL; NAText locationStr; if(!extractValueStr(md, tblStr, pos, "location:", ",", locationStr, "populateSD::location:###")) return NULL; NAText inputStr; if(!extractValueStr(md, tblStr, pos, "inputFormat:", ",", inputStr, "populateSD:inputFormat:###")) return NULL; NAText outputStr; if(!extractValueStr(md, tblStr, pos, "outputFormat:", ",", outputStr, "populateSD:outputFormat:###")) return NULL; NAText compressedStr; NABoolean isCompressed = FALSE; if(!extractValueStr(md, tblStr, pos, "compressed:", ",", compressedStr, "populateSD:compressed:###")) return NULL; if (compressedStr == "true") isCompressed = TRUE; NAText numBucketsStr; if(!extractValueStr(md, tblStr, pos, "numBuckets:", ",", numBucketsStr, "populateSD:numBuckets:###")) return NULL; Int32 numBuckets = atoi(numBucketsStr.c_str()); NABoolean nullFormatSpec = FALSE; NAString nullFormat; NABoolean success = populateSerDeParams(md, 0, fieldTerminator, recordTerminator, nullFormatSpec, nullFormat, tblStr, pos); if (!success) return NULL; struct hive_bkey_desc* newBucketingCols = populateBucketingCols(md, 0, tblStr, pos); struct hive_skey_desc* newSortCols = populateSortCols(md, 0, tblStr, pos); struct hive_sd_desc* newSD = new (CmpCommon::contextHeap()) struct hive_sd_desc(0, //SdID locationStr.c_str(), 0, // creation time numBuckets, inputStr.c_str(), outputStr.c_str(), (nullFormatSpec ? nullFormat.data() : NULL), hive_sd_desc::TABLE_SD, // TODO : no support for hive_sd_desc::PARTN_SD newColumns, newSortCols, newBucketingCols, fieldTerminator, recordTerminator, isCompressed ); result = newSD; // TODO : loop over SDs if (findAToken(md, tblStr, pos, "sd:StorageDescriptor(", "getTableDesc::sd:StorageDescriptor(###)",FALSE)) return NULL; return result; }