HiveReturn HiveRowSet::getFieldAsI64U(size_t column_idx, uint64_t* buffer, int* is_null_value, char* err_buf, size_t err_buf_len) { RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__, "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__, "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__, "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__, "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR); if (m_last_column_fetched != column_idx) { extractField(column_idx); m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */ m_last_column_fetched = column_idx; m_is_completely_read = false; } if (m_is_completely_read) { return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */ } /* If the column data is the same as the null format spec... */ if (strcmp(getNullFormat(), m_field_buffer) == 0) { *is_null_value = 1; *buffer = 0; } else { *is_null_value = 0; *buffer = ATOI64U(m_field_buffer); } m_is_completely_read = true; return HIVE_SUCCESS; }
HiveReturn HiveRowSet::getFieldAsCString(size_t column_idx, char* buffer, size_t buffer_len, size_t* data_byte_size, int* is_null_value, char* err_buf, size_t err_buf_len) { RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__, "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__, "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__, "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__, "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR); RETURN_ON_ASSERT(buffer_len == 0, __FUNCTION__, "Output buffer cannot have a size of zero.", err_buf, err_buf_len, HIVE_ERROR); if (m_last_column_fetched != column_idx) { extractField(column_idx); m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */ m_last_column_fetched = column_idx; m_is_completely_read = false; } if (m_is_completely_read) { return HIVE_NO_MORE_DATA; /* This field has already been completely fetched by a previous call*/ } /* If the column data is the same as the null format spec... */ if (strcmp(getNullFormat(), m_field_buffer) == 0) { /* This value must be NULL */ *is_null_value = 1; if (data_byte_size != NULL) { *data_byte_size = 0; } buffer[0] = '\0'; } else { /* This value has been determined not to be NULL */ *is_null_value = 0; size_t data_total_len = getFieldLen(column_idx); /* Cannot read more data then the total number of bytes available */ assert(data_total_len >= m_bytes_read); size_t bytes_remaining = data_total_len - m_bytes_read; // Excludes null char if (data_byte_size != NULL) { /* Save the number of remaining characters to return before this fetch */ *data_byte_size = bytes_remaining; } /* Move pointer to the read location */ const char* src_str_ptr = m_field_buffer + m_bytes_read; /* The total number of bytes to read (+1 null terminator) should be no more than the * size of the field buffer */ assert(m_bytes_read + bytes_remaining + 1 <= sizeof(m_field_buffer)); /* Copy as many characters as possible from the read location */ size_t bytes_copied = safe_strncpy(buffer, src_str_ptr, min(buffer_len, bytes_remaining + 1)); // +1 for null terminator /* bytes_copied does not count the null terminator */ m_bytes_read += bytes_copied; if (m_bytes_read < data_total_len) { return HIVE_SUCCESS_WITH_MORE_DATA; /* Data truncated; more data to return */ } } m_is_completely_read = true; return HIVE_SUCCESS; /* All data successfully read */ }
static boolean bbWordMatchesName(char *line, int fieldIx, void *target) /* Return true if first word of line is same as target, which is just a string. */ { char *name = target; int fieldSize; char *field; extractField(line, fieldIx, &field, &fieldSize); return strlen(name) == fieldSize && memcmp(name, field, fieldSize) == 0; }
// function for processing MD strings // returning a line with mismatch and match annotation // for each base, for example: // MD:1A20 // result: =A==================== string processMD(string MDfield, stringList &deletions) { string MD, MDstring; MD = extractField(MDfield); numList MDnum; stringList MDLetter; regexSeparate(MD, MDnum, MDLetter); MDstring = correctMDstring(MDnum,MDLetter, deletions); return MDstring; }
static boolean bbWordIsInHash(char *line, int fieldIx, void *target) /* Return true if first word of line is same as target, which is just a string. */ { int fieldSize; char *field; extractField(line, fieldIx, &field, &fieldSize); char fieldString[fieldSize+1]; memcpy(fieldString, field, fieldSize); fieldString[fieldSize] = 0; /* Return boolean value that reflects whether we found it in hash */ struct hash *hash = target; return hashLookup(hash, fieldString) != NULL; }
bool Message::setStringHeader( const std::string& string ) { clear(); std::string::size_type pos = 0; int count = 0; while ( pos < string.size() ) { FieldBase field = extractField( string, pos ); if ( count < 3 && headerOrder[ count++ ] != field.getTag() ) return false; if ( isHeaderField( field ) ) m_header.setField( field, false ); else break; } return true; }
void Message::setGroup( const std::string& msg, const FieldBase& field, const std::string& string, std::string::size_type& pos, FieldMap& map, const DataDictionary& dataDictionary ) { int group = field.getTag(); int delim; const DataDictionary* pDD = 0; if ( !dataDictionary.getGroup( msg, group, delim, pDD ) ) return ; std::unique_ptr<Group> pGroup; while ( pos < string.size() ) { std::string::size_type oldPos = pos; FieldBase field = extractField( string, pos, &dataDictionary, &dataDictionary, pGroup.get() ); // Start a new group because... if (// found delimiter (field.getTag() == delim) || // no delimiter, but field belongs to group OR field already processed (pDD->isField( field.getTag() ) && (pGroup.get() == 0 || pGroup->isSetField( field.getTag() )) )) { if ( pGroup.get() ) { map.addGroupPtr( group, pGroup.release(), false ); } pGroup.reset( new Group( field.getTag(), delim, pDD->getOrderedFields() ) ); } else if ( !pDD->isField( field.getTag() ) ) { if ( pGroup.get() ) { map.addGroupPtr( group, pGroup.release(), false ); } pos = oldPos; return ; } if ( !pGroup.get() ) return ; pGroup->setField( field, false ); setGroup( msg, field, string, pos, *pGroup, *pDD ); } }
void Message::setString( int direction, const std::string& string, const ValidationRules *validationRules, const DataDictionary* pSessionDataDictionary, const DataDictionary* pApplicationDataDictionary ) throw( Exception ) { clear(); std::string::size_type pos = 0; int count = 0; std::string msg; /* static int const headerOrder[] = { FIELD::BeginString, FIELD::BodyLength, FIELD::MsgType }; */ field_type type = header; while ( pos < string.size() ) { FieldBase field = extractField( string, pos, pSessionDataDictionary, pApplicationDataDictionary ); if ( count < 3 && headerOrder[ count++ ] != field.getTag() && !ValidationRules::shouldTolerateOutOfOrderTag(validationRules, OUTGOING_DIRECTION, safeMsgType(), field.getTag() ) ) { //throw InvalidMessage("Header fields out of order."); throw TagOutOfOrder( field.getTag() ); } if ( isHeaderField( field, pSessionDataDictionary ) ) { if ( type != header ) { if(m_tag == 0) m_tag = field.getTag(); m_validStructure = false; } if ( field.getTag() == FIELD::MsgType ) msg = field.getString(); m_header.setField( field, false ); if ( pSessionDataDictionary ) setGroup( "_header_", field, string, pos, getHeader(), *pSessionDataDictionary ); } else if ( isTrailerField( field, pSessionDataDictionary ) ) { type = trailer; m_trailer.setField( field, false ); if ( pSessionDataDictionary ) setGroup( "_trailer_", field, string, pos, getTrailer(), *pSessionDataDictionary ); } else { if ( type == trailer ) { if(m_tag == 0) m_tag = field.getTag(); m_validStructure = false; } type = body; setField( field, false ); if ( pApplicationDataDictionary ) { setGroup( msg, field, string, pos, *this, *pApplicationDataDictionary ); } } } validate( validationRules ); }
void HiveRowSet::initFieldBuffer() { /* m_field_buffer should always correspond to the field indicated by m_last_column_fetched*/ extractField(m_last_column_fetched); }
//main processing function // controlling flow for collecting data // from each alignment int processline(string line) { // variable definition stringList columns, deletions, insertions, mismatchList; string softclippedHead, softclippedTail; numList baseCounter, deletionBaseCounter, insertionBaseCounter, mismatchBaseCounter; numList headClippedBaseCounter, tailClippedBaseCounter; string chrom, id, sequence,quality; string deletionsString, insertionString, clippedString; string XGfield = "A", NMfield, MDfield; int numberOfMismatch, numberOfGapExtention; string cigarString, MDline, cigarLine; int i, seqlength, headClipped = 0, tailClipped = 0; double averageQualityScore, head5Qual, end5Qual; columns = split(line,'\t'); chrom = columns[2]; // only collect data from aligned reads if (chrom != "*") { //define columns id = columns[0]; cigarString = columns[5]; sequence = columns[9]; quality = columns[10]; seqlength = sequence.length(); baseCounter = getBaseCount(sequence); // collect base content averageQualityScore = averageQual(quality); // whole sequence quality head5Qual = averageQual(quality.substr(0,5)); // first 5 base quality end5Qual = averageQual(quality.substr(seqlength-5,5)); //end 5 base quality //define extra field // for XG, NM and MD findField(columns, XGfield, NMfield, MDfield); //get field item numberOfMismatch = atoi(extractField(NMfield).c_str()); if (XGfield.at(0) == 'X') { numberOfGapExtention = atoi(extractField(XGfield).c_str()); } else { numberOfGapExtention = 0; } // creating a line using MDfield MDline = processMD(MDfield, deletions); cigarLine = processCigar(cigarString, headClipped, tailClipped); mismatchList = insertionAndMismatch(cigarLine, MDline, sequence, insertions, softclippedHead, softclippedTail, id, numberOfMismatch); mismatchBaseCounter = getMismatchCount(mismatchList); int sumOfMismatch = std::accumulate(mismatchBaseCounter.begin(), mismatchBaseCounter.end(), 0); //count deletion and insertion deletionsString = concatString(deletions); insertionString = concatString(insertions); deletionBaseCounter = getBaseCount(deletionsString); insertionBaseCounter = getBaseCount(insertionString); headClippedBaseCounter = getBaseCount(softclippedHead); tailClippedBaseCounter = getBaseCount(softclippedTail); //assertions for verifying program assert (softclippedTail.length() == tailClipped); assert (softclippedHead.length() == headClipped); assert(accumulate(baseCounter.begin(),baseCounter.end(),0) == seqlength); //assert(sumOfMismatch + numberOfGapExtention == numberOfMismatch); // print out result cout << id << "\t"; //print out base counts printBase(baseCounter) ; cout << averageQualityScore << "\t" << head5Qual << "\t" << end5Qual << "\t"; cout << numberOfGapExtention << "\t" << numberOfMismatch - numberOfGapExtention << "\t"; //print mismatch // AtoC, AtoT, AtoG, CtoA, CtoT, CtoG, GtoA, GtoT, GtoC, TtoA,TtoC, TtoG printBase(mismatchBaseCounter); // print out deletion printBase(deletionBaseCounter) ; printBase(insertionBaseCounter) ; cout << headClipped << "\t" << tailClipped << "\t"; printBase(headClippedBaseCounter); printBase(tailClippedBaseCounter); cout << seqlength; cout << '\n'; } return 0; }