예제 #1
0
HiveReturn HiveRowSet::getFieldAsI64U(size_t column_idx, uint64_t* buffer, int* is_null_value,
                                      char* err_buf, size_t err_buf_len) {
  RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
                   "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
                   "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
                   HIVE_ERROR);
  RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
                   "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
                   "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);

  if (m_last_column_fetched != column_idx) {
    extractField(column_idx);
    m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
    m_last_column_fetched = column_idx;
    m_is_completely_read = false;
  }
  if (m_is_completely_read) {
    return HIVE_NO_MORE_DATA; /* This column has already been completely fetched */
  }
  /* If the column data is the same as the null format spec... */
  if (strcmp(getNullFormat(), m_field_buffer) == 0) {
    *is_null_value = 1;
    *buffer = 0;
  } else {
    *is_null_value = 0;
    *buffer = ATOI64U(m_field_buffer);
  }
  m_is_completely_read = true;
  return HIVE_SUCCESS;
}
예제 #2
0
HiveReturn HiveRowSet::getFieldAsCString(size_t column_idx, char* buffer, size_t buffer_len,
                                         size_t* data_byte_size, int* is_null_value, char* err_buf,
                                         size_t err_buf_len) {
  RETURN_ON_ASSERT(buffer == NULL, __FUNCTION__,
                   "Column data output buffer cannot be NULL.", err_buf, err_buf_len, HIVE_ERROR);
  RETURN_ON_ASSERT(is_null_value == NULL, __FUNCTION__,
                   "Column data is_null_value (output) cannot be NULL.", err_buf, err_buf_len,
                   HIVE_ERROR);
  RETURN_ON_ASSERT(getColumnCount() == 0, __FUNCTION__,
                   "Rowset contains zero columns.", err_buf, err_buf_len, HIVE_ERROR);
  RETURN_ON_ASSERT(column_idx >= getColumnCount(), __FUNCTION__,
                   "Column index out of bounds.", err_buf, err_buf_len, HIVE_ERROR);
  RETURN_ON_ASSERT(buffer_len == 0, __FUNCTION__,
                   "Output buffer cannot have a size of zero.", err_buf, err_buf_len, HIVE_ERROR);

  if (m_last_column_fetched != column_idx) {
    extractField(column_idx);
    m_bytes_read = 0; /* Reset the read offset if different from the last column fetched */
    m_last_column_fetched = column_idx;
    m_is_completely_read = false;
  }
  if (m_is_completely_read) {
    return HIVE_NO_MORE_DATA; /* This field has already been completely fetched by a previous call*/
  }
  /* If the column data is the same as the null format spec... */
  if (strcmp(getNullFormat(), m_field_buffer) == 0) {
    /* This value must be NULL */
    *is_null_value = 1;
    if (data_byte_size != NULL) {
      *data_byte_size = 0;
    }
    buffer[0] = '\0';
  } else {
    /* This value has been determined not to be NULL */
    *is_null_value = 0;
    size_t data_total_len = getFieldLen(column_idx);
    /* Cannot read more data then the total number of bytes available */
    assert(data_total_len >= m_bytes_read);
    size_t bytes_remaining = data_total_len - m_bytes_read; // Excludes null char
    if (data_byte_size != NULL) {
      /* Save the number of remaining characters to return before this fetch */
      *data_byte_size = bytes_remaining;
    }
    /* Move pointer to the read location */
    const char* src_str_ptr = m_field_buffer + m_bytes_read;
    /* The total number of bytes to read (+1 null terminator) should be no more than the
     * size of the field buffer */
    assert(m_bytes_read + bytes_remaining + 1 <= sizeof(m_field_buffer));
    /* Copy as many characters as possible from the read location */
    size_t bytes_copied = safe_strncpy(buffer, src_str_ptr, min(buffer_len, bytes_remaining + 1)); // +1 for null terminator
    /* bytes_copied does not count the null terminator */
    m_bytes_read += bytes_copied;
    if (m_bytes_read < data_total_len) {
      return HIVE_SUCCESS_WITH_MORE_DATA; /* Data truncated; more data to return */
    }
  }
  m_is_completely_read = true;
  return HIVE_SUCCESS; /* All data successfully read */
}
예제 #3
0
파일: bigBed.c 프로젝트: cestmoi7/AGAPE
static boolean bbWordMatchesName(char *line, int fieldIx, void *target)
/* Return true if first word of line is same as target, which is just a string. */
{
char *name = target;
int fieldSize;
char *field;
extractField(line, fieldIx, &field, &fieldSize);
return strlen(name) == fieldSize && memcmp(name, field, fieldSize) == 0;
}
예제 #4
0
// function for processing MD strings
// returning a line with mismatch and match annotation
// for each base, for example:
// MD:1A20
// result: =A====================
string processMD(string MDfield, stringList &deletions)
{
    string MD, MDstring;
    MD = extractField(MDfield);
    numList MDnum;
    stringList MDLetter;
    regexSeparate(MD, MDnum, MDLetter);
    MDstring = correctMDstring(MDnum,MDLetter, deletions);
    return MDstring;
}
예제 #5
0
파일: bigBed.c 프로젝트: cestmoi7/AGAPE
static boolean bbWordIsInHash(char *line, int fieldIx, void *target)
/* Return true if first word of line is same as target, which is just a string. */
{
int fieldSize;
char *field;
extractField(line, fieldIx, &field, &fieldSize);
char fieldString[fieldSize+1];
memcpy(fieldString, field, fieldSize);
fieldString[fieldSize] = 0;

/* Return boolean value that reflects whether we found it in hash */
struct hash *hash = target;
return hashLookup(hash, fieldString) != NULL;
}
예제 #6
0
bool Message::setStringHeader( const std::string& string )
{
  clear();

  std::string::size_type pos = 0;
  int count = 0;

  while ( pos < string.size() )
  {
    FieldBase field = extractField( string, pos );
    if ( count < 3 && headerOrder[ count++ ] != field.getTag() )
      return false;

    if ( isHeaderField( field ) )
      m_header.setField( field, false );
    else break;
  }
  return true;
}
예제 #7
0
void Message::setGroup( const std::string& msg, const FieldBase& field,
                        const std::string& string,
                        std::string::size_type& pos, FieldMap& map,
                        const DataDictionary& dataDictionary )
{
  int group = field.getTag();
  int delim;
  const DataDictionary* pDD = 0;
  if ( !dataDictionary.getGroup( msg, group, delim, pDD ) ) return ;
  std::unique_ptr<Group> pGroup;

  while ( pos < string.size() )
  {
    std::string::size_type oldPos = pos;
    FieldBase field = extractField( string, pos, &dataDictionary, &dataDictionary, pGroup.get() );
       
    // Start a new group because...
    if (// found delimiter
    (field.getTag() == delim) ||
    // no delimiter, but field belongs to group OR field already processed
    (pDD->isField( field.getTag() ) && (pGroup.get() == 0 || pGroup->isSetField( field.getTag() )) ))
    {
      if ( pGroup.get() )
      {
        map.addGroupPtr( group, pGroup.release(), false );
      }
      pGroup.reset( new Group( field.getTag(), delim, pDD->getOrderedFields() ) );
    }
    else if ( !pDD->isField( field.getTag() ) )
    {
      if ( pGroup.get() )
      {
        map.addGroupPtr( group, pGroup.release(), false );
      }
      pos = oldPos;
      return ;
    }

    if ( !pGroup.get() ) return ;
    pGroup->setField( field, false );
    setGroup( msg, field, string, pos, *pGroup, *pDD );
  }
}
예제 #8
0
void Message::setString( int direction, const std::string& string,
                         const ValidationRules *validationRules,
                         const DataDictionary* pSessionDataDictionary,
                         const DataDictionary* pApplicationDataDictionary )
throw( Exception )
{
  clear();

  std::string::size_type pos = 0;
  int count = 0;
  std::string msg;

  /*
  static int const headerOrder[] =
  {
    FIELD::BeginString,
    FIELD::BodyLength,
    FIELD::MsgType
  };
  */

  field_type type = header;

  while ( pos < string.size() )
  {
    FieldBase field = extractField( string, pos, pSessionDataDictionary, pApplicationDataDictionary );
    if ( count < 3 &&
         headerOrder[ count++ ] != field.getTag() && 
         !ValidationRules::shouldTolerateOutOfOrderTag(validationRules, OUTGOING_DIRECTION, safeMsgType(), field.getTag() ) 
 )
    {
      //throw InvalidMessage("Header fields out of order.");
      throw TagOutOfOrder( field.getTag() );
    }

    if ( isHeaderField( field, pSessionDataDictionary ) )
    {
      if ( type != header )
      {
        if(m_tag == 0) m_tag = field.getTag();
        m_validStructure = false;
      }

      if ( field.getTag() == FIELD::MsgType )
        msg = field.getString();

      m_header.setField( field, false );

      if ( pSessionDataDictionary )
        setGroup( "_header_", field, string, pos, getHeader(), *pSessionDataDictionary );
    }
    else if ( isTrailerField( field, pSessionDataDictionary ) )
    {
      type = trailer;
      m_trailer.setField( field, false );

      if ( pSessionDataDictionary )
        setGroup( "_trailer_", field, string, pos, getTrailer(), *pSessionDataDictionary );
    }
    else
    {
      if ( type == trailer )
      {
        if(m_tag == 0) m_tag = field.getTag();
        m_validStructure = false;
      }

      type = body;
      setField( field, false );

      if ( pApplicationDataDictionary )
      {
        setGroup( msg, field, string, pos, *this, *pApplicationDataDictionary );
      }
    }
  }

  validate( validationRules );
}
예제 #9
0
void HiveRowSet::initFieldBuffer() {
  /* m_field_buffer should always correspond to the field indicated by m_last_column_fetched*/
  extractField(m_last_column_fetched);
}
예제 #10
0
//main processing function
// controlling flow for collecting data
// from each alignment
int processline(string line)
{    
    // variable definition
    stringList columns, deletions, insertions, mismatchList;
    string softclippedHead, softclippedTail;
    numList baseCounter, deletionBaseCounter, insertionBaseCounter, mismatchBaseCounter;
    numList headClippedBaseCounter, tailClippedBaseCounter;
    string chrom, id, sequence,quality;
    string deletionsString, insertionString, clippedString;
    string  XGfield = "A", NMfield, MDfield;
	int numberOfMismatch, numberOfGapExtention;
    string cigarString, MDline, cigarLine;
    int i, seqlength, headClipped = 0, tailClipped = 0;
    double averageQualityScore, head5Qual, end5Qual;

    columns = split(line,'\t');
    chrom = columns[2];
    // only collect data from aligned reads
    if (chrom != "*")
    {
        //define columns
        id = columns[0];
        cigarString = columns[5];
        sequence = columns[9];
        quality = columns[10];
        seqlength = sequence.length();
        
        baseCounter = getBaseCount(sequence); // collect base content
        averageQualityScore = averageQual(quality); // whole sequence quality
        head5Qual = averageQual(quality.substr(0,5)); // first 5 base quality
        end5Qual = averageQual(quality.substr(seqlength-5,5)); //end 5 base quality

        //define extra field
        // for XG, NM and MD
        findField(columns, XGfield, NMfield, MDfield);

        //get field item
        numberOfMismatch = atoi(extractField(NMfield).c_str());
		if (XGfield.at(0) == 'X')
		{
			numberOfGapExtention = atoi(extractField(XGfield).c_str());
		}
		else
		{
			numberOfGapExtention = 0;
		}

        // creating a line using MDfield
        MDline = processMD(MDfield, deletions);  

        cigarLine = processCigar(cigarString, headClipped, tailClipped);
        mismatchList = insertionAndMismatch(cigarLine, MDline, sequence, insertions, softclippedHead, softclippedTail, id, numberOfMismatch);
        mismatchBaseCounter = getMismatchCount(mismatchList);
		int sumOfMismatch = std::accumulate(mismatchBaseCounter.begin(), mismatchBaseCounter.end(), 0);
        
        //count deletion and insertion
        deletionsString = concatString(deletions);
        insertionString = concatString(insertions);
        deletionBaseCounter = getBaseCount(deletionsString);
        insertionBaseCounter = getBaseCount(insertionString);
        headClippedBaseCounter = getBaseCount(softclippedHead);
        tailClippedBaseCounter = getBaseCount(softclippedTail);

        //assertions for verifying program
        assert (softclippedTail.length() == tailClipped);
        assert (softclippedHead.length() == headClipped);
        assert(accumulate(baseCounter.begin(),baseCounter.end(),0) == seqlength);
		//assert(sumOfMismatch + numberOfGapExtention == numberOfMismatch);

		
        // print out result
        cout << id << "\t";
        //print out base counts
        printBase(baseCounter) ;
        cout << averageQualityScore << "\t" <<  head5Qual << "\t" << end5Qual << "\t";
        cout << numberOfGapExtention << "\t" << numberOfMismatch - numberOfGapExtention << "\t";
        //print mismatch
        // AtoC, AtoT, AtoG, CtoA, CtoT, CtoG, GtoA, GtoT, GtoC, TtoA,TtoC, TtoG
        printBase(mismatchBaseCounter);
        // print out deletion
        printBase(deletionBaseCounter) ;
        printBase(insertionBaseCounter) ;
        cout << headClipped << "\t" << tailClipped << "\t";
        printBase(headClippedBaseCounter);
        printBase(tailClippedBaseCounter);
		cout << seqlength;
        cout << '\n';
   }
   return 0;
}