Пример #1
0
// ********************************************************************************************
void Block::StoreDNAHuf(BitStream &bit_stream, LzMatcher &lz_matcher, uchar *sym_code, HuffmanEncoder::Code *sym_huf_codes, bool try_lz)
{
	// Info about LZ matches
	if (try_lz)
	{
		FindLzMatches(lz_matcher);

		StoreLzMatches(bit_stream, lz_matcher);

		for (uint32 i = 0; i < rec_count; ++i)
		{
			uint32 cur_sequence_len = records[i].sequence_len;
			uchar *cur_sequence = records[i].sequence;
			for (uint32 j = lz_matches[i].length; j < cur_sequence_len; ++j)
			{
				bit_stream.PutBits(sym_huf_codes[sym_code[cur_sequence[j]]].code, sym_huf_codes[sym_code[cur_sequence[j]]].len);
			}
		}
	}
	else
	{
		for (uint32 i = 0; i < rec_count; ++i)
		{
			uint32 cur_sequence_len = records[i].sequence_len;
			uchar *cur_sequence = records[i].sequence;
			for (uint32 j = 0; j < cur_sequence_len; ++j)
			{
				bit_stream.PutBits(sym_huf_codes[sym_code[cur_sequence[j]]].code, sym_huf_codes[sym_code[cur_sequence[j]]].len);
			}
		}
	}

	bit_stream.FlushPartialWordBuffer();
}
Пример #2
0
// ********************************************************************************************
void Block::StoreLzMatches(BitStream &bit_stream, LzMatcher &lz_matcher)
{
	for (uint32 i = 0; i < rec_count; ++i)
	{
		bit_stream.PutBit(lz_matches[i].length != 0);
		if (lz_matches[i].length == 0)
		{
			bit_stream.PutBit(records[i].lz_inserted);
		}
	}
	bit_stream.FlushPartialWordBuffer();

	// DNA data
	uint32 rec_no_bits = BitStream::BitLength(b_start_rec_no + rec_count - 1);
	uint32 length_bits;

	uint32 offset_bits = (uint32)MAX(0, (int32)global_max_sequence_length - (int32)lz_matcher.GetMinMatchLen());
	if (offset_bits)
	{
		offset_bits = BitStream::BitLength(global_max_sequence_length - lz_matcher.GetMinMatchLen());
	}

	for (uint32 i = 0; i < rec_count; ++i)
	{
		if (lz_matches[i].length > 0)
		{
			length_bits = BitStream::BitLength(MAX(0, MIN((int32)records[i].sequence_len - (int32)lz_matcher.GetMinMatchLen(), 255)));

			bit_stream.PutBits(lz_matches[i].rec_no, rec_no_bits);
			if (length_bits > 0)
				bit_stream.PutBits(lz_matches[i].length - lz_matcher.GetMinMatchLen(), length_bits);

			if (offset_bits)
				bit_stream.PutBits(lz_matches[i].rec_offset, offset_bits);
		}
	}
	bit_stream.FlushPartialWordBuffer();
}
Пример #3
0
// ********************************************************************************************
void HuffmanEncoder::StoreTree(BitStream& bit_stream, HuffmanEncoder& tree)
{
	static uchar* mem_buf = NULL;
	static uint32 mem_size;

	tree.StoreTree(mem_buf, mem_size);
	my_assert(mem_size > 0);
	my_assert(mem_buf);

	bit_stream.FlushPartialWordBuffer();
	bit_stream.PutWord(mem_size);
	bit_stream.PutBytes(mem_buf, mem_size);

	delete [] mem_buf;
}
Пример #4
0
// ********************************************************************************************
void Block::StoreQualityRLE(BitStream &bit_stream, uchar *qua_code, HuffmanEncoder::Code **qua_huf_codes,
	HuffmanEncoder::Code **run_huf_codes, int32 /*n_qualities*/)
{
	// Quality data
	uchar prev = 0;
	uint32 pos = 0;

	for (uint32 i = 0; i < qua_stream_len; ++i)
	{
		uchar qua = qua_code[qua_stream[i]];
		uchar len = run_stream[i];
		bit_stream.PutBits(qua_huf_codes[prev][qua].code, qua_huf_codes[prev][qua].len);
		bit_stream.PutBits(run_huf_codes[qua][len].code, run_huf_codes[qua][len].len);
		prev = qua;
		pos += len+1;
	}

	bit_stream.FlushPartialWordBuffer();
}
Пример #5
0
// ********************************************************************************************
void Block::StoreQualityPlain(BitStream &bit_stream, uchar *qua_code, HuffmanEncoder::Code **qua_huf_codes, int32 /*n_qualities*/, bool use_truc_hash)
{
	// Quality data
	if (!use_truc_hash)
	{
		for (uint32 i = 0; i < rec_count; ++i)
		{
			uchar *cur_quality = records[i].quality; 
			uint32 cur_quality_len = records[i].quality_len;
			for (uint32 j = 0; j < cur_quality_len; ++j)
			{
				int32 qua = qua_code[cur_quality[j]];
				bit_stream.PutBits(qua_huf_codes[j+1][qua].code, qua_huf_codes[j+1][qua].len);
			}
		}
	}
	else
	{
		for (uint32 i = 0; i < rec_count; ++i)
		{
			uchar *cur_quality = records[i].quality; 
			uint32 cur_quality_len = records[i].quality_len;
			uint32 cur_quality_len_th = records[i].rec_th_len;
			
			bit_stream.PutBits(cur_quality_len_th != cur_quality_len, 1);
			if (cur_quality_len_th != cur_quality_len)
			{
				bit_stream.PutBits(cur_quality_len - cur_quality_len_th, BitStream::BitLength(cur_quality_len));
			}
			
			for (uint32 j = 0; j < cur_quality_len_th; ++j)
			{
				int32 qua = qua_code[cur_quality[j]];
				bit_stream.PutBits(qua_huf_codes[j+1][qua].code, qua_huf_codes[j+1][qua].len);
			}
		}
	}

	bit_stream.FlushPartialWordBuffer();
}
Пример #6
0
// ********************************************************************************************
void Block::StoreTitle(BitStream &bit_stream, std::vector<Field> &fields, int32 block_no, bool is_num_fields_constant)
{
	uint32 n_fields = (uint32) fields.size();
	uint32 n_fields_bits = BitStream::BitLength(n_fields);
	prev_value.resize(n_fields);

	for (uint32 i = 0; i < n_fields; ++i)
	{
		if (fields[i].is_constant)
			continue;

		Field::BlockDesc& block_desc = fields[i].block_desc[block_no];
		prev_value[i] = 0;
		if (!fields[i].is_numeric)
		{
			bit_stream.PutBit(block_desc.is_block_constant);
		}

		if (fields[i].is_numeric)
		{
			block_desc.is_block_delta_constant &= (int32)block_desc.block_delta_constant == fields[i].min_delta;
			if (fields[i].is_delta_coding)
			{
				bit_stream.PutBit(block_desc.is_block_delta_constant);
			}
			else
			{
				bit_stream.PutBit(block_desc.is_block_value_constant);
			}
		}		
	}

	for (uint32 i = 0; i < rec_count; ++i)
	{
		uint32 c_field = 0;
		uint32 start_pos = 0;
		FastqRecord &rec = records[i];

		if (!is_num_fields_constant)
		{
			bit_stream.PutBits(rec.no_of_fields, n_fields_bits);
		}

		for (uint32 k = 0; k <= rec.title_len; ++k)
		{
			Field &cur_field = fields[c_field];

			if (rec.title[k] != cur_field.sep && k < rec.title_len)
				continue;

			if (cur_field.is_constant)
			{
				start_pos = k+1;
				c_field++;
				continue;
			}

			if (cur_field.is_numeric)
			{
				int32 value = utils::to_num(rec.title+start_pos, k-start_pos);
				if (i == 0)
				{
					bit_stream.PutBits(value-cur_field.min_value, cur_field.no_of_bits_per_value);
				}
				else if ((cur_field.is_delta_coding && !cur_field.block_desc[block_no].is_block_delta_constant) ||
					(!cur_field.is_delta_coding && !cur_field.block_desc[block_no].is_block_value_constant))
				{
					int32 to_store;
					if (cur_field.is_delta_coding)
					{
						to_store = value - prev_value[c_field] - cur_field.min_delta;
					}
					else
					{
						to_store = value - cur_field.min_value;
					}

					if (cur_field.Huffman_global)
					{
						const HuffmanEncoder::Code* codes = cur_field.Huffman_global->GetCodes();
						bit_stream.PutBits(codes[to_store].code, codes[to_store].len);
					}
					else
					{
						bit_stream.PutBits(to_store, cur_field.no_of_bits_per_num);
					}
				}

				prev_value[c_field] = value;
				start_pos = k+1;
				c_field++;
				continue;
			}
			if (i > 0 && cur_field.block_desc[block_no].is_block_constant)
			{
				start_pos = k+1;
				c_field++;
				continue;
			}
			if (!cur_field.is_len_constant)
			{
				bit_stream.PutBits(k-start_pos - cur_field.min_len, cur_field.no_of_bits_per_len);
			}

			for (uint32 j = 0; j < k-start_pos; ++j)
			{
				if (j >= cur_field.len || !cur_field.Ham_mask[j])
				{
					uchar c = rec.title[start_pos+j];
					const HuffmanEncoder::Code* codes = cur_field.Huffman_local[MIN(j, Superblock::MAX_FIELD_STAT_LEN)]->GetCodes();
					bit_stream.PutBits(codes[c].code, codes[c].len);
				}
			}

			start_pos = k+1;
			c_field++;
		}
	}
	bit_stream.FlushPartialWordBuffer();
}
Пример #7
0
// ********************************************************************************************
void Block::Process(BitStream &bit_stream, LzMatcher &lz_matcher, std::vector<Field> &fields, uint32 /*n_fields*/,
	uint32 fastq_flags, uchar *sym_code, HuffmanEncoder::Code *sym_huf_codes, 
	uchar *qua_code, HuffmanEncoder::Code **qua_huf_codes, uint32 /*max_run_len*/, 
	HuffmanEncoder::Code **run_huf_codes, uint32 n_qualities, uint32 _global_max_sequence_length, 
	uint32 max_quality_length, uint32 block_no, uint32 _quality_stats_mode)
{
	global_max_sequence_length  = _global_max_sequence_length;

#if (D_RESERVE_BYTES_PER_BLOCK)
	{
		uchar bytes[Block::RESERVED_BYTES];
		std::fill(bytes, bytes+Block::RESERVED_BYTES, INVALID_BYTE);
		bit_stream.PutBytes(bytes, Block::RESERVED_BYTES);
	}
#endif

	if ((fastq_flags & FLAG_PLUS_ONLY) == 0)
	{
		for (uint32 i = 0; i < rec_count; ++i)
		{
			bit_stream.PutBit(records[i].plus_len == 1);
		}
	}
	uint32 quality_len_bits = BitStream::BitLength(max_quality_length);

	if ((fastq_flags & FLAG_VARIABLE_LENGTH) != 0)
	{
		for (uint32 i = 0; i < rec_count; ++i)
		{
			bit_stream.PutBits(records[i].quality_len, quality_len_bits);
		}
	}

	if ((fastq_flags & FLAG_LINE_BREAKS) != 0)
	{
		uint32 max_line_break_len = 0;
		for (uint32 i = 0; i < rec_count; ++i)
		{
			if (records[i].sequence_breaks)
			{
				for (uint32 j = 0; j < records[i].sequence_breaks->size(); ++j)
				{
					if ((*records[i].sequence_breaks)[j] > (int32) max_line_break_len)
					{
						max_line_break_len = (*records[i].sequence_breaks)[j];
					}
				}
			}
			if (records[i].quality_breaks)
			{
				for (uint32 j = 0; j < records[i].quality_breaks->size(); ++j)
				{
					if ((*records[i].quality_breaks)[j] > (int32) max_line_break_len)
					{
						max_line_break_len = (*records[i].quality_breaks)[j];
					}
				}
			}
		}

		uint32 line_breaks_bits = BitStream::BitLength(max_line_break_len);
		bit_stream.PutBits(line_breaks_bits, 5);

		for (uint32 i = 0; i < rec_count; ++i)
		{
			if (records[i].sequence_breaks)
			{
				for (uint32 j = 0; j < records[i].sequence_breaks->size(); ++j)
				{
					bit_stream.PutBits((*records[i].sequence_breaks)[j], line_breaks_bits);
				}
			}
			bit_stream.PutBits(0, line_breaks_bits);

			if (records[i].quality_breaks)
			{
				for (uint32 j = 0; j < records[i].quality_breaks->size(); ++j)
				{
					bit_stream.PutBits((*records[i].quality_breaks)[j], line_breaks_bits);
				}
			}
			bit_stream.PutBits(0, line_breaks_bits);
		}
	}
	bit_stream.FlushPartialWordBuffer();

	bool is_num_field_constant = (fastq_flags & FLAG_CONST_NUM_FIELDS) != 0;
	StoreTitle(bit_stream, fields, block_no, is_num_field_constant);

	if (_quality_stats_mode == QUALITY_RLE)
	{
		StoreQualityRLE(bit_stream, qua_code, qua_huf_codes, run_huf_codes, n_qualities);
	}
	else
	{
		bool use_trunc_hash = _quality_stats_mode == QUALITY_PLAIN_TRUNC;
		StoreQualityPlain(bit_stream, qua_code, qua_huf_codes, n_qualities, use_trunc_hash);
	}

	bool try_lz = (fastq_flags & FLAG_TRY_LZ) != 0;
	if ((fastq_flags & FLAG_DNA_PLAIN) != 0)
	{
		StoreDNAPlain(bit_stream, lz_matcher, sym_code, try_lz);
	}
	else
	{
		StoreDNAHuf(bit_stream, lz_matcher, sym_code, sym_huf_codes, try_lz);
	}

#if (D_COMPUTE_RECORDS_CRC_PER_BLOCK)
	uint32 hash = ComputeRecordsCrc32();
	bit_stream.PutWord(hash);
#endif
}