예제 #1
0
void MapBlock::deSerialize(std::istream &is, u8 version, bool disk)
{
	if(!ser_ver_supported(version))
		throw VersionMismatchException("ERROR: MapBlock format not supported");
	
	TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())<<std::endl);

	m_day_night_differs_expired = false;

	if(version <= 21)
	{
		deSerialize_pre22(is, version, disk);
		return;
	}

	u8 flags = readU8(is);
	is_underground = (flags & 0x01) ? true : false;
	m_day_night_differs = (flags & 0x02) ? true : false;
	m_lighting_expired = (flags & 0x04) ? true : false;
	m_generated = (flags & 0x08) ? false : true;

	/*
		Bulk node data
	*/
	TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
			<<": Bulk node data"<<std::endl);
	u32 nodecount = MAP_BLOCKSIZE*MAP_BLOCKSIZE*MAP_BLOCKSIZE;
	u8 content_width = readU8(is);
	u8 params_width = readU8(is);
	if(content_width != 1 && content_width != 2)
		throw SerializationError("MapBlock::deSerialize(): invalid content_width");
	if(params_width != 2)
		throw SerializationError("MapBlock::deSerialize(): invalid params_width");
	MapNode::deSerializeBulk(is, version, data, nodecount,
			content_width, params_width, true);

	/*
		NodeMetadata
	*/
	TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
			<<": Node metadata"<<std::endl);
	// Ignore errors
	try{
		std::ostringstream oss(std::ios_base::binary);
		decompressZlib(is, oss);
		std::istringstream iss(oss.str(), std::ios_base::binary);
		if(version >= 23)
			m_node_metadata.deSerialize(iss, m_gamedef);
		else
			content_nodemeta_deserialize_legacy(iss,
					&m_node_metadata, &m_node_timers,
					m_gamedef);
	}
	catch(SerializationError &e)
	{
		errorstream<<"WARNING: MapBlock::deSerialize(): Ignoring an error"
				<<" while deserializing node metadata at ("
				<<PP(getPos())<<": "<<e.what()<<std::endl;
	}

	/*
		Data that is only on disk
	*/
	if(disk)
	{
		// Node timers
		if(version == 23){
			// Read unused zero
			readU8(is);
		}
		if(version == 24){
			TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
					<<": Node timers (ver==24)"<<std::endl);
			m_node_timers.deSerialize(is, version);
		}

		// Static objects
		TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
				<<": Static objects"<<std::endl);
		m_static_objects.deSerialize(is);
		
		// Timestamp
		TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
				<<": Timestamp"<<std::endl);
		setTimestamp(readU32(is));
		m_disk_timestamp = m_timestamp;
		
		// Dynamically re-set ids based on node names
		TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
				<<": NameIdMapping"<<std::endl);
		NameIdMapping nimap;
		nimap.deSerialize(is);
		correctBlockNodeIds(&nimap, data, m_gamedef);

		if(version >= 25){
			TRACESTREAM(<<"MapBlock::deSerialize "<<PP(getPos())
					<<": Node timers (ver>=25)"<<std::endl);
			m_node_timers.deSerialize(is, version);
		}
예제 #2
0
void *EmergeThread::run()
{
	DSTACK(FUNCTION_NAME);
	BEGIN_DEBUG_EXCEPTION_HANDLER

	v3s16 pos;

	m_map    = (ServerMap *)&(m_server->m_env->getMap());
	m_emerge = m_server->m_emerge;
	m_mapgen = m_emerge->m_mapgens[id];
	enable_mapgen_debug_info = m_emerge->enable_mapgen_debug_info;

	reg("EmergeThread" + itos(id), 5);

	while (!stopRequested()) {
	try {
		std::map<v3s16, MapBlock *> modified_blocks;
		BlockEmergeData bedata;
		BlockMakeData bmdata;
		EmergeAction action;
		MapBlock *block;

		if (!popBlockEmerge(&pos, &bedata)) {
			m_queue_event.wait();
			continue;
		}

		if (blockpos_over_limit(pos))
			continue;

		bool allow_gen = bedata.flags & BLOCK_EMERGE_ALLOW_GEN;
		EMERGE_DBG_OUT("pos=" PP(pos) " allow_gen=" << allow_gen);

		action = getBlockOrStartGen(pos, allow_gen, &block, &bmdata);
		if (action == EMERGE_GENERATED) {
			{
				ScopeProfiler sp(g_profiler,
					"EmergeThread: Mapgen::makeChunk", SPT_AVG);
				TimeTaker t("mapgen::make_block()");

				m_mapgen->makeChunk(&bmdata);

				if (enable_mapgen_debug_info == false)
					t.stop(true); // Hide output
			}

			block = finishGen(pos, &bmdata, &modified_blocks);
		}

		runCompletionCallbacks(pos, action, bedata.callbacks);

		if (block) {
			//modified_blocks[pos] = block;
		} else if (allow_gen)
			verbosestream<<"nothing generated at "<<pos<< " emerge action="<< action <<std::endl;

		if (modified_blocks.size() > 0)
			m_server->SetBlocksNotSent(/*modified_blocks*/);

		if (m_mapgen->heat_cache.size() > 1000) {
			m_mapgen->heat_cache.clear();
			m_mapgen->humidity_cache.clear();
		}
	} catch (VersionMismatchException &e) {
		std::ostringstream err;
		err << "World data version mismatch in MapBlock " << PP(pos) << std::endl
			<< "----" << std::endl
			<< "\"" << e.what() << "\"" << std::endl
			<< "See debug.txt." << std::endl
			<< "World probably saved by a newer version of " PROJECT_NAME_C "."
			<< std::endl;
		debug_stacks_print();
		m_server->setAsyncFatalError(err.str());
	} catch (SerializationError &e) {
		std::ostringstream err;
		err << "Invalid data in MapBlock " << PP(pos) << std::endl
			<< "----" << std::endl
			<< "\"" << e.what() << "\"" << std::endl
			<< "See debug.txt." << std::endl
			<< "You can ignore this using [ignore_world_load_errors = true]."
			<< std::endl;
		debug_stacks_print();
		m_server->setAsyncFatalError(err.str());
	} catch (std::exception &e) {
		errorstream << "emerge: exception at " << pos << " : " << e.what() << std::endl;
	}
	}

	END_DEBUG_EXCEPTION_HANDLER
	return NULL;
}
예제 #3
0
void set_vmode_clk(vmode_t mode)
{
    enc_clk_val_t *p_enc = &setting_enc_clk_val[0];
    int i = sizeof(setting_enc_clk_val) / sizeof(enc_clk_val_t);
    int j = 0;
    
    printk("mode is: %d\n", mode);
    for (j = 0; j < i; j++){
        if(mode == p_enc[j].mode)
            break;
    }
    set_viu_path(p_enc[j].viu_path, p_enc[j].viu_type);
    set_hpll_clk_out(p_enc[j].hpll_clk_out);
    set_hpll_hdmi_od(p_enc[j].hpll_hdmi_od);
    set_vid_pll_div(p_enc[j].vid_pll_div);
    set_clk_final_div(p_enc[j].clk_final_div);
    set_hdmi_tx_pixel_div(p_enc[j].hdmi_tx_pixel_div);
    set_encp_div(p_enc[j].encp_div);
    set_enci_div(p_enc[j].enci_div);
    set_enct_div(p_enc[j].enct_div);
    set_encl_div(p_enc[j].encl_div);
    set_vdac0_div(p_enc[j].vdac0_div);
    set_vdac1_div(p_enc[j].vdac1_div);

    // If VCO outputs 1488, then we will reset it to exact 1485
    // please note, don't forget to re-config CNTL3/4
    if(((READ_CBUS_REG(HHI_VID_PLL_CNTL) & 0x7fff) == 0x43e)||((READ_CBUS_REG(HHI_VID_PLL_CNTL) & 0x7fff) == 0x21ef)) {
        WRITE_CBUS_REG_BITS(HHI_VID_PLL_CNTL, 0x21ef, 0, 14);
        WRITE_CBUS_REG(HHI_VID_PLL_CNTL3, 0x4b525012);
        WRITE_CBUS_REG(HHI_VID_PLL_CNTL4, 0x42000101);
    }
    
// For debug only
#if 0
    printk("hdmi debug tag\n%s\n%s[%d]\n", __FILE__, __FUNCTION__, __LINE__);
#define P(a)  printk("%s 0x%04x: 0x%08x\n", #a, a, READ_CBUS_REG(a))
    P(HHI_VID_PLL_CNTL);
    P(HHI_VID_DIVIDER_CNTL);
    P(HHI_VID_CLK_CNTL);
    P(HHI_VID_CLK_DIV);
    P(HHI_HDMI_CLK_CNTL);
    P(HHI_VIID_CLK_DIV);
#define PP(a) printk("%s(%d): %d MHz\n", #a, a, clk_util_clk_msr(a))
    PP(CTS_PWM_A_CLK        );
    PP(CTS_PWM_B_CLK        );
    PP(CTS_PWM_C_CLK        );
    PP(CTS_PWM_D_CLK        );
    PP(CTS_ETH_RX_TX        );
    PP(CTS_PCM_MCLK         );
    PP(CTS_PCM_SCLK         );
    PP(CTS_VDIN_MEAS_CLK    );
    PP(CTS_VDAC_CLK1        );
    PP(CTS_HDMI_TX_PIXEL_CLK);
    PP(CTS_MALI_CLK         );
    PP(CTS_SDHC_CLK1        );
    PP(CTS_SDHC_CLK0        );
    PP(CTS_AUDAC_CLKPI      );
    PP(CTS_A9_CLK           );
    PP(CTS_DDR_CLK          );
    PP(CTS_VDAC_CLK0        );
    PP(CTS_SAR_ADC_CLK      );
    PP(CTS_ENCI_CLK         );
    PP(SC_CLK_INT           );
    PP(USB_CLK_12MHZ        );
    PP(LVDS_FIFO_CLK        );
    PP(HDMI_CH3_TMDSCLK     );
    PP(MOD_ETH_CLK50_I      );
    PP(MOD_AUDIN_AMCLK_I    );
    PP(CTS_BTCLK27          );
    PP(CTS_HDMI_SYS_CLK     );
    PP(CTS_LED_PLL_CLK      );
    PP(CTS_VGHL_PLL_CLK     );
    PP(CTS_FEC_CLK_2        );
    PP(CTS_FEC_CLK_1        );
    PP(CTS_FEC_CLK_0        );
    PP(CTS_AMCLK            );
    PP(VID2_PLL_CLK         );
    PP(CTS_ETH_RMII         );
    PP(CTS_ENCT_CLK         );
    PP(CTS_ENCL_CLK         );
    PP(CTS_ENCP_CLK         );
    PP(CLK81                );
    PP(VID_PLL_CLK          );
    PP(AUD_PLL_CLK          );
    PP(MISC_PLL_CLK         );
    PP(DDR_PLL_CLK          );
    PP(SYS_PLL_CLK          );
    PP(AM_RING_OSC_CLK_OUT1 );
    PP(AM_RING_OSC_CLK_OUT0 );
#endif
}
예제 #4
0
static bool migrate_database(const GameParams &game_params, const Settings &cmd_args)
{
	std::string migrate_to = cmd_args.get("migrate");
	Settings world_mt;
	std::string world_mt_path = game_params.world_path + DIR_DELIM + "world.mt";
	if (!world_mt.readConfigFile(world_mt_path.c_str())) {
		errorstream << "Cannot read world.mt!" << std::endl;
		return false;
	}
	if (!world_mt.exists("backend")) {
		errorstream << "Please specify your current backend in world.mt:"
			<< std::endl
			<< "	backend = {sqlite3|leveldb|redis|dummy}"
			<< std::endl;
		return false;
	}
	std::string backend = world_mt.get("backend");
	if (backend == migrate_to) {
		errorstream << "Cannot migrate: new backend is same"
			<< " as the old one" << std::endl;
		return false;
	}
	Database *old_db = ServerMap::createDatabase(backend, game_params.world_path, world_mt),
		*new_db = ServerMap::createDatabase(migrate_to, game_params.world_path, world_mt);

	u32 count = 0;
	time_t last_update_time = 0;
	bool &kill = *porting::signal_handler_killstatus();

	std::vector<v3s16> blocks;
	old_db->listAllLoadableBlocks(blocks);
	new_db->beginSave();
	for (std::vector<v3s16>::const_iterator it = blocks.begin(); it != blocks.end(); ++it) {
		if (kill) return false;

		const std::string &data = old_db->loadBlock(*it);
		if (!data.empty()) {
			new_db->saveBlock(*it, data);
		} else {
			errorstream << "Failed to load block " << PP(*it) << ", skipping it." << std::endl;
		}
		if (++count % 0xFF == 0 && time(NULL) - last_update_time >= 1) {
			std::cerr << " Migrated " << count << " blocks, "
				<< (100.0 * count / blocks.size()) << "% completed.\r";
			new_db->endSave();
			new_db->beginSave();
			last_update_time = time(NULL);
		}
	}
	std::cerr << std::endl;
	new_db->endSave();
	delete old_db;
	delete new_db;

	actionstream << "Successfully migrated " << count << " blocks" << std::endl;
	world_mt.set("backend", migrate_to);
	if (!world_mt.updateConfigFile(world_mt_path.c_str()))
		errorstream << "Failed to update world.mt!" << std::endl;
	else
		actionstream << "world.mt updated" << std::endl;

	return true;
}
예제 #5
0
파일: emerge.cpp 프로젝트: BpTsTG/minetest
void *EmergeThread::Thread() {
	ThreadStarted();
	log_register_thread("EmergeThread" + itos(id));
	DSTACK(__FUNCTION_NAME);
	BEGIN_DEBUG_EXCEPTION_HANDLER

	v3s16 last_tried_pos(-32768,-32768,-32768); // For error output
	v3s16 p;
	u8 flags;

	map    = (ServerMap *)&(m_server->m_env->getMap());
	emerge = m_server->m_emerge;
	mapgen = emerge->mapgen[id];
	enable_mapgen_debug_info = emerge->mapgen_debug_info;

	while (!StopRequested())
	try {
		if (!popBlockEmerge(&p, &flags)) {
			qevent.wait();
			continue;
		}

		last_tried_pos = p;
		if (blockpos_over_limit(p))
			continue;

		bool allow_generate = flags & BLOCK_EMERGE_ALLOWGEN;
		EMERGE_DBG_OUT("p=" PP(p) " allow_generate=" << allow_generate);

		/*
			Try to fetch block from memory or disk.
			If not found and asked to generate, initialize generator.
		*/
		BlockMakeData data;
		MapBlock *block = NULL;
		std::map<v3s16, MapBlock *> modified_blocks;

		if (getBlockOrStartGen(p, &block, &data, allow_generate) && mapgen) {
			{
				ScopeProfiler sp(g_profiler, "EmergeThread: Mapgen::makeChunk", SPT_AVG);
				TimeTaker t("mapgen::make_block()");

				mapgen->makeChunk(&data);

				if (enable_mapgen_debug_info == false)
					t.stop(true); // Hide output
			}

			{
				//envlock: usually 0ms, but can take either 30 or 400ms to acquire
				JMutexAutoLock envlock(m_server->m_env_mutex);
				ScopeProfiler sp(g_profiler, "EmergeThread: after "
						"Mapgen::makeChunk (envlock)", SPT_AVG);

				map->finishBlockMake(&data, modified_blocks);

				block = map->getBlockNoCreateNoEx(p);
				if (block) {
					/*
						Do some post-generate stuff
					*/
					v3s16 minp = data.blockpos_min * MAP_BLOCKSIZE;
					v3s16 maxp = data.blockpos_max * MAP_BLOCKSIZE +
								 v3s16(1,1,1) * (MAP_BLOCKSIZE - 1);

					// Ignore map edit events, they will not need to be sent
					// to anybody because the block hasn't been sent to anybody
					MapEditEventAreaIgnorer
						ign(&m_server->m_ignore_map_edit_events_area,
						VoxelArea(minp, maxp));
					try {  // takes about 90ms with -O1 on an e3-1230v2
						m_server->getScriptIface()->environment_OnGenerated(
								minp, maxp, emerge->getBlockSeed(minp));
					} catch(LuaError &e) {
						m_server->setAsyncFatalError(e.what());
					}

					EMERGE_DBG_OUT("ended up with: " << analyze_block(block));

					m_server->m_env->activateBlock(block, 0);
				}
			}
		}

		/*
			Set sent status of modified blocks on clients
		*/
		// Add the originally fetched block to the modified list
		if (block)
			modified_blocks[p] = block;

		if (modified_blocks.size() > 0) {
			m_server->SetBlocksNotSent(modified_blocks);
		}
	}
	catch (VersionMismatchException &e) {
		std::ostringstream err;
		err << "World data version mismatch in MapBlock "<<PP(last_tried_pos)<<std::endl;
		err << "----"<<std::endl;
		err << "\""<<e.what()<<"\""<<std::endl;
		err << "See debug.txt."<<std::endl;
		err << "World probably saved by a newer version of Minetest."<<std::endl;
		m_server->setAsyncFatalError(err.str());
	}
	catch (SerializationError &e) {
		std::ostringstream err;
		err << "Invalid data in MapBlock "<<PP(last_tried_pos)<<std::endl;
		err << "----"<<std::endl;
		err << "\""<<e.what()<<"\""<<std::endl;
		err << "See debug.txt."<<std::endl;
		err << "You can ignore this using [ignore_world_load_errors = true]."<<std::endl;
		m_server->setAsyncFatalError(err.str());
	}

	END_DEBUG_EXCEPTION_HANDLER(errorstream)
	log_deregister_thread();
	return NULL;
}
예제 #6
0
  static void
    mshabal256_compress(mshabal256_context *sc,
    const unsigned char *buf0, const unsigned char *buf1,
    const unsigned char *buf2, const unsigned char *buf3,
    const unsigned char *buf4, const unsigned char *buf5,
    const unsigned char *buf6, const unsigned char *buf7,
    size_t num)
  {
    union {
      u32 words[64 * MSHABAL256_FACTOR];
      __m256i data[16];
    } u;
    size_t j;
    __m256i A[12], B[16], C[16];
    __m256i one;

    for (j = 0; j < 12; j++)
      A[j] = _mm256_loadu_si256((__m256i *)sc->state + j);
    for (j = 0; j < 16; j++) {
      B[j] = _mm256_loadu_si256((__m256i *)sc->state + j + 12);
      C[j] = _mm256_loadu_si256((__m256i *)sc->state + j + 28);
    }
    one = _mm256_set1_epi32(C32(0xFFFFFFFF));

#define M(i)   _mm256_load_si256(u.data + (i))

    while (num-- > 0) {

      for (j = 0; j < 64 * MSHABAL256_FACTOR; j += 4 * MSHABAL256_FACTOR) {
        size_t o = j / MSHABAL256_FACTOR;
        u.words[j + 0] = *(u32 *)(buf0 + o);
        u.words[j + 1] = *(u32 *)(buf1 + o);
        u.words[j + 2] = *(u32 *)(buf2 + o);
        u.words[j + 3] = *(u32 *)(buf3 + o);
        u.words[j + 4] = *(u32 *)(buf4 + o);
        u.words[j + 5] = *(u32 *)(buf5 + o);
        u.words[j + 6] = *(u32 *)(buf6 + o);
        u.words[j + 7] = *(u32 *)(buf7 + o);
      }

      for (j = 0; j < 16; j++)
        B[j] = _mm256_add_epi32(B[j], M(j));

      A[0] = _mm256_xor_si256(A[0], _mm256_set1_epi32(sc->Wlow));
      A[1] = _mm256_xor_si256(A[1], _mm256_set1_epi32(sc->Whigh));

      for (j = 0; j < 16; j++)
        B[j] = _mm256_or_si256(_mm256_slli_epi32(B[j], 17),
        _mm256_srli_epi32(B[j], 15));

#define PP(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm)   do { \
    __m256i tt; \
    tt = _mm256_or_si256(_mm256_slli_epi32(xa1, 15), \
      _mm256_srli_epi32(xa1, 17)); \
    tt = _mm256_add_epi32(_mm256_slli_epi32(tt, 2), tt); \
    tt = _mm256_xor_si256(_mm256_xor_si256(xa0, tt), xc); \
    tt = _mm256_add_epi32(_mm256_slli_epi32(tt, 1), tt); \
    tt = _mm256_xor_si256(\
      _mm256_xor_si256(tt, xb1), \
      _mm256_xor_si256(_mm256_andnot_si256(xb3, xb2), xm)); \
    xa0 = tt; \
    tt = xb0; \
    tt = _mm256_or_si256(_mm256_slli_epi32(tt, 1), \
      _mm256_srli_epi32(tt, 31)); \
    xb0 = _mm256_xor_si256(tt, _mm256_xor_si256(xa0, one)); \
        } while (0)

      PP(A[0x0], A[0xB], B[0x0], B[0xD], B[0x9], B[0x6], C[0x8], M(0x0));
      PP(A[0x1], A[0x0], B[0x1], B[0xE], B[0xA], B[0x7], C[0x7], M(0x1));
      PP(A[0x2], A[0x1], B[0x2], B[0xF], B[0xB], B[0x8], C[0x6], M(0x2));
      PP(A[0x3], A[0x2], B[0x3], B[0x0], B[0xC], B[0x9], C[0x5], M(0x3));
      PP(A[0x4], A[0x3], B[0x4], B[0x1], B[0xD], B[0xA], C[0x4], M(0x4));
      PP(A[0x5], A[0x4], B[0x5], B[0x2], B[0xE], B[0xB], C[0x3], M(0x5));
      PP(A[0x6], A[0x5], B[0x6], B[0x3], B[0xF], B[0xC], C[0x2], M(0x6));
      PP(A[0x7], A[0x6], B[0x7], B[0x4], B[0x0], B[0xD], C[0x1], M(0x7));
      PP(A[0x8], A[0x7], B[0x8], B[0x5], B[0x1], B[0xE], C[0x0], M(0x8));
      PP(A[0x9], A[0x8], B[0x9], B[0x6], B[0x2], B[0xF], C[0xF], M(0x9));
      PP(A[0xA], A[0x9], B[0xA], B[0x7], B[0x3], B[0x0], C[0xE], M(0xA));
      PP(A[0xB], A[0xA], B[0xB], B[0x8], B[0x4], B[0x1], C[0xD], M(0xB));
      PP(A[0x0], A[0xB], B[0xC], B[0x9], B[0x5], B[0x2], C[0xC], M(0xC));
      PP(A[0x1], A[0x0], B[0xD], B[0xA], B[0x6], B[0x3], C[0xB], M(0xD));
      PP(A[0x2], A[0x1], B[0xE], B[0xB], B[0x7], B[0x4], C[0xA], M(0xE));
      PP(A[0x3], A[0x2], B[0xF], B[0xC], B[0x8], B[0x5], C[0x9], M(0xF));

      PP(A[0x4], A[0x3], B[0x0], B[0xD], B[0x9], B[0x6], C[0x8], M(0x0));
      PP(A[0x5], A[0x4], B[0x1], B[0xE], B[0xA], B[0x7], C[0x7], M(0x1));
      PP(A[0x6], A[0x5], B[0x2], B[0xF], B[0xB], B[0x8], C[0x6], M(0x2));
      PP(A[0x7], A[0x6], B[0x3], B[0x0], B[0xC], B[0x9], C[0x5], M(0x3));
      PP(A[0x8], A[0x7], B[0x4], B[0x1], B[0xD], B[0xA], C[0x4], M(0x4));
      PP(A[0x9], A[0x8], B[0x5], B[0x2], B[0xE], B[0xB], C[0x3], M(0x5));
      PP(A[0xA], A[0x9], B[0x6], B[0x3], B[0xF], B[0xC], C[0x2], M(0x6));
      PP(A[0xB], A[0xA], B[0x7], B[0x4], B[0x0], B[0xD], C[0x1], M(0x7));
      PP(A[0x0], A[0xB], B[0x8], B[0x5], B[0x1], B[0xE], C[0x0], M(0x8));
      PP(A[0x1], A[0x0], B[0x9], B[0x6], B[0x2], B[0xF], C[0xF], M(0x9));
      PP(A[0x2], A[0x1], B[0xA], B[0x7], B[0x3], B[0x0], C[0xE], M(0xA));
      PP(A[0x3], A[0x2], B[0xB], B[0x8], B[0x4], B[0x1], C[0xD], M(0xB));
      PP(A[0x4], A[0x3], B[0xC], B[0x9], B[0x5], B[0x2], C[0xC], M(0xC));
      PP(A[0x5], A[0x4], B[0xD], B[0xA], B[0x6], B[0x3], C[0xB], M(0xD));
      PP(A[0x6], A[0x5], B[0xE], B[0xB], B[0x7], B[0x4], C[0xA], M(0xE));
      PP(A[0x7], A[0x6], B[0xF], B[0xC], B[0x8], B[0x5], C[0x9], M(0xF));

      PP(A[0x8], A[0x7], B[0x0], B[0xD], B[0x9], B[0x6], C[0x8], M(0x0));
      PP(A[0x9], A[0x8], B[0x1], B[0xE], B[0xA], B[0x7], C[0x7], M(0x1));
      PP(A[0xA], A[0x9], B[0x2], B[0xF], B[0xB], B[0x8], C[0x6], M(0x2));
      PP(A[0xB], A[0xA], B[0x3], B[0x0], B[0xC], B[0x9], C[0x5], M(0x3));
      PP(A[0x0], A[0xB], B[0x4], B[0x1], B[0xD], B[0xA], C[0x4], M(0x4));
      PP(A[0x1], A[0x0], B[0x5], B[0x2], B[0xE], B[0xB], C[0x3], M(0x5));
      PP(A[0x2], A[0x1], B[0x6], B[0x3], B[0xF], B[0xC], C[0x2], M(0x6));
      PP(A[0x3], A[0x2], B[0x7], B[0x4], B[0x0], B[0xD], C[0x1], M(0x7));
      PP(A[0x4], A[0x3], B[0x8], B[0x5], B[0x1], B[0xE], C[0x0], M(0x8));
      PP(A[0x5], A[0x4], B[0x9], B[0x6], B[0x2], B[0xF], C[0xF], M(0x9));
      PP(A[0x6], A[0x5], B[0xA], B[0x7], B[0x3], B[0x0], C[0xE], M(0xA));
      PP(A[0x7], A[0x6], B[0xB], B[0x8], B[0x4], B[0x1], C[0xD], M(0xB));
      PP(A[0x8], A[0x7], B[0xC], B[0x9], B[0x5], B[0x2], C[0xC], M(0xC));
      PP(A[0x9], A[0x8], B[0xD], B[0xA], B[0x6], B[0x3], C[0xB], M(0xD));
      PP(A[0xA], A[0x9], B[0xE], B[0xB], B[0x7], B[0x4], C[0xA], M(0xE));
      PP(A[0xB], A[0xA], B[0xF], B[0xC], B[0x8], B[0x5], C[0x9], M(0xF));

      A[0xB] = _mm256_add_epi32(A[0xB], C[0x6]);
      A[0xA] = _mm256_add_epi32(A[0xA], C[0x5]);
      A[0x9] = _mm256_add_epi32(A[0x9], C[0x4]);
      A[0x8] = _mm256_add_epi32(A[0x8], C[0x3]);
      A[0x7] = _mm256_add_epi32(A[0x7], C[0x2]);
      A[0x6] = _mm256_add_epi32(A[0x6], C[0x1]);
      A[0x5] = _mm256_add_epi32(A[0x5], C[0x0]);
      A[0x4] = _mm256_add_epi32(A[0x4], C[0xF]);
      A[0x3] = _mm256_add_epi32(A[0x3], C[0xE]);
      A[0x2] = _mm256_add_epi32(A[0x2], C[0xD]);
      A[0x1] = _mm256_add_epi32(A[0x1], C[0xC]);
      A[0x0] = _mm256_add_epi32(A[0x0], C[0xB]);
      A[0xB] = _mm256_add_epi32(A[0xB], C[0xA]);
      A[0xA] = _mm256_add_epi32(A[0xA], C[0x9]);
      A[0x9] = _mm256_add_epi32(A[0x9], C[0x8]);
      A[0x8] = _mm256_add_epi32(A[0x8], C[0x7]);
      A[0x7] = _mm256_add_epi32(A[0x7], C[0x6]);
      A[0x6] = _mm256_add_epi32(A[0x6], C[0x5]);
      A[0x5] = _mm256_add_epi32(A[0x5], C[0x4]);
      A[0x4] = _mm256_add_epi32(A[0x4], C[0x3]);
      A[0x3] = _mm256_add_epi32(A[0x3], C[0x2]);
      A[0x2] = _mm256_add_epi32(A[0x2], C[0x1]);
      A[0x1] = _mm256_add_epi32(A[0x1], C[0x0]);
      A[0x0] = _mm256_add_epi32(A[0x0], C[0xF]);
      A[0xB] = _mm256_add_epi32(A[0xB], C[0xE]);
      A[0xA] = _mm256_add_epi32(A[0xA], C[0xD]);
      A[0x9] = _mm256_add_epi32(A[0x9], C[0xC]);
      A[0x8] = _mm256_add_epi32(A[0x8], C[0xB]);
      A[0x7] = _mm256_add_epi32(A[0x7], C[0xA]);
      A[0x6] = _mm256_add_epi32(A[0x6], C[0x9]);
      A[0x5] = _mm256_add_epi32(A[0x5], C[0x8]);
      A[0x4] = _mm256_add_epi32(A[0x4], C[0x7]);
      A[0x3] = _mm256_add_epi32(A[0x3], C[0x6]);
      A[0x2] = _mm256_add_epi32(A[0x2], C[0x5]);
      A[0x1] = _mm256_add_epi32(A[0x1], C[0x4]);
      A[0x0] = _mm256_add_epi32(A[0x0], C[0x3]);

#define SWAP_AND_SUB(xb, xc, xm)   do { \
    __m256i tmp; \
    tmp = xb; \
    xb = _mm256_sub_epi32(xc, xm); \
    xc = tmp; \
        } while (0)

      SWAP_AND_SUB(B[0x0], C[0x0], M(0x0));
      SWAP_AND_SUB(B[0x1], C[0x1], M(0x1));
      SWAP_AND_SUB(B[0x2], C[0x2], M(0x2));
      SWAP_AND_SUB(B[0x3], C[0x3], M(0x3));
      SWAP_AND_SUB(B[0x4], C[0x4], M(0x4));
      SWAP_AND_SUB(B[0x5], C[0x5], M(0x5));
      SWAP_AND_SUB(B[0x6], C[0x6], M(0x6));
      SWAP_AND_SUB(B[0x7], C[0x7], M(0x7));
      SWAP_AND_SUB(B[0x8], C[0x8], M(0x8));
      SWAP_AND_SUB(B[0x9], C[0x9], M(0x9));
      SWAP_AND_SUB(B[0xA], C[0xA], M(0xA));
      SWAP_AND_SUB(B[0xB], C[0xB], M(0xB));
      SWAP_AND_SUB(B[0xC], C[0xC], M(0xC));
      SWAP_AND_SUB(B[0xD], C[0xD], M(0xD));
      SWAP_AND_SUB(B[0xE], C[0xE], M(0xE));
      SWAP_AND_SUB(B[0xF], C[0xF], M(0xF));

      buf0 += 64;
      buf1 += 64;
      buf2 += 64;
      buf3 += 64;
      buf4 += 64;
      buf5 += 64;
      buf6 += 64;
      buf7 += 64;
      if (++sc->Wlow == 0)
        sc->Whigh++;

    }

    for (j = 0; j < 12; j++)
      _mm256_storeu_si256((__m256i *)sc->state + j, A[j]);
    for (j = 0; j < 16; j++) {
      _mm256_storeu_si256((__m256i *)sc->state + j + 12, B[j]);
      _mm256_storeu_si256((__m256i *)sc->state + j + 28, C[j]);
    }

#undef M
  }