Expr CodeGen_X86::mulhi_shr(Expr a, Expr b, int shr) { Type ty = a.type(); if (ty.is_vector() && ty.bits() == 16) { // We can use pmulhu for this op. Expr p; if (ty.is_uint()) { p = u16(u32(a) * u32(b) / 65536); } else { p = i16(i32(a) * i32(b) / 65536); } if (shr) { p = p >> shr; } return p; }
void ImageLibrary::addImage(char const* name, Image* image, bool big) { Locker lock_(lock); if (images.has(name)) return; String name16 = String::format("images\\%s.bin", name); File* file16 = mpq->openFile(name16, File::REWRITE); if (file16) { Image i16(16, 16); BLTInfo blt16(image); blt16.setDstSize(16, 16); i16.blt(blt16); i16.modBrightness(1.16f); i16.sharpen(0.08f); i16.writeBIN(file16); delete file16; loadImage(name16); } if (big) { String name32 = String::format("images\\big%s.bin", name); File* file32 = mpq->openFile(name32, File::REWRITE); if (file32) { Image i32(32, 32); BLTInfo blt32(image); blt32.setDstSize(32, 32); i32.blt(blt32); i32.modBrightness(1.16f); // i32.sharpen(0.08f); i32.writeBIN(file32); delete file32; loadImage(name32); } } }
int MapData::getImageIndex(String name) { String title = String::getFileTitle(name); if (images.has(title)) return images.get(title); MPQLoader loader(*getApp()->getWarLoader()); loader.addArchive(map); File* file = loader.load(name); if (file == NULL) { String::setExtension(name, ".blp"); file = loader.load(name); } Image image(file); delete file; if (image.bits()) { Image i16(16, 16); BLTInfo blt16(&image); blt16.setDstSize(16, 16); i16.blt(blt16); i16.modBrightness(1.16f); i16.sharpen(0.08f); HBITMAP hBitmap = i16.createBitmap(); int pos = ImageList_Add(imgList, hBitmap, NULL); DeleteObject(hBitmap); images.set(title, pos); return pos; } return 0; }
void CodeGen_X86::visit(const Cast *op) { if (!op->type.is_vector()) { // We only have peephole optimizations for vectors in here. CodeGen_Posix::visit(op); return; } vector<Expr> matches; struct Pattern { Target::Feature feature; bool wide_op; Type type; int min_lanes; string intrin; Expr pattern; }; static Pattern patterns[] = { {Target::AVX2, true, Int(8, 32), 0, "llvm.x86.avx2.padds.b", i8_sat(wild_i16x_ + wild_i16x_)}, {Target::FeatureEnd, true, Int(8, 16), 0, "llvm.x86.sse2.padds.b", i8_sat(wild_i16x_ + wild_i16x_)}, {Target::AVX2, true, Int(8, 32), 0, "llvm.x86.avx2.psubs.b", i8_sat(wild_i16x_ - wild_i16x_)}, {Target::FeatureEnd, true, Int(8, 16), 0, "llvm.x86.sse2.psubs.b", i8_sat(wild_i16x_ - wild_i16x_)}, #if LLVM_VERSION < 80 // Older LLVM versions support this as an intrinsic {Target::AVX2, true, UInt(8, 32), 0, "llvm.x86.avx2.paddus.b", u8_sat(wild_u16x_ + wild_u16x_)}, {Target::FeatureEnd, true, UInt(8, 16), 0, "llvm.x86.sse2.paddus.b", u8_sat(wild_u16x_ + wild_u16x_)}, {Target::AVX2, true, UInt(8, 32), 0, "llvm.x86.avx2.psubus.b", u8(max(wild_i16x_ - wild_i16x_, 0))}, {Target::FeatureEnd, true, UInt(8, 16), 0, "llvm.x86.sse2.psubus.b", u8(max(wild_i16x_ - wild_i16x_, 0))}, #else // LLVM 8.0+ require using helpers from x86.ll {Target::AVX2, true, UInt(8, 32), 0, "paddusbx32", u8_sat(wild_u16x_ + wild_u16x_)}, {Target::FeatureEnd, true, UInt(8, 16), 0, "paddusbx16", u8_sat(wild_u16x_ + wild_u16x_)}, {Target::AVX2, true, UInt(8, 32), 0, "psubusbx32", u8(max(wild_i16x_ - wild_i16x_, 0))}, {Target::FeatureEnd, true, UInt(8, 16), 0, "psubusbx16", u8(max(wild_i16x_ - wild_i16x_, 0))}, #endif {Target::AVX2, true, Int(16, 16), 0, "llvm.x86.avx2.padds.w", i16_sat(wild_i32x_ + wild_i32x_)}, {Target::FeatureEnd, true, Int(16, 8), 0, "llvm.x86.sse2.padds.w", i16_sat(wild_i32x_ + wild_i32x_)}, {Target::AVX2, true, Int(16, 16), 0, "llvm.x86.avx2.psubs.w", i16_sat(wild_i32x_ - wild_i32x_)}, {Target::FeatureEnd, true, Int(16, 8), 0, "llvm.x86.sse2.psubs.w", i16_sat(wild_i32x_ - wild_i32x_)}, #if LLVM_VERSION < 80 // Older LLVM versions support this as an intrinsic {Target::AVX2, true, UInt(16, 16), 0, "llvm.x86.avx2.paddus.w", u16_sat(wild_u32x_ + wild_u32x_)}, {Target::FeatureEnd, true, UInt(16, 8), 0, "llvm.x86.sse2.paddus.w", u16_sat(wild_u32x_ + wild_u32x_)}, {Target::AVX2, true, UInt(16, 16), 0, "llvm.x86.avx2.psubus.w", u16(max(wild_i32x_ - wild_i32x_, 0))}, {Target::FeatureEnd, true, UInt(16, 8), 0, "llvm.x86.sse2.psubus.w", u16(max(wild_i32x_ - wild_i32x_, 0))}, #else // LLVM 8.0+ require using helpers from x86.ll {Target::AVX2, true, UInt(16, 16), 0, "padduswx16", u16_sat(wild_u32x_ + wild_u32x_)}, {Target::FeatureEnd, true, UInt(16, 8), 0, "padduswx8", u16_sat(wild_u32x_ + wild_u32x_)}, {Target::AVX2, true, UInt(16, 16), 0, "psubuswx16", u16(max(wild_i32x_ - wild_i32x_, 0))}, {Target::FeatureEnd, true, UInt(16, 8), 0, "psubuswx8", u16(max(wild_i32x_ - wild_i32x_, 0))}, #endif // Only use the avx2 version if we have > 8 lanes {Target::AVX2, true, Int(16, 16), 9, "llvm.x86.avx2.pmulh.w", i16((wild_i32x_ * wild_i32x_) / 65536)}, {Target::AVX2, true, UInt(16, 16), 9, "llvm.x86.avx2.pmulhu.w", u16((wild_u32x_ * wild_u32x_) / 65536)}, {Target::FeatureEnd, true, Int(16, 8), 0, "llvm.x86.sse2.pmulh.w", i16((wild_i32x_ * wild_i32x_) / 65536)}, {Target::FeatureEnd, true, UInt(16, 8), 0, "llvm.x86.sse2.pmulhu.w", u16((wild_u32x_ * wild_u32x_) / 65536)}, // LLVM 6.0+ require using helpers from x86.ll {Target::AVX2, true, UInt(8, 32), 0, "pavgbx32", u8(((wild_u16x_ + wild_u16x_) + 1) / 2)}, {Target::FeatureEnd, true, UInt(8, 16), 0, "pavgbx16", u8(((wild_u16x_ + wild_u16x_) + 1) / 2)}, {Target::AVX2, true, UInt(16, 16), 0, "pavgwx16", u16(((wild_u32x_ + wild_u32x_) + 1) / 2)}, {Target::FeatureEnd, true, UInt(16, 8), 0, "pavgwx8", u16(((wild_u32x_ + wild_u32x_) + 1) / 2)}, {Target::AVX2, false, Int(16, 16), 0, "packssdwx16", i16_sat(wild_i32x_)}, {Target::FeatureEnd, false, Int(16, 8), 0, "packssdwx8", i16_sat(wild_i32x_)}, {Target::AVX2, false, Int(8, 32), 0, "packsswbx32", i8_sat(wild_i16x_)}, {Target::FeatureEnd, false, Int(8, 16), 0, "packsswbx16", i8_sat(wild_i16x_)}, {Target::AVX2, false, UInt(8, 32), 0, "packuswbx32", u8_sat(wild_i16x_)}, {Target::FeatureEnd, false, UInt(8, 16), 0, "packuswbx16", u8_sat(wild_i16x_)}, {Target::AVX2, false, UInt(16, 16), 0, "packusdwx16", u16_sat(wild_i32x_)}, {Target::SSE41, false, UInt(16, 8), 0, "packusdwx8", u16_sat(wild_i32x_)} }; for (size_t i = 0; i < sizeof(patterns)/sizeof(patterns[0]); i++) { const Pattern &pattern = patterns[i]; if (!target.has_feature(pattern.feature)) { continue; } if (op->type.lanes() < pattern.min_lanes) { continue; } if (expr_match(pattern.pattern, op, matches)) { bool match = true; if (pattern.wide_op) { // Try to narrow the matches to the target type. for (size_t i = 0; i < matches.size(); i++) { matches[i] = lossless_cast(op->type, matches[i]); if (!matches[i].defined()) match = false; } } if (match) { value = call_intrin(op->type, pattern.type.lanes(), pattern.intrin, matches); return; } } } // Workaround for https://llvm.org/bugs/show_bug.cgi?id=24512 // LLVM uses a numerically unstable method for vector // uint32->float conversion before AVX. if (op->value.type().element_of() == UInt(32) && op->type.is_float() && op->type.is_vector() && !target.has_feature(Target::AVX)) { Type signed_type = Int(32, op->type.lanes()); // Convert the top 31 bits to float using the signed version Expr top_bits = cast(signed_type, op->value / 2); top_bits = cast(op->type, top_bits); // Convert the bottom bit Expr bottom_bit = cast(signed_type, op->value % 2); bottom_bit = cast(op->type, bottom_bit); // Recombine as floats codegen(top_bits + top_bits + bottom_bit); return; } CodeGen_Posix::visit(op); }
void Colorization::setScaledValue(PointBuffer& data, double value, Dimension const& d, std::size_t pointIndex) const { float flt(0.0); boost::int8_t i8(0); boost::uint8_t u8(0); boost::int16_t i16(0); boost::uint16_t u16(0); boost::int32_t i32(0); boost::uint32_t u32(0); boost::int64_t i64(0); boost::uint64_t u64(0); boost::uint32_t size = d.getByteSize(); switch (d.getInterpretation()) { case dimension::Float: if (size == 4) { flt = static_cast<float>(value); data.setField<float>(d, pointIndex, flt); } if (size == 8) { data.setField<double>(d, pointIndex, value); } break; case dimension::SignedInteger: case dimension::SignedByte: if (size == 1) { i8 = d.removeScaling<boost::int8_t>(value); data.setField<boost::int8_t>(d, pointIndex, i8); } if (size == 2) { i16 = d.removeScaling<boost::int16_t>(value); data.setField<boost::int16_t>(d, pointIndex, i16); } if (size == 4) { i32 = d.removeScaling<boost::int32_t>(value); data.setField<boost::int32_t>(d, pointIndex, i32); } if (size == 8) { i64 = d.removeScaling<boost::int64_t>(value); data.setField<boost::int64_t>(d, pointIndex, i64); } break; case dimension::UnsignedInteger: case dimension::UnsignedByte: if (size == 1) { u8 = d.removeScaling<boost::uint8_t>(value); data.setField<boost::uint8_t>(d, pointIndex, u8); } if (size == 2) { u16 = d.removeScaling<boost::uint16_t>(value); data.setField<boost::uint16_t>(d, pointIndex, u16); } if (size == 4) { u32 = d.removeScaling<boost::uint32_t>(value); data.setField<boost::uint32_t>(d, pointIndex, u32); } if (size == 8) { u64 = d.removeScaling<boost::uint64_t>(value); data.setField<boost::uint64_t>(d, pointIndex, u64); } break; case dimension::Pointer: // stored as 64 bits, even on a 32-bit box case dimension::Undefined: throw pdal_error("Dimension data type unable to be reprojected"); } }
double Colorization::getScaledValue(PointBuffer& data, Dimension const& d, std::size_t pointIndex) const { double output(0.0); float flt(0.0); boost::int8_t i8(0); boost::uint8_t u8(0); boost::int16_t i16(0); boost::uint16_t u16(0); boost::int32_t i32(0); boost::uint32_t u32(0); boost::int64_t i64(0); boost::uint64_t u64(0); boost::uint32_t size = d.getByteSize(); switch (d.getInterpretation()) { case dimension::Float: if (size == 4) { flt = data.getField<float>(d, pointIndex); output = static_cast<double>(flt); } if (size == 8) { output = data.getField<double>(d, pointIndex); } break; case dimension::SignedInteger: case dimension::SignedByte: if (size == 1) { i8 = data.getField<boost::int8_t>(d, pointIndex); output = d.applyScaling<boost::int8_t>(i8); } if (size == 2) { i16 = data.getField<boost::int16_t>(d, pointIndex); output = d.applyScaling<boost::int16_t>(i16); } if (size == 4) { i32 = data.getField<boost::int32_t>(d, pointIndex); output = d.applyScaling<boost::int32_t>(i32); } if (size == 8) { i64 = data.getField<boost::int64_t>(d, pointIndex); output = d.applyScaling<boost::int64_t>(i64); } break; case dimension::UnsignedInteger: case dimension::UnsignedByte: if (size == 1) { u8 = data.getField<boost::uint8_t>(d, pointIndex); output = d.applyScaling<boost::uint8_t>(u8); } if (size == 2) { u16 = data.getField<boost::uint16_t>(d, pointIndex); output = d.applyScaling<boost::uint16_t>(u16); } if (size == 4) { u32 = data.getField<boost::uint32_t>(d, pointIndex); output = d.applyScaling<boost::uint32_t>(u32); } if (size == 8) { u64 = data.getField<boost::uint64_t>(d, pointIndex); output = d.applyScaling<boost::uint64_t>(u64); } break; case dimension::Pointer: // stored as 64 bits, even on a 32-bit box case dimension::Undefined: throw pdal_error("Dimension data type unable to be reprojected"); } return output; }
void flush_snd_frame() { tape_bit(); #ifdef MOD_GS flush_gs_frame(); #endif if (temp.sndblock) return; unsigned endframe = cpu.t - temp.cpu_t_at_frame_start; if (conf.sound.ay_scheme) { // sound chip present ay[0].end_frame(endframe); // if (conf.sound.ay_samples) mix_dig(ay[0]); if (conf.sound.ay_scheme > AY_SCHEME_SINGLE) { ay[1].end_frame(endframe); // if (conf.sound.ay_samples) mix_dig(ay[1]); if (conf.sound.ay_scheme == AY_SCHEME_PSEUDO) { unsigned char last = ay[0].get_r13_reloaded()? 13 : 12; for (unsigned char r = 0; r <= last; r++) ay[1].select(r), ay[1].write(0, ay[0].get_reg(r)); } } if (savesndtype == 2) { if (!vtxbuf) { vtxbuf = (unsigned char*)malloc(32768); vtxbufsize = 32768; vtxbuffilled = 0; } if (vtxbuffilled + 14 >= vtxbufsize) { vtxbufsize += 32768; vtxbuf = (unsigned char*)realloc(vtxbuf, vtxbufsize); } for (unsigned char r = 0; r < 14; r++) vtxbuf[vtxbuffilled+r] = ay[0].get_reg(r); if (!ay[0].get_r13_reloaded()) vtxbuf[vtxbuffilled+13] = 0xFF; vtxbuffilled += 14; } } Saa1099.end_frame(endframe); sound.end_frame(endframe); // if (comp.tape.play_pointer) // play tape pulses // comp.tape.sound.end_frame(endframe); //Alone Coder comp.tape_sound.end_frame(endframe); //Alone Coder // else comp.tape.sound.end_empty_frame(endframe); unsigned bufplay, n_samples; sndcounter.begin(); sndcounter.count(sound); // sndcounter.count(comp.tape.sound); //Alone Coder sndcounter.count(comp.tape_sound); //Alone Coder if (conf.sound.ay_scheme) { sndcounter.count(ay[0]); if (conf.sound.ay_scheme > AY_SCHEME_SINGLE) sndcounter.count(ay[1]); } sndcounter.count(Saa1099); #ifdef MOD_GS #ifdef MOD_GSZ80 if (conf.gs_type==1) sndcounter.count(z80gs::sound); #endif #ifdef MOD_GSBASS // if (conf.gs_type==2) { gs.mix_fx(); return; } #endif #endif // MOD_GS sndcounter.end(bufplay, n_samples); for (unsigned k = 0; k < n_samples; k++, bufplay++) { u32 v = sndbuf[bufplay & (SNDBUFSIZE-1)]; u32 Y; if(conf.RejectDC) // DC rejection filter { i16 x[2]; float y[2]; x[0] = i16(v & 0xFFFF); x[1] = i16(v >> 16U); y[0] = 0.995f * (x[0] - x_1[0]) + 0.99f * y_1[0]; y[1] = 0.995f * (x[1] - x_1[1]) + 0.99f * y_1[1]; x_1[0] = x[0]; x_1[1] = x[1]; y_1[0] = y[0]; y_1[1] = y[1]; Y = ((i16(y[1]) & 0xFFFF)<<16) | (i16(y[0]) & 0xFFFF); } else {
void print_header( std::string const& model, std::string const& name, color::rgb<double> const& r ) { color_name<uint8_t> i8 ( r ); color_name<uint16_t> i16 ( r ); color_name<uint32_t> i32 ( r ); color_name<uint64_t> i64 ( r ); color_name<float> f ( r ); color_name<double> d ( r ); color_name<long double> ld ( r ); color::rgb<uint8_t> r32( r ); std::stringstream ss; ss << "#ifndef color_"<< model <<"_make_" << name << std::endl; ss << "#define color_"<< model <<"_make_" << name << std::endl; ss << std::endl; ss << "// ::color::make::" << name << "( c )" << std::endl; ss << std::endl; ss << " namespace color" << std::endl; ss << " {" << std::endl; ss << " namespace make" << std::endl; ss << " { //RGB equivalents: "; ss << "std::array<double,3>( { "<< d[0]<<", "<< d[1]<<", "<< d[2]<<" } )"; ss << " - "; ss << "rgb(" << std::setbase(10) <<(unsigned)r32[0] << "," << (unsigned)r32[1] << "," << (unsigned)r32[2] << ")" ; ss << " - "; ss << "#" << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)r32[0] << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)r32[1] << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)r32[2] ; ss << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_uint8 > & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array< std::uint8_t, " << i8.size() << " >( { " << "0x" << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)i8[0] << ", " << "0x" << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)i8[1] << ", " << "0x" << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)i8[2]; if( 4 == i8.size() ){ ss << ", 0x" << std::setbase(16) << std::setw(2) << std::setfill('0') << (unsigned)i8[3]; } ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_uint16 > & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array< std::uint16_t, " << i16.size() << " >( { " << "0x" << std::setbase(16) << std::setw(4) << std::setfill('0') << i16[0] << ", " << "0x" << std::setbase(16) << std::setw(4) << std::setfill('0') << i16[1] << ", " << "0x" << std::setbase(16) << std::setw(4) << std::setfill('0') << i16[2]; if( 4 == i16.size() ){ ss << ", 0x" << std::setbase(16) << std::setw(4) << std::setfill('0') << i16[3]; } ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_uint32 > & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array< std::uint32_t, " << f.size() << " >( { " << "0x" << std::setbase(16) << std::setw(8) << std::setfill('0') << i32[0] << ", " << "0x" << std::setbase(16) << std::setw(8) << std::setfill('0') << i32[1] << ", " << "0x" << std::setbase(16) << std::setw(8) << std::setfill('0') << i32[2]; if( 4 == i32.size() ){ ss << ", 0x" << std::setbase(16) << std::setw(8) << std::setfill('0') << i32[3]; } ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_uint64 > & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array< std::uint64_t, " << i64.size() << " >( { " << "0x" << std::setbase(16) << std::setw(16) << std::setfill('0') << i64[0] << "ull, " << "0x" << std::setbase(16) << std::setw(16) << std::setfill('0') << i64[1] << "ull, " << "0x" << std::setbase(16) << std::setw(16) << std::setfill('0') << i64[2] << "ull" ; if( 4 == i64.size() ){ ss << ", 0x" << std::setbase(16) << std::setw(16) << std::setfill('0') << i64[3] << "ull"; } ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_float > & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array<float," << f.size() << ">( { " << f[0] << ", " << f[1] << ", " << f[2]; if( 4 == f.size() ){ ss << ", " << f[3]; } ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_double> & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array<double," << d.size() << ">( { " << d[0] << ", " << d[1] << ", " << d[2]; if( 4 == d.size() ){ ss << ", " << d[3]; } ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " inline" << std::endl; ss << " void " << name << "( ::color::_internal::model< ::color::category::"<< model <<"_ldouble> & color_parameter )" << std::endl; ss << " {" << std::endl; ss << " color_parameter.container() = std::array<long double," << ld.size() << ">( { " << ld[0] << ", " << ld[1] << ", " << ld[2]; if( 4 == ld.size() ){ ss << ", " << ld[3];} ss << " } );" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << " }" << std::endl; ss << " }" << std::endl; ss << std::endl; ss << "#endif" << std::endl; //std::cout << ss.str(); //std::cout << "-------" << std:: endl; { std::ofstream ofs( ( "./gen-"+ model +"/"+name + ".hpp" ). c_str() ); //std::ofstream ofs( ( "../../../src/color/"+ model +"/make/"+name + ".hpp" ). c_str() ); ofs << ss.str(); } }