From 6f67371bb1b46579ae837d0e0c61ac1b291be743 Mon Sep 17 00:00:00 2001 From: LaG1924 <12997935+LaG1924@users.noreply.github.com> Date: Sat, 13 Jan 2018 07:51:33 +0500 Subject: Directory renamed --- external/include/glm/detail/type_vec4_simd.inl | 481 +++++++++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100644 external/include/glm/detail/type_vec4_simd.inl (limited to 'external/include/glm/detail/type_vec4_simd.inl') diff --git a/external/include/glm/detail/type_vec4_simd.inl b/external/include/glm/detail/type_vec4_simd.inl new file mode 100644 index 0000000..90652fd --- /dev/null +++ b/external/include/glm/detail/type_vec4_simd.inl @@ -0,0 +1,481 @@ +/// @ref core +/// @file glm/detail/type_tvec4_simd.inl + +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + +namespace glm{ +namespace detail +{ +# if GLM_SWIZZLE == GLM_SWIZZLE_ENABLED + template + struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3, true> : public _swizzle_base0 + { + GLM_FUNC_QUALIFIER tvec4 operator ()() const + { + __m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer); + + tvec4 Result(uninitialize); +# if GLM_ARCH & GLM_ARCH_AVX_BIT + Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0)); +# else + Result.data = _mm_shuffle_ps(data, data, _MM_SHUFFLE(E3, E2, E1, E0)); +# endif + return Result; + } + }; + + template + struct _swizzle_base1<4, int32, P, glm::tvec4, E0,E1,E2,E3, true> : public _swizzle_base0 + { + GLM_FUNC_QUALIFIER tvec4 operator ()() const + { + __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer); + + tvec4 Result(uninitialize); + Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0)); + return Result; + } + }; + + template + struct _swizzle_base1<4, uint32, P, glm::tvec4, E0,E1,E2,E3, true> : public _swizzle_base0 + { + GLM_FUNC_QUALIFIER tvec4 operator ()() const + { + __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer); + + tvec4 Result(uninitialize); + Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0)); + return Result; + } + }; +# endif// GLM_SWIZZLE == GLM_SWIZZLE_ENABLED + + template + struct compute_vec4_add + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm_add_ps(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_add + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_add_pd(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_sub + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm_sub_ps(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_sub + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_sub_pd(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_mul + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm_mul_ps(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_mul + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_mul_pd(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_div + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm_div_ps(a.data, b.data); + return Result; + } + }; + + # if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_div + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_div_pd(a.data, b.data); + return Result; + } + }; +# endif + + template <> + struct compute_vec4_div + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data)); + return Result; + } + }; + + template + struct compute_vec4_and + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm_and_si128(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template + struct compute_vec4_and + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_and_si256(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_or + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm_or_si128(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template + struct compute_vec4_or + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_or_si256(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_xor + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm_xor_si128(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template + struct compute_vec4_xor + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_xor_si256(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_shift_left + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm_sll_epi32(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template + struct compute_vec4_shift_left + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_sll_epi64(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_shift_right + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm_srl_epi32(a.data, b.data); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template + struct compute_vec4_shift_right + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_srl_epi64(a.data, b.data); + return Result; + } + }; +# endif + + template + struct compute_vec4_bitwise_not + { + static tvec4 call(tvec4 const & v) + { + tvec4 Result(uninitialize); + Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1)); + return Result; + } + }; + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template + struct compute_vec4_bitwise_not + { + static tvec4 call(tvec4 const & v) + { + tvec4 Result(uninitialize); + Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1)); + return Result; + } + }; +# endif + + template + struct compute_vec4_equal + { + static bool call(tvec4 const & v1, tvec4 const & v2) + { + return _mm_movemask_ps(_mm_cmpeq_ps(v1.data, v2.data)) != 0; + } + }; + + template + struct compute_vec4_equal + { + static bool call(tvec4 const & v1, tvec4 const & v2) + { + return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0; + } + }; + + template + struct compute_vec4_nequal + { + static bool call(tvec4 const & v1, tvec4 const & v2) + { + return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0; + } + }; + + template + struct compute_vec4_nequal + { + static bool call(tvec4 const & v1, tvec4 const & v2) + { + return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0; + } + }; +}//namespace detail + +# if !GLM_HAS_DEFAULTED_FUNCTIONS + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4() +# ifndef GLM_FORCE_NO_CTOR_INIT + : data(_mm_setzero_ps()) +# endif + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4() +# ifndef GLM_FORCE_NO_CTOR_INIT + : data(_mm_setzero_ps()) +# endif + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4() +# ifndef GLM_FORCE_NO_CTOR_INIT + : data(_mm_setzero_ps()) +# endif + {} +# endif//!GLM_HAS_DEFAULTED_FUNCTIONS + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(float s) : + data(_mm_set1_ps(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(float s) : + data(_mm_set1_ps(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(float s) : + data(_mm_set1_ps(s)) + {} + +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(double s) : + data(_mm256_set1_pd(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(double s) : + data(_mm256_set1_pd(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(double s) : + data(_mm256_set1_pd(s)) + {} +# endif + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 s) : + data(_mm_set1_epi32(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 s) : + data(_mm_set1_epi32(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 s) : + data(_mm_set1_epi32(s)) + {} + +# if GLM_ARCH & GLM_ARCH_AVX2_BIT + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int64 s) : + data(_mm256_set1_epi64x(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int64 s) : + data(_mm256_set1_epi64x(s)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int64 s) : + data(_mm256_set1_epi64x(s)) + {} +# endif + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(float a, float b, float c, float d) : + data(_mm_set_ps(d, c, b, a)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(float a, float b, float c, float d) : + data(_mm_set_ps(d, c, b, a)) + {} + + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(float a, float b, float c, float d) : + data(_mm_set_ps(d, c, b, a)) + {} + + template <> + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 a, int32 b, int32 c, int32 d) : + data(_mm_set_epi32(d, c, b, a)) + {} + + template <> + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 a, int32 b, int32 c, int32 d) : + data(_mm_set_epi32(d, c, b, a)) + {} + + template <> + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 a, int32 b, int32 c, int32 d) : + data(_mm_set_epi32(d, c, b, a)) + {} + + template <> + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 a, int32 b, int32 c, int32 d) : + data(_mm_castsi128_ps(_mm_set_epi32(d, c, b, a))) + {} + + template <> + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 a, int32 b, int32 c, int32 d) : + data(_mm_castsi128_ps(_mm_set_epi32(d, c, b, a))) + {} + + template <> + template <> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4::tvec4(int32 a, int32 b, int32 c, int32 d) : + data(_mm_castsi128_ps(_mm_set_epi32(d, c, b, a))) + {} +}//namespace glm + +#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT -- cgit v1.2.3