950 lines
32 KiB
C++
950 lines
32 KiB
C++
// ======================
|
|
// renderer/util/util.h
|
|
// (c) 2010 RockstarNorth
|
|
// ======================
|
|
|
|
#ifndef _RENDERER_UTIL_UTIL_H_
|
|
#define _RENDERER_UTIL_UTIL_H_
|
|
|
|
#include "vectormath/classes.h"
|
|
#include "vector/matrix44.h"
|
|
#include "atl/string.h"
|
|
|
|
#include "fwmaths/vectorutil.h"
|
|
#include "math/vecrand.h" // i moved a bunch of shit into rage math ..
|
|
|
|
void _test_vectormath_stuff(int count);
|
|
|
|
namespace rage { typedef double f64; } // please put this in the forceincludes
|
|
|
|
// ================================================================================================
|
|
|
|
template <typename T, size_t N> __forceinline bool IsOneOf(const T& src, const T (&arr)[N])
|
|
{
|
|
for (size_t i = 0; i < N; i++)
|
|
{
|
|
if (src == arr[i])
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
|
|
#if 1 // TODO -- clean this up and move it somewhere
|
|
|
|
#define _MAT4_DST_PARAMS(m) Vector_4V_InOut m##0, Vector_4V_InOut m##1, Vector_4V_InOut m##2, Vector_4V_InOut m##3
|
|
#define _MAT3_DST_PARAMS(m) Vector_4V_InOut m##0, Vector_4V_InOut m##1, Vector_4V_InOut m##2
|
|
|
|
#define _MAT4_SRC_PARAMS(m) Vector_4V_In m##0, Vector_4V_In m##1, Vector_4V_In m##2, Vector_4V_In_After3Args m##3
|
|
#define _MAT3_SRC_PARAMS(m) Vector_4V_In m##0, Vector_4V_In m##1, Vector_4V_In m##2
|
|
|
|
#define _MAT4_SRC_PARAMS_AFTER1ARGS(m) Vector_4V_In m##0, Vector_4V_In m##1, Vector_4V_In_After3Args m##2, Vector_4V_In_After3Args m##3
|
|
#define _MAT3_SRC_PARAMS_AFTER1ARGS(m) Vector_4V_In m##0, Vector_4V_In m##1, Vector_4V_In_After3Args m##2
|
|
|
|
#define _MAT4_SRC_PARAMS_AFTER2ARGS(m) Vector_4V_In m##0, Vector_4V_In_After3Args m##1, Vector_4V_In_After3Args m##2, Vector_4V_In_After3Args m##3
|
|
#define _MAT3_SRC_PARAMS_AFTER2ARGS(m) Vector_4V_In m##0, Vector_4V_In_After3Args m##1, Vector_4V_In_After3Args m##2
|
|
|
|
#define _MAT4_SRC_PARAMS_AFTER3ARGS(m) Vector_4V_In_After3Args m##0, Vector_4V_In_After3Args m##1, Vector_4V_In_After3Args m##2, Vector_4V_In_After3Args m##3
|
|
#define _MAT3_SRC_PARAMS_AFTER3ARGS(m) Vector_4V_In_After3Args m##0, Vector_4V_In_After3Args m##1, Vector_4V_In_After3Args m##2
|
|
|
|
#define _MAT4_TO_PARAMS(m) m.GetCol0Intrin128(), m.GetCol1Intrin128(), m.GetCol2Intrin128(), m.GetCol3Intrin128()
|
|
#define _MAT3_TO_PARAMS(m) m.GetCol0Intrin128(), m.GetCol1Intrin128(), m.GetCol2Intrin128()
|
|
|
|
namespace rage {
|
|
namespace Vec {
|
|
|
|
__forceinline void _V4_Transpose_Mat44(_MAT4_DST_PARAMS(d), _MAT4_SRC_PARAMS(m));
|
|
__forceinline void _V4_Transpose_Mat34(_MAT3_DST_PARAMS(d), _MAT4_SRC_PARAMS(m));
|
|
__forceinline void _V4_Transpose_Mat33(_MAT3_DST_PARAMS(d), _MAT3_SRC_PARAMS(m));
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Mat44_Vec4(_MAT4_SRC_PARAMS(m), Vector_4V_In_After3Args a);
|
|
__forceinline Vector_4V_Out _V4_Multiply_Mat34_Vec4(_MAT4_SRC_PARAMS(m), Vector_4V_In_After3Args a);
|
|
__forceinline Vector_4V_Out _V4_Multiply_Mat33_Vec3(_MAT3_SRC_PARAMS(m), Vector_4V_In_After3Args a);
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Vec4_Mat44(Vector_4V_In a, _MAT4_SRC_PARAMS_AFTER1ARGS(m));
|
|
__forceinline Vector_4V_Out _V4_Multiply_Vec3_Mat33(Vector_4V_In a, _MAT3_SRC_PARAMS_AFTER1ARGS(m));
|
|
|
|
__forceinline Vector_4V_Out _V4_InvertTransposeAffine_Mat33(_MAT3_DST_PARAMS(d), _MAT3_SRC_PARAMS(m));
|
|
|
|
__forceinline Vector_4V_Out _V4_UnTransformAffine_Mat33_Vec3 (_MAT3_SRC_PARAMS(m), Vector_4V_In_After3Args a);
|
|
__forceinline Vector_4V_Out _V4_UnTransformAffine_Mat33_Vec3_fast(_MAT3_SRC_PARAMS(m), Vector_4V_In_After3Args a);
|
|
|
|
} // namespace Vec
|
|
|
|
enum
|
|
{
|
|
_X1 = Vec::X1,
|
|
_Y1 = Vec::Y1,
|
|
_Z1 = Vec::Z1,
|
|
_W1 = Vec::W1,
|
|
|
|
_X2 = Vec::X2,
|
|
_Y2 = Vec::Y2,
|
|
_Z2 = Vec::Z2,
|
|
_W2 = Vec::W2,
|
|
|
|
// _ZERO = ...,
|
|
// _ANY1 = ...,
|
|
// _ANY2 = ...,
|
|
// _DONTCARE = ...,
|
|
};
|
|
|
|
template <u32 permX, u32 permY, u32 permZ, u32 permW> __forceinline Vec4V_Out Permute(Vec4V_In a);
|
|
|
|
// ================================================================================================
|
|
|
|
__forceinline Vec4V_Out _TransformV(Mat44V_In m, Vec4V_In a) { return Vec4V( Vec::_V4_Multiply_Mat44_Vec4(_MAT4_TO_PARAMS(m), a.GetIntrin128())); }
|
|
__forceinline Vec3V_Out _TransformV(Mat34V_In m, Vec4V_In a) { return Vec3V( Vec::_V4_Multiply_Mat34_Vec4(_MAT4_TO_PARAMS(m), a.GetIntrin128())); }
|
|
__forceinline Vec3V_Out _TransformP(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::V4Add(Vec::_V4_Multiply_Mat33_Vec3(_MAT3_TO_PARAMS(m), a.GetIntrin128()), m.GetCol3Intrin128())); }
|
|
__forceinline Vec3V_Out _TransformV(Mat34V_In m, Vec3V_In a) { return Vec3V( Vec::_V4_Multiply_Mat33_Vec3(_MAT3_TO_PARAMS(m), a.GetIntrin128())); }
|
|
__forceinline Vec3V_Out _TransformV(Mat33V_In m, Vec3V_In a) { return Vec3V( Vec::_V4_Multiply_Mat33_Vec3(_MAT3_TO_PARAMS(m), a.GetIntrin128())); }
|
|
|
|
__forceinline Vec3V_Out _UnTransformOrthoP(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_Multiply_Vec3_Mat33(Vec::V4Subtract(a.GetIntrin128(), m.GetCol3Intrin128()), _MAT3_TO_PARAMS(m))); }
|
|
__forceinline Vec3V_Out _UnTransformOrthoV(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_Multiply_Vec3_Mat33( a.GetIntrin128(), _MAT3_TO_PARAMS(m))); }
|
|
__forceinline Vec3V_Out _UnTransformOrthoV(Mat33V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_Multiply_Vec3_Mat33( a.GetIntrin128(), _MAT3_TO_PARAMS(m))); }
|
|
|
|
__forceinline Vec3V_Out _UnTransformAffineP(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_UnTransformAffine_Mat33_Vec3(_MAT3_TO_PARAMS(m), Vec::V4Subtract(a.GetIntrin128(), m.GetCol3Intrin128()))); }
|
|
__forceinline Vec3V_Out _UnTransformAffineV(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_UnTransformAffine_Mat33_Vec3(_MAT3_TO_PARAMS(m), a.GetIntrin128())); }
|
|
__forceinline Vec3V_Out _UnTransformAffineV(Mat33V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_UnTransformAffine_Mat33_Vec3(_MAT3_TO_PARAMS(m), a.GetIntrin128())); }
|
|
|
|
__forceinline Vec3V_Out _UnTransformAffineP_fast(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_UnTransformAffine_Mat33_Vec3_fast(_MAT3_TO_PARAMS(m), Vec::V4Subtract(a.GetIntrin128(), m.GetCol3Intrin128()))); }
|
|
__forceinline Vec3V_Out _UnTransformAffineV_fast(Mat34V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_UnTransformAffine_Mat33_Vec3_fast(_MAT3_TO_PARAMS(m), a.GetIntrin128())); }
|
|
__forceinline Vec3V_Out _UnTransformAffineV_fast(Mat33V_In m, Vec3V_In a) { return Vec3V(Vec::_V4_UnTransformAffine_Mat33_Vec3_fast(_MAT3_TO_PARAMS(m), a.GetIntrin128())); }
|
|
|
|
// ================================================================================================
|
|
|
|
namespace Vec {
|
|
|
|
__forceinline void _V4_Transpose_Mat44(_MAT4_DST_PARAMS(d), _MAT4_SRC_PARAMS(m))
|
|
{
|
|
Vector_4V v0,v1,v2,v3;
|
|
v0 = V4MergeXY(m0, m2); // m0x,m2x,m0y,m2y
|
|
v1 = V4MergeXY(m1, m3); // m1x,m3x,m1y,m3y
|
|
v2 = V4MergeZW(m0, m2); // m0z,m2z,m0w,m2w
|
|
v3 = V4MergeZW(m1, m3); // m1z,m3z,m1w,m3w
|
|
d0 = V4MergeXY(v0, v1); // m0x,m1x,m2x,m3x
|
|
d1 = V4MergeZW(v0, v1); // m0y,m1y,m2y,m3y
|
|
d2 = V4MergeXY(v2, v3); // m0z,m1z,m2z,m3z
|
|
d3 = V4MergeZW(v2, v3); // m0w,m1w,m2w,m3w
|
|
}
|
|
|
|
__forceinline void _V4_Transpose_Mat34(_MAT3_DST_PARAMS(d), _MAT4_SRC_PARAMS(m))
|
|
{
|
|
Vector_4V v0,v1,v2,v3;
|
|
v0 = V4MergeXY(m0, m2); // m0x,m2x,m0y,m2y
|
|
v1 = V4MergeXY(m1, m3); // m1x,m3x,m1y,m3y
|
|
v2 = V4MergeZW(m0, m2); // m0z,m2z,m0w,m2w
|
|
v3 = V4MergeZW(m1, m3); // m1z,m3z,m1w,m3w
|
|
d0 = V4MergeXY(v0, v1); // m0x,m1x,m2x,m3x
|
|
d1 = V4MergeZW(v0, v1); // m0y,m1y,m2y,m3y
|
|
d2 = V4MergeXY(v2, v3); // m0z,m1z,m2z,m3z
|
|
}
|
|
|
|
__forceinline void _V4_Transpose_Mat33(_MAT3_DST_PARAMS(d), _MAT3_SRC_PARAMS(m))
|
|
{
|
|
#if !USE_ALTERNATE_3X3_TRANSPOSE // 5 instr. .. w-components will be slightly different
|
|
Vector_4V v0,v2;
|
|
v0 = V4MergeXY(m0, m2); // m0x,m2x,m0y,m2y
|
|
v2 = V4MergeZW(m0, m2); // m0z,m2z,m0w,m2w
|
|
d0 = V4MergeXY(v0, m1); // m0x,m1x,m2x,m1y
|
|
d1 = V4PermuteTwo<Z1,Y2,W1,X1>(v0, m1); // m0y,m1y,m2y,m0x
|
|
d2 = V4PermuteTwo<X1,Z2,Y1,X1>(v2, m1); // m0z,m1z,m2z,m0z
|
|
#else // 7 instr.
|
|
Vector_4V v0,v1,v2,v3,m3=m2;
|
|
v0 = V4MergeXY(m0, m2); // m0x,m2x,m0y,m2y
|
|
v1 = V4MergeXY(m1, m3); // m1x,m3x,m1y,m3y
|
|
v2 = V4MergeZW(m0, m2); // m0z,m2z,m0w,m2w
|
|
v3 = V4MergeZW(m1, m3); // m1z,m3z,m1w,m3w
|
|
d0 = V4MergeXY(v0, v1); // m0x,m1x,m2x,m3x
|
|
d1 = V4MergeZW(v0, v1); // m0y,m1y,m2y,m3y
|
|
d2 = V4MergeXY(v2, v3); // m0z,m1z,m2z,m3z
|
|
#endif
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Mat44_Vec4(_MAT4_SRC_PARAMS(m), Vector_4V_In_After3Args a)
|
|
{
|
|
#if __XENON // 15 instr. (faster?)
|
|
Vector_4V t0,t1,t2,t3; _V4_Transpose_Mat44(t0,t1,t2,t3, m0,m1,m2,m3);
|
|
t0 = V4DotV(t0, a);
|
|
t1 = V4DotV(t1, a);
|
|
t2 = V4DotV(t2, a);
|
|
t3 = V4DotV(t3, a);
|
|
t0 = V4MergeXY(t0, t2);
|
|
t1 = V4MergeXY(t1, t3);
|
|
t0 = V4MergeXY(t0, t1);
|
|
return t0;
|
|
#else // 9/8 instr.
|
|
Vector_4V t0,t1,t2,t3;
|
|
t0 = V4SplatX(a);
|
|
t1 = V4SplatY(a);
|
|
t2 = V4SplatZ(a);
|
|
t3 = V4SplatW(a);
|
|
#if 1
|
|
t0 = V4Scale(t0, m0);
|
|
t1 = V4Scale(t1, m1);
|
|
t0 = V4AddScaled(t0, t2, m2);
|
|
t1 = V4AddScaled(t1, t3, m3);
|
|
t0 = V4Add(t0, t1);
|
|
#else
|
|
t0 = V4Scale(t0, m0);
|
|
t0 = V4AddScaled(t0, t1, m1);
|
|
t0 = V4AddScaled(t0, t2, m2);
|
|
t1 = V4AddScaled(t0, t3, m3);
|
|
#endif
|
|
return t0;
|
|
#endif
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Mat34_Vec4(_MAT4_SRC_PARAMS(m), Vector_4V_In_After3Args a)
|
|
{
|
|
#if 0 && __XENON // 13 instr.
|
|
Vector_4V t0,t1,t2; _V4_Transpose_Mat34(t0,t1,t2, m0,m1,m2,m3);
|
|
t0 = V4DotV(t0, a);
|
|
t1 = V4DotV(t1, a);
|
|
t2 = V4DotV(t2, a);
|
|
t0 = V4MergeXY(t0, t2);
|
|
t0 = V4MergeXY(t0, t1);
|
|
return t0;
|
|
#else // 9/8 instr.
|
|
Vector_4V t0,t1,t2,t3;
|
|
t0 = V4SplatX(a);
|
|
t1 = V4SplatY(a);
|
|
t2 = V4SplatZ(a);
|
|
t3 = V4SplatW(a);
|
|
#if 1
|
|
t0 = V4Scale(t0, m0);
|
|
t1 = V4Scale(t1, m1);
|
|
t0 = V4AddScaled(t0, t2, m2);
|
|
t1 = V4AddScaled(t1, t3, m3);
|
|
t0 = V4Add(t0, t1);
|
|
#else
|
|
t0 = V4Scale(t0, m0);
|
|
t0 = V4AddScaled(t0, t1, m1);
|
|
t0 = V4AddScaled(t0, t2, m2);
|
|
t1 = V4AddScaled(t0, t3, m3);
|
|
#endif
|
|
return t0;
|
|
#endif
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Mat33_Vec3(_MAT3_SRC_PARAMS(m), Vector_4V_In_After3Args a)
|
|
{
|
|
#if __XENON // 10 instr.
|
|
Vector_4V t0,t1,t2; _V4_Transpose_Mat33(t0,t1,t2, m0,m1,m2);
|
|
t0 = V3DotV(t0, a);
|
|
t1 = V3DotV(t1, a);
|
|
t2 = V3DotV(t2, a);
|
|
t0 = V4MergeXY(t0, t2);
|
|
t0 = V4MergeXY(t0, t1);
|
|
return t0;
|
|
#else // 6 instr.
|
|
Vector_4V t0,t1,t2;
|
|
t0 = V4SplatX(a);
|
|
t1 = V4SplatY(a);
|
|
t2 = V4SplatZ(a);
|
|
t0 = V4Scale(t0, m0);
|
|
t0 = V4AddScaled(t0, t1, m1);
|
|
t0 = V4AddScaled(t0, t2, m2);
|
|
return t0;
|
|
#endif
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Vec4_Mat44(Vector_4V_In a, _MAT4_SRC_PARAMS_AFTER1ARGS(m))
|
|
{
|
|
#if __XENON // 7 instr.
|
|
Vector_4V t0,t1,t2,t3;
|
|
t0 = V4DotV(m0, a);
|
|
t1 = V4DotV(m1, a);
|
|
t2 = V4DotV(m2, a);
|
|
t3 = V4DotV(m3, a);
|
|
t0 = V4MergeXY(t0, t2);
|
|
t1 = V4MergeXY(t1, t3);
|
|
t0 = V4MergeXY(t0, t1);
|
|
return t0;
|
|
#else // 17/16 instr.
|
|
Vector_4V t0,t1,t2,t3; _V4_Transpose_Mat44(t0,t1,t2,t3, m0,m1,m2,m3);
|
|
Vector_4V a0,a1,a2,a3;
|
|
a0 = V4SplatX(a);
|
|
a1 = V4SplatY(a);
|
|
a2 = V4SplatZ(a);
|
|
a3 = V4SplatW(a);
|
|
#if 1
|
|
t0 = V4Scale(t0, a0);
|
|
t1 = V4Scale(t1, a1);
|
|
t0 = V4AddScaled(t0, t2, a2);
|
|
t1 = V4AddScaled(t1, t3, a3);
|
|
t0 = V4Add(t0, t1);
|
|
#else
|
|
t0 = V4Scale(t0, a0);
|
|
t0 = V4AddScaled(t0, t1, a1);
|
|
t0 = V4AddScaled(t0, t2, a2);
|
|
t1 = V4AddScaled(t0, t3, a3);
|
|
#endif
|
|
return t0;
|
|
#endif
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_Multiply_Vec3_Mat33(Vector_4V_In a, _MAT3_SRC_PARAMS_AFTER1ARGS(m))
|
|
{
|
|
#if __XENON // 5 instr.
|
|
Vector_4V t0,t1,t2;
|
|
t0 = V3DotV(m0, a);
|
|
t1 = V3DotV(m1, a);
|
|
t2 = V3DotV(m2, a);
|
|
t0 = V4MergeXY(t0, t2);
|
|
t0 = V4MergeXY(t0, t1);
|
|
return t0;
|
|
#else // 13 instr.
|
|
Vector_4V t0,t1,t2; _V4_Transpose_Mat33(t0,t1,t2, m0,m1,m2);
|
|
Vector_4V a0,a1,a2;
|
|
a0 = V4SplatX(a);
|
|
a1 = V4SplatY(a);
|
|
a2 = V4SplatZ(a);
|
|
t0 = V4Scale(t0, a0);
|
|
t0 = V4AddScaled(t0, t1, a1);
|
|
t0 = V4AddScaled(t0, t2, a2);
|
|
return t0;
|
|
#endif
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_InvertTransposeAffine_Mat33(_MAT3_DST_PARAMS(d), _MAT3_SRC_PARAMS(m))
|
|
{
|
|
d0 = V3Cross(m1, m2); // m1yzw*m2zxy - m2yzw*m1zxy
|
|
d1 = V3Cross(m2, m0); // m2yzw*m0zxy - m0yzw*m2zxy
|
|
d2 = V3Cross(m0, m1); // m0yzw*m1zxy - m1yzw*m0zxy
|
|
|
|
return V3DotV(m0, d0);
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_UnTransformAffine_Mat33_Vec3(_MAT3_SRC_PARAMS(m), Vector_4V_In_After3Args a)
|
|
{
|
|
Vector_4V c0,c1,c2,det = _V4_InvertTransposeAffine_Mat33(c0,c1,c2, m0,m1,m2);
|
|
|
|
const Vector_4V invdet = V4Invert(det); // {c0,c1,c2}*invdet is inverse transpose of {m0,m1,m2}
|
|
const Vector_4V av = V4Scale(a, invdet);
|
|
|
|
return _V4_Multiply_Vec3_Mat33(av, c0,c1,c2);
|
|
}
|
|
|
|
__forceinline Vector_4V_Out _V4_UnTransformAffine_Mat33_Vec3_fast(_MAT3_SRC_PARAMS(m), Vector_4V_In_After3Args a)
|
|
{
|
|
Vector_4V c0,c1,c2,det = _V4_InvertTransposeAffine_Mat33(c0,c1,c2, m0,m1,m2);
|
|
|
|
const Vector_4V invdet = V4InvertFast(det); // {c0,c1,c2}*invdet is inverse transpose of {m0,m1,m2}
|
|
const Vector_4V av = V4Scale(a, invdet);
|
|
|
|
return _V4_Multiply_Vec3_Mat33(av, c0,c1,c2);
|
|
}
|
|
|
|
} // namespace Vec
|
|
|
|
// ================================================================================================
|
|
|
|
template <u32 permX, u32 permY, u32 permZ, u32 permW> __forceinline Vec4V_Out Permute(Vec4V_In a)
|
|
{
|
|
return a.Get<permX,permY,permZ,permW>();
|
|
}
|
|
|
|
} // namespace rage
|
|
|
|
#undef _MAT4_DST_PARAMS
|
|
#undef _MAT3_DST_PARAMS
|
|
|
|
#undef _MAT4_SRC_PARAMS
|
|
#undef _MAT3_SRC_PARAMS
|
|
|
|
#undef _MAT4_SRC_PARAMS_AFTER1ARGS
|
|
#undef _MAT3_SRC_PARAMS_AFTER1ARGS
|
|
|
|
#undef _MAT4_SRC_PARAMS_AFTER2ARGS
|
|
#undef _MAT3_SRC_PARAMS_AFTER2ARGS
|
|
|
|
#undef _MAT4_SRC_PARAMS_AFTER3ARGS
|
|
#undef _MAT3_SRC_PARAMS_AFTER3ARGS
|
|
|
|
#undef _MAT4_TO_PARAMS
|
|
#undef _MAT3_TO_PARAMS
|
|
|
|
#endif
|
|
|
|
// ================================================================================================
|
|
|
|
// ================================================================================================
|
|
|
|
// ================================================================================================
|
|
// copy of projects/rng .. adapting to rage vectormath
|
|
// ===================================================
|
|
|
|
#if 1 // TODO -- clean this up and move it somewhere
|
|
|
|
void _test_V4Shift();
|
|
|
|
void _test_XorShift31SkipAhead(bool bTestPerformance = false);
|
|
void _test_XorShift32SkipAhead(bool bTestPerformance = false);
|
|
void _test_Pattern();
|
|
void _test_StrangeXorShiftPattern();
|
|
|
|
#endif
|
|
|
|
// ================================================================================================
|
|
|
|
// ================================================================================================
|
|
|
|
#if 1 // TODO -- clean this up and move it somewhere
|
|
|
|
namespace rage {
|
|
namespace Vec {
|
|
|
|
__forceinline Vector_4V_Out _V4DivideBy2(Vector_4V_In a)
|
|
{
|
|
// comment by Luke Hutchinson [luke.hutchinson@teambondi.com]
|
|
// 1. Input must be finite floats (infinity will not stay as infinity)
|
|
// 2. Very small floats eg 0x00c00000 will return a denormal, and an incorrect one at that!
|
|
// The problem is not as bad as it sounds though, assuming non-Java mode is enabled (the
|
|
// default for both PS3 and 360), denormal inputs to any further instructions will all get
|
|
// flushed to zero.
|
|
|
|
const Vector_4V z = V4IsEqualV (a, V4VConstant(V_ZERO));
|
|
const Vector_4V b = V4SubtractInt(a, V4VConstant(V_FLT_MIN)); // subtract 0x00800000 integer
|
|
const Vector_4V c = V4Andc (b, z);
|
|
|
|
return c;
|
|
}
|
|
|
|
__forceinline const Vector_4V _V4VConstant_V_FLT_MIN() // replacement for V4VConstant(V_FLT_MIN)
|
|
{
|
|
#if UNIQUE_VECTORIZED_TYPE && __XENON
|
|
return __vslw( __vspltisw(1), __vspltisw(-9) );
|
|
#elif UNIQUE_VECTORIZED_TYPE && __PS3
|
|
return (Vector_4V)vec_sl( vec_splat_u32(1), vec_splat_u32(-9) );
|
|
#elif UNIQUE_VECTORIZED_TYPE && __SPU
|
|
return (Vector_4V)spu_splats( (int)U32_FLT_MIN );
|
|
#else
|
|
return V4VConstant<U32_FLT_MIN,U32_FLT_MIN,U32_FLT_MIN,U32_FLT_MIN>();
|
|
#endif
|
|
}
|
|
|
|
#if __PPU // optimisation from LAN guys ..
|
|
|
|
__forceinline unsigned int _V3IsEqualIntAll (Vector_4V_In a, Vector_4V_In b);
|
|
__forceinline unsigned int _V3IsEqualAll (Vector_4V_In a, Vector_4V_In b);
|
|
__forceinline unsigned int _V3IsLessThanAll (Vector_4V_In a, Vector_4V_In b);
|
|
__forceinline unsigned int _V3IsLessThanOrEqualAll(Vector_4V_In a, Vector_4V_In b);
|
|
|
|
/*
|
|
__forceinline unsigned int _V3IsEqualIntAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V z = (Vector_4V)vec_splat_s8(0);
|
|
return vec_all_eq((Vector_4V_uint)vec_sld(z, a, 12), (Vector_4V_uint)vec_sld(z, b, 12));
|
|
}
|
|
|
|
__forceinline unsigned int _V3IsEqualAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V z = (Vector_4V)vec_splat_s8(0);
|
|
return vec_all_eq(vec_sld(z, a, 12), vec_sld(z, b, 12));
|
|
}
|
|
|
|
__forceinline unsigned int _V3IsLessThanAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V g = (Vector_4V)vec_splat_s8(1); // '0x01010101' is something > 0 in floating-point
|
|
const Vector_4V z = (Vector_4V)vec_splat_s8(0);
|
|
return vec_all_lt(vec_sld(z, a, 12), vec_sld(g, b, 12));
|
|
}
|
|
|
|
__forceinline unsigned int _V3IsLessThanOrEqualAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V z = (Vector_4V)vec_splat_s8(0);
|
|
return vec_all_le(vec_sld(z, a, 12), vec_sld(z, b, 12));
|
|
}
|
|
*/
|
|
|
|
__forceinline unsigned int _V3IsEqualIntAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V z = V4VConstant(V_ZERO);
|
|
return V4IsEqualIntAll(V4ShiftLeftBytesDouble<12>(z, a), V4ShiftLeftBytesDouble<12>(z, b));
|
|
}
|
|
|
|
__forceinline unsigned int _V3IsEqualAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V z = V4VConstant(V_ZERO);
|
|
return V4IsEqualAll(V4ShiftLeftBytesDouble<12>(z, a), V4ShiftLeftBytesDouble<12>(z, b));
|
|
}
|
|
|
|
__forceinline unsigned int _V3IsLessThanAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V g = (Vector_4V)vec_splat_s8(1); // '0x01010101' is something > 0 in floating-point
|
|
const Vector_4V z = (Vector_4V)vec_splat_s8(0);
|
|
return V4IsLessThanAll(V4ShiftLeftBytesDouble<12>(z, a), V4ShiftLeftBytesDouble<12>(g, b));
|
|
}
|
|
|
|
__forceinline unsigned int _V3IsLessThanOrEqualAll(Vector_4V_In a, Vector_4V_In b)
|
|
{
|
|
const Vector_4V z = V4VConstant(V_ZERO);
|
|
return V4IsLessThanOrEqualAll(V4ShiftLeftBytesDouble<12>(z, a), V4ShiftLeftBytesDouble<12>(z, b));
|
|
}
|
|
|
|
#else
|
|
|
|
#define _V3IsEqualIntAll V3IsEqualIntAll
|
|
#define _V3IsEqualAll V3IsEqualAll
|
|
#define _V3IsLessThanAll V3IsLessThanAll
|
|
#define _V3IsLessThanOrEqualAll V3IsLessThanOrEqualAll
|
|
|
|
#endif
|
|
|
|
} // namespace Vec
|
|
} // namespace rage
|
|
|
|
#endif
|
|
|
|
// ================================================================================================
|
|
|
|
// ================================================================================================
|
|
|
|
// To avoid LHS stalls when generating random vector data, one solution is to buffer the data.
|
|
// CBufferedRNG is a template class which maintains a buffer of count OutputType's worth of
|
|
// data, filling the buffer on demand. The template requires a RNG class with a few simple
|
|
// methods such as RNG::Reset(seed) and RNG::Generate(). Note that RNG::Generate() does not
|
|
// have to generate the same size type as CBufferedRNG::Generate() returns, in fact this
|
|
// template can be used to buffer random vector data from a scalar generator or random scalar
|
|
// data from a vector generator, as well as random float (scalar) data from an integer (scalar)
|
|
// generator.
|
|
|
|
// ================================================================================================
|
|
|
|
namespace CBufferedRNG_ {
|
|
|
|
class _FromU32_add1 { public: typedef u32 T; enum { required = 1 }; static __forceinline T ConvertSrc(T value)
|
|
{
|
|
// optionally shift right by 9 bits (or 8 bits if value is 31 bits) instead of masking
|
|
const u32 mask = 0x007ffffful;
|
|
const u32 one = 0x3f800000ul;
|
|
return (value & mask) | one; // binary pattern for float [1..2)
|
|
}};
|
|
class _FromU64_add1 { public: typedef u64 T; enum { required = 1 }; static __forceinline T ConvertSrc(T value)
|
|
{
|
|
const u64 mask = 0x007fffff007fffffull;
|
|
const u64 one = 0x3f8000003f800000ull;
|
|
return (value & mask) | one; // binary pattern for float [1..2)
|
|
}};
|
|
class _FromVEC { public: typedef Vec::Vector_4V T; enum { required = 1 }; static __forceinline T ConvertSrc(T value)
|
|
{
|
|
// optionally shift right by 9 bits (or 8 bits if value is 31 bits) instead of masking
|
|
const Vec::Vector_4V mask = Vec::V4VConstantSplat<0x007fffff>(); // mantissa mask
|
|
const Vec::Vector_4V one = Vec::V4VConstantSplat<0x3f800000>(); // 1.0f
|
|
return Vec::V4Subtract(Vec::V4Or(Vec::V4And(value, mask), one), one);
|
|
}};
|
|
class _ToF32 { public: typedef float T; enum { required = 0 }; static __forceinline T ConvertDst(T value)
|
|
{
|
|
return value;
|
|
}};
|
|
class _ToF32_sub1 { public: typedef float T; enum { required = 1 }; static __forceinline T ConvertDst(T value)
|
|
{
|
|
return value - 1.0f;
|
|
}};
|
|
class _ToVEC { public: typedef Vec::Vector_4V T; enum { required = 0 }; static __forceinline T ConvertDst(T value)
|
|
{
|
|
return value;
|
|
}};
|
|
class _ToVEC_sub1 { public: typedef Vec::Vector_4V T; enum { required = 1 }; static __forceinline T ConvertDst(T value)
|
|
{
|
|
return Vec::V4Subtract(value, Vec::V4VConstantSplat<0x3f800000>());
|
|
}};
|
|
|
|
template <typename DstType, typename SrcType> class CConverter
|
|
{
|
|
public:
|
|
static __forceinline DstType ConvertDst(DstType value) { return value; }
|
|
static __forceinline SrcType ConvertSrc(SrcType value) { return value; }
|
|
};
|
|
|
|
#if 1 // new code uses macro .. keep this
|
|
|
|
#define DEF_CONVERTER(dstconv,srcconv) \
|
|
template <> class CConverter<typename dstconv::T, typename srcconv::T> \
|
|
: public dstconv \
|
|
, public srcconv \
|
|
{ \
|
|
public: \
|
|
enum { REQUIRE_SRC_CONVERSION = srcconv::required }; \
|
|
enum { REQUIRE_DST_CONVERSION = dstconv::required }; \
|
|
} \
|
|
// end.
|
|
|
|
DEF_CONVERTER(_ToF32_sub1, _FromU32_add1);
|
|
DEF_CONVERTER(_ToF32_sub1, _FromU64_add1);
|
|
DEF_CONVERTER(_ToF32 , _FromVEC );
|
|
DEF_CONVERTER(_ToVEC_sub1, _FromU32_add1);
|
|
DEF_CONVERTER(_ToVEC_sub1, _FromU64_add1);
|
|
DEF_CONVERTER(_ToVEC , _FromVEC );
|
|
|
|
#undef DEF_CONVERTER
|
|
|
|
#else // old code without macro
|
|
|
|
template <> class CConverter<float ,u32 > : public _ToF32_sub1, public _FromU32_add1 { public: enum { REQUIRE_DST_CONVERSION = 1 }; };
|
|
template <> class CConverter<float ,u64 > : public _ToF32_sub1, public _FromU64_add1 { public: enum { REQUIRE_DST_CONVERSION = 1 }; };
|
|
template <> class CConverter<float ,Vec::Vector_4V> : public _ToF32 , public _FromVEC { public: enum { REQUIRE_DST_CONVERSION = 0 }; };
|
|
template <> class CConverter<Vec::Vector_4V,u32 > : public _ToVEC_sub1, public _FromU32_add1 { public: enum { REQUIRE_DST_CONVERSION = 1 }; };
|
|
template <> class CConverter<Vec::Vector_4V,u64 > : public _ToVEC_sub1, public _FromU64_add1 { public: enum { REQUIRE_DST_CONVERSION = 1 }; };
|
|
template <> class CConverter<Vec::Vector_4V,Vec::Vector_4V> : public _ToVEC , public _FromVEC { public: enum { REQUIRE_DST_CONVERSION = 0 }; };
|
|
|
|
#endif
|
|
|
|
} // namespace CBufferedRNG_
|
|
|
|
#define CBufferedRNG_template_decl template <typename OutputType, typename RNG>
|
|
#define CBufferedRNG_template_inst CBufferedRNG<OutputType,RNG>
|
|
|
|
CBufferedRNG_template_decl class CBufferedRNG : public RNG
|
|
{
|
|
private:
|
|
typedef typename RNG::SeedType SeedType;
|
|
typedef typename RNG::DataType DataType;
|
|
typedef CBufferedRNG_::CConverter<OutputType,DataType> ConverterClass;
|
|
|
|
public:
|
|
__forceinline CBufferedRNG(SeedType seed, int count);
|
|
__forceinline ~CBufferedRNG();
|
|
|
|
__forceinline void Reset(SeedType seed);
|
|
|
|
__forceinline OutputType Generate(); // grab one vector of random data, fill buffer if necessary
|
|
__forceinline const OutputType* Generate(int count); // returns pointer to count vectors of random data
|
|
__forceinline void Generate(OutputType* data, int count); // copy vector data to memory
|
|
|
|
private:
|
|
__forceinline void Fill(); // TODO -- make this non-inlined
|
|
|
|
OutputType* m_buffer;
|
|
int m_index;
|
|
int m_count;
|
|
};
|
|
|
|
CBufferedRNG_template_decl __forceinline CBufferedRNG_template_inst::CBufferedRNG(SeedType seed, int count) : RNG(seed)
|
|
{
|
|
FastAssert(count > 0);
|
|
FastAssert((count*sizeof(OutputType)) % sizeof(DataType) == 0);
|
|
|
|
m_buffer = rage_new OutputType[count];
|
|
//m_buffer = (OutputType*)_aligned_malloc(count*sizeof(OutputType), 16);
|
|
m_index = count;
|
|
m_count = count;
|
|
}
|
|
|
|
CBufferedRNG_template_decl __forceinline CBufferedRNG_template_inst::~CBufferedRNG()
|
|
{
|
|
delete[] m_buffer;
|
|
//_aligned_free(m_buffer);
|
|
}
|
|
|
|
CBufferedRNG_template_decl __forceinline void CBufferedRNG_template_inst::Reset(SeedType seed)
|
|
{
|
|
RNG::Reset(seed);
|
|
m_index = m_count;
|
|
}
|
|
|
|
CBufferedRNG_template_decl __forceinline OutputType CBufferedRNG_template_inst::Generate() // grab one vector of random data, fill buffer if necessary
|
|
{
|
|
if (m_index >= m_count)
|
|
{
|
|
Fill();
|
|
}
|
|
|
|
return ConverterClass::ConvertDst(m_buffer[m_index++]);
|
|
}
|
|
|
|
#if 0 // this doesn't compile. What is 'base'?
|
|
CBufferedRNG_template_decl __forceinline const OutputType* CBufferedRNG_template_inst::Generate(int count) // returns pointer to count vectors of random data
|
|
{
|
|
FastAssert(count <= m_count);
|
|
|
|
if (m_index + count > m_count) // note that this may skip vectors at the end of m_buffer ...
|
|
{
|
|
Fill();
|
|
}
|
|
|
|
if (ConverterClass::REQUIRE_DST_CONVERSION)
|
|
{
|
|
OutputType* dst0 = &m_buffer[m_index];
|
|
OutputType* dst1 = &m_buffer[m_index + count];
|
|
|
|
#if !__PS3 // optional loop unrolling (having issues with macro cleverness on new ps3 compiler)
|
|
#define DEF_UNROLLCODE(i,args) ARG(0,args)[i] = ARG(1,args)(ARG(0,args)[i])
|
|
#define DEF_UNROLL(n,T,ptr,end,code) \
|
|
for (const T* dst##n = &end[-(n)]; ptr <= dst##n; ptr += (n)) \
|
|
{ \
|
|
REP##n##_SEPARATOR_FOREACH_ARGS(UNROLLCODE,;,(ptr,code)); \
|
|
} \
|
|
// end.
|
|
|
|
DEF_UNROLL(8,OutputType,dst0,dst1,ConverterClass::ConvertDst);
|
|
DEF_UNROLL(4,OutputType,dst0,dst1,ConverterClass::ConvertDst);
|
|
DEF_UNROLL(2,OutputType,dst0,dst1,ConverterClass::ConvertDst);
|
|
|
|
#undef DEF_UNROLLCODE
|
|
#undef DEF_UNROLL
|
|
#endif
|
|
|
|
while (dst0 < dst1)
|
|
{
|
|
*(dst0++) = ConverterClass::ConvertDst(*dst0);
|
|
}
|
|
}
|
|
|
|
m_index += count;
|
|
|
|
return base;
|
|
}
|
|
#endif
|
|
|
|
// this function could probably be optimised a bit to reduce branching
|
|
CBufferedRNG_template_decl __forceinline void CBufferedRNG_template_inst::Generate(OutputType* data, int count) // copy vector data to memory
|
|
{
|
|
while (count > 0)
|
|
{
|
|
const int n = Min<int>(count, m_count - m_index);
|
|
|
|
for (int i = 0; i < n; i++) // unroll this?
|
|
{
|
|
*(data++) = ConverterClass::ConvertDst(m_buffer[m_index++]);
|
|
}
|
|
|
|
count -= n;
|
|
|
|
if (count > 0)
|
|
{
|
|
Fill();
|
|
}
|
|
}
|
|
}
|
|
|
|
CBufferedRNG_template_decl __forceinline void CBufferedRNG_template_inst::Fill()
|
|
{
|
|
DataType* dst0 = (DataType*)&m_buffer[0];
|
|
DataType* dst1 = (DataType*)&m_buffer[m_count];
|
|
|
|
// note that sizeof(DataType) may be larger or smaller than sizeof(OutputType)
|
|
// i.e. we want to fill the buffer starting at 'dst0' and ending before 'dst1', but
|
|
// there may be more or less than m_count elements
|
|
|
|
#if !__PS3 // optional loop unrolling
|
|
#define DEF_UNROLLCODE(i,args) ARG(0,args)[i] = ARG(1,args)
|
|
#define DEF_UNROLL(n,T,ptr,end,code) \
|
|
for (const T* dst##n = &end[-(n)]; ptr <= dst##n; ptr += (n)) \
|
|
{ \
|
|
REP##n##_SEPARATOR_FOREACH_ARGS(UNROLLCODE,;,(ptr,code)); \
|
|
} \
|
|
// end.
|
|
|
|
DEF_UNROLL(8,DataType,dst0,dst1,ConverterClass::ConvertSrc(RNG::Generate()));
|
|
DEF_UNROLL(4,DataType,dst0,dst1,ConverterClass::ConvertSrc(RNG::Generate()));
|
|
DEF_UNROLL(2,DataType,dst0,dst1,ConverterClass::ConvertSrc(RNG::Generate()));
|
|
|
|
#undef DEF_UNROLLCODE
|
|
#undef DEF_UNROLL
|
|
#endif
|
|
|
|
/*
|
|
simpler version which does not use ARG() macros .. however DEF_UNROLL cannot be shared
|
|
|
|
#if !__PS3 // optional loop unrolling
|
|
#define DEF_UNROLLCODE(i) dst0[i] = ConverterClass::ConvertSrc(RNG::Generate())
|
|
#define DEF_UNROLL(n,T,ptr,end) \
|
|
for (const DataType* dst##n = &dst1[-(n)]; dst0 <= dst##n; dst0 += (n)) \
|
|
{ \
|
|
REP##n##_SEPARATOR_FOREACH(UNROLLCODE,;); \
|
|
} \
|
|
// end.
|
|
|
|
DEF_UNROLL(8);
|
|
DEF_UNROLL(4);
|
|
DEF_UNROLL(2);
|
|
|
|
#undef DEF_UNROLLCODE
|
|
#undef DEF_UNROLL
|
|
#endif
|
|
*/
|
|
|
|
while (dst0 < dst1)
|
|
{
|
|
*(dst0++) = ConverterClass::ConvertSrc(RNG::Generate());
|
|
}
|
|
|
|
m_index = 0;
|
|
}
|
|
|
|
#undef CBufferedRNG_template_decl
|
|
#undef CBufferedRNG_template_inst
|
|
|
|
#if !__FINAL
|
|
|
|
namespace _test_CBufferedRNG
|
|
{
|
|
void test();
|
|
}
|
|
|
|
#endif // !_FINAL
|
|
|
|
// ================================================================================================
|
|
|
|
#ifndef SAFE_RELEASE
|
|
#define SAFE_RELEASE(x) if ((x) != NULL) { (x)->Release(); (x) = NULL; }
|
|
#endif
|
|
#ifndef SAFE_DELETE
|
|
#define SAFE_DELETE(ptr) { if (ptr != NULL) { delete ptr; ptr = NULL; } }
|
|
#endif
|
|
#ifndef SAFE_DELETE_ARRAY
|
|
#define SAFE_DELETE_ARRAY(ptr) { if (ptr != NULL) { delete[] ptr; ptr = NULL; } }
|
|
#endif
|
|
|
|
#define ADD_WIDGET( bk,type,var, ...) bk.Add##type(STRING(var) , &var, ##__VA_ARGS__)
|
|
#define ADD_WIDGET2(bk,type,var,ext,...) bk.Add##type(STRING(var) ext, &var, ##__VA_ARGS__)
|
|
|
|
// ================================================================================================
|
|
|
|
__forceinline Vector4 BuildPlane(const Vector3& p, const Vector3& n)
|
|
{
|
|
Vector4 plane = n;
|
|
plane.w = -n.Dot(p);
|
|
return plane;
|
|
}
|
|
|
|
__forceinline Vector4 BuildPlane(const Vector3& p0, const Vector3& p1, const Vector3& p2)
|
|
{
|
|
Vector3 n;
|
|
n.Cross(p1 - p0, p2 - p0);
|
|
n.Normalize();
|
|
return BuildPlane(p0, n);
|
|
}
|
|
|
|
__forceinline float PlaneDistanceTo(const Vector4& plane, const Vector3& p)
|
|
{
|
|
return plane.GetVector3().Dot(p) + plane.w;
|
|
}
|
|
|
|
__forceinline Vector3 PlaneProject(const Vector4& plane, const Vector3& p)
|
|
{
|
|
return p - plane.GetVector3()*PlaneDistanceTo(plane, p);
|
|
}
|
|
|
|
__forceinline Vector4 PlaneNormalise(const Vector4& plane)
|
|
{
|
|
return plane*(1.0f/plane.GetVector3().Mag());
|
|
}
|
|
|
|
namespace rage {
|
|
namespace Vec {
|
|
|
|
__forceinline Vector_4V_Out _V3FindMinAbsAxis(Vector_4V_In v) // nice vector-friendly implementation
|
|
{
|
|
const Vector_4V xyz = V4Abs (v);
|
|
const Vector_4V yzx = V4Permute<Y,Z,X,W> (xyz);
|
|
const Vector_4V zxy = V4Permute<Z,X,Y,W> (xyz);
|
|
const Vector_4V xyz_CmpLT_yzx = V4IsLessThanV (xyz, yzx);
|
|
const Vector_4V xyz_CmpLE_zxy = V4IsLessThanOrEqualV(xyz, zxy);
|
|
const Vector_4V a = V4And (xyz_CmpLT_yzx, xyz_CmpLE_zxy);
|
|
const Vector_4V ax = V4SplatX (a);
|
|
const Vector_4V ay = V4SplatY (a);
|
|
const Vector_4V az = V4SplatZ (a);
|
|
const Vector_4V axy = V4Or (ax, ay);
|
|
const Vector_4V axyz = V4Or (axy, az); // could use si_orx(a) on SPU
|
|
const Vector_4V mask = V4Andc (V4VConstant(V_MASKX), axyz); // 0xffffffff in x component iff a.xyz = 0,0,0
|
|
const Vector_4V b = V4Or (a, mask);
|
|
const Vector_4V result = V4And (b, V4VConstant(V_ONE));
|
|
|
|
return result;
|
|
}
|
|
|
|
} // namespace Vec
|
|
} // namespace rage
|
|
|
|
__forceinline Vec3V_Out FindMinAbsAxis(Vec3V_In v)
|
|
{
|
|
return Vec3V(Vec::_V3FindMinAbsAxis(v.GetIntrin128ConstRef()));
|
|
}
|
|
|
|
__forceinline Vector3 FindMinAbsAxis(const Vector3& v)
|
|
{
|
|
#if 1
|
|
|
|
return Vector3(Vec::_V3FindMinAbsAxis(v));
|
|
|
|
#else // reference implementation, arranged to show vector operations
|
|
|
|
const Vector3 xyz = Vector3(Abs<float>(v.x), Abs<float>(v.y), Abs<float>(v.z));
|
|
const Vector3 yzx = Vector3(xyz.y, xyz.z, xyz.x);
|
|
const Vector3 zxy = Vector3(xyz.z, xyz.x, xyz.y);
|
|
|
|
const Vector3 xyz_CmpLT_yzx = Vector3(xyz.x < yzx.x ? 1.0f : 0.0f, xyz.y < yzx.y ? 1.0f : 0.0f, xyz.z < yzx.z ? 1.0f : 0.0f);
|
|
const Vector3 xyz_CmpLE_zxy = Vector3(xyz.x <= zxy.x ? 1.0f : 0.0f, xyz.y <= zxy.y ? 1.0f : 0.0f, xyz.z <= zxy.z ? 1.0f : 0.0f);
|
|
|
|
Vector3 a = xyz_CmpLT_yzx*xyz_CmpLE_zxy; // vector-AND, mask with 1.0f
|
|
|
|
a.x += (a.x == 0 && a.y == 0 && a.z == 0) ? 1.0f : 0.0f;
|
|
|
|
return a;
|
|
|
|
#endif
|
|
}
|
|
|
|
#if 0
|
|
__forceinline Vector3 FindMinAbsAxis_REFERENCE(const Vector3& v)
|
|
{
|
|
const float x = Abs<float>(v.x);
|
|
const float y = Abs<float>(v.y);
|
|
const float z = Abs<float>(v.z);
|
|
|
|
if (x < y && x <= z) return Vector3(1,0,0);
|
|
if (y < z && y <= x) return Vector3(0,1,0);
|
|
if (z < x && z <= y) return Vector3(0,0,1);
|
|
|
|
return Vector3(1,0,0);
|
|
}
|
|
|
|
__forceinline void FindMinAbsAxis_TEST()
|
|
{
|
|
int numErrors = 0;
|
|
|
|
for (float z = -3.0f; z <= 3.0f; z += 1.0f)
|
|
{
|
|
for (float y = -3.0f; y <= 3.0f; y += 1.0f)
|
|
{
|
|
for (float x = -3.0f; x <= 3.0f; x += 1.0f)
|
|
{
|
|
const Vector3 a = FindMinAbsAxis (Vector3(x,y,z));
|
|
const Vector3 b = FindMinAbsAxis_REFERENCE(Vector3(x,y,z));
|
|
|
|
const float a_sum = a.x + a.y + a.z;
|
|
|
|
if (a_sum != 1.0f || a.x != b.x || a.y != b.y || a.z != b.z)
|
|
{
|
|
Displayf(
|
|
"v=(%d,%d,%d), FindMinAxis=(%d,%d,%d), REF=(%d,%d,%d)",
|
|
(int)x,
|
|
(int)y,
|
|
(int)z,
|
|
(int)a.x,
|
|
(int)a.y,
|
|
(int)a.z,
|
|
(int)b.x,
|
|
(int)b.y,
|
|
(int)b.z
|
|
);
|
|
|
|
numErrors++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Displayf("FindMinAbsAxis_TEST: %d errors", numErrors);
|
|
}
|
|
#endif
|
|
|
|
#endif // _RENDERER_UTIL_UTIL_H_
|