1
0
mirror of https://github.com/alliedmodders/hl2sdk.git synced 2025-07-19 01:58:14 +08:00

Update mathlib lib/a

This commit is contained in:
GAMMACASE
2025-07-11 20:05:36 +03:00
parent 658ff26c47
commit a4be8ce51f
4 changed files with 336 additions and 312 deletions

Binary file not shown.

Binary file not shown.

View File

@ -80,8 +80,6 @@ float VectorNormalize (Vector& vec)
return radius;
}
// TODO: Add fast C VectorNormalizeFast.
// Perhaps use approximate rsqrt trick, if the accuracy isn't too bad.
void FASTCALL _VectorNormalizeFast (Vector& vec)
@ -427,6 +425,33 @@ void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
out[2][column] = in.z;
}
void MatrixScaleBy ( const float flScale, matrix3x4_t &out )
{
out[0][0] *= flScale;
out[1][0] *= flScale;
out[2][0] *= flScale;
out[0][1] *= flScale;
out[1][1] *= flScale;
out[2][1] *= flScale;
out[0][2] *= flScale;
out[1][2] *= flScale;
out[2][2] *= flScale;
}
void MatrixScaleByZero ( matrix3x4_t &out )
{
out[0][0] = 0.0f;
out[1][0] = 0.0f;
out[2][0] = 0.0f;
out[0][1] = 0.0f;
out[1][1] = 0.0f;
out[2][1] = 0.0f;
out[0][2] = 0.0f;
out[1][2] = 0.0f;
out[2][2] = 0.0f;
}
int VectorCompare (const float *v1, const float *v2)
{
@ -566,53 +591,128 @@ void ConcatRotations (const float in1[3][3], const float in2[3][3], float out[3]
in1[2][2] * in2[2][2];
}
void ConcatTransforms_Aligned( const matrix3x4_t &m0, const matrix3x4_t &m1, matrix3x4_t &out )
{
Assert( (((size_t)&m0) % 16) == 0 );
Assert( (((size_t)&m1) % 16) == 0 );
Assert( (((size_t)&out) % 16) == 0 );
fltx4 lastMask = *(fltx4 *)(&g_SIMD_ComponentMask[3]);
fltx4 rowA0 = LoadAlignedSIMD( m0.m_flMatVal[0] );
fltx4 rowA1 = LoadAlignedSIMD( m0.m_flMatVal[1] );
fltx4 rowA2 = LoadAlignedSIMD( m0.m_flMatVal[2] );
fltx4 rowB0 = LoadAlignedSIMD( m1.m_flMatVal[0] );
fltx4 rowB1 = LoadAlignedSIMD( m1.m_flMatVal[1] );
fltx4 rowB2 = LoadAlignedSIMD( m1.m_flMatVal[2] );
// now we have the rows of m0 and the columns of m1
// first output row
fltx4 A0 = SplatXSIMD(rowA0);
fltx4 A1 = SplatYSIMD(rowA0);
fltx4 A2 = SplatZSIMD(rowA0);
fltx4 mul00 = MulSIMD( A0, rowB0 );
fltx4 mul01 = MulSIMD( A1, rowB1 );
fltx4 mul02 = MulSIMD( A2, rowB2 );
fltx4 out0 = AddSIMD( mul00, AddSIMD(mul01,mul02) );
// second output row
A0 = SplatXSIMD(rowA1);
A1 = SplatYSIMD(rowA1);
A2 = SplatZSIMD(rowA1);
fltx4 mul10 = MulSIMD( A0, rowB0 );
fltx4 mul11 = MulSIMD( A1, rowB1 );
fltx4 mul12 = MulSIMD( A2, rowB2 );
fltx4 out1 = AddSIMD( mul10, AddSIMD(mul11,mul12) );
// third output row
A0 = SplatXSIMD(rowA2);
A1 = SplatYSIMD(rowA2);
A2 = SplatZSIMD(rowA2);
fltx4 mul20 = MulSIMD( A0, rowB0 );
fltx4 mul21 = MulSIMD( A1, rowB1 );
fltx4 mul22 = MulSIMD( A2, rowB2 );
fltx4 out2 = AddSIMD( mul20, AddSIMD(mul21,mul22) );
// add in translation vector
A0 = AndSIMD(rowA0,lastMask);
A1 = AndSIMD(rowA1,lastMask);
A2 = AndSIMD(rowA2,lastMask);
out0 = AddSIMD(out0, A0);
out1 = AddSIMD(out1, A1);
out2 = AddSIMD(out2, A2);
StoreAlignedSIMD( out.m_flMatVal[0], out0 );
StoreAlignedSIMD( out.m_flMatVal[1], out1 );
StoreAlignedSIMD( out.m_flMatVal[2], out2 );
}
/*
================
R_ConcatTransforms
================
*/
void ConcatTransforms (const matrix3x4_t& in1, const matrix3x4_t& in2, matrix3x4_t& out)
{
Assert( s_bMathlibInitialized );
if ( &in1 == &out )
#if 0
// test for ones that'll be 2x faster
if ( (((size_t)&in1) % 16) == 0 && (((size_t)&in2) % 16) == 0 && (((size_t)&out) % 16) == 0 )
{
matrix3x4_t in1b;
MatrixCopy( in1, in1b );
ConcatTransforms( in1b, in2, out );
ConcatTransforms_Aligned( in1, in2, out );
return;
}
if ( &in2 == &out )
{
matrix3x4_t in2b;
MatrixCopy( in2, in2b );
ConcatTransforms( in1, in2b, out );
return;
}
out[0][0] = in1[0][0] * in2[0][0] + in1[0][1] * in2[1][0] +
in1[0][2] * in2[2][0];
out[0][1] = in1[0][0] * in2[0][1] + in1[0][1] * in2[1][1] +
in1[0][2] * in2[2][1];
out[0][2] = in1[0][0] * in2[0][2] + in1[0][1] * in2[1][2] +
in1[0][2] * in2[2][2];
out[0][3] = in1[0][0] * in2[0][3] + in1[0][1] * in2[1][3] +
in1[0][2] * in2[2][3] + in1[0][3];
out[1][0] = in1[1][0] * in2[0][0] + in1[1][1] * in2[1][0] +
in1[1][2] * in2[2][0];
out[1][1] = in1[1][0] * in2[0][1] + in1[1][1] * in2[1][1] +
in1[1][2] * in2[2][1];
out[1][2] = in1[1][0] * in2[0][2] + in1[1][1] * in2[1][2] +
in1[1][2] * in2[2][2];
out[1][3] = in1[1][0] * in2[0][3] + in1[1][1] * in2[1][3] +
in1[1][2] * in2[2][3] + in1[1][3];
out[2][0] = in1[2][0] * in2[0][0] + in1[2][1] * in2[1][0] +
in1[2][2] * in2[2][0];
out[2][1] = in1[2][0] * in2[0][1] + in1[2][1] * in2[1][1] +
in1[2][2] * in2[2][1];
out[2][2] = in1[2][0] * in2[0][2] + in1[2][1] * in2[1][2] +
in1[2][2] * in2[2][2];
out[2][3] = in1[2][0] * in2[0][3] + in1[2][1] * in2[1][3] +
in1[2][2] * in2[2][3] + in1[2][3];
#endif
fltx4 lastMask = *(fltx4 *)(&g_SIMD_ComponentMask[3]);
fltx4 rowA0 = LoadUnalignedSIMD( in1.m_flMatVal[0] );
fltx4 rowA1 = LoadUnalignedSIMD( in1.m_flMatVal[1] );
fltx4 rowA2 = LoadUnalignedSIMD( in1.m_flMatVal[2] );
fltx4 rowB0 = LoadUnalignedSIMD( in2.m_flMatVal[0] );
fltx4 rowB1 = LoadUnalignedSIMD( in2.m_flMatVal[1] );
fltx4 rowB2 = LoadUnalignedSIMD( in2.m_flMatVal[2] );
// now we have the rows of m0 and the columns of m1
// first output row
fltx4 A0 = SplatXSIMD(rowA0);
fltx4 A1 = SplatYSIMD(rowA0);
fltx4 A2 = SplatZSIMD(rowA0);
fltx4 mul00 = MulSIMD( A0, rowB0 );
fltx4 mul01 = MulSIMD( A1, rowB1 );
fltx4 mul02 = MulSIMD( A2, rowB2 );
fltx4 out0 = AddSIMD( mul00, AddSIMD(mul01,mul02) );
// second output row
A0 = SplatXSIMD(rowA1);
A1 = SplatYSIMD(rowA1);
A2 = SplatZSIMD(rowA1);
fltx4 mul10 = MulSIMD( A0, rowB0 );
fltx4 mul11 = MulSIMD( A1, rowB1 );
fltx4 mul12 = MulSIMD( A2, rowB2 );
fltx4 out1 = AddSIMD( mul10, AddSIMD(mul11,mul12) );
// third output row
A0 = SplatXSIMD(rowA2);
A1 = SplatYSIMD(rowA2);
A2 = SplatZSIMD(rowA2);
fltx4 mul20 = MulSIMD( A0, rowB0 );
fltx4 mul21 = MulSIMD( A1, rowB1 );
fltx4 mul22 = MulSIMD( A2, rowB2 );
fltx4 out2 = AddSIMD( mul20, AddSIMD(mul21,mul22) );
// add in translation vector
A0 = AndSIMD(rowA0,lastMask);
A1 = AndSIMD(rowA1,lastMask);
A2 = AndSIMD(rowA2,lastMask);
out0 = AddSIMD(out0, A0);
out1 = AddSIMD(out1, A1);
out2 = AddSIMD(out2, A2);
// write to output
StoreUnalignedSIMD( out.m_flMatVal[0], out0 );
StoreUnalignedSIMD( out.m_flMatVal[1], out1 );
StoreUnalignedSIMD( out.m_flMatVal[2], out2 );
}
@ -1359,7 +1459,9 @@ float Bias( float x, float biasAmt )
{
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
}
return pow( x, lastExponent );
float fRet = pow( x, lastExponent );
Assert ( !IS_NAN( fRet ) );
return fRet;
}
@ -1375,7 +1477,9 @@ float Gain( float x, float biasAmt )
float SmoothCurve( float x )
{
return (1 - cos( x * M_PI )) * 0.5f;
// Actual smooth curve. Visualization:
// http://www.wolframalpha.com/input/?i=plot%5B+0.5+*+%281+-+cos%5B2+*+pi+*+x%5D%29+for+x+%3D+%280%2C+1%29+%5D
return 0.5f * (1 - cos( 2.0f * M_PI * x ) );
}
@ -2408,9 +2512,7 @@ void Hermite_SplineBasis( float t, float basis[4] )
//-----------------------------------------------------------------------------
// BUG: the VectorSubtract()'s calls go away if the global optimizer is enabled
#ifdef _MSC_VER
#pragma optimize( "g", off )
#endif
void Hermite_Spline( const Vector &p0, const Vector &p1, const Vector &p2, float t, Vector& output )
{
@ -2420,9 +2522,7 @@ void Hermite_Spline( const Vector &p0, const Vector &p1, const Vector &p2, float
Hermite_Spline( p1, p2, e10, e21, t, output );
}
#ifdef _MSC_VER
#pragma optimize( "", on )
#endif
float Hermite_Spline( float p0, float p1, float p2, float t )
{
@ -3188,18 +3288,15 @@ bool CalcLineToLineIntersectionSegment(
return true;
}
#ifdef _MSC_VER
#pragma optimize( "", off )
#endif
#ifndef EXCEPTION_EXECUTE_HANDLER
#define EXCEPTION_EXECUTE_HANDLER 1
#endif
#ifdef _MSC_VER
#pragma optimize( "", on )
#endif
static bool s_b3DNowEnabled = false;
static bool s_bMMXEnabled = false;
static bool s_bSSEEnabled = false;
static bool s_bSSE2Enabled = false;
@ -3213,7 +3310,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
#if !defined( _X360 )
// Grab the processor information:
const CPUInformation& pi = GetCPUInformation();
const CPUInformation& pi = *GetCPUInformation();
// Select the default generic routines.
pfSqrt = _sqrtf;
@ -3235,38 +3332,54 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
s_bMMXEnabled = false;
}
// GAMMACASE: Since the sse.cpp doesn't have any x64 code rn
// we can't use the sse stuff here
#ifndef COMPILER_MSVC64
// SSE Generally performs better than 3DNow when present, so this is placed
// first to allow SSE to override these settings.
#if !defined( OSX ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(LINUX)
if ( bAllow3DNow && pi.m_b3DNow )
{
s_b3DNowEnabled = true;
// Select the 3DNow specific routines if available;
pfVectorNormalize = _3DNow_VectorNormalize;
pfVectorNormalizeFast = _3DNow_VectorNormalizeFast;
pfInvRSquared = _3DNow_InvRSquared;
pfSqrt = _3DNow_Sqrt;
pfRSqrt = _3DNow_RSqrt;
pfRSqrtFast = _3DNow_RSqrt;
}
else
#endif
{
s_b3DNowEnabled = false;
}
if ( bAllowSSE && pi.m_bSSE )
{
s_bSSEEnabled = true;
#ifndef PLATFORM_WINDOWS_PC64
// These are not yet available.
// Select the SSE specific routines if available
pfVectorNormalizeFast = _SSE_VectorNormalizeFast;
pfInvRSquared = _SSE_InvRSquared;
pfSqrt = _SSE_Sqrt;
pfRSqrt = _SSE_RSqrtAccurate;
pfRSqrtFast = _SSE_RSqrtFast;
#ifdef _WIN32
#endif
#ifdef PLATFORM_WINDOWS_PC32
pfFastSinCos = _SSE_SinCos;
pfFastCos = _SSE_cos;
#endif
}
else
{
s_bSSEEnabled = false;
}
#else
s_bSSEEnabled = false;
#endif
#ifndef COMPILER_MSVC64
if ( bAllowSSE2 && pi.m_bSSE2 )
{
s_bSSE2Enabled = true;
#ifdef _WIN32
#ifdef PLATFORM_WINDOWS_PC32
pfFastSinCos = _SSE2_SinCos;
pfFastCos = _SSE2_cos;
#endif
@ -3275,10 +3388,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
{
s_bSSE2Enabled = false;
}
#else
s_bSSE2Enabled = false;
#endif
#endif
#endif // !_X360
s_bMathlibInitialized = true;
@ -3286,6 +3396,12 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
BuildGammaTable( gamma, texGamma, brightness, overbright );
}
bool MathLib_3DNowEnabled( void )
{
Assert( s_bMathlibInitialized );
return s_b3DNowEnabled;
}
bool MathLib_MMXEnabled( void )
{
Assert( s_bMathlibInitialized );
@ -3304,6 +3420,20 @@ bool MathLib_SSE2Enabled( void )
return s_bSSE2Enabled;
}
float Approach( float target, float value, float speed )
{
float delta = target - value;
if ( delta > speed )
value += speed;
else if ( delta < -speed )
value -= speed;
else
value = target;
return value;
}
// BUGBUG: Why doesn't this call angle diff?!?!?
float ApproachAngle( float target, float value, float speed )
{
@ -3990,8 +4120,8 @@ void HSVtoRGB( const Vector &hsv, Vector &rgb )
hue = 0.0F;
}
hue /= 60.0F;
int i = static_cast<int>(hue); // integer part
float32 f = hue - i; // fractional part
int i = hue; // integer part
float32 f = hue - i; // fractional part
float32 p = hsv.z * (1.0F - hsv.y);
float32 q = hsv.z * (1.0F - hsv.y * f);
float32 t = hsv.z * (1.0F - hsv.y * (1.0F - f));

View File

@ -14,9 +14,7 @@
#include "tier0/dbg.h"
#include "mathlib/math_pfns.h"
#ifndef ALIGN8_POST
#define ALIGN8_POST
#endif
// plane_t structure
// !!! if this is changed, it must be changed in asm code too !!!
// FIXME: does the asm code even exist anymore?
@ -151,6 +149,9 @@ struct matrix3x4_t
inline void ConcatRotations( const matrix3x4_t &other );
inline void ConcatTransforms( const matrix3x4_t &other );
inline void ScaleBy( const float value );
inline void ScaleByZero();
inline void Multiply( const matrix3x4_t &other );
inline void Transpose();
@ -161,6 +162,9 @@ struct matrix3x4_t
inline float RowDotProduct( int row, const Vector &in ) const;
inline float ColumnDotProduct( MatrixAxisType_t column, const Vector &in ) const;
inline Vector GetTranslation() const;
inline void SetTranslation( const Vector &in );
inline Vector GetColumn( MatrixAxisType_t column ) const;
inline void SetColumn( const Vector &in, MatrixAxisType_t column );
@ -205,7 +209,11 @@ struct matrix3x4_t
}
}
matrix3x4_t operator *( const matrix3x4_t &other ) { Multiply( other ); return *this; }
matrix3x4_t &operator *=( const matrix3x4_t &other ) { Multiply( other ); return *this; }
matrix3x4_t &operator *=( float value ) { ScaleBy( value ); return *this; }
matrix3x4_t operator *( const matrix3x4_t &other ) const { matrix3x4_t temp( *this ); temp.Multiply( other ); return temp; }
matrix3x4_t operator *( float value ) const { matrix3x4_t temp( *this ); temp.ScaleBy( value ); return temp; }
float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
@ -378,7 +386,7 @@ void inline SinCos( float radians, float *sine, float *cosine )
{
#if defined( _X360 )
XMScalarSinCos( sine, cosine, radians );
#elif defined( COMPILER_MSVC32 )
#elif defined( PLATFORM_WINDOWS_PC32 )
_asm
{
fld DWORD PTR [radians]
@ -390,15 +398,15 @@ void inline SinCos( float radians, float *sine, float *cosine )
fstp DWORD PTR [edx]
fstp DWORD PTR [eax]
}
#elif defined( GNUC )
#elif defined( PLATFORM_WINDOWS_PC64 )
*sine = sin( radians );
*cosine = cos( radians );
#elif defined( POSIX )
double __cosr, __sinr;
__asm __volatile__ ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
__asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
*sine = __sinr;
*cosine = __cosr;
#else
*sine = sinf(radians);
*cosine = cosf(radians);
#endif
}
@ -505,6 +513,19 @@ bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float f
void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
{
MatrixGetColumn ( in, 3, out );
}
inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out )
{
MatrixSetColumn ( in, 3, out );
}
void MatrixScaleBy ( const float flScale, matrix3x4_t &out );
void MatrixScaleByZero ( matrix3x4_t &out );
//void DecomposeRotation( const matrix3x4_t &mat, float *out );
void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
@ -531,10 +552,6 @@ void QuaternionInvert( const Quaternion &p, Quaternion &q );
float QuaternionNormalize( Quaternion &q );
void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt );
void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt );
void QuaternionExp( const Quaternion &p, Quaternion &q );
void QuaternionLn( const Quaternion &p, Quaternion &q );
void QuaternionAverageExponential( Quaternion &q, int nCount, const Quaternion *pQuaternions, const float *pflWeights = NULL );
void QuaternionLookAt( const Vector &vecForward, const Vector &referenceUp, Quaternion &q );
void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix );
void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix );
void QuaternionAngles( const Quaternion &q, QAngle &angles );
@ -571,16 +588,16 @@ inline float anglemod(float a)
inline float RemapVal( float val, float A, float B, float C, float D)
{
if ( A == B )
return fsel( val - B , D , C );
return val >= B ? D : C;
return C + (D - C) * (val - A) / (B - A);
}
inline float RemapValClamped( float val, float A, float B, float C, float D)
{
if ( A == B )
return fsel( val - B , D , C );
return val >= B ? D : C;
float cVal = (val - A) / (B - A);
cVal = clamp<float>( cVal, 0.0f, 1.0f );
cVal = clamp( cVal, 0.0f, 1.0f );
return C + (D - C) * cVal;
}
@ -671,7 +688,7 @@ template<> FORCEINLINE QAngleByValue Lerp<QAngleByValue>( float flPercent, const
#endif // VECTOR_NO_SLOW_OPERATIONS
// Swap two of anything.
/// Same as swap(), but won't cause problems with std::swap
template <class T>
FORCEINLINE void V_swap( T& x, T& y )
{
@ -686,20 +703,15 @@ template <class T> FORCEINLINE T AVG(T a, T b)
}
// number of elements in an array of static size
#define NELEMS(x) ((sizeof(x))/sizeof(x[0]))
#define NELEMS(x) ARRAYSIZE(x)
// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector));
#define XYZ(v) (v).x,(v).y,(v).z
//
// Returns a clamped value in the range [min, max].
//
#define V_clamp(val, min, max) (((val) > (max)) ? (max) : (((val) < (min)) ? (min) : (val)))
inline float Sign( float x )
{
return fsel( x, 1.0f, -1.0f ); // x >= 0 ? 1.0f : -1.0f
//return (x <0.0f) ? -1.0f : 1.0f;
return (x <0.0f) ? -1.0f : 1.0f;
}
//
@ -731,16 +743,6 @@ inline int ClampArrayBounds( int n, unsigned maxindex )
}
// Turn a number "inside out".
// See Recording Animation in Binary Order for Progressive Temporal Refinement
// by Paul Heckbert from "Graphics Gems".
//
// If you want to iterate something from 0 to n, you can use this to iterate non-sequentially, in
// such a way that you will start with widely separated values and then refine the gaps between
// them, as you would for progressive refinement. This works with non-power of two ranges.
int InsideOut( int nTotal, int nCounter );
#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \
(((p)->type < 3)? \
( \
@ -801,9 +803,7 @@ inline void PositionMatrix( const Vector &position, matrix3x4_t &mat )
inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position )
{
position[0] = matrix[0][3];
position[1] = matrix[1][3];
position[2] = matrix[2][3];
MatrixGetColumn( matrix, 3, position );
}
inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
@ -942,18 +942,6 @@ inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, c
void ClearBounds (Vector& mins, Vector& maxs);
void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs);
//-----------------------------------------------------------------------------
// Ensures that the min and max bounds values are valid.
// (ClearBounds() sets min > max, which is clearly invalid.)
//-----------------------------------------------------------------------------
bool AreBoundsValid( const Vector &vMin, const Vector &vMax );
//-----------------------------------------------------------------------------
// Returns true if the provided point is in the AABB defined by vMin
// at the lower corner and vMax at the upper corner.
//-----------------------------------------------------------------------------
bool IsPointInBounds( const Vector &vPoint, const Vector &vMin, const Vector &vMax );
//
// COLORSPACE/GAMMA CONVERSION STUFF
//
@ -1162,7 +1150,9 @@ inline float SimpleSplineRemapValClamped( float val, float A, float B, float C,
FORCEINLINE int RoundFloatToInt(float f)
{
#if defined( _X360 )
#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) || defined(__x86_64__)
return _mm_cvtss_si32(_mm_load_ss(&f));
#elif defined( _X360 )
#ifdef Assert
Assert( IsFPUControlWordSet() );
#endif
@ -1173,67 +1163,18 @@ FORCEINLINE int RoundFloatToInt(float f)
};
flResult = __fctiw( f );
return pResult[1];
#else // !X360
int nResult;
#if defined( COMPILER_MSVC32 )
__asm
{
fld f
fistp nResult
}
#elif GNUC
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#else
nResult = static_cast<int>(f);
#endif
return nResult;
#error Unknown architecture
#endif
}
FORCEINLINE unsigned char RoundFloatToByte(float f)
{
#if defined( _X360 )
int nResult = RoundFloatToInt(f);
#ifdef Assert
Assert( IsFPUControlWordSet() );
#endif
union
{
double flResult;
int pIntResult[2];
unsigned char pResult[8];
};
flResult = __fctiw( f );
#ifdef Assert
Assert( pIntResult[1] >= 0 && pIntResult[1] <= 255 );
#endif
return pResult[7];
#else // !X360
int nResult;
#if defined( COMPILER_MSVC32 )
__asm
{
fld f
fistp nResult
}
#elif GNUC
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#else
nResult = static_cast<unsigned int> (f) & 0xff;
#endif
#ifdef Assert
Assert( nResult >= 0 && nResult <= 255 );
#endif
return nResult;
Assert( (nResult & ~0xFF) == 0 );
#endif
return (unsigned char) nResult;
}
FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
@ -1253,25 +1194,41 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
return pResult[1];
#else // !X360
#if defined( COMPILER_MSVC32 )
unsigned char nResult[8];
__asm
#if defined( PLATFORM_WINDOWS_PC64 )
uint nRet = ( uint ) f;
if ( nRet & 1 )
{
fld f
fistp qword ptr nResult
if ( ( f - floor( f ) >= 0.5 ) )
{
nRet++;
}
}
return *((unsigned long*)nResult);
#elif defined( COMPILER_GCC )
else
{
if ( ( f - floor( f ) > 0.5 ) )
{
nRet++;
}
}
return nRet;
#else // PLATFORM_WINDOWS_PC64
unsigned char nResult[8];
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
return *((unsigned long*)nResult);
#else
return static_cast<unsigned long>(f);
#endif
#endif
#if defined( _WIN32 )
__asm
{
fld f
fistp qword ptr nResult
}
#elif POSIX
__asm __volatile__ (
"fistpl %0;": "=m" (nResult): "t" (f) : "st"
);
#endif
return *((unsigned long*)nResult);
#endif // PLATFORM_WINDOWS_PC64
#endif // !X360
}
FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
@ -1291,76 +1248,54 @@ FORCEINLINE int Float2Int( float a )
flResult = __fctiwz( a );
return pResult[1];
#else // !X360
int RetVal;
#if defined( COMPILER_MSVC32 )
int CtrlwdHolder;
int CtrlwdSetter;
__asm
{
fld a // push 'a' onto the FP stack
fnstcw CtrlwdHolder // store FPU control word
movzx eax, CtrlwdHolder // move and zero extend word into eax
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
or eax, 0x00000C00 // set rounding mode bits to round towards zero
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
fldcw CtrlwdSetter // Entering plaid!
fistp RetVal // Store and converted (to int) result
fldcw CtrlwdHolder // Restore control word
}
#else
RetVal = static_cast<int>( a );
#endif
return RetVal;
// Rely on compiler to generate CVTTSS2SI on x86
return (int) a;
#endif
}
// Over 15x faster than: (int)floor(value)
inline int Floor2Int( float a )
{
int RetVal;
#if defined( _X360 )
RetVal = (int)floor( a );
#elif defined( COMPILER_MSVC32 )
int CtrlwdHolder;
int CtrlwdSetter;
__asm
{
fld a // push 'a' onto the FP stack
fnstcw CtrlwdHolder // store FPU control word
movzx eax, CtrlwdHolder // move and zero extend word into eax
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
or eax, 0x00000400 // set rounding mode bits to round down
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
fldcw CtrlwdSetter // Entering plaid!
fistp RetVal // Store floored and converted (to int) result
fldcw CtrlwdHolder // Restore control word
}
int RetVal;
#if defined( __i386__ )
// Convert to int and back, compare, subtract one if too big
__m128 a128 = _mm_set_ss(a);
RetVal = _mm_cvtss_si32(a128);
__m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
RetVal -= _mm_comigt_ss( rounded128, a128 );
#else
RetVal = static_cast<int>( floor(a) );
#endif
return RetVal;
}
//-----------------------------------------------------------------------------
// Fast color conversion from float to unsigned char
//-----------------------------------------------------------------------------
FORCEINLINE unsigned char FastFToC( float c )
FORCEINLINE unsigned int FastFToC( float c )
{
volatile float dc;
// ieee trick
dc = c * 255.0f + (float)(1 << 23);
// return the lsb
#if defined( _X360 )
return ((unsigned char*)&dc)[3];
#if defined( __i386__ )
// IEEE float bit manipulation works for values between [0, 1<<23)
union { float f; int i; } convert = { c*255.0f + (float)(1<<23) };
return convert.i & 255;
#else
return *(unsigned char*)&dc;
// consoles CPUs suffer from load-hit-store penalty
return Float2Int( c * 255.0f );
#endif
}
//-----------------------------------------------------------------------------
// Fast conversion from float to integer with magnitude less than 2**22
//-----------------------------------------------------------------------------
FORCEINLINE int FastFloatToSmallInt( float c )
{
#if defined( __i386__ )
// IEEE float bit manipulation works for values between [-1<<22, 1<<22)
union { float f; int i; } convert = { c + (float)(3<<22) };
return (convert.i & ((1<<23)-1)) - (1<<22);
#else
// consoles CPUs suffer from load-hit-store penalty
return Float2Int( c );
#endif
}
@ -1372,35 +1307,22 @@ FORCEINLINE unsigned char FastFToC( float c )
inline float ClampToMsec( float in )
{
int msec = Floor2Int( in * 1000.0f + 0.5f );
return msec / 1000.0f;
return 0.001f * msec;
}
// Over 15x faster than: (int)ceil(value)
inline int Ceil2Int( float a )
{
int RetVal;
#if defined( _X360 )
RetVal = (int)ceil( a );
#elif defined( COMPILER_MSVC32 )
int CtrlwdHolder;
int CtrlwdSetter;
__asm
{
fld a // push 'a' onto the FP stack
fnstcw CtrlwdHolder // store FPU control word
movzx eax, CtrlwdHolder // move and zero extend word into eax
and eax, 0xFFFFF3FF // set all bits except rounding bits to 1
or eax, 0x00000800 // set rounding mode bits to round down
mov CtrlwdSetter, eax // Prepare to set the rounding mode -- prepare to enter plaid!
fldcw CtrlwdSetter // Entering plaid!
fistp RetVal // Store floored and converted (to int) result
fldcw CtrlwdHolder // Restore control word
}
#if defined( __i386__ )
// Convert to int and back, compare, add one if too small
__m128 a128 = _mm_load_ss(&a);
RetVal = _mm_cvtss_si32(a128);
__m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
RetVal += _mm_comilt_ss( rounded128, a128 );
#else
RetVal = static_cast<int>( ceil(a) );
RetVal = static_cast<int>( ceil(a) );
#endif
return RetVal;
}
@ -1741,13 +1663,6 @@ void Parabolic_Spline_NormalizeX(
float t,
Vector& output );
// Evaluate the cubic Bernstein basis for the input parametric coordinate.
// Output is the coefficient for that basis polynomial.
float CubicBasis0( float t );
float CubicBasis1( float t );
float CubicBasis2( float t );
float CubicBasis3( float t );
// quintic interpolating polynomial from Perlin.
// 0->0, 1->1, smooth-in between with smooth tangents
FORCEINLINE float QuinticInterpolatingPolynomial(float t)
@ -1768,6 +1683,7 @@ void GetInterpolationData( float const *pKnotPositions,
float *pValueA,
float *pValueB,
float *pInterpolationValue);
float RangeCompressor( float flValue, float flMin, float flMax, float flBase );
// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs]
@ -1811,11 +1727,12 @@ float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA,
// Init the mathlib
void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true );
bool MathLib_3DNowEnabled( void );
bool MathLib_MMXEnabled( void );
bool MathLib_SSEEnabled( void );
bool MathLib_SSE2Enabled( void );
inline float Approach( float target, float value, float speed );
float Approach( float target, float value, float speed );
float ApproachAngle( float target, float value, float speed );
float AngleDiff( float destAngle, float srcAngle );
float AngleDistance( float next, float cur );
@ -1830,23 +1747,10 @@ bool AnglesAreEqual( float a, float b, float tolerance = 0.0f );
void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle );
void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out );
//-----------------------------------------------------------------------------
// Clips a line segment such that only the portion in the positive half-space
// of the plane remains. If the segment is entirely clipped, the vectors
// are set to vec3_invalid (all components are FLT_MAX).
//
// flBias is added to the dot product with the normal. A positive bias
// results in a more inclusive positive half-space, while a negative bias
// results in a more exclusive positive half-space.
//-----------------------------------------------------------------------------
void ClipLineSegmentToPlane( const Vector &vNormal, const Vector &vPlanePoint, Vector *p1, Vector *p2, float flBias = 0.0f );
void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept );
int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f );
int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f );
int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 );
float TetrahedronVolume( const Vector &p0, const Vector &p1, const Vector &p2, const Vector &p3 );
float TriangleArea( const Vector &p0, const Vector &p1, const Vector &p2 );
//-----------------------------------------------------------------------------
// Computes a reasonable tangent space for a triangle
@ -2006,12 +1910,7 @@ FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, floa
pNormal[0] = ( iX - 16384.0f ) / 16384.0f;
pNormal[1] = ( iY - 16384.0f ) / 16384.0f;
float mag = ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] );
if ( mag > 1.0f )
{
mag = 1.0f;
}
pNormal[2] = zSign*sqrtf( 1.0f - mag );
pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) );
if ( bIsTangent )
{
pNormal[3] = tSign;
@ -2277,33 +2176,6 @@ inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10)
AlmostEqual( a.z, b.z, maxUlps );
}
inline float Approach( float target, float value, float speed )
{
float delta = target - value;
#if defined(_X360) || defined( PS3 ) // use conditional move for speed on 360
return fsel( delta-speed, // delta >= speed ?
value + speed, // if delta == speed, then value + speed == value + delta == target
fsel( (-speed) - delta, // delta <= -speed
value - speed,
target )
); // delta < speed && delta > -speed
#else
if ( delta > speed )
value += speed;
else if ( delta < -speed )
value -= speed;
else
value = target;
return value;
#endif
}
inline void matrix3x4_t::Init( const Vector &xAxis, const Vector &yAxis, const Vector &zAxis, const Vector &vecOrigin )
{
MatrixInitialize( *this, vecOrigin, xAxis, yAxis, zAxis );
@ -2378,6 +2250,16 @@ inline void matrix3x4_t::ConcatTransforms( const matrix3x4_t &other )
::ConcatTransforms( *this, other, *this );
}
inline void matrix3x4_t::ScaleBy( const float value )
{
MatrixScaleBy( value, *this );
}
inline void matrix3x4_t::ScaleByZero()
{
MatrixScaleByZero( *this );
}
inline void matrix3x4_t::Multiply( const matrix3x4_t &other )
{
MatrixMultiply( *this, other, *this );
@ -2419,6 +2301,18 @@ inline float matrix3x4_t::ColumnDotProduct( MatrixAxisType_t column, const Vecto
return MatrixColumnDotProduct( *this, column, in );
}
inline Vector matrix3x4_t::GetTranslation() const
{
Vector out;
MatrixGetTranslation( *this, out );
return out;
}
inline void matrix3x4_t::SetTranslation( const Vector &in )
{
MatrixSetTranslation( in, *this );
}
inline Vector matrix3x4_t::GetColumn( MatrixAxisType_t column ) const
{
Vector out;