2008-09-15 01:00:17 -05:00
//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
//
// Purpose: Math primitives.
//
// $Workfile: $
// $NoKeywords: $
//=============================================================================//
# if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
/// FIXME: As soon as all references to mathlib.c are gone, include it in here
# include <math.h>
# include <float.h> // Needed for FLT_EPSILON
# include "basetypes.h"
# include <memory.h>
# include "tier0/dbg.h"
# include "tier0/vprof.h"
//#define _VPROF_MATHLIB
2008-09-15 01:33:59 -05:00
# ifdef _MSC_VER
2008-09-15 01:00:17 -05:00
# pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
# pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
# include "mathlib.h"
# include "amd3dx.h"
# include "vector.h"
// memdbgon must be the last include file in a .cpp file!!!
# include "tier0/memdbgon.h"
static bool s_bMathlibInitialized = false ;
# ifdef PARANOID
// User must provide an implementation of Sys_Error()
void Sys_Error ( char * error , . . . ) ;
# endif
# ifdef _XBOX
# include <xmmintrin.h>
# endif
const Vector vec3_origin ( 0 , 0 , 0 ) ;
const QAngle vec3_angle ( 0 , 0 , 0 ) ;
const Vector vec3_invalid ( FLT_MAX , FLT_MAX , FLT_MAX ) ;
const int nanmask = 255 < < 23 ;
// Math routines which have alternate versions for MMX/SSE/3DNOW in MATHLIB.C
// void _VectorMA( const float *start, float scale, const float *direction, float *dest );
# ifdef PFN_VECTORMA
void __cdecl _SSE_VectorMA ( const float * start , float scale , const float * direction , float * dest ) ;
# endif
//-----------------------------------------------------------------------------
// Macros and constants required by some of the SSE assembly:
//-----------------------------------------------------------------------------
# ifdef _WIN32
# define _PS_EXTERN_CONST(Name, Val) \
const __declspec ( align ( 16 ) ) float _ps_ # # Name [ 4 ] = { Val , Val , Val , Val }
# define _PS_EXTERN_CONST_TYPE(Name, Type, Val) \
const __declspec ( align ( 16 ) ) Type _ps_ # # Name [ 4 ] = { Val , Val , Val , Val } ; \
# define _EPI32_CONST(Name, Val) \
static const __declspec ( align ( 16 ) ) __int32 _epi32_ # # Name [ 4 ] = { Val , Val , Val , Val }
# define _PS_CONST(Name, Val) \
static const __declspec ( align ( 16 ) ) float _ps_ # # Name [ 4 ] = { Val , Val , Val , Val }
# elif _LINUX
# define _PS_EXTERN_CONST(Name, Val) \
const __attribute__ ( ( aligned ( 16 ) ) ) float _ps_ # # Name [ 4 ] = { Val , Val , Val , Val }
# define _PS_EXTERN_CONST_TYPE(Name, Type, Val) \
const __attribute__ ( ( aligned ( 16 ) ) ) Type _ps_ # # Name [ 4 ] = { Val , Val , Val , Val } ; \
# define _EPI32_CONST(Name, Val) \
static const __attribute__ ( ( aligned ( 16 ) ) ) int32 _epi32_ # # Name [ 4 ] = { Val , Val , Val , Val }
# define _PS_CONST(Name, Val) \
static const __attribute__ ( ( aligned ( 16 ) ) ) float _ps_ # # Name [ 4 ] = { Val , Val , Val , Val }
# endif
_PS_EXTERN_CONST ( am_0 , 0.0f ) ;
_PS_EXTERN_CONST ( am_1 , 1.0f ) ;
_PS_EXTERN_CONST ( am_m1 , - 1.0f ) ;
_PS_EXTERN_CONST ( am_0p5 , 0.5f ) ;
_PS_EXTERN_CONST ( am_1p5 , 1.5f ) ;
_PS_EXTERN_CONST ( am_pi , ( float ) M_PI ) ;
_PS_EXTERN_CONST ( am_pi_o_2 , ( float ) ( M_PI / 2.0 ) ) ;
_PS_EXTERN_CONST ( am_2_o_pi , ( float ) ( 2.0 / M_PI ) ) ;
_PS_EXTERN_CONST ( am_pi_o_4 , ( float ) ( M_PI / 4.0 ) ) ;
_PS_EXTERN_CONST ( am_4_o_pi , ( float ) ( 4.0 / M_PI ) ) ;
_PS_EXTERN_CONST_TYPE ( am_sign_mask , int32 , 0x80000000 ) ;
_PS_EXTERN_CONST_TYPE ( am_inv_sign_mask , int32 , ~ 0x80000000 ) ;
_PS_EXTERN_CONST_TYPE ( am_min_norm_pos , int32 , 0x00800000 ) ;
_PS_EXTERN_CONST_TYPE ( am_mant_mask , int32 , 0x7f800000 ) ;
_PS_EXTERN_CONST_TYPE ( am_inv_mant_mask , int32 , ~ 0x7f800000 ) ;
_EPI32_CONST ( 1 , 1 ) ;
_EPI32_CONST ( 2 , 2 ) ;
_PS_CONST ( sincos_p0 , 0.15707963267948963959e1 f ) ;
_PS_CONST ( sincos_p1 , - 0.64596409750621907082e0 f ) ;
_PS_CONST ( sincos_p2 , 0.7969262624561800806e-1 f ) ;
_PS_CONST ( sincos_p3 , - 0.468175413106023168e-2 f ) ;
static const uint32 _sincos_masks [ ] = { ( uint32 ) 0x0 , ( uint32 ) ~ 0x0 } ;
static const uint32 _sincos_inv_masks [ ] = { ( uint32 ) ~ 0x0 , ( uint32 ) 0x0 } ;
//-----------------------------------------------------------------------------
// Standard C implementations of optimized routines:
//-----------------------------------------------------------------------------
float _sqrtf ( float _X )
{
Assert ( s_bMathlibInitialized ) ;
return sqrtf ( _X ) ;
}
float _rsqrtf ( float x )
{
Assert ( s_bMathlibInitialized ) ;
return 1.f / _sqrtf ( x ) ;
}
float FASTCALL _VectorNormalize ( Vector & vec )
{
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " _VectorNormalize " , " Mathlib " ) ;
# endif
Assert ( s_bMathlibInitialized ) ;
float radius = sqrtf ( vec . x * vec . x + vec . y * vec . y + vec . z * vec . z ) ;
// FLT_EPSILON is added to the radius to eliminate the possibility of divide by zero.
float iradius = 1.f / ( radius + FLT_EPSILON ) ;
vec . x * = iradius ;
vec . y * = iradius ;
vec . z * = iradius ;
return radius ;
}
// TODO: Add fast C VectorNormalizeFast.
// Perhaps use approximate rsqrt trick, if the accuracy isn't too bad.
void FASTCALL _VectorNormalizeFast ( Vector & vec )
{
Assert ( s_bMathlibInitialized ) ;
// FLT_EPSILON is added to the radius to eliminate the possibility of divide by zero.
float iradius = 1.f / ( sqrtf ( vec . x * vec . x + vec . y * vec . y + vec . z * vec . z ) + FLT_EPSILON ) ;
vec . x * = iradius ;
vec . y * = iradius ;
vec . z * = iradius ;
}
float _InvRSquared ( const float * v )
{
Assert ( s_bMathlibInitialized ) ;
float r2 = DotProduct ( v , v ) ;
return r2 < 1.f ? 1.f : 1 / r2 ;
}
// Math routines done in optimized assembly math package routines
//-----------------------------------------------------------------------------
// 3D Now Implementations of optimized routines:
//-----------------------------------------------------------------------------
float _3DNow_Sqrt ( float x )
{
Assert ( s_bMathlibInitialized ) ;
float root = 0.f ;
# ifdef _WIN32
_asm
{
femms
movd mm0 , x
PFRSQRT ( mm1 , mm0 )
punpckldq mm0 , mm0
PFMUL ( mm0 , mm1 )
movd root , mm0
femms
}
# elif _LINUX
__asm __volatile__ ( " femms " ) ;
__asm __volatile__
(
" pfrsqrt %y0, %y1 \n \t "
" punpckldq %y1, %y1 \n \t "
" pfmul %y1, %y0 \n \t "
: " =y " ( root ) , " =y " ( x )
: " 0 " ( x )
) ;
__asm __volatile__ ( " femms " ) ;
# else
# error
# endif
return root ;
}
// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
// on AMD's before using the specialized instruction.
float _3DNow_RSqrt ( float x )
{
Assert ( s_bMathlibInitialized ) ;
return 1.f / _3DNow_Sqrt ( x ) ;
}
float FASTCALL _3DNow_VectorNormalize ( Vector & vec )
{
Assert ( s_bMathlibInitialized ) ;
float * v = & vec [ 0 ] ;
float radius = 0.f ;
if ( v [ 0 ] | | v [ 1 ] | | v [ 2 ] )
{
# ifdef _WIN32
_asm
{
mov eax , v
femms
movq mm0 , QWORD PTR [ eax ]
movd mm1 , DWORD PTR [ eax + 8 ]
movq mm2 , mm0
movq mm3 , mm1
PFMUL ( mm0 , mm0 )
PFMUL ( mm1 , mm1 )
PFACC ( mm0 , mm0 )
PFADD ( mm1 , mm0 )
PFRSQRT ( mm0 , mm1 )
punpckldq mm1 , mm1
PFMUL ( mm1 , mm0 )
PFMUL ( mm2 , mm0 )
PFMUL ( mm3 , mm0 )
movq QWORD PTR [ eax ] , mm2
movd DWORD PTR [ eax + 8 ] , mm3
movd radius , mm1
femms
}
# elif _LINUX
long long a , c ;
int b , d ;
memcpy ( & a , & vec [ 0 ] , sizeof ( a ) ) ;
memcpy ( & b , & vec [ 2 ] , sizeof ( b ) ) ;
memcpy ( & c , & vec [ 0 ] , sizeof ( c ) ) ;
memcpy ( & d , & vec [ 2 ] , sizeof ( d ) ) ;
__asm __volatile__ ( " femms " ) ;
__asm __volatile__
(
" pfmul %y3, %y3 \n \t "
" pfmul %y0, %y0 \n \t "
" pfacc %y3, %y3 \n \t "
" pfadd %y3, %y0 \n \t "
" pfrsqrt %y0, %y3 \n \t "
" punpckldq %y0, %y0 \n \t "
" pfmul %y3, %y0 \n \t "
" pfmul %y3, %y2 \n \t "
" pfmul %y3, %y1 \n \t "
: " =y " ( radius ) , " =y " ( c ) , " =y " ( d )
: " y " ( a ) , " 0 " ( b ) , " 1 " ( c ) , " 2 " ( d )
) ;
memcpy ( & vec [ 0 ] , & c , sizeof ( c ) ) ;
memcpy ( & vec [ 2 ] , & d , sizeof ( d ) ) ;
__asm __volatile__ ( " femms " ) ;
# else
# error
# endif
}
return radius ;
}
void FASTCALL _3DNow_VectorNormalizeFast ( Vector & vec )
{
_3DNow_VectorNormalize ( vec ) ;
}
// JAY: This complains with the latest processor pack
2008-09-15 01:33:59 -05:00
# ifdef _MSC_VER
2008-09-15 01:00:17 -05:00
# pragma warning(disable: 4730)
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
float _3DNow_InvRSquared ( const float * v )
{
Assert ( s_bMathlibInitialized ) ;
float r2 = 1.f ;
# ifdef _WIN32
_asm { // AMD 3DNow only routine
mov eax , v
femms
movq mm0 , QWORD PTR [ eax ]
movd mm1 , DWORD PTR [ eax + 8 ]
movd mm2 , [ r2 ]
PFMUL ( mm0 , mm0 )
PFMUL ( mm1 , mm1 )
PFACC ( mm0 , mm0 )
PFADD ( mm1 , mm0 )
PFMAX ( mm1 , mm2 )
PFRCP ( mm0 , mm1 )
movd [ r2 ] , mm0
femms
}
# elif _LINUX
long long a , c ;
int b ;
memcpy ( & a , & v [ 0 ] , sizeof ( a ) ) ;
memcpy ( & b , & v [ 2 ] , sizeof ( b ) ) ;
memcpy ( & c , & v [ 0 ] , sizeof ( c ) ) ;
__asm __volatile__ ( " femms " ) ;
__asm __volatile__
(
" PFMUL %y2, %y2 \n \t "
" PFMUL %y3, %y3 \n \t "
" PFACC %y2, %y2 \n \t "
" PFADD %y2, %y3 \n \t "
" PFMAX %y3, %y4 \n \t "
" PFRCP %y3, %y2 \n \t "
" movq %y2, %y0 \n \t "
: " =y " ( r2 )
: " 0 " ( r2 ) , " y " ( a ) , " y " ( b ) , " y " ( c )
) ;
__asm __volatile__ ( " femms " ) ;
# else
# error
# endif
return r2 ;
}
//-----------------------------------------------------------------------------
// SSE implementations of optimized routines:
//-----------------------------------------------------------------------------
float _SSE_Sqrt ( float x )
{
Assert ( s_bMathlibInitialized ) ;
float root = 0.f ;
# ifdef _WIN32
_asm
{
sqrtss xmm0 , x
movss root , xmm0
}
# elif _LINUX
__asm__ __volatile__ (
" movss %1,%%xmm2 \n "
" sqrtss %%xmm2,%%xmm1 \n "
" movss %%xmm1,%0 "
: " =m " ( root )
: " m " ( x )
) ;
# endif
return root ;
}
// Single iteration NewtonRaphson reciprocal square root:
// 0.5 * rsqrtps * (3 - x * rsqrtps(x) * rsqrtps(x))
// Very low error, and fine to use in place of 1.f / sqrtf(x).
#if 0
float _SSE_RSqrtAccurate ( float x )
{
Assert ( s_bMathlibInitialized ) ;
float rroot ;
_asm
{
rsqrtss xmm0 , x
movss rroot , xmm0
}
return ( 0.5f * rroot ) * ( 3.f - ( x * rroot ) * rroot ) ;
}
# else
// Intel / Kipps SSE RSqrt. Significantly faster than above.
inline float _SSE_RSqrtAccurate ( float a )
{
float x ;
float half = 0.5f ;
float three = 3.f ;
# ifdef _WIN32
__asm
{
movss xmm3 , a ;
movss xmm1 , half ;
movss xmm2 , three ;
rsqrtss xmm0 , xmm3 ;
mulss xmm3 , xmm0 ;
mulss xmm1 , xmm0 ;
mulss xmm3 , xmm0 ;
subss xmm2 , xmm3 ;
mulss xmm1 , xmm2 ;
movss x , xmm1 ;
}
# elif _LINUX
__asm__ __volatile__ (
" movss %1, %%xmm3 \n \t "
" movss %2, %%xmm1 \n \t "
" movss %3, %%xmm2 \n \t "
" rsqrtss %%xmm3, %%xmm0 \n \t "
" mulss %%xmm0, %%xmm3 \n \t "
" mulss %%xmm0, %%xmm1 \n \t "
" mulss %%xmm0, %%xmm3 \n \t "
" subss %%xmm3, %%xmm2 \n \t "
" mulss %%xmm2, %%xmm1 \n \t "
" movss %%xmm1, %0 \n \t "
: " =m " ( x )
: " m " ( a ) , " m " ( half ) , " m " ( three )
) ;
# else
# error
# endif
return x ;
}
# endif
// Simple SSE rsqrt. Usually accurate to around 6 (relative) decimal places
// or so, so ok for closed transforms. (ie, computing lighting normals)
inline float _SSE_RSqrtFast ( float x )
{
Assert ( s_bMathlibInitialized ) ;
float rroot ;
# ifdef _WIN32
_asm
{
rsqrtss xmm0 , x
movss rroot , xmm0
}
# elif _LINUX
__asm__ __volatile__ (
" rsqrtss %1, %%xmm0 \n \t "
" movss %%xmm0, %0 \n \t "
: " =m " ( x )
: " m " ( rroot )
: " %xmm0 "
) ;
# else
# error
# endif
return rroot ;
}
float FASTCALL _SSE_VectorNormalize ( Vector & vec )
{
Assert ( s_bMathlibInitialized ) ;
// NOTE: This is necessary to prevent an memory overwrite...
// sice vec only has 3 floats, we can't "movaps" directly into it.
# ifdef _WIN32
__declspec ( align ( 16 ) ) float result [ 4 ] ;
# elif _LINUX
__attribute__ ( ( aligned ( 16 ) ) ) float result [ 4 ] ;
# endif
float * v = & vec [ 0 ] ;
2008-09-15 01:33:59 -05:00
# ifdef _WIN32
2008-09-15 01:00:17 -05:00
float * r = & result [ 0 ] ;
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
float radius = 0.f ;
// Blah, get rid of these comparisons ... in reality, if you have all 3 as zero, it shouldn't
// be much of a performance win, considering you will very likely miss 3 branch predicts in a row.
if ( v [ 0 ] | | v [ 1 ] | | v [ 2 ] )
{
# ifdef _WIN32
_asm
{
mov eax , v
mov edx , r
# ifdef ALIGNED_VECTOR
movaps xmm4 , [ eax ] // r4 = vx, vy, vz, X
movaps xmm1 , xmm4 // r1 = r4
# else
movups xmm4 , [ eax ] // r4 = vx, vy, vz, X
movaps xmm1 , xmm4 // r1 = r4
# endif
mulps xmm1 , xmm4 // r1 = vx * vx, vy * vy, vz * vz, X
movhlps xmm3 , xmm1 // r3 = vz * vz, X, X, X
movaps xmm2 , xmm1 // r2 = r1
shufps xmm2 , xmm2 , 1 // r2 = vy * vy, X, X, X
addss xmm1 , xmm2 // r1 = (vx * vx) + (vy * vy), X, X, X
addss xmm1 , xmm3 // r1 = (vx * vx) + (vy * vy) + (vz * vz), X, X, X
sqrtss xmm1 , xmm1 // r1 = sqrt((vx * vx) + (vy * vy) + (vz * vz)), X, X, X
movss radius , xmm1 // radius = sqrt((vx * vx) + (vy * vy) + (vz * vz))
rcpss xmm1 , xmm1 // r1 = 1/radius, X, X, X
shufps xmm1 , xmm1 , 0 // r1 = 1/radius, 1/radius, 1/radius, X
mulps xmm4 , xmm1 // r4 = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
movaps [ edx ] , xmm4 // v = vx * 1/radius, vy * 1/radius, vz * 1/radius, X
}
# elif _LINUX
__asm__ __volatile__ (
# ifdef ALIGNED_VECTOR
" movaps %2, %%xmm4 \n \t "
" movaps %%xmm4, %%xmm1 \n \t "
# else
" movups %2, %%xmm4 \n \t "
" movaps %%xmm4, %%xmm1 \n \t "
# endif
" mulps %%xmm4, %%xmm1 \n \t "
" movhlps %%xmm1, %%xmm3 \n \t "
" movaps %%xmm1, %%xmm2 \n \t "
" shufps $1, %%xmm2, %%xmm2 \n \t "
" addss %%xmm2, %%xmm1 \n \t "
" addss %%xmm3, %%xmm1 \n \t "
" sqrtss %%xmm1, %%xmm1 \n \t "
" movss %%xmm1, %0 \n \t "
" rcpss %%xmm1, %%xmm1 \n \t "
" shufps $0, %%xmm1, %%xmm1 \n \t "
" mulps %%xmm1, %%xmm4 \n \t "
" movaps %%xmm4, %1 \n \t "
: " =m " ( radius ) , " =m " ( result )
: " m " ( * v )
) ;
# else
# error
# endif
vec . x = result [ 0 ] ;
vec . y = result [ 1 ] ;
vec . z = result [ 2 ] ;
}
return radius ;
}
void FASTCALL _SSE_VectorNormalizeFast ( Vector & vec )
{
float ool = _SSE_RSqrtAccurate ( FLT_EPSILON + vec . x * vec . x + vec . y * vec . y + vec . z * vec . z ) ;
vec . x * = ool ;
vec . y * = ool ;
vec . z * = ool ;
}
float _SSE_InvRSquared ( const float * v )
{
float inv_r2 = 1.f ;
# ifdef _WIN32
_asm { // Intel SSE only routine
mov eax , v
movss xmm5 , inv_r2 // x5 = 1.0, 0, 0, 0
# ifdef ALIGNED_VECTOR
movaps xmm4 , [ eax ] // x4 = vx, vy, vz, X
# else
movups xmm4 , [ eax ] // x4 = vx, vy, vz, X
# endif
movaps xmm1 , xmm4 // x1 = x4
mulps xmm1 , xmm4 // x1 = vx * vx, vy * vy, vz * vz, X
movhlps xmm3 , xmm1 // x3 = vz * vz, X, X, X
movaps xmm2 , xmm1 // x2 = x1
shufps xmm2 , xmm2 , 1 // x2 = vy * vy, X, X, X
addss xmm1 , xmm2 // x1 = (vx * vx) + (vy * vy), X, X, X
addss xmm1 , xmm3 // x1 = (vx * vx) + (vy * vy) + (vz * vz), X, X, X
2011-04-28 01:29:22 -05:00
maxss xmm1 , xmm5 // x1 = MAX( 1.0, x1 )
rcpss xmm0 , xmm1 // x0 = 1 / MAX( 1.0, x1 )
2008-09-15 01:00:17 -05:00
movss inv_r2 , xmm0 // inv_r2 = x0
}
# elif _LINUX
__asm__ __volatile__ (
# ifdef ALIGNED_VECTOR
" movaps %1, %%xmm4 \n \t "
# else
" movups %1, %%xmm4 \n \t "
# endif
" movaps %%xmm4, %%xmm1 \n \t "
" mulps %%xmm4, %%xmm1 \n \t "
" movhlps %%xmm1, %%xmm3 \n \t "
" movaps %%xmm1, %%xmm2 \n \t "
" shufps $1, %%xmm2, %%xmm2 \n \t "
" addss %%xmm2, %%xmm1 \n \t "
" addss %%xmm3, %%xmm1 \n \t "
" maxss %%xmm5, %%xmm1 \n \t "
" rcpss %%xmm1, %%xmm0 \n \t "
" movss %%xmm0, %0 \n \t "
: " =m " ( inv_r2 )
2008-09-15 01:33:59 -05:00
: " m " ( * v ) , " m " ( inv_r2 )
2008-09-15 01:00:17 -05:00
) ;
# else
# error
# endif
return inv_r2 ;
}
# ifdef _WIN32
void _SSE_SinCos ( float x , float * s , float * c )
{
float t4 , t8 , t12 ;
__asm
{
movss xmm0 , x
movss t12 , xmm0
movss xmm1 , _ps_am_inv_sign_mask
mov eax , t12
mulss xmm0 , _ps_am_2_o_pi
andps xmm0 , xmm1
and eax , 0x80000000
cvttss2si edx , xmm0
mov ecx , edx
mov t12 , esi
mov esi , edx
add edx , 0x1
shl ecx , ( 31 - 1 )
shl edx , ( 31 - 1 )
movss xmm4 , _ps_am_1
cvtsi2ss xmm3 , esi
mov t8 , eax
and esi , 0x1
subss xmm0 , xmm3
movss xmm3 , _sincos_inv_masks [ esi * 4 ]
minss xmm0 , xmm4
subss xmm4 , xmm0
movss xmm6 , xmm4
andps xmm4 , xmm3
and ecx , 0x80000000
movss xmm2 , xmm3
andnps xmm3 , xmm0
and edx , 0x80000000
movss xmm7 , t8
andps xmm0 , xmm2
mov t8 , ecx
mov t4 , edx
orps xmm4 , xmm3
mov eax , s //mov eax, [esp + 4 + 16]
mov edx , c //mov edx, [esp + 4 + 16 + 4]
andnps xmm2 , xmm6
orps xmm0 , xmm2
movss xmm2 , t8
movss xmm1 , xmm0
movss xmm5 , xmm4
xorps xmm7 , xmm2
movss xmm3 , _ps_sincos_p3
mulss xmm0 , xmm0
mulss xmm4 , xmm4
movss xmm2 , xmm0
movss xmm6 , xmm4
orps xmm1 , xmm7
movss xmm7 , _ps_sincos_p2
mulss xmm0 , xmm3
mulss xmm4 , xmm3
movss xmm3 , _ps_sincos_p1
addss xmm0 , xmm7
addss xmm4 , xmm7
movss xmm7 , _ps_sincos_p0
mulss xmm0 , xmm2
mulss xmm4 , xmm6
addss xmm0 , xmm3
addss xmm4 , xmm3
movss xmm3 , t4
mulss xmm0 , xmm2
mulss xmm4 , xmm6
orps xmm5 , xmm3
mov esi , t12
addss xmm0 , xmm7
addss xmm4 , xmm7
mulss xmm0 , xmm1
mulss xmm4 , xmm5
// use full stores since caller might reload with full loads
movss [ eax ] , xmm0
movss [ edx ] , xmm4
}
}
float _SSE_cos ( float x )
{
float temp ;
__asm
{
movss xmm0 , x
movss xmm1 , _ps_am_inv_sign_mask
andps xmm0 , xmm1
addss xmm0 , _ps_am_pi_o_2
mulss xmm0 , _ps_am_2_o_pi
cvttss2si ecx , xmm0
movss xmm5 , _ps_am_1
mov edx , ecx
shl edx , ( 31 - 1 )
cvtsi2ss xmm1 , ecx
and edx , 0x80000000
and ecx , 0x1
subss xmm0 , xmm1
movss xmm6 , _sincos_masks [ ecx * 4 ]
minss xmm0 , xmm5
movss xmm1 , _ps_sincos_p3
subss xmm5 , xmm0
andps xmm5 , xmm6
movss xmm7 , _ps_sincos_p2
andnps xmm6 , xmm0
mov temp , edx
orps xmm5 , xmm6
movss xmm0 , xmm5
mulss xmm5 , xmm5
movss xmm4 , _ps_sincos_p1
movss xmm2 , xmm5
mulss xmm5 , xmm1
movss xmm1 , _ps_sincos_p0
addss xmm5 , xmm7
mulss xmm5 , xmm2
movss xmm3 , temp
addss xmm5 , xmm4
mulss xmm5 , xmm2
orps xmm0 , xmm3
addss xmm5 , xmm1
mulss xmm0 , xmm5
movss x , xmm0
}
return x ;
}
//-----------------------------------------------------------------------------
// SSE2 implementations of optimized routines:
//-----------------------------------------------------------------------------
# ifndef _XBOX
void _SSE2_SinCos ( float x , float * s , float * c ) // any x
{
__asm
{
movss xmm0 , x
movaps xmm7 , xmm0
movss xmm1 , _ps_am_inv_sign_mask
movss xmm2 , _ps_am_sign_mask
movss xmm3 , _ps_am_2_o_pi
andps xmm0 , xmm1
andps xmm7 , xmm2
mulss xmm0 , xmm3
pxor xmm3 , xmm3
movd xmm5 , _epi32_1
movss xmm4 , _ps_am_1
cvttps2dq xmm2 , xmm0
pand xmm5 , xmm2
movd xmm1 , _epi32_2
pcmpeqd xmm5 , xmm3
movd xmm3 , _epi32_1
cvtdq2ps xmm6 , xmm2
paddd xmm3 , xmm2
pand xmm2 , xmm1
pand xmm3 , xmm1
subss xmm0 , xmm6
pslld xmm2 , ( 31 - 1 )
minss xmm0 , xmm4
mov eax , s // mov eax, [esp + 4 + 16]
mov edx , c // mov edx, [esp + 4 + 16 + 4]
subss xmm4 , xmm0
pslld xmm3 , ( 31 - 1 )
movaps xmm6 , xmm4
xorps xmm2 , xmm7
movaps xmm7 , xmm5
andps xmm6 , xmm7
andnps xmm7 , xmm0
andps xmm0 , xmm5
andnps xmm5 , xmm4
movss xmm4 , _ps_sincos_p3
orps xmm6 , xmm7
orps xmm0 , xmm5
movss xmm5 , _ps_sincos_p2
movaps xmm1 , xmm0
movaps xmm7 , xmm6
mulss xmm0 , xmm0
mulss xmm6 , xmm6
orps xmm1 , xmm2
orps xmm7 , xmm3
movaps xmm2 , xmm0
movaps xmm3 , xmm6
mulss xmm0 , xmm4
mulss xmm6 , xmm4
movss xmm4 , _ps_sincos_p1
addss xmm0 , xmm5
addss xmm6 , xmm5
movss xmm5 , _ps_sincos_p0
mulss xmm0 , xmm2
mulss xmm6 , xmm3
addss xmm0 , xmm4
addss xmm6 , xmm4
mulss xmm0 , xmm2
mulss xmm6 , xmm3
addss xmm0 , xmm5
addss xmm6 , xmm5
mulss xmm0 , xmm1
mulss xmm6 , xmm7
// use full stores since caller might reload with full loads
movss [ eax ] , xmm0
movss [ edx ] , xmm6
}
}
# endif
# ifndef _XBOX
float _SSE2_cos ( float x )
{
__asm
{
movss xmm0 , x
movss xmm1 , _ps_am_inv_sign_mask
movss xmm2 , _ps_am_pi_o_2
movss xmm3 , _ps_am_2_o_pi
andps xmm0 , xmm1
addss xmm0 , xmm2
mulss xmm0 , xmm3
pxor xmm3 , xmm3
movd xmm5 , _epi32_1
movss xmm4 , _ps_am_1
cvttps2dq xmm2 , xmm0
pand xmm5 , xmm2
movd xmm1 , _epi32_2
pcmpeqd xmm5 , xmm3
cvtdq2ps xmm6 , xmm2
pand xmm2 , xmm1
pslld xmm2 , ( 31 - 1 )
subss xmm0 , xmm6
movss xmm3 , _ps_sincos_p3
minss xmm0 , xmm4
subss xmm4 , xmm0
andps xmm0 , xmm5
andnps xmm5 , xmm4
orps xmm0 , xmm5
movaps xmm1 , xmm0
movss xmm4 , _ps_sincos_p2
mulss xmm0 , xmm0
movss xmm5 , _ps_sincos_p1
orps xmm1 , xmm2
movaps xmm7 , xmm0
mulss xmm0 , xmm3
movss xmm6 , _ps_sincos_p0
addss xmm0 , xmm4
mulss xmm0 , xmm7
addss xmm0 , xmm5
mulss xmm0 , xmm7
addss xmm0 , xmm6
mulss xmm0 , xmm1
movss x , xmm0
}
return x ;
}
# endif
# elif _LINUX
# warning "_SSE_sincos,_SSE_cos,_SSE2_cos,_SSE_sincos NOT implemented!"
# else
# error
# endif
//-----------------------------------------------------------------------------
// Function pointers selecting the appropriate implementation from above:
//-----------------------------------------------------------------------------
float ( * pfSqrt ) ( float x ) = _sqrtf ;
float ( * pfRSqrt ) ( float x ) = _rsqrtf ;
float ( * pfRSqrtFast ) ( float x ) = _rsqrtf ;
float ( FASTCALL * pfVectorNormalize ) ( Vector & v ) = _VectorNormalize ;
void ( FASTCALL * pfVectorNormalizeFast ) ( Vector & v ) = _VectorNormalizeFast ;
float ( * pfInvRSquared ) ( const float * v ) = _InvRSquared ;
void ( * pfFastSinCos ) ( float x , float * s , float * c ) = SinCos ;
float ( * pfFastCos ) ( float x ) = cosf ;
float SinCosTable [ SIN_TABLE_SIZE ] ;
void InitSinCosTable ( )
{
for ( int i = 0 ; i < SIN_TABLE_SIZE ; i + + )
{
SinCosTable [ i ] = sin ( i * 2.0 * M_PI / SIN_TABLE_SIZE ) ;
}
}
# ifdef PARANOID
/*
= = = = = = = = = = = = = = = = = =
BOPS_Error
Split out like this for ASM to call .
= = = = = = = = = = = = = = = = = =
*/
void BOPS_Error ( void )
{
Sys_Error ( " BoxOnPlaneSide: Bad signbits " ) ;
}
# endif
# if !id386 && !_LINUX
/*
= = = = = = = = = = = = = = = = = =
BoxOnPlaneSide
Returns 1 , 2 , or 1 + 2
= = = = = = = = = = = = = = = = = =
*/
int BoxOnPlaneSide ( const float * emins , const float * emaxs , const cplane_t * p )
{
Assert ( s_bMathlibInitialized ) ;
float dist1 , dist2 ;
int sides ;
#if 0 // this is done by the BOX_ON_PLANE_SIDE macro before calling this
// function
// fast axial cases
if ( p - > type < 3 )
{
if ( p - > dist < = emins [ p - > type ] )
return 1 ;
if ( p - > dist > = emaxs [ p - > type ] )
return 2 ;
return 3 ;
}
# endif
// general case
switch ( p - > signbits )
{
case 0 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 1 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 2 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 3 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 4 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
case 5 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
case 6 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
case 7 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
default :
dist1 = dist2 = 0 ; // shut up compiler
# ifdef PARANOID
BOPS_Error ( ) ;
# endif
break ;
}
#if 0
int i ;
float corners [ 2 ] [ 3 ] ;
for ( i = 0 ; i < 3 ; i + + )
{
if ( plane - > normal [ i ] < 0 )
{
corners [ 0 ] [ i ] = emins [ i ] ;
corners [ 1 ] [ i ] = emaxs [ i ] ;
}
else
{
corners [ 1 ] [ i ] = emins [ i ] ;
corners [ 0 ] [ i ] = emaxs [ i ] ;
}
}
dist = DotProduct ( plane - > normal , corners [ 0 ] ) - plane - > dist ;
dist2 = DotProduct ( plane - > normal , corners [ 1 ] ) - plane - > dist ;
sides = 0 ;
if ( dist1 > = 0 )
sides = 1 ;
if ( dist2 < 0 )
sides | = 2 ;
# endif
sides = 0 ;
if ( dist1 > = p - > dist )
sides = 1 ;
if ( dist2 < p - > dist )
sides | = 2 ;
# ifdef PARANOID
if ( sides = = 0 )
Sys_Error ( " BoxOnPlaneSide: sides==0 " ) ;
# endif
return sides ;
}
# endif
//#endif
qboolean VectorsEqual ( const float * v1 , const float * v2 )
{
Assert ( s_bMathlibInitialized ) ;
return ( ( v1 [ 0 ] = = v2 [ 0 ] ) & &
( v1 [ 1 ] = = v2 [ 1 ] ) & &
( v1 [ 2 ] = = v2 [ 2 ] ) ) ;
}
int LineSphereIntersection (
const Vector & vSphereCenter ,
const float fSphereRadius ,
const Vector & vLinePt ,
const Vector & vLineDir ,
float * fIntersection1 ,
float * fIntersection2 )
{
Assert ( s_bMathlibInitialized ) ;
// Line = P + Vt
// Sphere = r (assume we've translated to origin)
// (P + Vt)^2 = r^2
// VVt^2 + 2PVt + (PP - r^2)
// Solve as quadratic: (-b +/- sqrt(b^2 - 4ac)) / 2a
// If (b^2 - 4ac) is < 0 there is no solution.
// If (b^2 - 4ac) is = 0 there is one solution (a case this function doesn't support).
// If (b^2 - 4ac) is > 0 there are two solutions.
Vector P ;
float a , b , c , sqr , insideSqr ;
// Translate sphere to origin.
P [ 0 ] = vLinePt [ 0 ] - vSphereCenter [ 0 ] ;
P [ 1 ] = vLinePt [ 1 ] - vSphereCenter [ 1 ] ;
P [ 2 ] = vLinePt [ 2 ] - vSphereCenter [ 2 ] ;
a = vLineDir . Dot ( vLineDir ) ;
b = 2.0f * P . Dot ( vLineDir ) ;
c = P . Dot ( P ) - ( fSphereRadius * fSphereRadius ) ;
insideSqr = b * b - 4 * a * c ;
if ( insideSqr < = 0.000001f )
return 0 ;
// Ok, two solutions.
sqr = ( float ) sqrt ( insideSqr ) ;
* fIntersection1 = ( - b + sqr ) / ( 2.0f * a ) ;
* fIntersection2 = ( - b - sqr ) / ( 2.0f * a ) ;
return 1 ;
}
//-----------------------------------------------------------------------------
// Purpose: Generates Euler angles given a left-handed orientation matrix. The
// columns of the matrix contain the forward, left, and up vectors.
// Input : matrix - Left-handed orientation matrix.
// angles[PITCH, YAW, ROLL]. Receives right-handed counterclockwise
// rotations in degrees around Y, Z, and X respectively.
//-----------------------------------------------------------------------------
void MatrixAngles ( const matrix3x4_t & matrix , RadianEuler & angles , Vector & position )
{
MatrixGetColumn ( matrix , 3 , position ) ;
MatrixAngles ( matrix , angles ) ;
}
void MatrixAngles ( const matrix3x4_t & matrix , Quaternion & q , Vector & pos )
{
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " MatrixQuaternion " , " Mathlib " ) ;
# endif
float trace ;
trace = matrix [ 0 ] [ 0 ] + matrix [ 1 ] [ 1 ] + matrix [ 2 ] [ 2 ] + 1.0f ;
if ( trace > 1.0f + FLT_EPSILON )
{
// VPROF_INCREMENT_COUNTER("MatrixQuaternion A",1);
q . x = ( matrix [ 2 ] [ 1 ] - matrix [ 1 ] [ 2 ] ) ;
q . y = ( matrix [ 0 ] [ 2 ] - matrix [ 2 ] [ 0 ] ) ;
q . z = ( matrix [ 1 ] [ 0 ] - matrix [ 0 ] [ 1 ] ) ;
q . w = trace ;
}
else if ( matrix [ 0 ] [ 0 ] > matrix [ 1 ] [ 1 ] & & matrix [ 0 ] [ 0 ] > matrix [ 2 ] [ 2 ] )
{
// VPROF_INCREMENT_COUNTER("MatrixQuaternion B",1);
trace = 1.0f + matrix [ 0 ] [ 0 ] - matrix [ 1 ] [ 1 ] - matrix [ 2 ] [ 2 ] ;
q . x = trace ;
q . y = ( matrix [ 1 ] [ 0 ] + matrix [ 0 ] [ 1 ] ) ;
q . z = ( matrix [ 0 ] [ 2 ] + matrix [ 2 ] [ 0 ] ) ;
q . w = ( matrix [ 2 ] [ 1 ] - matrix [ 1 ] [ 2 ] ) ;
}
else if ( matrix [ 1 ] [ 1 ] > matrix [ 2 ] [ 2 ] )
{
// VPROF_INCREMENT_COUNTER("MatrixQuaternion C",1);
trace = 1.0f + matrix [ 1 ] [ 1 ] - matrix [ 0 ] [ 0 ] - matrix [ 2 ] [ 2 ] ;
q . x = ( matrix [ 0 ] [ 1 ] + matrix [ 1 ] [ 0 ] ) ;
q . y = trace ;
q . z = ( matrix [ 2 ] [ 1 ] + matrix [ 1 ] [ 2 ] ) ;
q . w = ( matrix [ 0 ] [ 2 ] - matrix [ 2 ] [ 0 ] ) ;
}
else
{
// VPROF_INCREMENT_COUNTER("MatrixQuaternion D",1);
trace = 1.0f + matrix [ 2 ] [ 2 ] - matrix [ 0 ] [ 0 ] - matrix [ 1 ] [ 1 ] ;
q . x = ( matrix [ 0 ] [ 2 ] + matrix [ 2 ] [ 0 ] ) ;
q . y = ( matrix [ 2 ] [ 1 ] + matrix [ 1 ] [ 2 ] ) ;
q . z = trace ;
q . w = ( matrix [ 1 ] [ 0 ] - matrix [ 0 ] [ 1 ] ) ;
}
QuaternionNormalize ( q ) ;
#if 0
// check against the angle version
RadianEuler ang ;
MatrixAngles ( matrix , ang ) ;
Quaternion test ;
AngleQuaternion ( ang , test ) ;
float d = QuaternionDotProduct ( q , test ) ;
Assert ( fabs ( d ) > 0.99 & & fabs ( d ) < 1.01 ) ;
# endif
MatrixGetColumn ( matrix , 3 , pos ) ;
}
void MatrixAngles ( const matrix3x4_t & matrix , float * angles )
{
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " MatrixAngles " , " Mathlib " ) ;
# endif
Assert ( s_bMathlibInitialized ) ;
float forward [ 3 ] ;
float left [ 3 ] ;
float up [ 3 ] ;
//
// Extract the basis vectors from the matrix. Since we only need the Z
// component of the up vector, we don't get X and Y.
//
forward [ 0 ] = matrix [ 0 ] [ 0 ] ;
forward [ 1 ] = matrix [ 1 ] [ 0 ] ;
forward [ 2 ] = matrix [ 2 ] [ 0 ] ;
left [ 0 ] = matrix [ 0 ] [ 1 ] ;
left [ 1 ] = matrix [ 1 ] [ 1 ] ;
left [ 2 ] = matrix [ 2 ] [ 1 ] ;
up [ 2 ] = matrix [ 2 ] [ 2 ] ;
float xyDist = sqrtf ( forward [ 0 ] * forward [ 0 ] + forward [ 1 ] * forward [ 1 ] ) ;
// enough here to get angles?
if ( xyDist > 0.001f )
{
// (yaw) y = ATAN( forward.y, forward.x ); -- in our space, forward is the X axis
angles [ 1 ] = RAD2DEG ( atan2f ( forward [ 1 ] , forward [ 0 ] ) ) ;
// The engine does pitch inverted from this, but we always end up negating it in the DLL
// UNDONE: Fix the engine to make it consistent
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
angles [ 0 ] = RAD2DEG ( atan2f ( - forward [ 2 ] , xyDist ) ) ;
// (roll) z = ATAN( left.z, up.z );
angles [ 2 ] = RAD2DEG ( atan2f ( left [ 2 ] , up [ 2 ] ) ) ;
}
else // forward is mostly Z, gimbal lock-
{
// (yaw) y = ATAN( -left.x, left.y ); -- forward is mostly z, so use right for yaw
angles [ 1 ] = RAD2DEG ( atan2f ( - left [ 0 ] , left [ 1 ] ) ) ;
// The engine does pitch inverted from this, but we always end up negating it in the DLL
// UNDONE: Fix the engine to make it consistent
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
angles [ 0 ] = RAD2DEG ( atan2f ( - forward [ 2 ] , xyDist ) ) ;
// Assume no roll in this case as one degree of freedom has been lost (i.e. yaw == roll)
angles [ 2 ] = 0 ;
}
}
#if 0
/*
= = = = = = = = = = = = =
DecomposeRotation
= = = = = = = = = = = = =
*/
// What is this for and why is this different from MatrixAngle?
void DecomposeRotation ( const matrix3x4_t & mat , float * out )
{
Assert ( s_bMathlibInitialized ) ;
float cp ;
qboolean fixYaw = 0 ;
qboolean fixRoll = 0 ;
// Start off in radians
out [ PITCH ] = asin ( - mat [ 0 ] [ 2 ] ) ;
cp = cos ( out [ PITCH ] ) ;
if ( cp )
{
float cy = mat [ 0 ] [ 0 ] / cp ;
float cr = mat [ 2 ] [ 2 ] / cp ;
// fix xome rounding error
if ( cy > 1.0 ) cy = 1.0 ;
else if ( cy < - 1.0 ) cy = - 1.0 ;
if ( cr > 1.0 ) cr = 1.0 ;
else if ( cr < - 1.0 ) cr = - 1.0 ;
out [ YAW ] = acos ( cy ) ;
out [ ROLL ] = acos ( cr ) ;
}
else
{
out [ YAW ] = 0 ;
out [ ROLL ] = acos ( mat [ 1 ] [ 1 ] ) ;
}
// convert angles to degrees
out [ PITCH ] * = 180.0 / M_PI ;
out [ YAW ] * = 180.0 / M_PI ;
out [ ROLL ] * = 180.0 / M_PI ;
// correct for quadrants
fixYaw = mat [ 0 ] [ 1 ] / cp > 0 ;
fixRoll = ( mat [ 1 ] [ 2 ] / - cp < 0 ) ;
if ( fixYaw )
out [ YAW ] = 360 - out [ YAW ] ;
if ( fixRoll )
out [ ROLL ] = 360 - out [ ROLL ] ;
// normalize
#if 0
if ( out [ PITCH ] < 0 )
out [ PITCH ] + = 360 ;
if ( out [ YAW ] < 0 )
out [ YAW ] + = 360 ;
if ( out [ ROLL ] < 0 )
out [ ROLL ] + = 360 ;
# endif
}
# endif
void VectorTransform ( const float * in1 , const matrix3x4_t & in2 , float * out )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( in1 ! = out ) ;
out [ 0 ] = DotProduct ( in1 , in2 [ 0 ] ) + in2 [ 0 ] [ 3 ] ;
out [ 1 ] = DotProduct ( in1 , in2 [ 1 ] ) + in2 [ 1 ] [ 3 ] ;
out [ 2 ] = DotProduct ( in1 , in2 [ 2 ] ) + in2 [ 2 ] [ 3 ] ;
}
// SSE Version of VectorTransform
void VectorTransformSSE ( const float * in1 , const matrix3x4_t & in2 , float * out1 )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( in1 ! = out1 ) ;
# ifdef _WIN32
__asm
{
mov eax , in1 ;
mov ecx , in2 ;
mov edx , out1 ;
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
addss xmm0 , [ ecx + 12 ]
movss [ edx ] , xmm0 ;
add ecx , 16 ;
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
addss xmm0 , [ ecx + 12 ]
movss [ edx + 4 ] , xmm0 ;
add ecx , 16 ;
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
addss xmm0 , [ ecx + 12 ]
movss [ edx + 8 ] , xmm0 ;
}
# elif _LINUX
# warning "VectorTransformSSE C implementation only"
out1 [ 0 ] = DotProduct ( in1 , in2 [ 0 ] ) + in2 [ 0 ] [ 3 ] ;
out1 [ 1 ] = DotProduct ( in1 , in2 [ 1 ] ) + in2 [ 1 ] [ 3 ] ;
out1 [ 2 ] = DotProduct ( in1 , in2 [ 2 ] ) + in2 [ 2 ] [ 3 ] ;
# else
# error
# endif
}
void VectorITransform ( const float * in1 , const matrix3x4_t & in2 , float * out )
{
Assert ( s_bMathlibInitialized ) ;
float in1t [ 3 ] ;
in1t [ 0 ] = in1 [ 0 ] - in2 [ 0 ] [ 3 ] ;
in1t [ 1 ] = in1 [ 1 ] - in2 [ 1 ] [ 3 ] ;
in1t [ 2 ] = in1 [ 2 ] - in2 [ 2 ] [ 3 ] ;
out [ 0 ] = in1t [ 0 ] * in2 [ 0 ] [ 0 ] + in1t [ 1 ] * in2 [ 1 ] [ 0 ] + in1t [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 1 ] = in1t [ 0 ] * in2 [ 0 ] [ 1 ] + in1t [ 1 ] * in2 [ 1 ] [ 1 ] + in1t [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 2 ] = in1t [ 0 ] * in2 [ 0 ] [ 2 ] + in1t [ 1 ] * in2 [ 1 ] [ 2 ] + in1t [ 2 ] * in2 [ 2 ] [ 2 ] ;
}
void VectorRotate ( const float * in1 , const matrix3x4_t & in2 , float * out )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( in1 ! = out ) ;
out [ 0 ] = DotProduct ( in1 , in2 [ 0 ] ) ;
out [ 1 ] = DotProduct ( in1 , in2 [ 1 ] ) ;
out [ 2 ] = DotProduct ( in1 , in2 [ 2 ] ) ;
}
void VectorRotate ( const Vector & in1 , const QAngle & in2 , Vector & out )
{
matrix3x4_t matRotate ;
AngleMatrix ( in2 , matRotate ) ;
VectorRotate ( in1 , matRotate , out ) ;
}
void VectorRotateSSE ( const float * in1 , const matrix3x4_t & in2 , float * out1 )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( in1 ! = out1 ) ;
# ifdef _WIN32
__asm
{
mov eax , in1 ;
mov ecx , in2 ;
mov edx , out1 ;
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
movss [ edx ] , xmm0 ;
add ecx , 16 ;
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
movss [ edx + 4 ] , xmm0 ;
add ecx , 16 ;
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
movss [ edx + 8 ] , xmm0 ;
}
# elif _LINUX
# warning "VectorRotateSSE C implementation only"
out1 [ 0 ] = DotProduct ( in1 , in2 [ 0 ] ) ;
out1 [ 1 ] = DotProduct ( in1 , in2 [ 1 ] ) ;
out1 [ 2 ] = DotProduct ( in1 , in2 [ 2 ] ) ;
# else
# error
# endif
}
//-----------------------------------------------------------------------------
// NOTE: This actually works, but we need to be sure it actually optimizes anything
// I've actually really only added it to make sure it's in source control
//-----------------------------------------------------------------------------
void TransformAndRotate ( const Vector & srcPos , const Vector & srcNorm ,
const matrix3x4_t & mat , Vector & pos , Vector & norm )
{
2008-09-15 01:33:59 -05:00
# ifdef _WIN32
2008-09-15 01:00:17 -05:00
const float * pMat = & mat [ 0 ] [ 0 ] ;
const float * pNormal = & srcNorm . x ;
float * pNormalOut = & norm . x ;
2008-09-15 01:33:59 -05:00
# endif
const float * pPos = & srcPos . x ;
float * pPosOut = & pos . x ;
2008-09-15 01:00:17 -05:00
// Vector pt, nt;
// pt[0] = DotProduct(vert.m_vecPosition.Base(), mat[0]) + mat[0][3];
// pt[1] = DotProduct(vert.m_vecPosition.Base(), mat[1]) + mat[1][3];
// pt[2] = DotProduct(vert.m_vecPosition.Base(), mat[2]) + mat[2][3];
// nt[0] = DotProduct( vert.m_vecNormal.Base(), mat[0] );
// nt[1] = DotProduct( vert.m_vecNormal.Base(), mat[1] );
// nt[2] = DotProduct( vert.m_vecNormal.Base(), mat[2] );
# ifdef _WIN32
__asm
{
mov eax , DWORD PTR [ pMat ]
mov edx , DWORD PTR [ pPos ]
mov ecx , DWORD PTR [ pNormal ]
mov esi , DWORD PTR [ pNormalOut ]
mov edi , DWORD PTR [ pPosOut ]
fld DWORD PTR [ eax + 3 * 4 ] ; m03
fld DWORD PTR [ eax + 0 ] ; m00 | m03
fld DWORD PTR [ edx + 0 ] ; pos . x | m00 | m03
fld DWORD PTR [ ecx + 0 ] ; norm . x | pos . x | m00 | m03
fld DWORD PTR [ edx + 4 ] ; pos . y | norm . x | pos . x | m00 | m03
fld DWORD PTR [ eax + 1 * 4 ] ; m01 | pos . y | norm . x | pos . x | m00 | m03
fld DWORD PTR [ ecx + 4 ] ; norm . y | m01 | pos . y | norm . x | pos . x | m00 | m03
fxch st ( 5 ) ; m00 | m01 | pos . y | norm . x | pos . x | norm . y | m03
fmul st ( 4 ) , st ( 0 ) ; m00 | m01 | pos . y | norm . x | pos . x * m00 | norm . y | m03
fmulp st ( 3 ) , st ( 0 ) ; m01 | pos . y | norm . x * m00 | pos . x * m00 | norm . y | m03
fmul st ( 1 ) , st ( 0 ) ; m01 | pos . y * m01 | norm . x * m00 | pos . x * m00 | norm . y | m03
fmulp st ( 4 ) , st ( 0 ) ; pos . y * m01 | norm . x * m00 | pos . x * m00 | norm . y * m01 | m03
fld DWORD PTR [ eax + 2 * 4 ] ; m02 | pos . y * m01 | norm . x * m00 | pos . x * m00 | norm . y * m01 | m03
fld DWORD PTR [ edx + 8 ] ; pos . z | m02 | pos . y * m01 | norm . x * m00 | pos . x * m00 | norm . y * m01 | m03
fld DWORD PTR [ ecx + 8 ] ; norm . z | pos . z | m02 | pos . y * m01 | norm . x * m00 | pos . x * m00 | norm . y * m01 | m03
fxch st ( 5 ) ; pos . x * m00 | pos . z | m02 | pos . y * m01 | norm . x * m00 | norm . z | norm . y * m01 | m03
faddp st ( 3 ) , st ( 0 ) ; pos . z | m02 | pos . x * m00 + pos . y * m01 | norm . x * m00 | norm . z | norm . y * m01 | m03
fxch st ( 3 ) ; norm . x * m00 | m02 | pos . x * m00 + pos . y * m01 | pos . z | norm . z | norm . y * m01 | m03
faddp st ( 5 ) , st ( 0 ) ; m02 | pos . x * m00 + pos . y * m01 | pos . z | norm . z | norm . y * m01 + norm . x * m00 | m03
fmul st ( 2 ) , st ( 0 ) ; m02 | pos . x * m00 + pos . y * m01 | pos . z * m02 | norm . z | norm . y * m01 + norm . x * m00 | m03
fmulp st ( 3 ) , st ( 0 ) ; pos . x * m00 + pos . y * m01 | pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | m03
faddp st ( 4 ) , st ( 0 ) ; pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | pos . x * m00 + pos . y * m01 + m03
; NOTE : Need two more instructions before we can start using the . z stuff
fld DWORD PTR [ eax + 1 * 16 + 3 * 4 ] ; m13 | pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | pos . x * m00 + pos . y * m01 + m03
fld DWORD PTR [ eax + 1 * 16 + 0 ] ; m10 | m13 | pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | pos . x * m00 + pos . y * m01 + m03
fld DWORD PTR [ edx + 0 ] ; pos . x | m10 | m13 | pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | pos . x * m00 + pos . y * m01 + m03
fld DWORD PTR [ ecx + 0 ] ; norm . x | pos . x | m10 | m13 | pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | pos . x * m00 + pos . y * m01 + m03
fxch st ( 7 ) ; pos . x * m00 + pos . y * m01 + m03 | pos . x | m10 | m13 | pos . z * m02 | norm . z * m02 | norm . y * m01 + norm . x * m00 | norm . x
faddp st ( 4 ) , st ( 0 ) ; pos . x | m10 | m13 | pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | norm . z * m02 | norm . y * m01 + norm . x * m00 | norm . x
fxch st ( 5 ) ; norm . y * m01 + norm . x * m00 | m10 | m13 | pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | norm . z * m02 | pos . x | norm . x
faddp st ( 4 ) , st ( 0 ) ; m10 | m13 | pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | norm . x * m00 + norm . y * m01 + norm . z * m02 | pos . x | norm . x
fmul st ( 4 ) , st ( 0 ) ; m10 | m13 | pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | norm . x * m00 + norm . y * m01 + norm . z * m02 | pos . x * m10 | norm . x
fmulp st ( 5 ) , st ( 0 ) ; m13 | pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | norm . x * m00 + norm . y * m01 + norm . z * m02 | pos . x * m10 | norm . x * m10
fxch st ( 2 ) ; norm . x * m00 + norm . y * m01 + norm . z * m02 | pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | m13 | pos . x * m10 | norm . x * m10
fstp DWORD PTR [ esi ] ; pos . x * m00 + pos . y * m01 + pos . z * m02 + m03 | m13 | pos . x * m10 | norm . x * m10
fstp DWORD PTR [ edi ] ; m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ eax + 1 * 16 + 1 * 4 ] ; m11 | m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ edx + 4 ] ; pos . y | m11 | m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ ecx + 4 ] ; norm . y | pos . y | m11 | m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ eax + 1 * 16 + 2 * 4 ] ; m12 | norm . y | pos . y | m11 | m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ edx + 8 ] ; pos . z | m12 | norm . y | pos . y | m11 | m13 | pos . x * m10 | norm . x * m10
fxch st ( 4 ) ; m11 | m12 | norm . y | pos . y | pos . z | m13 | pos . x * m10 | norm . x * m10
fmul st ( 3 ) , st ( 0 ) ; m11 | m12 | norm . y | pos . y * m11 | pos . z | m13 | pos . x * m10 | norm . x * m10
fmulp st ( 2 ) , st ( 0 ) ; m12 | norm . y * m11 | pos . y * m11 | pos . z | m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ ecx + 8 ] ; norm . z | m12 | norm . y * m11 | pos . y * m11 | pos . z | m13 | pos . x * m10 | norm . x * m10
fxch st ( 1 ) ; m12 | norm . z | norm . y * m11 | pos . y * m11 | pos . z | m13 | pos . x * m10 | norm . x * m10
fmul st ( 4 ) , st ( 0 ) ; m12 | norm . z | norm . y * m11 | pos . y * m11 | pos . z * m12 | m13 | pos . x * m10 | norm . x * m10
fmulp st ( 1 ) , st ( 0 ) ; norm . z * m12 | norm . y * m11 | pos . y * m11 | pos . z * m12 | m13 | pos . x * m10 | norm . x * m10
fld DWORD PTR [ eax + 2 * 16 + 3 * 4 ] ; m23 | norm . z * m12 | norm . y * m11 | pos . y * m11 | pos . z * m12 | m13 | pos . x * m10 | norm . x * m10
fxch st ( 6 ) ; pos . x * m10 | norm . z * m12 | norm . y * m11 | pos . y * m11 | pos . z * m12 | m13 | m23 | norm . x * m10
faddp st ( 3 ) , st ( 0 ) ; norm . z * m12 | norm . y * m11 | pos . x * m10 + pos . y * m11 | pos . z * m12 | m13 | m23 | norm . x * m10
fxch st ( 4 ) ; m13 | norm . y * m11 | pos . x * m10 + pos . y * m11 | pos . z * m12 | norm . z * m12 | m23 | norm . x * m10
faddp st ( 3 ) , st ( 0 ) ; norm . y * m11 | pos . x * m10 + pos . y * m11 | pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10
faddp st ( 5 ) , st ( 0 ) ; pos . x * m10 + pos . y * m11 | pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10 + norm . y * m11
fld DWORD PTR [ eax + 2 * 16 + 0 ] ; m20 | pos . x * m10 + pos . y * m11 | pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10 + norm . y * m11
fld DWORD PTR [ edx + 0 ] ; pos . x | m20 | pos . x * m10 + pos . y * m11 | pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10 + norm . y * m11
fld DWORD PTR [ ecx + 0 ] ; norm . x | pos . x | m20 | pos . x * m10 + pos . y * m11 | pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10 + norm . y * m11
fxch st ( 3 ) ; pos . x * m10 + pos . y * m11 | pos . x | m20 | norm . x | pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10 + norm . y * m11
faddp st ( 4 ) , st ( 0 ) ; pos . x | m20 | norm . x | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | norm . z * m12 | m23 | norm . x * m10 + norm . y * m11
fxch st ( 4 ) ; norm . z * m12 | m20 | norm . x | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x | m23 | norm . x * m10 + norm . y * m11
faddp st ( 6 ) , st ( 0 ) ; m20 | norm . x | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fmul st ( 3 ) , st ( 0 ) ; m20 | norm . x | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fmulp st ( 1 ) , st ( 0 ) ; norm . x * m20 | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fld DWORD PTR [ eax + 2 * 16 + 1 * 4 ] ; m21 | norm . x * m20 | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fld DWORD PTR [ edx + 4 ] ; pos . y | m21 | norm . x * m20 | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fld DWORD PTR [ ecx + 4 ] ; norm . y | pos . y | m21 | norm . x * m20 | pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fxch st ( 4 ) ; pos . x * m10 + pos . y * m11 + pos . z * m12 + m13 | pos . y | m21 | norm . x * m20 | norm . y | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fstp DWORD PTR [ edi + 4 ] ; pos . y | m21 | norm . x * m20 | norm . y | pos . x * m20 | m23 | norm . x * m10 + norm . y * m11 + norm . z * m12
fxch st ( 6 ) ; norm . x * m10 + norm . y * m11 + norm . z * m12 | m21 | norm . x * m20 | norm . y | pos . x * m20 | m23 | pos . y
fstp DWORD PTR [ esi + 4 ] ; m21 | norm . x * m20 | norm . y | pos . x * m20 | m23 | pos . y
fmul st ( 5 ) , st ( 0 ) ; m21 | norm . x * m20 | norm . y | pos . x * m20 | m23 | pos . y * m21
fmulp st ( 2 ) , st ( 0 ) ; norm . x * m20 | norm . y * m21 | pos . x * m20 | m23 | pos . y * m21
fld DWORD PTR [ eax + 2 * 16 + 2 * 4 ] ; m22 | norm . x * m20 | norm . y * m21 | pos . x * m20 | m23 | pos . y * m21
fld DWORD PTR [ edx + 8 ] ; pos . z | m22 | norm . x * m20 | norm . y * m21 | pos . x * m20 | m23 | pos . y * m21
fld DWORD PTR [ ecx + 8 ] ; norm . z | pos . z | m22 | norm . x * m20 | norm . y * m21 | pos . x * m20 | m23 | pos . y * m21
fxch st ( 5 ) ; pos . x * m20 | pos . z | m22 | norm . x * m20 | norm . y * m21 | norm . z | m23 | pos . y * m21
faddp st ( 6 ) , st ( 0 ) ; pos . z | m22 | norm . x * m20 | norm . y * m21 | norm . z | pos . x * m20 + m23 | pos . y * m21
fxch st ( 2 ) ; norm . x * m20 | m22 | pos . z | norm . y * m21 | norm . z | pos . x * m20 + m23 | pos . y * m21
faddp st ( 3 ) , st ( 0 ) ; m22 | pos . z | norm . x * m20 + norm . y * m21 | norm . z | pos . x * m20 + m23 | pos . y * m21
fmul st ( 1 ) , st ( 0 ) ; m22 | pos . z * m22 | norm . x * m20 + norm . y * m21 | norm . z | pos . x * m20 + m23 | pos . y * m21
fmulp st ( 3 ) , st ( 0 ) ; pos . z * m22 | norm . x * m20 + norm . y * m21 | norm . z * m22 | pos . x * m20 + m23 | pos . y * m21
fxch st ( 4 ) ; pos . y * m21 | norm . x * m20 + norm . y * m21 | norm . z * m22 | pos . x * m20 + m23 | pos . z * m22
faddp st ( 3 ) , st ( 0 ) ; norm . x * m20 + norm . y * m21 | norm . z * m22 | pos . x * m20 + pos . y * m21 + m23 | pos . z * m22
; STALLS AHOY ! ! !
faddp st ( 1 ) , st ( 0 ) ; norm . x * m20 + norm . y * m21 + norm . z * m22 | pos . x * m20 + pos . y * m21 + m23 | pos . z * m22
fxch st ( 2 ) ; pos . z * m22 | pos . x * m20 + pos . y * m21 + m23 | norm . x * m20 + norm . y * m21 + norm . z * m22
faddp st ( 1 ) , st ( 0 ) ; pos . x * m20 + pos . y * m21 + pos . z * m22 + m23 | norm . x * m20 + norm . y * m21 + norm . z * m22
fxch st ( 1 ) ; norm . x * m20 + norm . y * m21 + norm . z * m22 | pos . x * m20 + pos . y * m21 + pos . z * m22 + m23
fstp DWORD PTR [ esi + 8 ] ; pos . x * m20 + pos . y * m21 + pos . z * m22 + m23
fstp DWORD PTR [ edi + 8 ]
}
# elif _LINUX
# warning "TransformAndRotate C implementation only"
VectorTransform ( pPos , mat , pPosOut ) ;
VectorRotate ( pPosOut , mat , pPosOut ) ;
norm = pos ;
VectorNormalize ( norm ) ;
# else
# error
# endif
}
// rotate by the inverse of the matrix
void VectorIRotate ( const float * in1 , const matrix3x4_t & in2 , float * out )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( in1 ! = out ) ;
out [ 0 ] = in1 [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 1 ] * in2 [ 1 ] [ 0 ] + in1 [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 1 ] = in1 [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 1 ] * in2 [ 1 ] [ 1 ] + in1 [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 2 ] = in1 [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 1 ] * in2 [ 1 ] [ 2 ] + in1 [ 2 ] * in2 [ 2 ] [ 2 ] ;
}
# ifndef VECTOR_NO_SLOW_OPERATIONS
QAngle TransformAnglesToLocalSpace ( const QAngle & angles , const matrix3x4_t & parentMatrix )
{
matrix3x4_t angToWorld , worldToParent , localMatrix ;
MatrixInvert ( parentMatrix , worldToParent ) ;
AngleMatrix ( angles , angToWorld ) ;
ConcatTransforms ( worldToParent , angToWorld , localMatrix ) ;
QAngle out ;
MatrixAngles ( localMatrix , out ) ;
return out ;
}
QAngle TransformAnglesToWorldSpace ( const QAngle & angles , const matrix3x4_t & parentMatrix )
{
matrix3x4_t angToParent , angToWorld ;
AngleMatrix ( angles , angToParent ) ;
ConcatTransforms ( parentMatrix , angToParent , angToWorld ) ;
QAngle out ;
MatrixAngles ( angToWorld , out ) ;
return out ;
}
# endif
void MatrixInitialize ( matrix3x4_t & mat , const Vector & vecOrigin , const Vector & vecXAxis , const Vector & vecYAxis , const Vector & vecZAxis )
{
MatrixSetColumn ( vecXAxis , 0 , mat ) ;
MatrixSetColumn ( vecYAxis , 1 , mat ) ;
MatrixSetColumn ( vecZAxis , 2 , mat ) ;
MatrixSetColumn ( vecOrigin , 3 , mat ) ;
}
void MatrixCopy ( const matrix3x4_t & in , matrix3x4_t & out )
{
Assert ( s_bMathlibInitialized ) ;
memcpy ( out . Base ( ) , in . Base ( ) , sizeof ( float ) * 3 * 4 ) ;
}
void MatrixInvert ( const matrix3x4_t & in , matrix3x4_t & out )
{
Assert ( s_bMathlibInitialized ) ;
if ( & in = = & out )
{
swap ( out [ 0 ] [ 1 ] , out [ 1 ] [ 0 ] ) ;
swap ( out [ 0 ] [ 2 ] , out [ 2 ] [ 0 ] ) ;
swap ( out [ 1 ] [ 2 ] , out [ 2 ] [ 1 ] ) ;
}
else
{
// I'm guessing this only works on a 3x4 orthonormal matrix
out [ 0 ] [ 0 ] = in [ 0 ] [ 0 ] ;
out [ 0 ] [ 1 ] = in [ 1 ] [ 0 ] ;
out [ 0 ] [ 2 ] = in [ 2 ] [ 0 ] ;
out [ 1 ] [ 0 ] = in [ 0 ] [ 1 ] ;
out [ 1 ] [ 1 ] = in [ 1 ] [ 1 ] ;
out [ 1 ] [ 2 ] = in [ 2 ] [ 1 ] ;
out [ 2 ] [ 0 ] = in [ 0 ] [ 2 ] ;
out [ 2 ] [ 1 ] = in [ 1 ] [ 2 ] ;
out [ 2 ] [ 2 ] = in [ 2 ] [ 2 ] ;
}
float tmp [ 3 ] ;
tmp [ 0 ] = in [ 0 ] [ 3 ] ;
tmp [ 1 ] = in [ 1 ] [ 3 ] ;
tmp [ 2 ] = in [ 2 ] [ 3 ] ;
out [ 0 ] [ 3 ] = - DotProduct ( tmp , out [ 0 ] ) ;
out [ 1 ] [ 3 ] = - DotProduct ( tmp , out [ 1 ] ) ;
out [ 2 ] [ 3 ] = - DotProduct ( tmp , out [ 2 ] ) ;
}
void MatrixGetColumn ( const matrix3x4_t & in , int column , Vector & out )
{
out . x = in [ 0 ] [ column ] ;
out . y = in [ 1 ] [ column ] ;
out . z = in [ 2 ] [ column ] ;
}
void MatrixSetColumn ( const Vector & in , int column , matrix3x4_t & out )
{
out [ 0 ] [ column ] = in . x ;
out [ 1 ] [ column ] = in . y ;
out [ 2 ] [ column ] = in . z ;
}
int VectorCompare ( const float * v1 , const float * v2 )
{
Assert ( s_bMathlibInitialized ) ;
int i ;
for ( i = 0 ; i < 3 ; i + + )
if ( v1 [ i ] ! = v2 [ i ] )
return 0 ;
return 1 ;
}
# if _WIN32
_declspec ( naked )
# ifndef PFN_VECTORMA
void VectorMA ( const float * start , float scale , const float * direction , float * dest )
# else
void _VectorMA ( const float * start , float scale , const float * direction , float * dest )
# endif
{
Assert ( s_bMathlibInitialized ) ;
_asm {
mov eax , DWORD PTR [ esp + 0x04 ] ; * start , s0 . . s2
mov ecx , DWORD PTR [ esp + 0x0c ] ; * direction , d0 . . d2
mov edx , DWORD PTR [ esp + 0x10 ] ; * dest
fld DWORD PTR [ esp + 0x08 ] ; sc
fld DWORD PTR [ eax ] ; s0 | sc
fld DWORD PTR [ ecx ] ; d0 | s0 | sc
fld DWORD PTR [ eax + 4 ] ; s1 | d0 | s0 | sc
fld DWORD PTR [ ecx + 4 ] ; d1 | s1 | d0 | s0 | sc
fld DWORD PTR [ eax + 8 ] ; s2 | d1 | s1 | d0 | s0 | sc
fxch st ( 4 ) ; s0 | d1 | s1 | d0 | s2 | sc
fld DWORD PTR [ ecx + 8 ] ; d2 | s0 | d1 | s1 | d0 | s2 | sc
fxch st ( 4 ) ; d0 | s0 | d1 | s1 | d2 | s2 | sc
fmul st , st ( 6 ) ; d0 * sc | s0 | d1 | s1 | d2 | s2 | sc
fxch st ( 2 ) ; d1 | s0 | d0 * sc | s1 | d2 | s2 | sc
fmul st , st ( 6 ) ; d1 * sc | s0 | d0 * sc | s1 | d2 | s2 | sc
fxch st ( 4 ) ; d2 | s0 | d0 * sc | s1 | d1 * sc | s2 | sc
fmulp st ( 6 ) , st ; s0 | d0 * sc | s1 | d1 * sc | s2 | d2 * sc
faddp st ( 1 ) , st ; s0 + d0 * sc | s1 | d1 * sc | s2 | d2 * sc
fstp DWORD PTR [ edx ] ; s1 | d1 * sc | s2 | d2 * sc
faddp st ( 1 ) , st ; s1 + d1 * sc | s2 | d2 * sc
fstp DWORD PTR [ edx + 4 ] ; s2 | d2 * sc
faddp st ( 1 ) , st ; s2 + d2 * sc
fstp DWORD PTR [ edx + 8 ] ; [ Empty stack ]
ret
}
}
# else
# ifndef PFN_VECTORMA
void VectorMA ( const float * start , float scale , const float * direction , float * dest )
# else
void _VectorMA ( const float * start , float scale , const float * direction , float * dest )
# endif
{
Assert ( s_bMathlibInitialized ) ;
dest [ 0 ] = start [ 0 ] + scale * direction [ 0 ] ;
dest [ 1 ] = start [ 1 ] + scale * direction [ 1 ] ;
dest [ 2 ] = start [ 2 ] + scale * direction [ 2 ] ;
}
# endif
# ifdef _WIN32
void
_declspec ( naked )
_SSE_VectorMA ( const float * start , float scale , const float * direction , float * dest )
{
// FIXME: This don't work!! It will overwrite memory in the write to dest
Assert ( 0 ) ;
Assert ( s_bMathlibInitialized ) ;
_asm { // Intel SSE only routine
mov eax , DWORD PTR [ esp + 0x04 ] ; * start , s0 . . s2
mov ecx , DWORD PTR [ esp + 0x0c ] ; * direction , d0 . . d2
mov edx , DWORD PTR [ esp + 0x10 ] ; * dest
movss xmm2 , [ esp + 0x08 ] ; x2 = scale , 0 , 0 , 0
# ifdef ALIGNED_VECTOR
movaps xmm3 , [ ecx ] ; x3 = dir0 , dir1 , dir2 , X
pshufd xmm2 , xmm2 , 0 ; x2 = scale , scale , scale , scale
movaps xmm1 , [ eax ] ; x1 = start1 , start2 , start3 , X
mulps xmm3 , xmm2 ; x3 * = x2
addps xmm3 , xmm1 ; x3 + = x1
movaps [ edx ] , xmm3 ; * dest = x3
# else
movups xmm3 , [ ecx ] ; x3 = dir0 , dir1 , dir2 , X
pshufd xmm2 , xmm2 , 0 ; x2 = scale , scale , scale , scale
movups xmm1 , [ eax ] ; x1 = start1 , start2 , start3 , X
mulps xmm3 , xmm2 ; x3 * = x2
addps xmm3 , xmm1 ; x3 + = x1
movups [ edx ] , xmm3 ; * dest = x3
# endif
}
}
# endif
# ifdef OLD_DOTPRODUCT
_declspec ( naked )
vec_t __cdecl DotProduct ( const vec_t * a , const vec_t * c )
{
Assert ( s_bMathlibInitialized ) ;
_asm {
mov eax , DWORD PTR [ esp + 4 ] ; * a
mov ecx , DWORD PTR [ esp + 8 ] ; * c
fld DWORD PTR [ eax ] ; a0
fmul DWORD PTR [ ecx ] ; a0 * c0
fld DWORD PTR [ eax + 4 ] ; a1 | a0 * c0
fmul DWORD PTR [ ecx + 4 ] ; a1 * c1 | a0 * c0
fld DWORD PTR [ eax + 8 ] ; a2 | a1 * c1 | a0 * c0
fmul DWORD PTR [ ecx + 8 ] ; a2 * c2 | a1 * c1 | a0 * c0
fxch st ( 2 ) ; a0 * c0 | a1 * c1 | a2 * c2
faddp st ( 1 ) , st ; a1 * c1 + a0 * c0 | a2 * c2
faddp st ( 1 ) , st ; a2 * c2 + a0 * c0 + a1 * c1
ret
}
}
# endif
// SSE DotProduct -- it's a smidgen faster than the asm DotProduct...
// Should be validated too! :)
// NJS: (Nov 1 2002) -NOT- faster. may time a couple cycles faster in a single function like
// this, but when inlined, and instruction scheduled, the C version is faster.
// Verified this via VTune
/*
vec_t DotProduct ( const vec_t * a , const vec_t * c )
{
vec_t temp ;
__asm
{
mov eax , a ;
mov ecx , c ;
mov edx , DWORD PTR [ temp ]
movss xmm0 , [ eax ] ;
mulss xmm0 , [ ecx ] ;
movss xmm1 , [ eax + 4 ] ;
mulss xmm1 , [ ecx + 4 ] ;
movss xmm2 , [ eax + 8 ] ;
mulss xmm2 , [ ecx + 8 ] ;
addss xmm0 , xmm1 ;
addss xmm0 , xmm2 ;
movss [ edx ] , xmm0 ;
fld DWORD PTR [ edx ] ;
ret
}
}
*/
void CrossProduct ( const float * v1 , const float * v2 , float * cross )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( v1 ! = cross ) ;
Assert ( v2 ! = cross ) ;
cross [ 0 ] = v1 [ 1 ] * v2 [ 2 ] - v1 [ 2 ] * v2 [ 1 ] ;
cross [ 1 ] = v1 [ 2 ] * v2 [ 0 ] - v1 [ 0 ] * v2 [ 2 ] ;
cross [ 2 ] = v1 [ 0 ] * v2 [ 1 ] - v1 [ 1 ] * v2 [ 0 ] ;
}
int Q_log2 ( int val )
{
int answer = 0 ;
while ( val > > = 1 )
answer + + ;
return answer ;
}
void VectorMatrix ( const Vector & forward , matrix3x4_t & matrix )
{
Assert ( s_bMathlibInitialized ) ;
Vector right , left , up , tmp ;
if ( forward [ 0 ] = = 0 & & forward [ 1 ] = = 0 )
{
// pitch 90 degrees up/down from identity
left [ 0 ] = 0 ;
left [ 1 ] = 1 ;
left [ 2 ] = 0 ;
up [ 0 ] = - forward [ 2 ] ; // NOTE: This should be 1.0f or -1.0f, Assert?
up [ 1 ] = 0 ;
up [ 2 ] = 0 ;
}
else
{
tmp [ 0 ] = 0 ; tmp [ 1 ] = 0 ; tmp [ 2 ] = 1.0 ;
CrossProduct ( tmp , forward , left ) ;
VectorNormalize ( left ) ;
CrossProduct ( forward , left , up ) ;
VectorNormalize ( up ) ;
}
MatrixSetColumn ( forward , 0 , matrix ) ;
MatrixSetColumn ( left , 1 , matrix ) ;
MatrixSetColumn ( up , 2 , matrix ) ;
}
void VectorVectors ( const Vector & forward , Vector & right , Vector & up )
{
Assert ( s_bMathlibInitialized ) ;
Vector tmp ;
if ( forward [ 0 ] = = 0 & & forward [ 1 ] = = 0 )
{
// pitch 90 degrees up/down from identity
right [ 0 ] = 0 ;
right [ 1 ] = - 1 ;
right [ 2 ] = 0 ;
up [ 0 ] = - forward [ 2 ] ;
up [ 1 ] = 0 ;
up [ 2 ] = 0 ;
}
else
{
tmp [ 0 ] = 0 ; tmp [ 1 ] = 0 ; tmp [ 2 ] = 1.0 ;
CrossProduct ( forward , tmp , right ) ;
VectorNormalize ( right ) ;
CrossProduct ( right , forward , up ) ;
VectorNormalize ( up ) ;
}
}
void VectorAngles ( const float * forward , float * angles )
{
Assert ( s_bMathlibInitialized ) ;
float tmp , yaw , pitch ;
if ( forward [ 1 ] = = 0 & & forward [ 0 ] = = 0 )
{
yaw = 0 ;
if ( forward [ 2 ] > 0 )
pitch = 270 ;
else
pitch = 90 ;
}
else
{
yaw = ( atan2 ( forward [ 1 ] , forward [ 0 ] ) * 180 / M_PI ) ;
if ( yaw < 0 )
yaw + = 360 ;
tmp = sqrt ( forward [ 0 ] * forward [ 0 ] + forward [ 1 ] * forward [ 1 ] ) ;
pitch = ( atan2 ( - forward [ 2 ] , tmp ) * 180 / M_PI ) ;
if ( pitch < 0 )
pitch + = 360 ;
}
angles [ 0 ] = pitch ;
angles [ 1 ] = yaw ;
angles [ 2 ] = 0 ;
}
/*
= = = = = = = = = = = = = = = =
R_ConcatRotations
= = = = = = = = = = = = = = = =
*/
void ConcatRotations ( const float in1 [ 3 ] [ 3 ] , const float in2 [ 3 ] [ 3 ] , float out [ 3 ] [ 3 ] )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( in1 ! = out ) ;
Assert ( in2 ! = out ) ;
out [ 0 ] [ 0 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 0 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 0 ] [ 1 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 1 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 0 ] [ 2 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 2 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 2 ] ;
out [ 1 ] [ 0 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 0 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 1 ] [ 1 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 1 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 1 ] [ 2 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 2 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 2 ] ;
out [ 2 ] [ 0 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 0 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 2 ] [ 1 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 1 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 2 ] [ 2 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 2 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 2 ] ;
}
/*
= = = = = = = = = = = = = = = =
R_ConcatTransforms
= = = = = = = = = = = = = = = =
*/
void ConcatTransforms ( const matrix3x4_t & in1 , const matrix3x4_t & in2 , matrix3x4_t & out )
{
Assert ( s_bMathlibInitialized ) ;
if ( & in1 = = & out )
{
matrix3x4_t in1b ;
MatrixCopy ( in1 , in1b ) ;
ConcatTransforms ( in1b , in2 , out ) ;
return ;
}
if ( & in2 = = & out )
{
matrix3x4_t in2b ;
MatrixCopy ( in2 , in2b ) ;
ConcatTransforms ( in1 , in2b , out ) ;
return ;
}
out [ 0 ] [ 0 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 0 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 0 ] [ 1 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 1 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 0 ] [ 2 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 2 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 2 ] ;
out [ 0 ] [ 3 ] = in1 [ 0 ] [ 0 ] * in2 [ 0 ] [ 3 ] + in1 [ 0 ] [ 1 ] * in2 [ 1 ] [ 3 ] +
in1 [ 0 ] [ 2 ] * in2 [ 2 ] [ 3 ] + in1 [ 0 ] [ 3 ] ;
out [ 1 ] [ 0 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 0 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 1 ] [ 1 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 1 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 1 ] [ 2 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 2 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 2 ] ;
out [ 1 ] [ 3 ] = in1 [ 1 ] [ 0 ] * in2 [ 0 ] [ 3 ] + in1 [ 1 ] [ 1 ] * in2 [ 1 ] [ 3 ] +
in1 [ 1 ] [ 2 ] * in2 [ 2 ] [ 3 ] + in1 [ 1 ] [ 3 ] ;
out [ 2 ] [ 0 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 0 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 0 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 0 ] ;
out [ 2 ] [ 1 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 1 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 1 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 1 ] ;
out [ 2 ] [ 2 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 2 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 2 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 2 ] ;
out [ 2 ] [ 3 ] = in1 [ 2 ] [ 0 ] * in2 [ 0 ] [ 3 ] + in1 [ 2 ] [ 1 ] * in2 [ 1 ] [ 3 ] +
in1 [ 2 ] [ 2 ] * in2 [ 2 ] [ 3 ] + in1 [ 2 ] [ 3 ] ;
}
/*
= = = = = = = = = = = = = = = = = = =
FloorDivMod
Returns mathematically correct ( floor - based ) quotient and remainder for
numer and denom , both of which should contain no fractional part . The
quotient must fit in 32 bits .
= = = = = = = = = = = = = = = = = = = =
*/
void FloorDivMod ( double numer , double denom , int * quotient ,
int * rem )
{
Assert ( s_bMathlibInitialized ) ;
int q , r ;
double x ;
# ifdef PARANOID
if ( denom < = 0.0 )
Sys_Error ( " FloorDivMod: bad denominator %d \n " , denom ) ;
// if ((floor(numer) != numer) || (floor(denom) != denom))
// Sys_Error ("FloorDivMod: non-integer numer or denom %f %f\n",
// numer, denom);
# endif
if ( numer > = 0.0 )
{
x = floor ( numer / denom ) ;
q = ( int ) x ;
r = Floor2Int ( numer - ( x * denom ) ) ;
}
else
{
//
// perform operations with positive values, and fix mod to make floor-based
//
x = floor ( - numer / denom ) ;
q = - ( int ) x ;
r = Floor2Int ( - numer - ( x * denom ) ) ;
if ( r ! = 0 )
{
q - - ;
r = ( int ) denom - r ;
}
}
* quotient = q ;
* rem = r ;
}
/*
= = = = = = = = = = = = = = = = = = =
GreatestCommonDivisor
= = = = = = = = = = = = = = = = = = = =
*/
int GreatestCommonDivisor ( int i1 , int i2 )
{
Assert ( s_bMathlibInitialized ) ;
if ( i1 > i2 )
{
if ( i2 = = 0 )
return ( i1 ) ;
return GreatestCommonDivisor ( i2 , i1 % i2 ) ;
}
else
{
if ( i1 = = 0 )
return ( i2 ) ;
return GreatestCommonDivisor ( i1 , i2 % i1 ) ;
}
}
bool IsDenormal ( const float & val )
{
const int x = * reinterpret_cast < const int * > ( & val ) ; // needs 32-bit int
const int abs_mantissa = x & 0x007FFFFF ;
const int biased_exponent = x & 0x7F800000 ;
return ( biased_exponent = = 0 & & abs_mantissa ! = 0 ) ;
}
// ************************************** SPECIAL NOTE *******************************************************
// IFF you find this code to be PROBLEMATIC, PLEASE "#if 0" this and send email to "tkback@valvesoftware.com
// Note what machine this happened on and BUILD a debug version of the binary and see if it catches the problem.
// ***********************************************************************************************************
#if 0
# ifdef __cplusplus
extern " C " {
# endif
// This code is an TEST OPTIMIZATION for float -> long. DISABLE this (use MSVCRT ver) if it causes problems.
//
// WARNING!!! This is an experimental replacement for the compiler intrinsic "_ftol()'
// This code is called whenever there is a float to int conversion in c code.
// Try this 80x87 specific FPU conversion trick! (This normally does a ROUND during FISTP by default)
// If positive, we subtract 0.5 before doing a round() which equals floor() for positive values
// If negative, we add 0.5 before doing a round() which equals ceil() for negative values.
// Together, this simulates "chop to zero" even when the FPU is in normal rounding mode.
// The following two values should be "cache line aligned" ie. align 16.
const unsigned long FPU_HALF = 0x3EFFFFFFL ;
const unsigned long FPU_MAGIC = 0x59C00000L ;
long int
__declspec ( naked )
__cdecl _ftol ( )
{
# ifdef _DEBUG
// Turn off "Debug Multitreaded C Runtime Lib use in release mode" to avoid this.(why debug?)
//#ifdef NDEBUG
//#error Both _DEBUG and NDEBUG set!
//#endif
Assert ( s_bMathlibInitialized ) ;
// Make sure that we are in 64-bit precision(53-bit mantissa) FPU mode. _ftol() needs 64bit.
_asm {
fnstcw word ptr [ esp - 4 ]
and word ptr [ esp - 4 ] , 0x0300
cmp word ptr [ esp - 4 ] , 0x0200 ; Precision bits . Must be 0x0200
je short good_precision
int 3 ; WRONG Precision set on FPU ! ! ! s / b > 32 bit Internally .
good_precision :
}
# endif
// Start the float to long conversion
_asm { ; farg
ftst
fnstsw ax
sahf
jbe short negative
fsub DWORD PTR [ FPU_HALF ] ; Subtract 0.5 from positive float , this changes round ( ) to floor ( ) !
fadd DWORD PTR [ FPU_MAGIC ] ; Adjust the mantissa to have the " int " in the low bits .
fstp QWORD PTR [ esp - 0 Ch ] ; Write out the floating point value in 64 bit double format
mov eax , DWORD PTR [ esp - 0 Ch ] ; Grab the low 32 bits of the mantissa which have our " int " .
ret
negative :
fadd DWORD PTR [ FPU_HALF ] ; Add 0.5 to Negative float , this changes round ( ) to floor ( ) !
fadd DWORD PTR [ FPU_MAGIC ] ; Adjust the mantissa to have the " int " in the low bits .
fstp QWORD PTR [ esp - 0 Ch ] ; Write out the floating point value in 64 bit double format
mov eax , DWORD PTR [ esp - 0 Ch ] ; Grab the low 32 bits of the mantissa which have our " int " .
ret
}
}
# ifdef __cplusplus
}
# endif
# endif
int SignbitsForPlane ( cplane_t * out )
{
Assert ( s_bMathlibInitialized ) ;
int bits , j ;
// for fast box on planeside test
bits = 0 ;
for ( j = 0 ; j < 3 ; j + + )
{
if ( out - > normal [ j ] < 0 )
bits | = 1 < < j ;
}
return bits ;
}
/*
= = = = = = = = = = = = = = = = = =
BoxOnPlaneSide
Returns 1 , 2 , or 1 + 2
= = = = = = = = = = = = = = = = = =
*/
# if 1 || !id386 || defined _LINUX
int __cdecl BoxOnPlaneSide ( const float * emins , const float * emaxs , const cplane_t * p )
{
Assert ( s_bMathlibInitialized ) ;
float dist1 , dist2 ;
int sides ;
// fast axial cases
if ( p - > type < 3 )
{
if ( p - > dist < = emins [ p - > type ] )
return 1 ;
if ( p - > dist > = emaxs [ p - > type ] )
return 2 ;
return 3 ;
}
// general case
switch ( p - > signbits )
{
case 0 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 1 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 2 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 3 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
break ;
case 4 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
case 5 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
case 6 :
dist1 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
case 7 :
dist1 = p - > normal [ 0 ] * emins [ 0 ] + p - > normal [ 1 ] * emins [ 1 ] + p - > normal [ 2 ] * emins [ 2 ] ;
dist2 = p - > normal [ 0 ] * emaxs [ 0 ] + p - > normal [ 1 ] * emaxs [ 1 ] + p - > normal [ 2 ] * emaxs [ 2 ] ;
break ;
default :
dist1 = dist2 = 0 ; // shut up compiler
Assert ( 0 ) ;
break ;
}
sides = 0 ;
if ( dist1 > = p - > dist )
sides = 1 ;
if ( dist2 < p - > dist )
sides | = 2 ;
Assert ( sides ! = 0 ) ;
return sides ;
}
# else
# pragma warning( disable: 4035 )
__declspec ( naked ) int __cdecl BoxOnPlaneSide ( const float * emins , const float * emaxs , const cplane_t * p )
{
Assert ( s_bMathlibInitialized ) ;
static int bops_initialized ;
static int Ljmptab [ 8 ] ;
__asm {
push ebx
cmp bops_initialized , 1
je initialized
mov bops_initialized , 1
mov Ljmptab [ 0 * 4 ] , offset Lcase0
mov Ljmptab [ 1 * 4 ] , offset Lcase1
mov Ljmptab [ 2 * 4 ] , offset Lcase2
mov Ljmptab [ 3 * 4 ] , offset Lcase3
mov Ljmptab [ 4 * 4 ] , offset Lcase4
mov Ljmptab [ 5 * 4 ] , offset Lcase5
mov Ljmptab [ 6 * 4 ] , offset Lcase6
mov Ljmptab [ 7 * 4 ] , offset Lcase7
initialized :
mov edx , ds : dword ptr [ 4 + 12 + esp ]
mov ecx , ds : dword ptr [ 4 + 4 + esp ]
xor eax , eax
mov ebx , ds : dword ptr [ 4 + 8 + esp ]
mov al , ds : byte ptr [ 17 + edx ]
cmp al , 8
jge Lerror
fld ds : dword ptr [ 0 + edx ]
fld st ( 0 )
jmp dword ptr [ Ljmptab + eax * 4 ]
Lcase0 :
fmul ds : dword ptr [ ebx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ebx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase1 :
fmul ds : dword ptr [ ecx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ebx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase2 :
fmul ds : dword ptr [ ebx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ecx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase3 :
fmul ds : dword ptr [ ecx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ecx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase4 :
fmul ds : dword ptr [ ebx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ebx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase5 :
fmul ds : dword ptr [ ecx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ebx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase6 :
fmul ds : dword ptr [ ebx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ecx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ecx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
jmp LSetSides
Lcase7 :
fmul ds : dword ptr [ ecx ]
fld ds : dword ptr [ 0 + 4 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 4 + ecx ]
fld ds : dword ptr [ 0 + 8 + edx ]
fxch st ( 2 )
fmul ds : dword ptr [ 4 + ebx ]
fxch st ( 2 )
fld st ( 0 )
fmul ds : dword ptr [ 8 + ecx ]
fxch st ( 5 )
faddp st ( 3 ) , st ( 0 )
fmul ds : dword ptr [ 8 + ebx ]
fxch st ( 1 )
faddp st ( 3 ) , st ( 0 )
fxch st ( 3 )
faddp st ( 2 ) , st ( 0 )
LSetSides :
faddp st ( 2 ) , st ( 0 )
fcomp ds : dword ptr [ 12 + edx ]
xor ecx , ecx
fnstsw ax
fcomp ds : dword ptr [ 12 + edx ]
and ah , 1
xor ah , 1
add cl , ah
fnstsw ax
and ah , 1
add ah , ah
add cl , ah
pop ebx
mov eax , ecx
ret
Lerror :
int 3
}
}
# pragma warning( default: 4035 )
# endif
//-----------------------------------------------------------------------------
// VectorMA routines
//-----------------------------------------------------------------------------
# if _WIN32
_declspec ( naked )
# if PFN_VECTORMA
void __cdecl _VectorMA ( const Vector & start , float scale , const Vector & direction , Vector & dest )
# else
void __cdecl VectorMA ( const Vector & start , float scale , const Vector & direction , Vector & dest )
# endif
{
Assert ( s_bMathlibInitialized ) ;
_asm {
mov eax , DWORD PTR [ esp + 0x04 ] ; * start , s0 . . s2
mov ecx , DWORD PTR [ esp + 0x0c ] ; * direction , d0 . . d2
mov edx , DWORD PTR [ esp + 0x10 ] ; * dest
fld DWORD PTR [ esp + 0x08 ] ; sc
fld DWORD PTR [ eax ] ; s0 | sc
fld DWORD PTR [ ecx ] ; d0 | s0 | sc
fld DWORD PTR [ eax + 4 ] ; s1 | d0 | s0 | sc
fld DWORD PTR [ ecx + 4 ] ; d1 | s1 | d0 | s0 | sc
fld DWORD PTR [ eax + 8 ] ; s2 | d1 | s1 | d0 | s0 | sc
fxch st ( 4 ) ; s0 | d1 | s1 | d0 | s2 | sc
fld DWORD PTR [ ecx + 8 ] ; d2 | s0 | d1 | s1 | d0 | s2 | sc
fxch st ( 4 ) ; d0 | s0 | d1 | s1 | d2 | s2 | sc
fmul st , st ( 6 ) ; d0 * sc | s0 | d1 | s1 | d2 | s2 | sc
fxch st ( 2 ) ; d1 | s0 | d0 * sc | s1 | d2 | s2 | sc
fmul st , st ( 6 ) ; d1 * sc | s0 | d0 * sc | s1 | d2 | s2 | sc
fxch st ( 4 ) ; d2 | s0 | d0 * sc | s1 | d1 * sc | s2 | sc
fmulp st ( 6 ) , st ; s0 | d0 * sc | s1 | d1 * sc | s2 | d2 * sc
faddp st ( 1 ) , st ; s0 + d0 * sc | s1 | d1 * sc | s2 | d2 * sc
fstp DWORD PTR [ edx ] ; s1 | d1 * sc | s2 | d2 * sc
faddp st ( 1 ) , st ; s1 + d1 * sc | s2 | d2 * sc
fstp DWORD PTR [ edx + 4 ] ; s2 | d2 * sc
faddp st ( 1 ) , st ; s2 + d2 * sc
fstp DWORD PTR [ edx + 8 ] ; [ Empty stack ]
ret
}
}
# else
# if PFN_VECTORMA
void __cdecl _VectorMA ( const Vector & start , float scale , const Vector & direction , Vector & dest )
# else
void __cdecl VectorMA ( const Vector & start , float scale , const Vector & direction , Vector & dest )
# endif
{
Assert ( s_bMathlibInitialized ) ;
dest [ 0 ] = start [ 0 ] + scale * direction [ 0 ] ;
dest [ 1 ] = start [ 1 ] + scale * direction [ 1 ] ;
dest [ 2 ] = start [ 2 ] + scale * direction [ 2 ] ;
}
# endif
# ifdef _WIN32
# if PFN_VECTORMA
void
_declspec ( naked )
__cdecl _SSE_VectorMA ( const Vector & start , float scale , const Vector & direction , Vector & dest )
{
// FIXME: This don't work!! It will overwrite memory in the write to dest
Assert ( 0 ) ;
Assert ( s_bMathlibInitialized ) ;
_asm
{
// Intel SSE only routine
mov eax , DWORD PTR [ esp + 0x04 ] ; * start , s0 . . s2
mov ecx , DWORD PTR [ esp + 0x0c ] ; * direction , d0 . . d2
mov edx , DWORD PTR [ esp + 0x10 ] ; * dest
movss xmm2 , [ esp + 0x08 ] ; x2 = scale , 0 , 0 , 0
# ifdef ALIGNED_VECTOR
movaps xmm3 , [ ecx ] ; x3 = dir0 , dir1 , dir2 , X
pshufd xmm2 , xmm2 , 0 ; x2 = scale , scale , scale , scale
movaps xmm1 , [ eax ] ; x1 = start1 , start2 , start3 , X
mulps xmm3 , xmm2 ; x3 * = x2
addps xmm3 , xmm1 ; x3 + = x1
movaps [ edx ] , xmm3 ; * dest = x3
# else
movups xmm3 , [ ecx ] ; x3 = dir0 , dir1 , dir2 , X
pshufd xmm2 , xmm2 , 0 ; x2 = scale , scale , scale , scale
movups xmm1 , [ eax ] ; x1 = start1 , start2 , start3 , X
mulps xmm3 , xmm2 ; x3 * = x2
addps xmm3 , xmm1 ; x3 + = x1
movups [ edx ] , xmm3 ; * dest = x3
# endif
}
}
float ( __cdecl * pfVectorMA ) ( Vector & v ) = _VectorMA ;
# endif
# endif
//-----------------------------------------------------------------------------
// Euler QAngle -> Basis Vectors
//-----------------------------------------------------------------------------
void AngleVectors ( const QAngle & angles , Vector * forward )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( forward ) ;
float sp , sy , cp , cy ;
SinCos ( DEG2RAD ( angles [ YAW ] ) , & sy , & cy ) ;
SinCos ( DEG2RAD ( angles [ PITCH ] ) , & sp , & cp ) ;
forward - > x = cp * cy ;
forward - > y = cp * sy ;
forward - > z = - sp ;
}
void AngleVectors ( const QAngle & angles , Vector * forward , Vector * right , Vector * up )
{
Assert ( s_bMathlibInitialized ) ;
float sr , sp , sy , cr , cp , cy ;
SinCos ( DEG2RAD ( angles [ YAW ] ) , & sy , & cy ) ;
SinCos ( DEG2RAD ( angles [ PITCH ] ) , & sp , & cp ) ;
SinCos ( DEG2RAD ( angles [ ROLL ] ) , & sr , & cr ) ;
if ( forward )
{
forward - > x = cp * cy ;
forward - > y = cp * sy ;
forward - > z = - sp ;
}
if ( right )
{
right - > x = ( - 1 * sr * sp * cy + - 1 * cr * - sy ) ;
right - > y = ( - 1 * sr * sp * sy + - 1 * cr * cy ) ;
right - > z = - 1 * sr * cp ;
}
if ( up )
{
up - > x = ( cr * sp * cy + - sr * - sy ) ;
up - > y = ( cr * sp * sy + - sr * cy ) ;
up - > z = cr * cp ;
}
}
//-----------------------------------------------------------------------------
// Euler QAngle -> Basis Vectors transposed
//-----------------------------------------------------------------------------
void AngleVectorsTranspose ( const QAngle & angles , Vector * forward , Vector * right , Vector * up )
{
Assert ( s_bMathlibInitialized ) ;
float sr , sp , sy , cr , cp , cy ;
SinCos ( DEG2RAD ( angles [ YAW ] ) , & sy , & cy ) ;
SinCos ( DEG2RAD ( angles [ PITCH ] ) , & sp , & cp ) ;
SinCos ( DEG2RAD ( angles [ ROLL ] ) , & sr , & cr ) ;
if ( forward )
{
forward - > x = cp * cy ;
forward - > y = ( sr * sp * cy + cr * - sy ) ;
forward - > z = ( cr * sp * cy + - sr * - sy ) ;
}
if ( right )
{
right - > x = cp * sy ;
right - > y = ( sr * sp * sy + cr * cy ) ;
right - > z = ( cr * sp * sy + - sr * cy ) ;
}
if ( up )
{
up - > x = - sp ;
up - > y = sr * cp ;
up - > z = cr * cp ;
}
}
//-----------------------------------------------------------------------------
// Forward direction vector -> Euler angles
//-----------------------------------------------------------------------------
void VectorAngles ( const Vector & forward , QAngle & angles )
{
Assert ( s_bMathlibInitialized ) ;
float tmp , yaw , pitch ;
if ( forward [ 1 ] = = 0 & & forward [ 0 ] = = 0 )
{
yaw = 0 ;
if ( forward [ 2 ] > 0 )
pitch = 270 ;
else
pitch = 90 ;
}
else
{
yaw = ( atan2 ( forward [ 1 ] , forward [ 0 ] ) * 180 / M_PI ) ;
if ( yaw < 0 )
yaw + = 360 ;
tmp = FastSqrt ( forward [ 0 ] * forward [ 0 ] + forward [ 1 ] * forward [ 1 ] ) ;
pitch = ( atan2 ( - forward [ 2 ] , tmp ) * 180 / M_PI ) ;
if ( pitch < 0 )
pitch + = 360 ;
}
angles [ 0 ] = pitch ;
angles [ 1 ] = yaw ;
angles [ 2 ] = 0 ;
}
//-----------------------------------------------------------------------------
// Forward direction vector with a reference up vector -> Euler angles
//-----------------------------------------------------------------------------
void VectorAngles ( const Vector & forward , const Vector & pseudoup , QAngle & angles )
{
Assert ( s_bMathlibInitialized ) ;
Vector left ;
CrossProduct ( forward , pseudoup , left ) ;
VectorNormalizeFast ( left ) ;
float xyDist = sqrtf ( forward [ 0 ] * forward [ 0 ] + forward [ 1 ] * forward [ 1 ] ) ;
// enough here to get angles?
if ( xyDist > 0.001f )
{
// (yaw) y = ATAN( forward.y, forward.x ); -- in our space, forward is the X axis
angles [ 1 ] = RAD2DEG ( atan2f ( forward [ 1 ] , forward [ 0 ] ) ) ;
// The engine does pitch inverted from this, but we always end up negating it in the DLL
// UNDONE: Fix the engine to make it consistent
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
angles [ 0 ] = RAD2DEG ( atan2f ( - forward [ 2 ] , xyDist ) ) ;
float up_z = ( left [ 0 ] * forward [ 1 ] ) - ( left [ 1 ] * forward [ 0 ] ) ;
// (roll) z = ATAN( left.z, up.z );
angles [ 2 ] = RAD2DEG ( atan2f ( left [ 2 ] , up_z ) ) ;
}
else // forward is mostly Z, gimbal lock-
{
// (yaw) y = ATAN( -left.x, left.y ); -- forward is mostly z, so use right for yaw
angles [ 1 ] = RAD2DEG ( atan2f ( left [ 0 ] , - left [ 1 ] ) ) ; //This was originally copied from the "void MatrixAngles( const matrix3x4_t& matrix, float *angles )" code, and it's 180 degrees off, negated the values and it all works now (Dave Kircher)
// The engine does pitch inverted from this, but we always end up negating it in the DLL
// UNDONE: Fix the engine to make it consistent
// (pitch) x = ATAN( -forward.z, sqrt(forward.x*forward.x+forward.y*forward.y) );
angles [ 0 ] = RAD2DEG ( atan2f ( - forward [ 2 ] , xyDist ) ) ;
// Assume no roll in this case as one degree of freedom has been lost (i.e. yaw == roll)
angles [ 2 ] = 0 ;
}
}
void SetIdentityMatrix ( matrix3x4_t & matrix )
{
memset ( matrix . Base ( ) , 0 , sizeof ( float ) * 3 * 4 ) ;
matrix [ 0 ] [ 0 ] = 1.0 ;
matrix [ 1 ] [ 1 ] = 1.0 ;
matrix [ 2 ] [ 2 ] = 1.0 ;
}
//-----------------------------------------------------------------------------
// Purpose: converts engine euler angles into a matrix
// Input : vec3_t angles - PITCH, YAW, ROLL
// Output : *matrix - left-handed column matrix
// the basis vectors for the rotations will be in the columns as follows:
// matrix[][0] is forward
// matrix[][1] is left
// matrix[][2] is up
//-----------------------------------------------------------------------------
void AngleMatrix ( RadianEuler const & angles , const Vector & position , matrix3x4_t & matrix )
{
AngleMatrix ( angles , matrix ) ;
MatrixSetColumn ( position , 3 , matrix ) ;
}
void AngleMatrix ( const RadianEuler & angles , matrix3x4_t & matrix )
{
QAngle quakeEuler ( RAD2DEG ( angles . y ) , RAD2DEG ( angles . z ) , RAD2DEG ( angles . x ) ) ;
AngleMatrix ( quakeEuler , matrix ) ;
}
void AngleMatrix ( const QAngle & angles , const Vector & position , matrix3x4_t & matrix )
{
AngleMatrix ( angles , matrix ) ;
MatrixSetColumn ( position , 3 , matrix ) ;
}
void AngleMatrix ( const QAngle & angles , matrix3x4_t & matrix )
{
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " AngleMatrix " , " Mathlib " ) ;
# endif
Assert ( s_bMathlibInitialized ) ;
float sr , sp , sy , cr , cp , cy ;
SinCos ( DEG2RAD ( angles [ YAW ] ) , & sy , & cy ) ;
SinCos ( DEG2RAD ( angles [ PITCH ] ) , & sp , & cp ) ;
SinCos ( DEG2RAD ( angles [ ROLL ] ) , & sr , & cr ) ;
// matrix = (YAW * PITCH) * ROLL
matrix [ 0 ] [ 0 ] = cp * cy ;
matrix [ 1 ] [ 0 ] = cp * sy ;
matrix [ 2 ] [ 0 ] = - sp ;
matrix [ 0 ] [ 1 ] = sr * sp * cy + cr * - sy ;
matrix [ 1 ] [ 1 ] = sr * sp * sy + cr * cy ;
matrix [ 2 ] [ 1 ] = sr * cp ;
matrix [ 0 ] [ 2 ] = ( cr * sp * cy + - sr * - sy ) ;
matrix [ 1 ] [ 2 ] = ( cr * sp * sy + - sr * cy ) ;
matrix [ 2 ] [ 2 ] = cr * cp ;
matrix [ 0 ] [ 3 ] = 0.f ;
matrix [ 1 ] [ 3 ] = 0.f ;
matrix [ 2 ] [ 3 ] = 0.f ;
}
void AngleIMatrix ( const RadianEuler & angles , matrix3x4_t & matrix )
{
QAngle quakeEuler ( RAD2DEG ( angles . y ) , RAD2DEG ( angles . z ) , RAD2DEG ( angles . x ) ) ;
AngleIMatrix ( quakeEuler , matrix ) ;
}
void AngleIMatrix ( const QAngle & angles , matrix3x4_t & matrix )
{
Assert ( s_bMathlibInitialized ) ;
float sr , sp , sy , cr , cp , cy ;
SinCos ( DEG2RAD ( angles [ YAW ] ) , & sy , & cy ) ;
SinCos ( DEG2RAD ( angles [ PITCH ] ) , & sp , & cp ) ;
SinCos ( DEG2RAD ( angles [ ROLL ] ) , & sr , & cr ) ;
// matrix = (YAW * PITCH) * ROLL
matrix [ 0 ] [ 0 ] = cp * cy ;
matrix [ 0 ] [ 1 ] = cp * sy ;
matrix [ 0 ] [ 2 ] = - sp ;
matrix [ 1 ] [ 0 ] = sr * sp * cy + cr * - sy ;
matrix [ 1 ] [ 1 ] = sr * sp * sy + cr * cy ;
matrix [ 1 ] [ 2 ] = sr * cp ;
matrix [ 2 ] [ 0 ] = ( cr * sp * cy + - sr * - sy ) ;
matrix [ 2 ] [ 1 ] = ( cr * sp * sy + - sr * cy ) ;
matrix [ 2 ] [ 2 ] = cr * cp ;
matrix [ 0 ] [ 3 ] = 0.f ;
matrix [ 1 ] [ 3 ] = 0.f ;
matrix [ 2 ] [ 3 ] = 0.f ;
}
void AngleIMatrix ( const QAngle & angles , const Vector & position , matrix3x4_t & mat )
{
AngleIMatrix ( angles , mat ) ;
Vector vecTranslation ;
VectorRotate ( position , mat , vecTranslation ) ;
vecTranslation * = - 1.0f ;
MatrixSetColumn ( vecTranslation , 3 , mat ) ;
}
//-----------------------------------------------------------------------------
// Intersects a sphere against an axis aligned box
//-----------------------------------------------------------------------------
bool SphereToAABBIntersection ( const Vector & sphCenter , float sphRadius ,
const Vector & boxMin , const Vector & boxMax )
{
Assert ( s_bMathlibInitialized ) ;
int i ;
float s ;
float dist = 0 ;
for ( i = 0 ; i < 3 ; i + + )
{
if ( sphCenter [ i ] < boxMin [ i ] )
{
s = sphCenter [ i ] - boxMin [ i ] ;
dist + = s * s ;
}
else if ( sphCenter [ i ] > boxMax [ i ] )
{
s = sphCenter [ i ] - boxMax [ i ] ;
dist + = s * s ;
}
}
return ( dist < = ( sphRadius * sphRadius ) ) ;
}
//-----------------------------------------------------------------------------
// Bounding box construction methods
//-----------------------------------------------------------------------------
void ClearBounds ( Vector & mins , Vector & maxs )
{
Assert ( s_bMathlibInitialized ) ;
mins [ 0 ] = mins [ 1 ] = mins [ 2 ] = 99999 ;
maxs [ 0 ] = maxs [ 1 ] = maxs [ 2 ] = - 99999 ;
}
void AddPointToBounds ( const Vector & v , Vector & mins , Vector & maxs )
{
Assert ( s_bMathlibInitialized ) ;
int i ;
vec_t val ;
for ( i = 0 ; i < 3 ; i + + )
{
val = v [ i ] ;
if ( val < mins [ i ] )
mins [ i ] = val ;
if ( val > maxs [ i ] )
maxs [ i ] = val ;
}
}
//-----------------------------------------------------------------------------
// Gamma conversion support
//-----------------------------------------------------------------------------
static byte texgammatable [ 256 ] ; // palette is sent through this to convert to screen gamma
static float texturetolinear [ 256 ] ; // texture (0..255) to linear (0..1)
static int lineartotexture [ 1024 ] ; // linear (0..1) to texture (0..255)
static int lineartoscreen [ 1024 ] ; // linear (0..1) to gamma corrected vertex light (0..255)
// build a lightmap texture to combine with surface texture, adjust for src*dst+dst*src, ramp reprogramming, etc
float lineartovertex [ 4096 ] ; // linear (0..4) to screen corrected vertex space (0..1?)
unsigned char lineartolightmap [ 4096 ] ; // linear (0..4) to screen corrected texture value (0..255)
static float g_Mathlib_GammaToLinear [ 256 ] ; // gamma (0..1) to linear (0..1)
static float g_Mathlib_LinearToGamma [ 256 ] ; // linear (0..1) to gamma (0..1)
float power2_n [ 256 ] ; // 2**(index - 128)
void BuildExponentTable ( )
{
for ( int i = 0 ; i < 256 ; i + + )
{
power2_n [ i ] = pow ( 2.0f , i - 128 ) / 255.0f ;
}
}
void BuildGammaTable ( float gamma , float texGamma , float brightness , int overbright )
{
int i , inf ;
float g1 , g3 ;
// Con_Printf("BuildGammaTable %.1f %.1f %.1f\n", g, v_lightgamma.GetFloat(), v_texgamma.GetFloat() );
float g = gamma ;
if ( g > 3.0 )
{
g = 3.0 ;
}
g = 1.0 / g ;
g1 = texGamma * g ;
if ( brightness < = 0.0 )
{
g3 = 0.125 ;
}
else if ( brightness > 1.0 )
{
g3 = 0.05 ;
}
else
{
g3 = 0.125 - ( brightness * brightness ) * 0.075 ;
}
for ( i = 0 ; i < 256 ; i + + )
{
2008-09-15 01:33:59 -05:00
inf = static_cast < int > ( 255 * pow ( i / 255.f , g1 ) ) ;
2008-09-15 01:00:17 -05:00
if ( inf < 0 )
inf = 0 ;
if ( inf > 255 )
inf = 255 ;
texgammatable [ i ] = inf ;
}
for ( i = 0 ; i < 1024 ; i + + )
{
float f ;
f = i / 1023.0 ;
// scale up
if ( brightness > 1.0 )
f = f * brightness ;
// shift up
if ( f < = g3 )
f = ( f / g3 ) * 0.125 ;
else
f = 0.125 + ( ( f - g3 ) / ( 1.0 - g3 ) ) * 0.875 ;
// convert linear space to desired gamma space
2008-09-15 01:33:59 -05:00
inf = static_cast < int > ( 255 * pow ( f , g ) ) ;
2008-09-15 01:00:17 -05:00
if ( inf < 0 )
inf = 0 ;
if ( inf > 255 )
inf = 255 ;
lineartoscreen [ i ] = inf ;
}
/*
for ( i = 0 ; i < 1024 ; i + + )
{
// convert from screen gamma space to linear space
lineargammatable [ i ] = 1023 * pow ( i / 1023.0 , v_gamma . GetFloat ( ) ) ;
// convert from linear gamma space to screen space
screengammatable [ i ] = 1023 * pow ( i / 1023.0 , 1.0 / v_gamma . GetFloat ( ) ) ;
}
*/
for ( i = 0 ; i < 256 ; i + + )
{
// convert from nonlinear texture space (0..255) to linear space (0..1)
texturetolinear [ i ] = pow ( i / 255.f , texGamma ) ;
// convert from linear space (0..1) to nonlinear (sRGB) space (0..1)
g_Mathlib_LinearToGamma [ i ] = pow ( i / 255.f , 1.0f / 2.2f ) ;
// convert from sRGB gamma space (0..1) to linear space (0..1)
g_Mathlib_GammaToLinear [ i ] = pow ( i / 255.f , 2.2f ) ;
}
for ( i = 0 ; i < 1024 ; i + + )
{
// convert from linear space (0..1) to nonlinear texture space (0..255)
2008-09-15 01:33:59 -05:00
lineartotexture [ i ] = static_cast < int > ( pow ( i / 1023.0 , 1.0 / texGamma ) * 255 ) ;
2008-09-15 01:00:17 -05:00
}
BuildExponentTable ( ) ;
#if 0
for ( i = 0 ; i < 256 ; i + + )
{
float f ;
// convert from nonlinear lightmap space (0..255) to linear space (0..4)
// f = (i / 255.0) * sqrt( 4 );
f = i * ( 2.0 / 255.0 ) ;
f = f * f ;
texlighttolinear [ i ] = f ;
}
# endif
{
float f ;
float overbrightFactor = 1.0f ;
// Can't do overbright without texcombine
// UNDONE: Add GAMMA ramp to rectify this
if ( overbright = = 2 )
{
overbrightFactor = 0.5 ;
}
else if ( overbright = = 4 )
{
overbrightFactor = 0.25 ;
}
for ( i = 0 ; i < 4096 ; i + + )
{
// convert from linear 0..4 (x1024) to screen corrected vertex space (0..1?)
f = pow ( i / 1024.0 , 1.0 / gamma ) ;
lineartovertex [ i ] = f * overbrightFactor ;
if ( lineartovertex [ i ] > 1 )
lineartovertex [ i ] = 1 ;
int nLightmap = RoundFloatToInt ( f * 255 * overbrightFactor ) ;
nLightmap = clamp ( nLightmap , 0 , 255 ) ;
lineartolightmap [ i ] = ( unsigned char ) nLightmap ;
}
}
}
float GammaToLinearFullRange ( float gamma )
{
return pow ( gamma , 2.2f ) ;
}
float LinearToGammaFullRange ( float linear )
{
return pow ( linear , 1.0f / 2.2f ) ;
}
float GammaToLinear ( float gamma )
{
Assert ( s_bMathlibInitialized ) ;
// return pow( gamma, 2.2f );
if ( gamma < 0.0f )
{
return 0.0f ;
}
Assert ( gamma < = 1.0f ) ;
if ( gamma > = 0.95f )
{
// Use GammaToLinearFullRange maybe if you trip this.
return 1.0f ;
}
int index = RoundFloatToInt ( gamma * 255.0f ) ;
Assert ( index > = 0 & & index < 256 ) ;
return g_Mathlib_GammaToLinear [ index ] ;
}
float LinearToGamma ( float linear )
{
Assert ( s_bMathlibInitialized ) ;
// return pow( linear, 1.0f / 2.2f );
if ( linear < 0.0f )
{
return 0.0f ;
}
if ( linear > 1.0f )
{
// Use LinearToGammaFullRange maybe if you trip this.
Assert ( 0 ) ;
return 1.0f ;
}
int index = RoundFloatToInt ( linear * 255.0f ) ;
Assert ( index > = 0 & & index < 256 ) ;
return g_Mathlib_LinearToGamma [ index ] ;
}
// convert texture to linear 0..1 value
float TextureToLinear ( int c )
{
Assert ( s_bMathlibInitialized ) ;
if ( c < 0 )
return 0 ;
if ( c > 255 )
return 1.0 ;
return texturetolinear [ c ] ;
}
// convert texture to linear 0..1 value
int LinearToTexture ( float f )
{
Assert ( s_bMathlibInitialized ) ;
int i ;
2008-09-15 01:33:59 -05:00
i = static_cast < int > ( f * 1023 ) ; // assume 0..1 range
2008-09-15 01:00:17 -05:00
if ( i < 0 )
i = 0 ;
if ( i > 1023 )
i = 1023 ;
return lineartotexture [ i ] ;
}
// converts 0..1 linear value to screen gamma (0..255)
int LinearToScreenGamma ( float f )
{
Assert ( s_bMathlibInitialized ) ;
int i ;
2008-09-15 01:33:59 -05:00
i = static_cast < int > ( f * 1023 ) ; // assume 0..1 range
2008-09-15 01:00:17 -05:00
if ( i < 0 )
i = 0 ;
if ( i > 1023 )
i = 1023 ;
return lineartoscreen [ i ] ;
}
// solve a x^2 + b x + c = 0
bool SolveQuadratic ( float a , float b , float c , float & root1 , float & root2 )
{
Assert ( s_bMathlibInitialized ) ;
if ( a = = 0 )
{
if ( b ! = 0 )
{
// no x^2 component, it's a linear system
root1 = root2 = - c / b ;
return true ;
}
if ( c = = 0 )
{
// all zero's
root1 = root2 = 0 ;
return true ;
}
return false ;
}
float tmp = b * b - 4.0f * a * c ;
if ( tmp < 0 )
{
// imaginary number, bah, no solution.
return false ;
}
tmp = sqrt ( tmp ) ;
root1 = ( - b + tmp ) / ( 2.0f * a ) ;
root2 = ( - b - tmp ) / ( 2.0f * a ) ;
return true ;
}
// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists
bool SolveInverseQuadratic ( float x1 , float y1 , float x2 , float y2 , float x3 , float y3 , float & a , float & b , float & c )
{
float det = ( x1 - x2 ) * ( x1 - x3 ) * ( x2 - x3 ) ;
// FIXME: check with some sort of epsilon
if ( det = = 0.0 )
return false ;
a = ( x3 * ( - y1 + y2 ) + x2 * ( y1 - y3 ) + x1 * ( - y2 + y3 ) ) / det ;
b = ( x3 * x3 * ( y1 - y2 ) + x1 * x1 * ( y2 - y3 ) + x2 * x2 * ( - y1 + y3 ) ) / det ;
c = ( x1 * x3 * ( - x1 + x3 ) * y2 + x2 * x2 * ( x3 * y1 - x1 * y3 ) + x2 * ( - ( x3 * x3 * y1 ) + x1 * x1 * y3 ) ) / det ;
return true ;
}
bool SolveInverseQuadraticMonotonic ( float x1 , float y1 , float x2 , float y2 , float x3 , float y3 ,
float & a , float & b , float & c )
{
// use SolveInverseQuadratic, but if the sigm of the derivative at the start point is the wrong
// sign, displace the mid point
// first, sort parameters
if ( x1 > x2 )
{
swap ( x1 , x2 ) ;
swap ( y1 , y2 ) ;
}
if ( x2 > x3 )
{
swap ( x2 , x3 ) ;
swap ( y2 , y3 ) ;
}
if ( x1 > x2 )
{
swap ( x1 , x2 ) ;
swap ( y1 , y2 ) ;
}
// this code is not fast. what it does is when the curve would be non-monotonic, slowly shifts
// the center point closer to the linear line between the endpoints. Should anyone need htis
// function to be actually fast, it would be fairly easy to change it to be so.
for ( float blend_to_linear_factor = 0.0 ; blend_to_linear_factor < = 1.0 ; blend_to_linear_factor + = 0.05 )
{
float tempy2 = ( 1 - blend_to_linear_factor ) * y2 + blend_to_linear_factor * FLerp ( y1 , y3 , x1 , x3 , x2 ) ;
if ( ! SolveInverseQuadratic ( x1 , y1 , x2 , tempy2 , x3 , y3 , a , b , c ) )
return false ;
float derivative = 2.0 * a + b ;
if ( ( y1 < y2 ) & & ( y2 < y3 ) ) // monotonically increasing
{
if ( derivative > = 0.0 )
return true ;
}
else
{
if ( ( y1 > y2 ) & & ( y2 > y3 ) ) // monotonically decreasing
{
if ( derivative < = 0.0 )
return true ;
}
else
return true ;
}
}
return true ;
}
// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists
bool SolveInverseReciprocalQuadratic ( float x1 , float y1 , float x2 , float y2 , float x3 , float y3 , float & a , float & b , float & c )
{
float det = ( x1 - x2 ) * ( x1 - x3 ) * ( x2 - x3 ) * y1 * y2 * y3 ;
// FIXME: check with some sort of epsilon
if ( det = = 0.0 )
return false ;
a = ( x1 * y1 * ( y2 - y3 ) + x3 * ( y1 - y2 ) * y3 + x2 * y2 * ( - y1 + y3 ) ) / det ;
b = ( x2 * x2 * y2 * ( y1 - y3 ) + x3 * x3 * ( - y1 + y2 ) * y3 + x1 * x1 * y1 * ( - y2 + y3 ) ) / det ;
c = ( x2 * ( x2 - x3 ) * x3 * y2 * y3 + x1 * x1 * y1 * ( x2 * y2 - x3 * y3 ) + x1 * ( - ( x2 * x2 * y1 * y2 ) + x3 * x3 * y1 * y3 ) ) / det ;
return true ;
}
// Rotate a vector around the Z axis (YAW)
void VectorYawRotate ( const Vector & in , float flYaw , Vector & out )
{
Assert ( s_bMathlibInitialized ) ;
if ( & in = = & out )
{
Vector tmp ;
tmp = in ;
VectorYawRotate ( tmp , flYaw , out ) ;
return ;
}
float sy , cy ;
SinCos ( DEG2RAD ( flYaw ) , & sy , & cy ) ;
out . x = in . x * cy - in . y * sy ;
out . y = in . x * sy + in . y * cy ;
out . z = in . z ;
}
float Bias ( float x , float biasAmt )
{
// WARNING: not thread safe
static float lastAmt = - 1 ;
static float lastExponent = 0 ;
if ( lastAmt ! = biasAmt )
{
lastExponent = log ( biasAmt ) * - 1.4427f ; // (-1.4427 = 1 / log(0.5))
}
return pow ( x , lastExponent ) ;
}
float Gain ( float x , float biasAmt )
{
// WARNING: not thread safe
if ( x < 0.5 )
return 0.5f * Bias ( 2 * x , 1 - biasAmt ) ;
else
return 1 - 0.5f * Bias ( 2 - 2 * x , 1 - biasAmt ) ;
}
float SmoothCurve ( float x )
{
return ( 1 - cos ( x * M_PI ) ) * 0.5f ;
}
inline float MovePeak ( float x , float flPeakPos )
{
// Todo: make this higher-order?
if ( x < flPeakPos )
return x * 0.5f / flPeakPos ;
else
return 0.5 + 0.5 * ( x - flPeakPos ) / ( 1 - flPeakPos ) ;
}
float SmoothCurve_Tweak ( float x , float flPeakPos , float flPeakSharpness )
{
float flMovedPeak = MovePeak ( x , flPeakPos ) ;
float flSharpened = Gain ( flMovedPeak , flPeakSharpness ) ;
return SmoothCurve ( flSharpened ) ;
}
//-----------------------------------------------------------------------------
// make sure quaternions are within 180 degrees of one another, if not, reverse q
//-----------------------------------------------------------------------------
void QuaternionAlign ( const Quaternion & p , const Quaternion & q , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
// FIXME: can this be done with a quat dot product?
int i ;
// decide if one of the quaternions is backwards
float a = 0 ;
float b = 0 ;
for ( i = 0 ; i < 4 ; i + + )
{
a + = ( p [ i ] - q [ i ] ) * ( p [ i ] - q [ i ] ) ;
b + = ( p [ i ] + q [ i ] ) * ( p [ i ] + q [ i ] ) ;
}
if ( a > b )
{
for ( i = 0 ; i < 4 ; i + + )
{
qt [ i ] = - q [ i ] ;
}
}
else if ( & qt ! = & q )
{
for ( i = 0 ; i < 4 ; i + + )
{
qt [ i ] = q [ i ] ;
}
}
}
//-----------------------------------------------------------------------------
// Do a piecewise addition of the quaternion elements. This actually makes little
// mathematical sense, but it's a cheap way to simulate a slerp.
//-----------------------------------------------------------------------------
void QuaternionBlend ( const Quaternion & p , const Quaternion & q , float t , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
// decide if one of the quaternions is backwards
Quaternion q2 ;
QuaternionAlign ( p , q , q2 ) ;
QuaternionBlendNoAlign ( p , q2 , t , qt ) ;
}
void QuaternionBlendNoAlign ( const Quaternion & p , const Quaternion & q , float t , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
float sclp , sclq ;
int i ;
// 0.0 returns p, 1.0 return q.
sclp = 1.0f - t ;
sclq = t ;
for ( i = 0 ; i < 4 ; i + + ) {
qt [ i ] = sclp * p [ i ] + sclq * q [ i ] ;
}
QuaternionNormalize ( qt ) ;
}
void QuaternionIdentityBlend ( const Quaternion & p , float t , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
float sclp ;
sclp = 1.0f - t ;
qt . x = p . x * sclp ;
qt . y = p . y * sclp ;
qt . z = p . z * sclp ;
if ( qt . w < 0.0 )
{
qt . w = p . w * sclp - t ;
}
else
{
qt . w = p . w * sclp + t ;
}
QuaternionNormalize ( qt ) ;
}
//-----------------------------------------------------------------------------
// Quaternion sphereical linear interpolation
//-----------------------------------------------------------------------------
void QuaternionSlerp ( const Quaternion & p , const Quaternion & q , float t , Quaternion & qt )
{
Quaternion q2 ;
// 0.0 returns p, 1.0 return q.
// decide if one of the quaternions is backwards
QuaternionAlign ( p , q , q2 ) ;
QuaternionSlerpNoAlign ( p , q2 , t , qt ) ;
}
void QuaternionSlerpNoAlign ( const Quaternion & p , const Quaternion & q , float t , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
float omega , cosom , sinom , sclp , sclq ;
int i ;
// 0.0 returns p, 1.0 return q.
cosom = p [ 0 ] * q [ 0 ] + p [ 1 ] * q [ 1 ] + p [ 2 ] * q [ 2 ] + p [ 3 ] * q [ 3 ] ;
if ( ( 1.0f + cosom ) > 0.000001f ) {
if ( ( 1.0f - cosom ) > 0.000001f ) {
omega = acos ( cosom ) ;
sinom = sin ( omega ) ;
sclp = sin ( ( 1.0f - t ) * omega ) / sinom ;
sclq = sin ( t * omega ) / sinom ;
}
else {
// TODO: add short circuit for cosom == 1.0f?
sclp = 1.0f - t ;
sclq = t ;
}
for ( i = 0 ; i < 4 ; i + + ) {
qt [ i ] = sclp * p [ i ] + sclq * q [ i ] ;
}
}
else {
Assert ( & qt ! = & q ) ;
qt [ 0 ] = - q [ 1 ] ;
qt [ 1 ] = q [ 0 ] ;
qt [ 2 ] = - q [ 3 ] ;
qt [ 3 ] = q [ 2 ] ;
sclp = sin ( ( 1.0f - t ) * ( 0.5f * M_PI ) ) ;
sclq = sin ( t * ( 0.5f * M_PI ) ) ;
for ( i = 0 ; i < 3 ; i + + ) {
qt [ i ] = sclp * p [ i ] + sclq * qt [ i ] ;
}
}
Assert ( qt . IsValid ( ) ) ;
}
//-----------------------------------------------------------------------------
// Purpose: Returns the angular delta between the two normalized quaternions in degrees.
//-----------------------------------------------------------------------------
float QuaternionAngleDiff ( const Quaternion & p , const Quaternion & q )
{
# if 1
// this code path is here for 2 reasons:
// 1 - acos maps 1-epsilon to values much larger than epsilon (vs asin, which maps epsilon to itself)
// this means that in floats, anything below ~0.05 degrees truncates to 0
// 2 - normalized quaternions are frequently slightly non-normalized due to float precision issues,
// and the epsilon off of normalized can be several percents of a degree
Quaternion qInv , diff ;
QuaternionConjugate ( q , qInv ) ;
QuaternionMult ( p , qInv , diff ) ;
float sinang = sqrt ( diff . x * diff . x + diff . y * diff . y + diff . z * diff . z ) ;
float angle = RAD2DEG ( 2 * asin ( sinang ) ) ;
return angle ;
# else
Quaternion q2 ;
QuaternionAlign ( p , q , q2 ) ;
Assert ( s_bMathlibInitialized ) ;
float cosom = p . x * q2 . x + p . y * q2 . y + p . z * q2 . z + p . w * q2 . w ;
if ( cosom > - 1.0f )
{
if ( cosom < 1.0f )
{
float omega = 2 * fabs ( acos ( cosom ) ) ;
return RAD2DEG ( omega ) ;
}
return 0.0f ;
}
return 180.0f ;
# endif
}
void QuaternionConjugate ( const Quaternion & p , Quaternion & q )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( q . IsValid ( ) ) ;
q . x = - p . x ;
q . y = - p . y ;
q . z = - p . z ;
q . w = p . w ;
}
void QuaternionInvert ( const Quaternion & p , Quaternion & q )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( q . IsValid ( ) ) ;
QuaternionConjugate ( p , q ) ;
float magnitudeSqr = QuaternionDotProduct ( p , p ) ;
Assert ( magnitudeSqr ) ;
if ( magnitudeSqr )
{
float inv = 1.0f / magnitudeSqr ;
q . x * = inv ;
q . y * = inv ;
q . z * = inv ;
q . w * = inv ;
}
}
//-----------------------------------------------------------------------------
// Make sure the quaternion is of unit length
//-----------------------------------------------------------------------------
float QuaternionNormalize ( Quaternion & q )
{
Assert ( s_bMathlibInitialized ) ;
float radius , iradius ;
Assert ( q . IsValid ( ) ) ;
radius = q [ 0 ] * q [ 0 ] + q [ 1 ] * q [ 1 ] + q [ 2 ] * q [ 2 ] + q [ 3 ] * q [ 3 ] ;
if ( radius ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
{
radius = sqrt ( radius ) ;
iradius = 1.0f / radius ;
q [ 3 ] * = iradius ;
q [ 2 ] * = iradius ;
q [ 1 ] * = iradius ;
q [ 0 ] * = iradius ;
}
return radius ;
}
void QuaternionScale ( const Quaternion & p , float t , Quaternion & q )
{
Assert ( s_bMathlibInitialized ) ;
#if 0
Quaternion p0 ;
Quaternion q ;
p0 . Init ( 0.0 , 0.0 , 0.0 , 1.0 ) ;
// slerp in "reverse order" so that p doesn't get realigned
QuaternionSlerp ( p , p0 , 1.0 - fabs ( t ) , q ) ;
if ( t < 0.0 )
{
q . w = - q . w ;
}
# else
float r ;
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
float sinom = sqrt ( DotProduct ( & p . x , & p . x ) ) ;
2011-04-28 01:29:22 -05:00
sinom = MIN ( sinom , 1.f ) ;
2008-09-15 01:00:17 -05:00
float sinsom = sin ( asin ( sinom ) * t ) ;
t = sinsom / ( sinom + FLT_EPSILON ) ;
VectorScale ( & p . x , t , & q . x ) ;
// rescale rotation
r = 1.0f - sinsom * sinsom ;
// Assert( r >= 0 );
if ( r < 0.0f )
r = 0.0f ;
r = sqrt ( r ) ;
// keep sign of rotation
if ( p . w < 0 )
q . w = - r ;
else
q . w = r ;
# endif
Assert ( q . IsValid ( ) ) ;
return ;
}
void QuaternionAdd ( const Quaternion & p , const Quaternion & q , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( p . IsValid ( ) ) ;
Assert ( q . IsValid ( ) ) ;
// decide if one of the quaternions is backwards
Quaternion q2 ;
QuaternionAlign ( p , q , q2 ) ;
// is this right???
qt [ 0 ] = p [ 0 ] + q2 [ 0 ] ;
qt [ 1 ] = p [ 1 ] + q2 [ 1 ] ;
qt [ 2 ] = p [ 2 ] + q2 [ 2 ] ;
qt [ 3 ] = p [ 3 ] + q2 [ 3 ] ;
return ;
}
float QuaternionDotProduct ( const Quaternion & p , const Quaternion & q )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( p . IsValid ( ) ) ;
Assert ( q . IsValid ( ) ) ;
return p . x * q . x + p . y * q . y + p . z * q . z + p . w * q . w ;
}
// qt = p * q
void QuaternionMult ( const Quaternion & p , const Quaternion & q , Quaternion & qt )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( p . IsValid ( ) ) ;
Assert ( q . IsValid ( ) ) ;
if ( & p = = & qt )
{
Quaternion p2 = p ;
QuaternionMult ( p2 , q , qt ) ;
return ;
}
// decide if one of the quaternions is backwards
Quaternion q2 ;
QuaternionAlign ( p , q , q2 ) ;
qt . x = p . x * q2 . w + p . y * q2 . z - p . z * q2 . y + p . w * q2 . x ;
qt . y = - p . x * q2 . z + p . y * q2 . w + p . z * q2 . x + p . w * q2 . y ;
qt . z = p . x * q2 . y - p . y * q2 . x + p . z * q2 . w + p . w * q2 . z ;
qt . w = - p . x * q2 . x - p . y * q2 . y - p . z * q2 . z + p . w * q2 . w ;
}
void QuaternionMatrix ( const Quaternion & q , const Vector & pos , matrix3x4_t & matrix )
{
Assert ( pos . IsValid ( ) ) ;
QuaternionMatrix ( q , matrix ) ;
matrix [ 0 ] [ 3 ] = pos . x ;
matrix [ 1 ] [ 3 ] = pos . y ;
matrix [ 2 ] [ 3 ] = pos . z ;
}
void QuaternionMatrix ( const Quaternion & q , matrix3x4_t & matrix )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( q . IsValid ( ) ) ;
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " QuaternionMatrix " , " Mathlib " ) ;
# endif
// Original code
// This should produce the same code as below with optimization, but looking at the assmebly,
// it doesn't. There are 7 extra multiplies in the release build of this, go figure.
# if 1
matrix [ 0 ] [ 0 ] = 1.0 - 2.0 * q . y * q . y - 2.0 * q . z * q . z ;
matrix [ 1 ] [ 0 ] = 2.0 * q . x * q . y + 2.0 * q . w * q . z ;
matrix [ 2 ] [ 0 ] = 2.0 * q . x * q . z - 2.0 * q . w * q . y ;
matrix [ 0 ] [ 1 ] = 2.0f * q . x * q . y - 2.0f * q . w * q . z ;
matrix [ 1 ] [ 1 ] = 1.0f - 2.0f * q . x * q . x - 2.0f * q . z * q . z ;
matrix [ 2 ] [ 1 ] = 2.0f * q . y * q . z + 2.0f * q . w * q . x ;
matrix [ 0 ] [ 2 ] = 2.0f * q . x * q . z + 2.0f * q . w * q . y ;
matrix [ 1 ] [ 2 ] = 2.0f * q . y * q . z - 2.0f * q . w * q . x ;
matrix [ 2 ] [ 2 ] = 1.0f - 2.0f * q . x * q . x - 2.0f * q . y * q . y ;
# else
float wx , wy , wz , xx , yy , yz , xy , xz , zz , x2 , y2 , z2 ;
// precalculate common multiplitcations
x2 = q . x + q . x ;
y2 = q . y + q . y ;
z2 = q . z + q . z ;
xx = q . x * x2 ;
xy = q . x * y2 ;
xz = q . x * z2 ;
yy = q . y * y2 ;
yz = q . y * z2 ;
zz = q . z * z2 ;
wx = q . w * x2 ;
wy = q . w * y2 ;
wz = q . w * z2 ;
matrix [ 0 ] [ 0 ] = 1.0 - ( yy + zz ) ;
matrix [ 0 ] [ 1 ] = xy - wz ;
matrix [ 0 ] [ 2 ] = xz + wy ;
matrix [ 1 ] [ 0 ] = xy + wz ;
matrix [ 1 ] [ 1 ] = 1.0 - ( xx + zz ) ;
matrix [ 1 ] [ 2 ] = yz - wx ;
matrix [ 2 ] [ 0 ] = xz - wy ;
matrix [ 2 ] [ 1 ] = yz + wx ;
matrix [ 2 ] [ 2 ] = 1.0 - ( xx + yy ) ;
# endif
}
//-----------------------------------------------------------------------------
// Purpose: Converts engine-format euler angles to a quaternion
// Input : angles - Right-handed Euler angles in degrees as follows:
// [0]: PITCH: Clockwise rotation around the Y axis.
// [1]: YAW: Counterclockwise rotation around the Z axis.
// [2]: ROLL: Counterclockwise rotation around the X axis.
// *outQuat - quaternion of form (i,j,k,real)
//-----------------------------------------------------------------------------
void AngleQuaternion ( const QAngle & angles , Quaternion & outQuat )
{
RadianEuler radians ;
radians . Init ( DEG2RAD ( angles . z ) , DEG2RAD ( angles . x ) , DEG2RAD ( angles . y ) ) ;
AngleQuaternion ( radians , outQuat ) ;
}
//-----------------------------------------------------------------------------
// Purpose: Converts a quaternion into engine angles
// Input : *quaternion - q3 + q0.i + q1.j + q2.k
// *outAngles - PITCH, YAW, ROLL
//-----------------------------------------------------------------------------
void QuaternionAngles ( const Quaternion & q , QAngle & angles )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( q . IsValid ( ) ) ;
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " QuaternionAngles " , " Mathlib " ) ;
# endif
# if 1
// FIXME: doing it this way calculates too much data, needs to do an optimized version...
matrix3x4_t matrix ;
QuaternionMatrix ( q , matrix ) ;
MatrixAngles ( matrix , angles ) ;
# else
float m11 , m12 , m13 , m23 , m33 ;
m11 = ( 2.0f * q . w * q . w ) + ( 2.0f * q . x * q . x ) - 1.0f ;
m12 = ( 2.0f * q . x * q . y ) + ( 2.0f * q . w * q . z ) ;
m13 = ( 2.0f * q . x * q . z ) - ( 2.0f * q . w * q . y ) ;
m23 = ( 2.0f * q . y * q . z ) + ( 2.0f * q . w * q . x ) ;
m33 = ( 2.0f * q . w * q . w ) + ( 2.0f * q . z * q . z ) - 1.0f ;
// FIXME: this code has a singularity near PITCH +-90
angles [ YAW ] = RAD2DEG ( atan2 ( m12 , m11 ) ) ;
angles [ PITCH ] = RAD2DEG ( asin ( - m13 ) ) ;
angles [ ROLL ] = RAD2DEG ( atan2 ( m23 , m33 ) ) ;
# endif
Assert ( angles . IsValid ( ) ) ;
}
//-----------------------------------------------------------------------------
// Purpose: Converts a quaternion to an axis / angle in degrees
// (exponential map)
//-----------------------------------------------------------------------------
void QuaternionAxisAngle ( const Quaternion & q , Vector & axis , float & angle )
{
angle = RAD2DEG ( 2 * acos ( q . w ) ) ;
if ( angle > 180 )
{
angle - = 360 ;
}
axis . x = q . x ;
axis . y = q . y ;
axis . z = q . z ;
VectorNormalize ( axis ) ;
}
//-----------------------------------------------------------------------------
// Purpose: Converts an exponential map (ang/axis) to a quaternion
//-----------------------------------------------------------------------------
void AxisAngleQuaternion ( const Vector & axis , float angle , Quaternion & q )
{
float sa , ca ;
SinCos ( DEG2RAD ( angle ) * 0.5f , & sa , & ca ) ;
q . x = axis . x * sa ;
q . y = axis . y * sa ;
q . z = axis . z * sa ;
q . w = ca ;
}
//-----------------------------------------------------------------------------
// Purpose: Converts radian-euler axis aligned angles to a quaternion
// Input : *pfAngles - Right-handed Euler angles in radians
// *outQuat - quaternion of form (i,j,k,real)
//-----------------------------------------------------------------------------
void AngleQuaternion ( RadianEuler const & angles , Quaternion & outQuat )
{
Assert ( s_bMathlibInitialized ) ;
// Assert( angles.IsValid() );
# ifdef _VPROF_MATHLIB
VPROF_BUDGET ( " AngleQuaternion " , " Mathlib " ) ;
# endif
float sr , sp , sy , cr , cp , cy ;
SinCos ( angles . z * 0.5f , & sy , & cy ) ;
SinCos ( angles . y * 0.5f , & sp , & cp ) ;
SinCos ( angles . x * 0.5f , & sr , & cr ) ;
// NJS: for some reason VC6 wasn't recognizing the common subexpressions:
float srXcp = sr * cp ,
crXsp = cr * sp ;
outQuat . x = srXcp * cy - crXsp * sy ; // X
outQuat . y = crXsp * cy + srXcp * sy ; // Y
float crXcp = cr * cp ,
srXsp = sr * sp ;
outQuat . z = crXcp * sy - srXsp * cy ; // Z
outQuat . w = crXcp * cy + srXsp * sy ; // W (real component)
// Assert( outQuat.IsValid() );
}
//-----------------------------------------------------------------------------
// Purpose: Converts a basis to a quaternion
//-----------------------------------------------------------------------------
void BasisToQuaternion ( const Vector & vecForward , const Vector & vecRight , const Vector & vecUp , Quaternion & q )
{
Assert ( fabs ( vecForward . LengthSqr ( ) - 1.0f ) < 1e-3 ) ;
Assert ( fabs ( vecRight . LengthSqr ( ) - 1.0f ) < 1e-3 ) ;
Assert ( fabs ( vecUp . LengthSqr ( ) - 1.0f ) < 1e-3 ) ;
Vector vecLeft ;
VectorMultiply ( vecRight , - 1.0f , vecLeft ) ;
// FIXME: Don't know why, but this doesn't match at all with other result
// so we can't use this super-fast way.
/*
// Find the trace of the matrix:
float flTrace = vecForward . x + vecLeft . y + vecUp . z + 1.0f ;
if ( flTrace > 1e-6 )
{
float flSqrtTrace = FastSqrt ( flTrace ) ;
float s = 0.5f / flSqrtTrace ;
q . x = ( vecUp . y - vecLeft . z ) * s ;
q . y = ( vecForward . z - vecUp . x ) * s ;
q . z = ( vecLeft . x - vecForward . y ) * s ;
q . w = 0.5f * flSqrtTrace ;
}
else
{
if ( ( vecForward . x > vecLeft . y ) & & ( vecForward . x > vecUp . z ) )
{
float flSqrtTrace = FastSqrt ( 1.0f + vecForward . x - vecLeft . y - vecUp . z ) ;
float s = 0.5f / flSqrtTrace ;
q . x = 0.5f * flSqrtTrace ;
q . y = ( vecForward . y + vecLeft . x ) * s ;
q . z = ( vecUp . x + vecForward . z ) * s ;
q . w = ( vecUp . y - vecLeft . z ) * s ;
}
else if ( vecLeft . y > vecUp . z )
{
float flSqrtTrace = FastSqrt ( 1.0f + vecLeft . y - vecForward . x - vecUp . z ) ;
float s = 0.5f / flSqrtTrace ;
q . x = ( vecForward . y + vecLeft . x ) * s ;
q . y = 0.5f * flSqrtTrace ;
q . z = ( vecUp . y + vecLeft . z ) * s ;
q . w = ( vecForward . z - vecUp . x ) * s ;
}
else
{
float flSqrtTrace = FastSqrt ( 1.0 + vecUp . z - vecForward . x - vecLeft . y ) ;
float s = 0.5f / flSqrtTrace ;
q . x = ( vecUp . x + vecForward . z ) * s ;
q . y = ( vecUp . y + vecLeft . z ) * s ;
q . z = 0.5f * flSqrtTrace ;
q . w = ( vecLeft . x - vecForward . y ) * s ;
}
}
QuaternionNormalize ( q ) ;
*/
// Version 2: Go through angles
matrix3x4_t mat ;
MatrixSetColumn ( vecForward , 0 , mat ) ;
MatrixSetColumn ( vecLeft , 1 , mat ) ;
MatrixSetColumn ( vecUp , 2 , mat ) ;
QAngle angles ;
MatrixAngles ( mat , angles ) ;
// Quaternion q2;
AngleQuaternion ( angles , q ) ;
// Assert( fabs(q.x - q2.x) < 1e-3 );
// Assert( fabs(q.y - q2.y) < 1e-3 );
// Assert( fabs(q.z - q2.z) < 1e-3 );
// Assert( fabs(q.w - q2.w) < 1e-3 );
}
//-----------------------------------------------------------------------------
// Purpose: Converts a quaternion into engine angles
// Input : *quaternion - q3 + q0.i + q1.j + q2.k
// *outAngles - PITCH, YAW, ROLL
//-----------------------------------------------------------------------------
void QuaternionAngles ( const Quaternion & q , RadianEuler & angles )
{
Assert ( s_bMathlibInitialized ) ;
Assert ( q . IsValid ( ) ) ;
// FIXME: doing it this way calculates too much data, needs to do an optimized version...
matrix3x4_t matrix ;
QuaternionMatrix ( q , matrix ) ;
MatrixAngles ( matrix , angles ) ;
Assert ( angles . IsValid ( ) ) ;
}
//-----------------------------------------------------------------------------
// Purpose: A helper function to normalize p2.x->p1.x and p3.x->p4.x to
// be the same length as p2.x->p3.x
// Input : &p2 -
// &p4 -
// p4n -
//-----------------------------------------------------------------------------
void Spline_Normalize (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
Vector & p1n ,
Vector & p4n )
{
float dt = p3 . x - p2 . x ;
p1n = p1 ;
p4n = p4 ;
if ( dt ! = 0.0 )
{
if ( p1 . x ! = p2 . x )
{
// Equivalent to p1n = p2 - (p2 - p1) * (dt / (p2.x - p1.x));
VectorLerp ( p2 , p1 , dt / ( p2 . x - p1 . x ) , p1n ) ;
}
if ( p4 . x ! = p3 . x )
{
// Equivalent to p4n = p3 + (p4 - p3) * (dt / (p4.x - p3.x));
VectorLerp ( p3 , p4 , dt / ( p4 . x - p3 . x ) , p4n ) ;
}
}
}
//-----------------------------------------------------------------------------
// Purpose:
// Input :
//-----------------------------------------------------------------------------
void Catmull_Rom_Spline (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float tSqr = t * t * 0.5f ;
float tSqrSqr = t * tSqr ;
t * = 0.5f ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & p3 ) ;
Assert ( & output ! = & p4 ) ;
output . Init ( ) ;
Vector a , b , c , d ;
// matrix row 1
VectorScale ( p1 , - tSqrSqr , a ) ; // 0.5 t^3 * [ (-1*p1) + ( 3*p2) + (-3*p3) + p4 ]
VectorScale ( p2 , tSqrSqr * 3 , b ) ;
VectorScale ( p3 , tSqrSqr * - 3 , c ) ;
VectorScale ( p4 , tSqrSqr , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 2
VectorScale ( p1 , tSqr * 2 , a ) ; // 0.5 t^2 * [ ( 2*p1) + (-5*p2) + ( 4*p3) - p4 ]
VectorScale ( p2 , tSqr * - 5 , b ) ;
VectorScale ( p3 , tSqr * 4 , c ) ;
VectorScale ( p4 , - tSqr , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 3
VectorScale ( p1 , - t , a ) ; // 0.5 t * [ (-1*p1) + p3 ]
VectorScale ( p3 , t , b ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
// matrix row 4
VectorAdd ( p2 , output , output ) ; // p2
}
void Catmull_Rom_Spline_Tangent (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float tOne = 3 * t * t * 0.5f ;
float tTwo = 2 * t * 0.5f ;
float tThree = 0.5 ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & p3 ) ;
Assert ( & output ! = & p4 ) ;
output . Init ( ) ;
Vector a , b , c , d ;
// matrix row 1
VectorScale ( p1 , - tOne , a ) ; // 0.5 t^3 * [ (-1*p1) + ( 3*p2) + (-3*p3) + p4 ]
VectorScale ( p2 , tOne * 3 , b ) ;
VectorScale ( p3 , tOne * - 3 , c ) ;
VectorScale ( p4 , tOne , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 2
VectorScale ( p1 , tTwo * 2 , a ) ; // 0.5 t^2 * [ ( 2*p1) + (-5*p2) + ( 4*p3) - p4 ]
VectorScale ( p2 , tTwo * - 5 , b ) ;
VectorScale ( p3 , tTwo * 4 , c ) ;
VectorScale ( p4 , - tTwo , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 3
VectorScale ( p1 , - tThree , a ) ; // 0.5 t * [ (-1*p1) + p3 ]
VectorScale ( p3 , tThree , b ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
}
void Catmull_Rom_Spline_Normalize (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
float dt = p3 . DistTo ( p2 ) ;
Vector p1n , p4n ;
VectorSubtract ( p1 , p2 , p1n ) ;
VectorSubtract ( p4 , p3 , p4n ) ;
VectorNormalize ( p1n ) ;
VectorNormalize ( p4n ) ;
VectorMA ( p2 , dt , p1n , p1n ) ;
VectorMA ( p3 , dt , p4n , p4n ) ;
Catmull_Rom_Spline ( p1n , p2 , p3 , p4n , t , output ) ;
}
void Catmull_Rom_Spline_NormalizeX (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Vector p1n , p4n ;
Spline_Normalize ( p1 , p2 , p3 , p4 , p1n , p4n ) ;
Catmull_Rom_Spline ( p1n , p2 , p3 , p4n , t , output ) ;
}
//-----------------------------------------------------------------------------
// Purpose: basic hermite spline. t = 0 returns p1, t = 1 returns p2,
// d1 and d2 are used to entry and exit slope of curve
// Input :
//-----------------------------------------------------------------------------
void Hermite_Spline (
const Vector & p1 ,
const Vector & p2 ,
const Vector & d1 ,
const Vector & d2 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float tSqr = t * t ;
float tCube = t * tSqr ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & d1 ) ;
Assert ( & output ! = & d2 ) ;
float b1 = 2.0f * tCube - 3.0f * tSqr + 1.0f ;
float b2 = 1.0f - b1 ; // -2*tCube+3*tSqr;
float b3 = tCube - 2 * tSqr + t ;
float b4 = tCube - tSqr ;
VectorScale ( p1 , b1 , output ) ;
VectorMA ( output , b2 , p2 , output ) ;
VectorMA ( output , b3 , d1 , output ) ;
VectorMA ( output , b4 , d2 , output ) ;
}
float Hermite_Spline (
float p1 ,
float p2 ,
float d1 ,
float d2 ,
float t )
{
Assert ( s_bMathlibInitialized ) ;
float output ;
float tSqr = t * t ;
float tCube = t * tSqr ;
float b1 = 2.0f * tCube - 3.0f * tSqr + 1.0f ;
float b2 = 1.0f - b1 ; // -2*tCube+3*tSqr;
float b3 = tCube - 2 * tSqr + t ;
float b4 = tCube - tSqr ;
output = p1 * b1 ;
output + = p2 * b2 ;
output + = d1 * b3 ;
output + = d2 * b4 ;
return output ;
}
void Hermite_SplineBasis ( float t , float basis [ 4 ] )
{
float tSqr = t * t ;
float tCube = t * tSqr ;
basis [ 0 ] = 2.0f * tCube - 3.0f * tSqr + 1.0f ;
basis [ 1 ] = 1.0f - basis [ 0 ] ; // -2*tCube+3*tSqr;
basis [ 2 ] = tCube - 2 * tSqr + t ;
basis [ 3 ] = tCube - tSqr ;
}
//-----------------------------------------------------------------------------
// Purpose: simple three data point hermite spline.
// t = 0 returns p1, t = 1 returns p2,
// slopes are generated from the p0->p1 and p1->p2 segments
// this is reasonable C1 method when there's no "p3" data yet.
// Input :
//-----------------------------------------------------------------------------
// BUG: the VectorSubtract()'s calls go away if the global optimizer is enabled
2008-09-15 01:33:59 -05:00
# ifdef _MSC_VER
2008-09-15 01:00:17 -05:00
# pragma optimize( "g", off )
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
void Hermite_Spline ( const Vector & p0 , const Vector & p1 , const Vector & p2 , float t , Vector & output )
{
Vector e10 , e21 ;
VectorSubtract ( p1 , p0 , e10 ) ;
VectorSubtract ( p2 , p1 , e21 ) ;
Hermite_Spline ( p1 , p2 , e10 , e21 , t , output ) ;
}
2008-09-15 01:33:59 -05:00
# ifdef _MSC_VER
2008-09-15 01:00:17 -05:00
# pragma optimize( "", on )
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
float Hermite_Spline ( float p0 , float p1 , float p2 , float t )
{
return Hermite_Spline ( p1 , p2 , p1 - p0 , p2 - p1 , t ) ;
}
void Hermite_Spline ( const Quaternion & q0 , const Quaternion & q1 , const Quaternion & q2 , float t , Quaternion & output )
{
// cheap, hacked version of quaternions
Quaternion q0a ;
Quaternion q1a ;
QuaternionAlign ( q2 , q0 , q0a ) ;
QuaternionAlign ( q2 , q1 , q1a ) ;
output . x = Hermite_Spline ( q0a . x , q1a . x , q2 . x , t ) ;
output . y = Hermite_Spline ( q0a . y , q1a . y , q2 . y , t ) ;
output . z = Hermite_Spline ( q0a . z , q1a . z , q2 . z , t ) ;
output . w = Hermite_Spline ( q0a . w , q1a . w , q2 . w , t ) ;
QuaternionNormalize ( output ) ;
}
// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves
//
// Tension: -1 = Round -> 1 = Tight
// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right)
// Continuity: -1 = Box corners -> 1 = Inverted corners
//
// If T=B=C=0 it's the same matrix as Catmull-Rom.
// If T=1 & B=C=0 it's the same as Cubic.
// If T=B=0 & C=-1 it's just linear interpolation
//
// See http://news.povray.org/povray.binaries.tutorials/attachment/%3CXns91B880592482seed7@povray.org%3E/Splines.bas.txt
// for example code and descriptions of various spline types...
//
void Kochanek_Bartels_Spline (
float tension ,
float bias ,
float continuity ,
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float ffa , ffb , ffc , ffd ;
ffa = ( 1.0f - tension ) * ( 1.0f + continuity ) * ( 1.0f + bias ) ;
ffb = ( 1.0f - tension ) * ( 1.0f - continuity ) * ( 1.0f - bias ) ;
ffc = ( 1.0f - tension ) * ( 1.0f - continuity ) * ( 1.0f + bias ) ;
ffd = ( 1.0f - tension ) * ( 1.0f + continuity ) * ( 1.0f - bias ) ;
float tSqr = t * t * 0.5f ;
float tSqrSqr = t * tSqr ;
t * = 0.5f ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & p3 ) ;
Assert ( & output ! = & p4 ) ;
output . Init ( ) ;
Vector a , b , c , d ;
// matrix row 1
VectorScale ( p1 , tSqrSqr * - ffa , a ) ;
VectorScale ( p2 , tSqrSqr * ( 4.0f + ffa - ffb - ffc ) , b ) ;
VectorScale ( p3 , tSqrSqr * ( - 4.0f + ffb + ffc - ffd ) , c ) ;
VectorScale ( p4 , tSqrSqr * ffd , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 2
VectorScale ( p1 , tSqr * 2 * ffa , a ) ;
VectorScale ( p2 , tSqr * ( - 6 - 2 * ffa + 2 * ffb + ffc ) , b ) ;
VectorScale ( p3 , tSqr * ( 6 - 2 * ffb - ffc + ffd ) , c ) ;
VectorScale ( p4 , tSqr * - ffd , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 3
VectorScale ( p1 , t * - ffa , a ) ;
VectorScale ( p2 , t * ( ffa - ffb ) , b ) ;
VectorScale ( p3 , t * ffb , c ) ;
// p4 unchanged
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
// matrix row 4
// p1, p3, p4 unchanged
// p2 is multiplied by 1 and added, so just added it directly
VectorAdd ( p2 , output , output ) ;
}
void Kochanek_Bartels_Spline_NormalizeX (
float tension ,
float bias ,
float continuity ,
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Vector p1n , p4n ;
Spline_Normalize ( p1 , p2 , p3 , p4 , p1n , p4n ) ;
Kochanek_Bartels_Spline ( tension , bias , continuity , p1n , p2 , p3 , p4n , t , output ) ;
}
void Cubic_Spline (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float tSqr = t * t ;
float tSqrSqr = t * tSqr ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & p3 ) ;
Assert ( & output ! = & p4 ) ;
output . Init ( ) ;
Vector a , b , c , d ;
// matrix row 1
VectorScale ( p2 , tSqrSqr * 2 , b ) ;
VectorScale ( p3 , tSqrSqr * - 2 , c ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
// matrix row 2
VectorScale ( p2 , tSqr * - 3 , b ) ;
VectorScale ( p3 , tSqr * 3 , c ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
// matrix row 3
// no influence
// p4 unchanged
// matrix row 4
// p1, p3, p4 unchanged
VectorAdd ( p2 , output , output ) ;
}
void Cubic_Spline_NormalizeX (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Vector p1n , p4n ;
Spline_Normalize ( p1 , p2 , p3 , p4 , p1n , p4n ) ;
Cubic_Spline ( p1n , p2 , p3 , p4n , t , output ) ;
}
void BSpline (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float oneOver6 = 1.0f / 6.0f ;
float tSqr = t * t * oneOver6 ;
float tSqrSqr = t * tSqr ;
t * = oneOver6 ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & p3 ) ;
Assert ( & output ! = & p4 ) ;
output . Init ( ) ;
Vector a , b , c , d ;
// matrix row 1
VectorScale ( p1 , - tSqrSqr , a ) ;
VectorScale ( p2 , tSqrSqr * 3.0f , b ) ;
VectorScale ( p3 , tSqrSqr * - 3.0f , c ) ;
VectorScale ( p4 , tSqrSqr , d ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
VectorAdd ( d , output , output ) ;
// matrix row 2
VectorScale ( p1 , tSqr * 3.0f , a ) ;
VectorScale ( p2 , tSqr * - 6.0f , b ) ;
VectorScale ( p3 , tSqr * 3.0f , c ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
// matrix row 3
VectorScale ( p1 , t * - 3.0f , a ) ;
VectorScale ( p3 , t * 3.0f , c ) ;
// p4 unchanged
VectorAdd ( a , output , output ) ;
VectorAdd ( c , output , output ) ;
// matrix row 4
// p1 and p3 scaled by 1.0f, so done below
VectorScale ( p1 , oneOver6 , a ) ;
VectorScale ( p2 , 4.0f * oneOver6 , b ) ;
VectorScale ( p3 , oneOver6 , c ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
}
void BSpline_NormalizeX (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Vector p1n , p4n ;
Spline_Normalize ( p1 , p2 , p3 , p4 , p1n , p4n ) ;
BSpline ( p1n , p2 , p3 , p4n , t , output ) ;
}
void Parabolic_Spline (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Assert ( s_bMathlibInitialized ) ;
float tSqr = t * t * 0.5f ;
t * = 0.5f ;
Assert ( & output ! = & p1 ) ;
Assert ( & output ! = & p2 ) ;
Assert ( & output ! = & p3 ) ;
Assert ( & output ! = & p4 ) ;
output . Init ( ) ;
Vector a , b , c , d ;
// matrix row 1
// no influence from t cubed
// matrix row 2
VectorScale ( p1 , tSqr , a ) ;
VectorScale ( p2 , tSqr * - 2.0f , b ) ;
VectorScale ( p3 , tSqr , c ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
VectorAdd ( c , output , output ) ;
// matrix row 3
VectorScale ( p1 , t * - 2.0f , a ) ;
VectorScale ( p2 , t * 2.0f , b ) ;
// p4 unchanged
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
// matrix row 4
VectorScale ( p1 , 0.5f , a ) ;
VectorScale ( p2 , 0.5f , b ) ;
VectorAdd ( a , output , output ) ;
VectorAdd ( b , output , output ) ;
}
void Parabolic_Spline_NormalizeX (
const Vector & p1 ,
const Vector & p2 ,
const Vector & p3 ,
const Vector & p4 ,
float t ,
Vector & output )
{
Vector p1n , p4n ;
Spline_Normalize ( p1 , p2 , p3 , p4 , p1n , p4n ) ;
Parabolic_Spline ( p1n , p2 , p3 , p4n , t , output ) ;
}
//-----------------------------------------------------------------------------
// Purpose: Compress the input values for a ranged result such that from 75% to 200% smoothly of the range maps
//-----------------------------------------------------------------------------
float RangeCompressor ( float flValue , float flMin , float flMax , float flBase )
{
flValue + = flBase ;
// convert to 0 to 1 value
float flMid = ( flValue - flMin ) / ( flMax - flMin ) ;
// convert to -1 to 1 value
float flTarget = flMid * 2 - 1 ;
if ( fabs ( flTarget ) > 0.75 )
{
float t = ( fabs ( flTarget ) - 0.75 ) / ( 1.25 ) ;
if ( t < 1.0 )
{
if ( flTarget > 0 )
{
flTarget = Hermite_Spline ( 0.75 , 1 , 0.75 , 0 , t ) ;
}
else
{
flTarget = - Hermite_Spline ( 0.75 , 1 , 0.75 , 0 , t ) ;
}
}
else
{
flTarget = ( flTarget > 0 ) ? 1.0f : - 1.0f ;
}
}
flMid = ( flTarget + 1 ) / 2.0 ;
flValue = flMin * ( 1 - flMid ) + flMax * flMid ;
flValue - = flBase ;
return flValue ;
}
//#pragma optimize( "", on )
//-----------------------------------------------------------------------------
// Transforms a AABB into another space; which will inherently grow the box.
//-----------------------------------------------------------------------------
void TransformAABB ( const matrix3x4_t & transform , const Vector & vecMinsIn , const Vector & vecMaxsIn , Vector & vecMinsOut , Vector & vecMaxsOut )
{
Vector localCenter ;
VectorAdd ( vecMinsIn , vecMaxsIn , localCenter ) ;
localCenter * = 0.5f ;
Vector localExtents ;
VectorSubtract ( vecMaxsIn , localCenter , localExtents ) ;
Vector worldCenter ;
VectorTransform ( localCenter , transform , worldCenter ) ;
Vector worldExtents ;
worldExtents . x = DotProductAbs ( localExtents , transform [ 0 ] ) ;
worldExtents . y = DotProductAbs ( localExtents , transform [ 1 ] ) ;
worldExtents . z = DotProductAbs ( localExtents , transform [ 2 ] ) ;
VectorSubtract ( worldCenter , worldExtents , vecMinsOut ) ;
VectorAdd ( worldCenter , worldExtents , vecMaxsOut ) ;
}
//-----------------------------------------------------------------------------
// Uses the inverse transform of in1
//-----------------------------------------------------------------------------
void ITransformAABB ( const matrix3x4_t & transform , const Vector & vecMinsIn , const Vector & vecMaxsIn , Vector & vecMinsOut , Vector & vecMaxsOut )
{
Vector worldCenter ;
VectorAdd ( vecMinsIn , vecMaxsIn , worldCenter ) ;
worldCenter * = 0.5f ;
Vector worldExtents ;
VectorSubtract ( vecMaxsIn , worldCenter , worldExtents ) ;
Vector localCenter ;
VectorITransform ( worldCenter , transform , localCenter ) ;
Vector localExtents ;
localExtents . x = FloatMakePositive ( worldExtents . x * transform [ 0 ] [ 0 ] ) +
FloatMakePositive ( worldExtents . y * transform [ 1 ] [ 0 ] ) +
FloatMakePositive ( worldExtents . z * transform [ 2 ] [ 0 ] ) ;
localExtents . y = FloatMakePositive ( worldExtents . x * transform [ 0 ] [ 1 ] ) +
FloatMakePositive ( worldExtents . y * transform [ 1 ] [ 1 ] ) +
FloatMakePositive ( worldExtents . z * transform [ 2 ] [ 1 ] ) ;
localExtents . z = FloatMakePositive ( worldExtents . x * transform [ 0 ] [ 2 ] ) +
FloatMakePositive ( worldExtents . y * transform [ 1 ] [ 2 ] ) +
FloatMakePositive ( worldExtents . z * transform [ 2 ] [ 2 ] ) ;
VectorSubtract ( localCenter , localExtents , vecMinsOut ) ;
VectorAdd ( localCenter , localExtents , vecMaxsOut ) ;
}
//-----------------------------------------------------------------------------
// Rotates a AABB into another space; which will inherently grow the box.
// (same as TransformAABB, but doesn't take the translation into account)
//-----------------------------------------------------------------------------
void RotateAABB ( const matrix3x4_t & transform , const Vector & vecMinsIn , const Vector & vecMaxsIn , Vector & vecMinsOut , Vector & vecMaxsOut )
{
Vector localCenter ;
VectorAdd ( vecMinsIn , vecMaxsIn , localCenter ) ;
localCenter * = 0.5f ;
Vector localExtents ;
VectorSubtract ( vecMaxsIn , localCenter , localExtents ) ;
Vector newCenter ;
VectorRotate ( localCenter , transform , newCenter ) ;
Vector newExtents ;
newExtents . x = DotProductAbs ( localExtents , transform [ 0 ] ) ;
newExtents . y = DotProductAbs ( localExtents , transform [ 1 ] ) ;
newExtents . z = DotProductAbs ( localExtents , transform [ 2 ] ) ;
VectorSubtract ( newCenter , newExtents , vecMinsOut ) ;
VectorAdd ( newCenter , newExtents , vecMaxsOut ) ;
}
//-----------------------------------------------------------------------------
// Uses the inverse transform of in1
//-----------------------------------------------------------------------------
void IRotateAABB ( const matrix3x4_t & transform , const Vector & vecMinsIn , const Vector & vecMaxsIn , Vector & vecMinsOut , Vector & vecMaxsOut )
{
Vector oldCenter ;
VectorAdd ( vecMinsIn , vecMaxsIn , oldCenter ) ;
oldCenter * = 0.5f ;
Vector oldExtents ;
VectorSubtract ( vecMaxsIn , oldCenter , oldExtents ) ;
Vector newCenter ;
VectorIRotate ( oldCenter , transform , newCenter ) ;
Vector newExtents ;
newExtents . x = FloatMakePositive ( oldExtents . x * transform [ 0 ] [ 0 ] ) +
FloatMakePositive ( oldExtents . y * transform [ 1 ] [ 0 ] ) +
FloatMakePositive ( oldExtents . z * transform [ 2 ] [ 0 ] ) ;
newExtents . y = FloatMakePositive ( oldExtents . x * transform [ 0 ] [ 1 ] ) +
FloatMakePositive ( oldExtents . y * transform [ 1 ] [ 1 ] ) +
FloatMakePositive ( oldExtents . z * transform [ 2 ] [ 1 ] ) ;
newExtents . z = FloatMakePositive ( oldExtents . x * transform [ 0 ] [ 2 ] ) +
FloatMakePositive ( oldExtents . y * transform [ 1 ] [ 2 ] ) +
FloatMakePositive ( oldExtents . z * transform [ 2 ] [ 2 ] ) ;
VectorSubtract ( newCenter , newExtents , vecMinsOut ) ;
VectorAdd ( newCenter , newExtents , vecMaxsOut ) ;
}
float CalcSqrDistanceToAABB ( const Vector & mins , const Vector & maxs , const Vector & point )
{
float flDelta ;
float flDistSqr = 0.0f ;
if ( point . x < mins . x )
{
flDelta = ( mins . x - point . x ) ;
flDistSqr + = flDelta * flDelta ;
}
else if ( point . x > maxs . x )
{
flDelta = ( point . x - maxs . x ) ;
flDistSqr + = flDelta * flDelta ;
}
if ( point . y < mins . y )
{
flDelta = ( mins . y - point . y ) ;
flDistSqr + = flDelta * flDelta ;
}
else if ( point . y > maxs . y )
{
flDelta = ( point . y - maxs . y ) ;
flDistSqr + = flDelta * flDelta ;
}
if ( point . z < mins . z )
{
flDelta = ( mins . z - point . z ) ;
flDistSqr + = flDelta * flDelta ;
}
else if ( point . z > maxs . z )
{
flDelta = ( point . z - maxs . z ) ;
flDistSqr + = flDelta * flDelta ;
}
return flDistSqr ;
}
void CalcClosestPointOnAABB ( const Vector & mins , const Vector & maxs , const Vector & point , Vector & closestOut )
{
for ( int i = 0 ; i < 3 ; i + + )
{
if ( point [ i ] < mins [ i ] )
{
closestOut [ i ] = mins [ i ] ;
}
else if ( point [ i ] > maxs [ i ] )
{
closestOut [ i ] = maxs [ i ] ;
}
else
{
closestOut [ i ] = point [ i ] ;
}
}
}
float CalcClosestPointToLineT ( const Vector & P , const Vector & vLineA , const Vector & vLineB , Vector & vDir )
{
Assert ( s_bMathlibInitialized ) ;
VectorSubtract ( vLineB , vLineA , vDir ) ;
// D dot [P - (A + D*t)] = 0
// t = ( DP - DA) / DD
float div = vDir . Dot ( vDir ) ;
if ( div < 0.00001f )
{
return 0 ;
}
else
{
return ( vDir . Dot ( P ) - vDir . Dot ( vLineA ) ) / div ;
}
}
void CalcClosestPointOnLine ( const Vector & P , const Vector & vLineA , const Vector & vLineB , Vector & vClosest , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector vDir ;
float t = CalcClosestPointToLineT ( P , vLineA , vLineB , vDir ) ;
if ( outT ) * outT = t ;
vClosest . MulAdd ( vLineA , vDir , t ) ;
}
float CalcDistanceToLine ( const Vector & P , const Vector & vLineA , const Vector & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector vClosest ;
CalcClosestPointOnLine ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistTo ( vClosest ) ;
}
float CalcDistanceSqrToLine ( const Vector & P , const Vector & vLineA , const Vector & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector vClosest ;
CalcClosestPointOnLine ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistToSqr ( vClosest ) ;
}
void CalcClosestPointOnLineSegment ( const Vector & P , const Vector & vLineA , const Vector & vLineB , Vector & vClosest , float * outT )
{
Vector vDir ;
float t = CalcClosestPointToLineT ( P , vLineA , vLineB , vDir ) ;
t = clamp ( t , 0 , 1 ) ;
if ( outT )
{
* outT = t ;
}
vClosest . MulAdd ( vLineA , vDir , t ) ;
}
float CalcDistanceToLineSegment ( const Vector & P , const Vector & vLineA , const Vector & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector vClosest ;
CalcClosestPointOnLineSegment ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistTo ( vClosest ) ;
}
float CalcDistanceSqrToLineSegment ( const Vector & P , const Vector & vLineA , const Vector & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector vClosest ;
CalcClosestPointOnLineSegment ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistToSqr ( vClosest ) ;
}
float CalcClosestPointToLineT2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , Vector2D & vDir )
{
Assert ( s_bMathlibInitialized ) ;
Vector2DSubtract ( vLineB , vLineA , vDir ) ;
// D dot [P - (A + D*t)] = 0
// t = (DP - DA) / DD
float div = vDir . Dot ( vDir ) ;
if ( div < 0.00001f )
{
return 0 ;
}
else
{
return ( vDir . Dot ( P ) - vDir . Dot ( vLineA ) ) / div ;
}
}
void CalcClosestPointOnLine2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , Vector2D & vClosest , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector2D vDir ;
float t = CalcClosestPointToLineT2D ( P , vLineA , vLineB , vDir ) ;
if ( outT ) * outT = t ;
vClosest . MulAdd ( vLineA , vDir , t ) ;
}
float CalcDistanceToLine2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector2D vClosest ;
CalcClosestPointOnLine2D ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistTo ( vClosest ) ;
}
float CalcDistanceSqrToLine2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector2D vClosest ;
CalcClosestPointOnLine2D ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistToSqr ( vClosest ) ;
}
void CalcClosestPointOnLineSegment2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , Vector2D & vClosest , float * outT )
{
Vector2D vDir ;
float t = CalcClosestPointToLineT2D ( P , vLineA , vLineB , vDir ) ;
t = clamp ( t , 0 , 1 ) ;
if ( outT )
{
* outT = t ;
}
vClosest . MulAdd ( vLineA , vDir , t ) ;
}
float CalcDistanceToLineSegment2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector2D vClosest ;
CalcClosestPointOnLineSegment2D ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistTo ( vClosest ) ;
}
float CalcDistanceSqrToLineSegment2D ( const Vector2D & P , const Vector2D & vLineA , const Vector2D & vLineB , float * outT )
{
Assert ( s_bMathlibInitialized ) ;
Vector2D vClosest ;
CalcClosestPointOnLineSegment2D ( P , vLineA , vLineB , vClosest , outT ) ;
return P . DistToSqr ( vClosest ) ;
}
// Do we have another epsilon we could use
# define LINE_EPS ( 0.000001f )
//-----------------------------------------------------------------------------
// Purpose: Given lines p1->p2 and p3->p4, computes a line segment (pa->pb) and returns the parameters 0->1 multipliers
// along each segment for the returned points
// Input : p1 -
// p2 -
// p3 -
// p4 -
// *s1 -
// *s2 -
// Output : Returns true on success, false on failure.
//-----------------------------------------------------------------------------
bool CalcLineToLineIntersectionSegment (
const Vector & p1 , const Vector & p2 , const Vector & p3 , const Vector & p4 , Vector * s1 , Vector * s2 ,
float * t1 , float * t2 )
{
Vector p13 , p43 , p21 ;
float d1343 , d4321 , d1321 , d4343 , d2121 ;
float numer , denom ;
p13 . x = p1 . x - p3 . x ;
p13 . y = p1 . y - p3 . y ;
p13 . z = p1 . z - p3 . z ;
p43 . x = p4 . x - p3 . x ;
p43 . y = p4 . y - p3 . y ;
p43 . z = p4 . z - p3 . z ;
if ( fabs ( p43 . x ) < LINE_EPS & & fabs ( p43 . y ) < LINE_EPS & & fabs ( p43 . z ) < LINE_EPS )
return false ;
p21 . x = p2 . x - p1 . x ;
p21 . y = p2 . y - p1 . y ;
p21 . z = p2 . z - p1 . z ;
if ( fabs ( p21 . x ) < LINE_EPS & & fabs ( p21 . y ) < LINE_EPS & & fabs ( p21 . z ) < LINE_EPS )
return false ;
d1343 = p13 . x * p43 . x + p13 . y * p43 . y + p13 . z * p43 . z ;
d4321 = p43 . x * p21 . x + p43 . y * p21 . y + p43 . z * p21 . z ;
d1321 = p13 . x * p21 . x + p13 . y * p21 . y + p13 . z * p21 . z ;
d4343 = p43 . x * p43 . x + p43 . y * p43 . y + p43 . z * p43 . z ;
d2121 = p21 . x * p21 . x + p21 . y * p21 . y + p21 . z * p21 . z ;
denom = d2121 * d4343 - d4321 * d4321 ;
if ( fabs ( denom ) < LINE_EPS )
return false ;
numer = d1343 * d4321 - d1321 * d4343 ;
* t1 = numer / denom ;
* t2 = ( d1343 + d4321 * ( * t1 ) ) / d4343 ;
s1 - > x = p1 . x + * t1 * p21 . x ;
s1 - > y = p1 . y + * t1 * p21 . y ;
s1 - > z = p1 . z + * t1 * p21 . z ;
s2 - > x = p3 . x + * t2 * p43 . x ;
s2 - > y = p3 . y + * t2 * p43 . y ;
s2 - > z = p3 . z + * t2 * p43 . z ;
return true ;
}
2008-09-15 01:33:59 -05:00
# ifdef _MSC_VER
2008-09-15 01:00:17 -05:00
# pragma optimize( "", off )
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
// stuff from windows.h
# ifndef _XBOX
typedef unsigned int DWORD ;
# endif
# ifndef EXCEPTION_EXECUTE_HANDLER
# define EXCEPTION_EXECUTE_HANDLER 1
# endif
2008-09-15 01:33:59 -05:00
# ifdef _MSC_VER
2008-09-15 01:00:17 -05:00
# pragma optimize( "", on )
2008-09-15 01:33:59 -05:00
# endif
2008-09-15 01:00:17 -05:00
static bool s_b3DNowEnabled = false ;
static bool s_bMMXEnabled = false ;
static bool s_bSSEEnabled = false ;
static bool s_bSSE2Enabled = false ;
void MathLib_Init ( float gamma , float texGamma , float brightness , int overbright , bool bAllow3DNow , bool bAllowSSE , bool bAllowSSE2 , bool bAllowMMX )
{
# ifdef _XBOX
if ( s_bMathlibInitialized )
return ;
# endif
// FIXME: Hook SSE into VectorAligned + Vector4DAligned
// Grab the processor information:
const CPUInformation & pi = GetCPUInformation ( ) ;
# ifndef _XBOX
// Select the default generic routines.
pfSqrt = _sqrtf ;
pfRSqrt = _rsqrtf ;
pfRSqrtFast = _rsqrtf ;
pfVectorNormalize = _VectorNormalize ;
pfVectorNormalizeFast = _VectorNormalizeFast ;
pfInvRSquared = _InvRSquared ;
pfFastSinCos = SinCos ;
pfFastCos = cosf ;
# endif
if ( bAllowMMX & & pi . m_bMMX )
{
// Select the MMX specific routines if available
// (MMX routines were used by SW span fillers - not currently used for HW)
s_bMMXEnabled = true ;
}
else
{
s_bMMXEnabled = false ;
}
# ifndef _XBOX
// SSE Generally performs better than 3DNow when present, so this is placed
// first to allow SSE to override these settings.
if ( bAllow3DNow & & pi . m_b3DNow )
{
s_b3DNowEnabled = true ;
// Select the 3DNow specific routines if available;
pfVectorNormalize = _3DNow_VectorNormalize ;
pfVectorNormalizeFast = _3DNow_VectorNormalizeFast ;
pfInvRSquared = _3DNow_InvRSquared ;
pfSqrt = _3DNow_Sqrt ;
pfRSqrt = _3DNow_RSqrt ;
pfRSqrtFast = _3DNow_RSqrt ;
}
else
{
s_b3DNowEnabled = false ;
}
# endif
if ( bAllowSSE & & pi . m_bSSE )
{
s_bSSEEnabled = true ;
# ifdef _XBOX
_MM_SET_FLUSH_ZERO_MODE ( _MM_FLUSH_ZERO_ON ) ;
# endif
// Select the SSE specific routines if available
pfVectorNormalize = _VectorNormalize ;
pfVectorNormalizeFast = _SSE_VectorNormalizeFast ;
pfInvRSquared = _SSE_InvRSquared ;
pfSqrt = _SSE_Sqrt ;
pfRSqrt = _SSE_RSqrtAccurate ;
pfRSqrtFast = _SSE_RSqrtFast ;
# ifdef _WIN32
pfFastSinCos = _SSE_SinCos ;
pfFastCos = _SSE_cos ;
# endif
}
else
{
# ifdef _XBOX
// sse expected
Assert ( 0 ) ;
# endif
s_bSSEEnabled = false ;
}
# ifndef _XBOX
if ( bAllowSSE2 & & pi . m_bSSE2 )
{
s_bSSE2Enabled = true ;
# ifdef _WIN32
pfFastSinCos = _SSE2_SinCos ;
pfFastCos = _SSE2_cos ;
# endif
} else
{
s_bSSE2Enabled = false ;
}
# endif
s_bMathlibInitialized = true ;
InitSinCosTable ( ) ;
BuildGammaTable ( gamma , texGamma , brightness , overbright ) ;
}
bool MathLib_3DNowEnabled ( void )
{
Assert ( s_bMathlibInitialized ) ;
return s_b3DNowEnabled ;
}
bool MathLib_MMXEnabled ( void )
{
Assert ( s_bMathlibInitialized ) ;
return s_bMMXEnabled ;
}
bool MathLib_SSEEnabled ( void )
{
Assert ( s_bMathlibInitialized ) ;
return s_bSSEEnabled ;
}
bool MathLib_SSE2Enabled ( void )
{
Assert ( s_bMathlibInitialized ) ;
return s_bSSE2Enabled ;
}
float Approach ( float target , float value , float speed )
{
float delta = target - value ;
if ( delta > speed )
value + = speed ;
else if ( delta < - speed )
value - = speed ;
else
value = target ;
return value ;
}
// BUGBUG: Why doesn't this call angle diff?!?!?
float ApproachAngle ( float target , float value , float speed )
{
target = anglemod ( target ) ;
value = anglemod ( value ) ;
float delta = target - value ;
// Speed is assumed to be positive
if ( speed < 0 )
speed = - speed ;
if ( delta < - 180 )
delta + = 360 ;
else if ( delta > 180 )
delta - = 360 ;
if ( delta > speed )
value + = speed ;
else if ( delta < - speed )
value - = speed ;
else
value = target ;
return value ;
}
// BUGBUG: Why do we need both of these?
float AngleDiff ( float destAngle , float srcAngle )
{
float delta ;
delta = fmodf ( destAngle - srcAngle , 360.0f ) ;
if ( destAngle > srcAngle )
{
if ( delta > = 180 )
delta - = 360 ;
}
else
{
if ( delta < = - 180 )
delta + = 360 ;
}
return delta ;
}
float AngleDistance ( float next , float cur )
{
float delta = next - cur ;
if ( delta < - 180 )
delta + = 360 ;
else if ( delta > 180 )
delta - = 360 ;
return delta ;
}
float AngleNormalize ( float angle )
{
angle = fmodf ( angle , 360.0f ) ;
if ( angle > 180 )
{
angle - = 360 ;
}
if ( angle < - 180 )
{
angle + = 360 ;
}
return angle ;
}
//--------------------------------------------------------------------------------------------------------------
// ensure that 0 <= angle <= 360
float AngleNormalizePositive ( float angle )
{
angle = fmodf ( angle , 360.0f ) ;
if ( angle < 0.0f )
{
angle + = 360.0f ;
}
return angle ;
}
//--------------------------------------------------------------------------------------------------------------
bool AnglesAreEqual ( float a , float b , float tolerance )
{
return ( fabs ( AngleDiff ( a , b ) ) < tolerance ) ;
}
void RotationDeltaAxisAngle ( const QAngle & srcAngles , const QAngle & destAngles , Vector & deltaAxis , float & deltaAngle )
{
Quaternion srcQuat , destQuat , srcQuatInv , out ;
AngleQuaternion ( srcAngles , srcQuat ) ;
AngleQuaternion ( destAngles , destQuat ) ;
QuaternionScale ( srcQuat , - 1 , srcQuatInv ) ;
QuaternionMult ( destQuat , srcQuatInv , out ) ;
QuaternionNormalize ( out ) ;
QuaternionAxisAngle ( out , deltaAxis , deltaAngle ) ;
}
void RotationDelta ( const QAngle & srcAngles , const QAngle & destAngles , QAngle * out )
{
matrix3x4_t src , srcInv ;
matrix3x4_t dest ;
AngleMatrix ( srcAngles , src ) ;
AngleMatrix ( destAngles , dest ) ;
// xform = src(-1) * dest
MatrixInvert ( src , srcInv ) ;
matrix3x4_t xform ;
ConcatTransforms ( dest , srcInv , xform ) ;
QAngle xformAngles ;
MatrixAngles ( xform , xformAngles ) ;
if ( out )
{
* out = xformAngles ;
}
}
//-----------------------------------------------------------------------------
// Purpose: Computes a triangle normal
//-----------------------------------------------------------------------------
void ComputeTrianglePlane ( const Vector & v1 , const Vector & v2 , const Vector & v3 , Vector & normal , float & intercept )
{
Vector e1 , e2 ;
VectorSubtract ( v2 , v1 , e1 ) ;
VectorSubtract ( v3 , v1 , e2 ) ;
CrossProduct ( e1 , e2 , normal ) ;
VectorNormalize ( normal ) ;
intercept = DotProduct ( normal , v1 ) ;
}
//-----------------------------------------------------------------------------
// Purpose: This is a clone of BaseWindingForPlane()
// Input : *outVerts - an array of preallocated verts to build the polygon in
// normal - the plane normal
// dist - the plane constant
// Output : int - vert count (always 4)
//-----------------------------------------------------------------------------
int PolyFromPlane ( Vector * outVerts , const Vector & normal , float dist , float fHalfScale )
{
int i , x ;
vec_t max , v ;
Vector org , vright , vup ;
// find the major axis
max = - 16384 ; //MAX_COORD_INTEGER
x = - 1 ;
for ( i = 0 ; i < 3 ; i + + )
{
v = fabs ( normal [ i ] ) ;
if ( v > max )
{
x = i ;
max = v ;
}
}
if ( x = = - 1 )
return 0 ;
// Build a unit vector along something other than the major axis
VectorCopy ( vec3_origin , vup ) ;
switch ( x )
{
case 0 :
case 1 :
vup [ 2 ] = 1 ;
break ;
case 2 :
vup [ 0 ] = 1 ;
break ;
}
// Remove the component of this vector along the normal
v = DotProduct ( vup , normal ) ;
VectorMA ( vup , - v , normal , vup ) ;
// Make it a unit (perpendicular)
VectorNormalize ( vup ) ;
// Center of the poly is at normal * dist
VectorScale ( normal , dist , org ) ;
// Calculate the third orthonormal basis vector for our plane space (this one and vup are in the plane)
CrossProduct ( vup , normal , vright ) ;
// Make the plane's basis vectors big (these are the half-sides of the polygon we're making)
VectorScale ( vup , fHalfScale , vup ) ;
VectorScale ( vright , fHalfScale , vright ) ;
// Move diagonally away from org to create the corner verts
VectorSubtract ( org , vright , outVerts [ 0 ] ) ; // left
VectorAdd ( outVerts [ 0 ] , vup , outVerts [ 0 ] ) ; // up
VectorAdd ( org , vright , outVerts [ 1 ] ) ; // right
VectorAdd ( outVerts [ 1 ] , vup , outVerts [ 1 ] ) ; // up
VectorAdd ( org , vright , outVerts [ 2 ] ) ; // right
VectorSubtract ( outVerts [ 2 ] , vup , outVerts [ 2 ] ) ; // down
VectorSubtract ( org , vright , outVerts [ 3 ] ) ; // left
VectorSubtract ( outVerts [ 3 ] , vup , outVerts [ 3 ] ) ; // down
// The four corners form a planar quadrilateral normal to "normal"
return 4 ;
}
//-----------------------------------------------------------------------------
// Purpose: clip a poly to the plane and return the poly on the front side of the plane
// Input : *inVerts - input polygon
// vertCount - # verts in input poly
// *outVerts - destination poly
// normal - plane normal
// dist - plane constant
// Output : int - # verts in output poly
//-----------------------------------------------------------------------------
int ClipPolyToPlane ( Vector * inVerts , int vertCount , Vector * outVerts , const Vector & normal , float dist , float fOnPlaneEpsilon )
{
vec_t * dists = ( vec_t * ) stackalloc ( sizeof ( vec_t ) * vertCount * 4 ) ; //4x vertcount should cover all cases
int * sides = ( int * ) stackalloc ( sizeof ( vec_t ) * vertCount * 4 ) ;
int counts [ 3 ] ;
vec_t dot ;
int i , j ;
Vector mid = vec3_origin ;
int outCount ;
counts [ 0 ] = counts [ 1 ] = counts [ 2 ] = 0 ;
// determine sides for each point
for ( i = 0 ; i < vertCount ; i + + )
{
dot = DotProduct ( inVerts [ i ] , normal ) - dist ;
dists [ i ] = dot ;
if ( dot > fOnPlaneEpsilon )
{
sides [ i ] = SIDE_FRONT ;
}
else if ( dot < - fOnPlaneEpsilon )
{
sides [ i ] = SIDE_BACK ;
}
else
{
sides [ i ] = SIDE_ON ;
}
counts [ sides [ i ] ] + + ;
}
sides [ i ] = sides [ 0 ] ;
dists [ i ] = dists [ 0 ] ;
if ( ! counts [ 0 ] )
return 0 ;
if ( ! counts [ 1 ] )
{
// Copy to output verts
for ( i = 0 ; i < vertCount ; i + + )
{
VectorCopy ( inVerts [ i ] , outVerts [ i ] ) ;
}
return vertCount ;
}
outCount = 0 ;
for ( i = 0 ; i < vertCount ; i + + )
{
Vector & p1 = inVerts [ i ] ;
if ( sides [ i ] = = SIDE_ON )
{
VectorCopy ( p1 , outVerts [ outCount ] ) ;
outCount + + ;
continue ;
}
if ( sides [ i ] = = SIDE_FRONT )
{
VectorCopy ( p1 , outVerts [ outCount ] ) ;
outCount + + ;
}
if ( sides [ i + 1 ] = = SIDE_ON | | sides [ i + 1 ] = = sides [ i ] )
continue ;
// generate a split point
Vector & p2 = inVerts [ ( i + 1 ) % vertCount ] ;
dot = dists [ i ] / ( dists [ i ] - dists [ i + 1 ] ) ;
for ( j = 0 ; j < 3 ; j + + )
{ // avoid round off error when possible
if ( normal [ j ] = = 1 )
mid [ j ] = dist ;
else if ( normal [ j ] = = - 1 )
mid [ j ] = - dist ;
else
mid [ j ] = p1 [ j ] + dot * ( p2 [ j ] - p1 [ j ] ) ;
}
VectorCopy ( mid , outVerts [ outCount ] ) ;
outCount + + ;
}
return outCount ;
}
int ClipPolyToPlane_Precise ( double * inVerts , int vertCount , double * outVerts , const double * normal , double dist , double fOnPlaneEpsilon )
{
double * dists = ( double * ) stackalloc ( sizeof ( double ) * vertCount * 4 ) ; //4x vertcount should cover all cases
int * sides = ( int * ) stackalloc ( sizeof ( double ) * vertCount * 4 ) ;
int counts [ 3 ] ;
double dot ;
int i , j ;
//Vector mid = vec3_origin;
double mid [ 3 ] ;
mid [ 0 ] = 0.0 ;
mid [ 1 ] = 0.0 ;
mid [ 2 ] = 0.0 ;
int outCount ;
counts [ 0 ] = counts [ 1 ] = counts [ 2 ] = 0 ;
// determine sides for each point
for ( i = 0 ; i < vertCount ; i + + )
{
//dot = DotProduct( inVerts[i], normal) - dist;
dot = ( ( inVerts [ i * 3 + 0 ] * normal [ 0 ] ) + ( inVerts [ i * 3 + 1 ] * normal [ 1 ] ) + ( inVerts [ i * 3 + 2 ] * normal [ 2 ] ) ) - dist ;
dists [ i ] = dot ;
if ( dot > fOnPlaneEpsilon )
{
sides [ i ] = SIDE_FRONT ;
}
else if ( dot < - fOnPlaneEpsilon )
{
sides [ i ] = SIDE_BACK ;
}
else
{
sides [ i ] = SIDE_ON ;
}
counts [ sides [ i ] ] + + ;
}
sides [ i ] = sides [ 0 ] ;
dists [ i ] = dists [ 0 ] ;
if ( ! counts [ 0 ] )
return 0 ;
if ( ! counts [ 1 ] )
{
// Copy to output verts
//for ( i = 0; i < vertCount; i++ )
for ( i = 0 ; i < vertCount * 3 ; i + + )
{
//VectorCopy( inVerts[i], outVerts[i] );
outVerts [ i ] = inVerts [ i ] ;
}
return vertCount ;
}
outCount = 0 ;
for ( i = 0 ; i < vertCount ; i + + )
{
//Vector& p1 = inVerts[i];
double * p1 = & inVerts [ i * 3 ] ;
//p1[0] = inVerts[i*3 + 0];
//p1[1] = inVerts[i*3 + 1];
//p1[2] = inVerts[i*3 + 2];
if ( sides [ i ] = = SIDE_ON )
{
//VectorCopy( p1, outVerts[outCount]);
outVerts [ outCount * 3 + 0 ] = p1 [ 0 ] ;
outVerts [ outCount * 3 + 1 ] = p1 [ 1 ] ;
outVerts [ outCount * 3 + 2 ] = p1 [ 2 ] ;
outCount + + ;
continue ;
}
if ( sides [ i ] = = SIDE_FRONT )
{
//VectorCopy( p1, outVerts[outCount]);
outVerts [ outCount * 3 + 0 ] = p1 [ 0 ] ;
outVerts [ outCount * 3 + 1 ] = p1 [ 1 ] ;
outVerts [ outCount * 3 + 2 ] = p1 [ 2 ] ;
outCount + + ;
}
if ( sides [ i + 1 ] = = SIDE_ON | | sides [ i + 1 ] = = sides [ i ] )
continue ;
// generate a split point
//Vector& p2 = inVerts[(i+1)%vertCount];
int wrappedindex = ( i + 1 ) % vertCount ;
double * p2 = & inVerts [ wrappedindex * 3 ] ;
//p2[0] = inVerts[wrappedindex*3 + 0];
//p2[1] = inVerts[wrappedindex*3 + 1];
//p2[2] = inVerts[wrappedindex*3 + 2];
dot = dists [ i ] / ( dists [ i ] - dists [ i + 1 ] ) ;
for ( j = 0 ; j < 3 ; j + + )
{
mid [ j ] = ( double ) p1 [ j ] + dot * ( ( double ) p2 [ j ] - ( double ) p1 [ j ] ) ;
}
//VectorCopy (mid, outVerts[outCount]);
outVerts [ outCount * 3 + 0 ] = mid [ 0 ] ;
outVerts [ outCount * 3 + 1 ] = mid [ 1 ] ;
outVerts [ outCount * 3 + 2 ] = mid [ 2 ] ;
outCount + + ;
}
return outCount ;
}
void ColorRGBExp32ToVector ( const ColorRGBExp32 & in , Vector & out )
{
Assert ( s_bMathlibInitialized ) ;
// FIXME: Why is there a factor of 255 built into this?
out . x = 255.0f * TexLightToLinear ( in . r , in . exponent ) ;
out . y = 255.0f * TexLightToLinear ( in . g , in . exponent ) ;
out . z = 255.0f * TexLightToLinear ( in . b , in . exponent ) ;
}
// assumes that the desired mantissa range is 0..255
static int VectorToColorRGBExp32_CalcExponent ( float in )
{
int power = 0 ;
if ( in ! = 0.0f )
{
while ( in > 255.0f )
{
power + = 1 ;
in * = 0.5f ;
}
while ( in < 127.0f )
{
power - = 1 ;
in * = 2.0f ;
}
}
return power ;
}
void VectorToColorRGBExp32 ( const Vector & vin , ColorRGBExp32 & c )
{
Vector v = vin ;
Assert ( s_bMathlibInitialized ) ;
Assert ( v . x > = 0.0f & & v . y > = 0.0f & & v . z > = 0.0f ) ;
int i ;
float max = v [ 0 ] ;
for ( i = 1 ; i < 3 ; i + + )
{
// Get the maximum value.
if ( v [ i ] > max )
{
max = v [ i ] ;
}
}
// figure out the exponent for this luxel.
int exponent = VectorToColorRGBExp32_CalcExponent ( max ) ;
// make the exponent fits into a signed byte.
if ( exponent < - 128 )
{
exponent = - 128 ;
}
else if ( exponent > 127 )
{
exponent = 127 ;
}
// undone: optimize with a table
float scalar = pow ( 2.0f , - exponent ) ;
// convert to mantissa x 2^exponent format
for ( i = 0 ; i < 3 ; i + + )
{
v [ i ] * = scalar ;
// clamp
if ( v [ i ] > 255.0f )
{
v [ i ] = 255.0f ;
}
}
c . r = ( unsigned char ) v [ 0 ] ;
c . g = ( unsigned char ) v [ 1 ] ;
c . b = ( unsigned char ) v [ 2 ] ;
c . exponent = ( signed char ) exponent ;
}
int CeilPow2 ( int in )
{
int retval ;
retval = 1 ;
while ( retval < in )
retval < < = 1 ;
return retval ;
}
int FloorPow2 ( int in )
{
int retval ;
retval = 1 ;
while ( retval < in )
retval < < = 1 ;
return retval > > 1 ;
}
bool R_CullBox ( const Vector & mins , const Vector & maxs , const Frustum_t & frustum )
{
return ( ( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_RIGHT ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_LEFT ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_TOP ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_BOTTOM ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_NEARZ ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_FARZ ) ) = = 2 ) ) ;
}
bool R_CullBoxSkipNear ( const Vector & mins , const Vector & maxs , const Frustum_t & frustum )
{
return ( ( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_RIGHT ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_LEFT ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_TOP ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_BOTTOM ) ) = = 2 ) | |
( BoxOnPlaneSide ( mins , maxs , frustum . GetPlane ( FRUSTUM_FARZ ) ) = = 2 ) ) ;
}
# endif // !_STATIC_LINKED || _SHARED_LIB