This commit is contained in:
FluorescentCIAAfricanAmerican
2020-04-22 12:56:21 -04:00
commit 3bf9df6b27
15370 changed files with 5489726 additions and 0 deletions

197
mathlib/3dnow.cpp Normal file
View File

@ -0,0 +1,197 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: 3DNow Math primitives.
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/amd3dx.h"
#include "mathlib/vector.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
#if !defined(COMPILER_MSVC64) && !defined(LINUX)
// Implement for 64-bit Windows if needed.
// Clang hits "fatal error: error in backend:" and other errors when trying
// to compile the inline assembly below. 3DNow support is highly unlikely to
// be useful/used, so it's not worth spending time on fixing.
#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
//-----------------------------------------------------------------------------
// 3D Now Implementations of optimized routines:
//-----------------------------------------------------------------------------
float _3DNow_Sqrt(float x)
{
Assert( s_bMathlibInitialized );
float root = 0.f;
#ifdef _WIN32
_asm
{
femms
movd mm0, x
PFRSQRT (mm1,mm0)
punpckldq mm0, mm0
PFMUL (mm0, mm1)
movd root, mm0
femms
}
#elif LINUX
__asm __volatile__( "femms" );
__asm __volatile__
(
"pfrsqrt %y0, %y1 \n\t"
"punpckldq %y1, %y1 \n\t"
"pfmul %y1, %y0 \n\t"
: "=y" (root), "=y" (x)
:"0" (x)
);
__asm __volatile__( "femms" );
#else
#error
#endif
return root;
}
// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
// on AMD's before using the specialized instruction.
float _3DNow_RSqrt(float x)
{
Assert( s_bMathlibInitialized );
return 1.f / _3DNow_Sqrt(x);
}
float FASTCALL _3DNow_VectorNormalize (Vector& vec)
{
Assert( s_bMathlibInitialized );
float *v = &vec[0];
float radius = 0.f;
if ( v[0] || v[1] || v[2] )
{
#ifdef _WIN32
_asm
{
mov eax, v
femms
movq mm0, QWORD PTR [eax]
movd mm1, DWORD PTR [eax+8]
movq mm2, mm0
movq mm3, mm1
PFMUL (mm0, mm0)
PFMUL (mm1, mm1)
PFACC (mm0, mm0)
PFADD (mm1, mm0)
PFRSQRT (mm0, mm1)
punpckldq mm1, mm1
PFMUL (mm1, mm0)
PFMUL (mm2, mm0)
PFMUL (mm3, mm0)
movq QWORD PTR [eax], mm2
movd DWORD PTR [eax+8], mm3
movd radius, mm1
femms
}
#elif LINUX
long long a,c;
int b,d;
memcpy(&a,&vec[0],sizeof(a));
memcpy(&b,&vec[2],sizeof(b));
memcpy(&c,&vec[0],sizeof(c));
memcpy(&d,&vec[2],sizeof(d));
__asm __volatile__( "femms" );
__asm __volatile__
(
"pfmul %y3, %y3\n\t"
"pfmul %y0, %y0 \n\t"
"pfacc %y3, %y3 \n\t"
"pfadd %y3, %y0 \n\t"
"pfrsqrt %y0, %y3 \n\t"
"punpckldq %y0, %y0 \n\t"
"pfmul %y3, %y0 \n\t"
"pfmul %y3, %y2 \n\t"
"pfmul %y3, %y1 \n\t"
: "=y" (radius), "=y" (c), "=y" (d)
: "y" (a), "0" (b), "1" (c), "2" (d)
);
memcpy(&vec[0],&c,sizeof(c));
memcpy(&vec[2],&d,sizeof(d));
__asm __volatile__( "femms" );
#else
#error
#endif
}
return radius;
}
void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
{
_3DNow_VectorNormalize( vec );
}
// JAY: This complains with the latest processor pack
#pragma warning(disable: 4730)
float _3DNow_InvRSquared(const float* v)
{
Assert( s_bMathlibInitialized );
float r2 = 1.f;
#ifdef _WIN32
_asm { // AMD 3DNow only routine
mov eax, v
femms
movq mm0, QWORD PTR [eax]
movd mm1, DWORD PTR [eax+8]
movd mm2, [r2]
PFMUL (mm0, mm0)
PFMUL (mm1, mm1)
PFACC (mm0, mm0)
PFADD (mm1, mm0)
PFMAX (mm1, mm2)
PFRCP (mm0, mm1)
movd [r2], mm0
femms
}
#elif LINUX
long long a,c;
int b;
memcpy(&a,&v[0],sizeof(a));
memcpy(&b,&v[2],sizeof(b));
memcpy(&c,&v[0],sizeof(c));
__asm __volatile__( "femms" );
__asm __volatile__
(
"PFMUL %y2, %y2 \n\t"
"PFMUL %y3, %y3 \n\t"
"PFACC %y2, %y2 \n\t"
"PFADD %y2, %y3 \n\t"
"PFMAX %y3, %y4 \n\t"
"PFRCP %y3, %y2 \n\t"
"movq %y2, %y0 \n\t"
: "=y" (r2)
: "0" (r2), "y" (a), "y" (b), "y" (c)
);
__asm __volatile__( "femms" );
#else
#error
#endif
return r2;
}
#endif // COMPILER_MSVC64

16
mathlib/3dnow.h Normal file
View File

@ -0,0 +1,16 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=====================================================================================//
#ifndef _3DNOW_H
#define _3DNOW_H
float _3DNow_Sqrt(float x);
float _3DNow_RSqrt(float x);
float FASTCALL _3DNow_VectorNormalize (Vector& vec);
void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec);
float _3DNow_InvRSquared(const float* v);
#endif // _3DNOW_H

393
mathlib/IceKey.cpp Normal file
View File

@ -0,0 +1,393 @@
// Purpose: C++ implementation of the ICE encryption algorithm.
// Taken from public domain code, as written by Matthew Kwan - July 1996
// http://www.darkside.com.au/ice/
#if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
#include "mathlib/IceKey.H"
#include "tier0/memdbgon.h"
#pragma warning(disable: 4244)
/* Structure of a single round subkey */
class IceSubkey {
public:
unsigned long val[3];
};
/* The S-boxes */
static unsigned long ice_sbox[4][1024];
static int ice_sboxes_initialised = 0;
/* Modulo values for the S-boxes */
static const int ice_smod[4][4] = {
{333, 313, 505, 369},
{379, 375, 319, 391},
{361, 445, 451, 397},
{397, 425, 395, 505}};
/* XOR values for the S-boxes */
static const int ice_sxor[4][4] = {
{0x83, 0x85, 0x9b, 0xcd},
{0xcc, 0xa7, 0xad, 0x41},
{0x4b, 0x2e, 0xd4, 0x33},
{0xea, 0xcb, 0x2e, 0x04}};
/* Permutation values for the P-box */
static const unsigned long ice_pbox[32] = {
0x00000001, 0x00000080, 0x00000400, 0x00002000,
0x00080000, 0x00200000, 0x01000000, 0x40000000,
0x00000008, 0x00000020, 0x00000100, 0x00004000,
0x00010000, 0x00800000, 0x04000000, 0x20000000,
0x00000004, 0x00000010, 0x00000200, 0x00008000,
0x00020000, 0x00400000, 0x08000000, 0x10000000,
0x00000002, 0x00000040, 0x00000800, 0x00001000,
0x00040000, 0x00100000, 0x02000000, 0x80000000};
/* The key rotation schedule */
static const int ice_keyrot[16] = {
0, 1, 2, 3, 2, 1, 3, 0,
1, 3, 2, 0, 3, 1, 0, 2};
/*
* 8-bit Galois Field multiplication of a by b, modulo m.
* Just like arithmetic multiplication, except that additions and
* subtractions are replaced by XOR.
*/
static unsigned int
gf_mult (
unsigned int a,
unsigned int b,
unsigned int m
) {
unsigned int res = 0;
while (b) {
if (b & 1)
res ^= a;
a <<= 1;
b >>= 1;
if (a >= 256)
a ^= m;
}
return (res);
}
/*
* Galois Field exponentiation.
* Raise the base to the power of 7, modulo m.
*/
static unsigned long
gf_exp7 (
unsigned int b,
unsigned int m
) {
unsigned int x;
if (b == 0)
return (0);
x = gf_mult (b, b, m);
x = gf_mult (b, x, m);
x = gf_mult (x, x, m);
return (gf_mult (b, x, m));
}
/*
* Carry out the ICE 32-bit P-box permutation.
*/
static unsigned long
ice_perm32 (
unsigned long x
) {
unsigned long res = 0;
const unsigned long *pbox = ice_pbox;
while (x) {
if (x & 1)
res |= *pbox;
pbox++;
x >>= 1;
}
return (res);
}
/*
* Initialise the ICE S-boxes.
* This only has to be done once.
*/
static void
ice_sboxes_init (void)
{
int i;
for (i=0; i<1024; i++) {
int col = (i >> 1) & 0xff;
int row = (i & 0x1) | ((i & 0x200) >> 8);
unsigned long x;
x = gf_exp7 (col ^ ice_sxor[0][row], ice_smod[0][row]) << 24;
ice_sbox[0][i] = ice_perm32 (x);
x = gf_exp7 (col ^ ice_sxor[1][row], ice_smod[1][row]) << 16;
ice_sbox[1][i] = ice_perm32 (x);
x = gf_exp7 (col ^ ice_sxor[2][row], ice_smod[2][row]) << 8;
ice_sbox[2][i] = ice_perm32 (x);
x = gf_exp7 (col ^ ice_sxor[3][row], ice_smod[3][row]);
ice_sbox[3][i] = ice_perm32 (x);
}
}
/*
* Create a new ICE key.
*/
IceKey::IceKey (int n)
{
if (!ice_sboxes_initialised) {
ice_sboxes_init ();
ice_sboxes_initialised = 1;
}
if (n < 1) {
_size = 1;
_rounds = 8;
} else {
_size = n;
_rounds = n * 16;
}
_keysched = new IceSubkey[_rounds];
}
/*
* Destroy an ICE key.
*/
IceKey::~IceKey ()
{
int i, j;
for (i=0; i<_rounds; i++)
for (j=0; j<3; j++)
_keysched[i].val[j] = 0;
_rounds = _size = 0;
delete[] _keysched;
}
/*
* The single round ICE f function.
*/
static unsigned long
ice_f (
unsigned long p,
const IceSubkey *sk
) {
unsigned long tl, tr; /* Expanded 40-bit values */
unsigned long al, ar; /* Salted expanded 40-bit values */
/* Left half expansion */
tl = ((p >> 16) & 0x3ff) | (((p >> 14) | (p << 18)) & 0xffc00);
/* Right half expansion */
tr = (p & 0x3ff) | ((p << 2) & 0xffc00);
/* Perform the salt permutation */
// al = (tr & sk->val[2]) | (tl & ~sk->val[2]);
// ar = (tl & sk->val[2]) | (tr & ~sk->val[2]);
al = sk->val[2] & (tl ^ tr);
ar = al ^ tr;
al ^= tl;
al ^= sk->val[0]; /* XOR with the subkey */
ar ^= sk->val[1];
/* S-box lookup and permutation */
return (ice_sbox[0][al >> 10] | ice_sbox[1][al & 0x3ff]
| ice_sbox[2][ar >> 10] | ice_sbox[3][ar & 0x3ff]);
}
/*
* Encrypt a block of 8 bytes of data with the given ICE key.
*/
void
IceKey::encrypt (
const unsigned char *ptext,
unsigned char *ctext
) const
{
int i;
unsigned long l, r;
l = (((unsigned long) ptext[0]) << 24)
| (((unsigned long) ptext[1]) << 16)
| (((unsigned long) ptext[2]) << 8) | ptext[3];
r = (((unsigned long) ptext[4]) << 24)
| (((unsigned long) ptext[5]) << 16)
| (((unsigned long) ptext[6]) << 8) | ptext[7];
for (i = 0; i < _rounds; i += 2) {
l ^= ice_f (r, &_keysched[i]);
r ^= ice_f (l, &_keysched[i + 1]);
}
for (i = 0; i < 4; i++) {
ctext[3 - i] = r & 0xff;
ctext[7 - i] = l & 0xff;
r >>= 8;
l >>= 8;
}
}
/*
* Decrypt a block of 8 bytes of data with the given ICE key.
*/
void
IceKey::decrypt (
const unsigned char *ctext,
unsigned char *ptext
) const
{
int i;
unsigned long l, r;
l = (((unsigned long) ctext[0]) << 24)
| (((unsigned long) ctext[1]) << 16)
| (((unsigned long) ctext[2]) << 8) | ctext[3];
r = (((unsigned long) ctext[4]) << 24)
| (((unsigned long) ctext[5]) << 16)
| (((unsigned long) ctext[6]) << 8) | ctext[7];
for (i = _rounds - 1; i > 0; i -= 2) {
l ^= ice_f (r, &_keysched[i]);
r ^= ice_f (l, &_keysched[i - 1]);
}
for (i = 0; i < 4; i++) {
ptext[3 - i] = r & 0xff;
ptext[7 - i] = l & 0xff;
r >>= 8;
l >>= 8;
}
}
/*
* Set 8 rounds [n, n+7] of the key schedule of an ICE key.
*/
void
IceKey::scheduleBuild (
unsigned short *kb,
int n,
const int *keyrot
) {
int i;
for (i=0; i<8; i++) {
int j;
int kr = keyrot[i];
IceSubkey *isk = &_keysched[n + i];
for (j=0; j<3; j++)
isk->val[j] = 0;
for (j=0; j<15; j++) {
int k;
unsigned long *curr_sk = &isk->val[j % 3];
for (k=0; k<4; k++) {
unsigned short *curr_kb = &kb[(kr + k) & 3];
int bit = *curr_kb & 1;
*curr_sk = (*curr_sk << 1) | bit;
*curr_kb = (*curr_kb >> 1) | ((bit ^ 1) << 15);
}
}
}
}
/*
* Set the key schedule of an ICE key.
*/
void
IceKey::set (
const unsigned char *key
) {
int i;
if (_rounds == 8) {
unsigned short kb[4];
for (i=0; i<4; i++)
kb[3 - i] = (key[i*2] << 8) | key[i*2 + 1];
scheduleBuild (kb, 0, ice_keyrot);
return;
}
for (i=0; i<_size; i++) {
int j;
unsigned short kb[4];
for (j=0; j<4; j++)
kb[3 - j] = (key[i*8 + j*2] << 8) | key[i*8 + j*2 + 1];
scheduleBuild (kb, i*8, ice_keyrot);
scheduleBuild (kb, _rounds - 8 - i*8, &ice_keyrot[8]);
}
}
/*
* Return the key size, in bytes.
*/
int
IceKey::keySize () const
{
return (_size * 8);
}
/*
* Return the block size, in bytes.
*/
int
IceKey::blockSize () const
{
return (8);
}
#endif // !_STATIC_LINKED || _SHARED_LIB

97
mathlib/almostequal.cpp Normal file
View File

@ -0,0 +1,97 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Fast ways to compare equality of two floats. Assumes
// sizeof(float) == sizeof(int) and we are using IEEE format.
//
// Source: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
//=====================================================================================//
#include <float.h>
#include <math.h>
#include "mathlib/mathlib.h"
static inline bool AE_IsInfinite(float a)
{
const int kInfAsInt = 0x7F800000;
// An infinity has an exponent of 255 (shift left 23 positions) and
// a zero mantissa. There are two infinities - positive and negative.
if ((*(int*)&a & 0x7FFFFFFF) == kInfAsInt)
return true;
return false;
}
static inline bool AE_IsNan(float a)
{
// a NAN has an exponent of 255 (shifted left 23 positions) and
// a non-zero mantissa.
int exp = *(int*)&a & 0x7F800000;
int mantissa = *(int*)&a & 0x007FFFFF;
if (exp == 0x7F800000 && mantissa != 0)
return true;
return false;
}
static inline int AE_Sign(float a)
{
// The sign bit of a number is the high bit.
return (*(int*)&a) & 0x80000000;
}
// This is the 'final' version of the AlmostEqualUlps function.
// The optional checks are included for completeness, but in many
// cases they are not necessary, or even not desirable.
bool AlmostEqual(float a, float b, int maxUlps)
{
// There are several optional checks that you can do, depending
// on what behavior you want from your floating point comparisons.
// These checks should not be necessary and they are included
// mainly for completeness.
// If a or b are infinity (positive or negative) then
// only return true if they are exactly equal to each other -
// that is, if they are both infinities of the same sign.
// This check is only needed if you will be generating
// infinities and you don't want them 'close' to numbers
// near FLT_MAX.
if (AE_IsInfinite(a) || AE_IsInfinite(b))
return a == b;
// If a or b are a NAN, return false. NANs are equal to nothing,
// not even themselves.
// This check is only needed if you will be generating NANs
// and you use a maxUlps greater than 4 million or you want to
// ensure that a NAN does not equal itself.
if (AE_IsNan(a) || AE_IsNan(b))
return false;
// After adjusting floats so their representations are lexicographically
// ordered as twos-complement integers a very small positive number
// will compare as 'close' to a very small negative number. If this is
// not desireable, and if you are on a platform that supports
// subnormals (which is the only place the problem can show up) then
// you need this check.
// The check for a == b is because zero and negative zero have different
// signs but are equal to each other.
if (AE_Sign(a) != AE_Sign(b))
return a == b;
int aInt = *(int*)&a;
// Make aInt lexicographically ordered as a twos-complement int
if (aInt < 0)
aInt = 0x80000000 - aInt;
// Make bInt lexicographically ordered as a twos-complement int
int bInt = *(int*)&b;
if (bInt < 0)
bInt = 0x80000000 - bInt;
// Now we can compare aInt and bInt to find out how far apart a and b
// are.
int intDiff = abs(aInt - bInt);
if (intDiff <= maxUlps)
return true;
return false;
}

181
mathlib/anorms.cpp Normal file
View File

@ -0,0 +1,181 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=============================================================================//
#if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
#include "mathlib/vector.h"
#include "mathlib/anorms.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
Vector g_anorms[NUMVERTEXNORMALS] =
{
Vector(-0.525731, 0.000000, 0.850651),
Vector(-0.442863, 0.238856, 0.864188),
Vector(-0.295242, 0.000000, 0.955423),
Vector(-0.309017, 0.500000, 0.809017),
Vector(-0.162460, 0.262866, 0.951056),
Vector(0.000000, 0.000000, 1.000000),
Vector(0.000000, 0.850651, 0.525731),
Vector(-0.147621, 0.716567, 0.681718),
Vector(0.147621, 0.716567, 0.681718),
Vector(0.000000, 0.525731, 0.850651),
Vector(0.309017, 0.500000, 0.809017),
Vector(0.525731, 0.000000, 0.850651),
Vector(0.295242, 0.000000, 0.955423),
Vector(0.442863, 0.238856, 0.864188),
Vector(0.162460, 0.262866, 0.951056),
Vector(-0.681718, 0.147621, 0.716567),
Vector(-0.809017, 0.309017, 0.500000),
Vector(-0.587785, 0.425325, 0.688191),
Vector(-0.850651, 0.525731, 0.000000),
Vector(-0.864188, 0.442863, 0.238856),
Vector(-0.716567, 0.681718, 0.147621),
Vector(-0.688191, 0.587785, 0.425325),
Vector(-0.500000, 0.809017, 0.309017),
Vector(-0.238856, 0.864188, 0.442863),
Vector(-0.425325, 0.688191, 0.587785),
Vector(-0.716567, 0.681718, -0.147621),
Vector(-0.500000, 0.809017, -0.309017),
Vector(-0.525731, 0.850651, 0.000000),
Vector(0.000000, 0.850651, -0.525731),
Vector(-0.238856, 0.864188, -0.442863),
Vector(0.000000, 0.955423, -0.295242),
Vector(-0.262866, 0.951056, -0.162460),
Vector(0.000000, 1.000000, 0.000000),
Vector(0.000000, 0.955423, 0.295242),
Vector(-0.262866, 0.951056, 0.162460),
Vector(0.238856, 0.864188, 0.442863),
Vector(0.262866, 0.951056, 0.162460),
Vector(0.500000, 0.809017, 0.309017),
Vector(0.238856, 0.864188, -0.442863),
Vector(0.262866, 0.951056, -0.162460),
Vector(0.500000, 0.809017, -0.309017),
Vector(0.850651, 0.525731, 0.000000),
Vector(0.716567, 0.681718, 0.147621),
Vector(0.716567, 0.681718, -0.147621),
Vector(0.525731, 0.850651, 0.000000),
Vector(0.425325, 0.688191, 0.587785),
Vector(0.864188, 0.442863, 0.238856),
Vector(0.688191, 0.587785, 0.425325),
Vector(0.809017, 0.309017, 0.500000),
Vector(0.681718, 0.147621, 0.716567),
Vector(0.587785, 0.425325, 0.688191),
Vector(0.955423, 0.295242, 0.000000),
Vector(1.000000, 0.000000, 0.000000),
Vector(0.951056, 0.162460, 0.262866),
Vector(0.850651, -0.525731, 0.000000),
Vector(0.955423, -0.295242, 0.000000),
Vector(0.864188, -0.442863, 0.238856),
Vector(0.951056, -0.162460, 0.262866),
Vector(0.809017, -0.309017, 0.500000),
Vector(0.681718, -0.147621, 0.716567),
Vector(0.850651, 0.000000, 0.525731),
Vector(0.864188, 0.442863, -0.238856),
Vector(0.809017, 0.309017, -0.500000),
Vector(0.951056, 0.162460, -0.262866),
Vector(0.525731, 0.000000, -0.850651),
Vector(0.681718, 0.147621, -0.716567),
Vector(0.681718, -0.147621, -0.716567),
Vector(0.850651, 0.000000, -0.525731),
Vector(0.809017, -0.309017, -0.500000),
Vector(0.864188, -0.442863, -0.238856),
Vector(0.951056, -0.162460, -0.262866),
Vector(0.147621, 0.716567, -0.681718),
Vector(0.309017, 0.500000, -0.809017),
Vector(0.425325, 0.688191, -0.587785),
Vector(0.442863, 0.238856, -0.864188),
Vector(0.587785, 0.425325, -0.688191),
Vector(0.688191, 0.587785, -0.425325),
Vector(-0.147621, 0.716567, -0.681718),
Vector(-0.309017, 0.500000, -0.809017),
Vector(0.000000, 0.525731, -0.850651),
Vector(-0.525731, 0.000000, -0.850651),
Vector(-0.442863, 0.238856, -0.864188),
Vector(-0.295242, 0.000000, -0.955423),
Vector(-0.162460, 0.262866, -0.951056),
Vector(0.000000, 0.000000, -1.000000),
Vector(0.295242, 0.000000, -0.955423),
Vector(0.162460, 0.262866, -0.951056),
Vector(-0.442863, -0.238856, -0.864188),
Vector(-0.309017, -0.500000, -0.809017),
Vector(-0.162460, -0.262866, -0.951056),
Vector(0.000000, -0.850651, -0.525731),
Vector(-0.147621, -0.716567, -0.681718),
Vector(0.147621, -0.716567, -0.681718),
Vector(0.000000, -0.525731, -0.850651),
Vector(0.309017, -0.500000, -0.809017),
Vector(0.442863, -0.238856, -0.864188),
Vector(0.162460, -0.262866, -0.951056),
Vector(0.238856, -0.864188, -0.442863),
Vector(0.500000, -0.809017, -0.309017),
Vector(0.425325, -0.688191, -0.587785),
Vector(0.716567, -0.681718, -0.147621),
Vector(0.688191, -0.587785, -0.425325),
Vector(0.587785, -0.425325, -0.688191),
Vector(0.000000, -0.955423, -0.295242),
Vector(0.000000, -1.000000, 0.000000),
Vector(0.262866, -0.951056, -0.162460),
Vector(0.000000, -0.850651, 0.525731),
Vector(0.000000, -0.955423, 0.295242),
Vector(0.238856, -0.864188, 0.442863),
Vector(0.262866, -0.951056, 0.162460),
Vector(0.500000, -0.809017, 0.309017),
Vector(0.716567, -0.681718, 0.147621),
Vector(0.525731, -0.850651, 0.000000),
Vector(-0.238856, -0.864188, -0.442863),
Vector(-0.500000, -0.809017, -0.309017),
Vector(-0.262866, -0.951056, -0.162460),
Vector(-0.850651, -0.525731, 0.000000),
Vector(-0.716567, -0.681718, -0.147621),
Vector(-0.716567, -0.681718, 0.147621),
Vector(-0.525731, -0.850651, 0.000000),
Vector(-0.500000, -0.809017, 0.309017),
Vector(-0.238856, -0.864188, 0.442863),
Vector(-0.262866, -0.951056, 0.162460),
Vector(-0.864188, -0.442863, 0.238856),
Vector(-0.809017, -0.309017, 0.500000),
Vector(-0.688191, -0.587785, 0.425325),
Vector(-0.681718, -0.147621, 0.716567),
Vector(-0.442863, -0.238856, 0.864188),
Vector(-0.587785, -0.425325, 0.688191),
Vector(-0.309017, -0.500000, 0.809017),
Vector(-0.147621, -0.716567, 0.681718),
Vector(-0.425325, -0.688191, 0.587785),
Vector(-0.162460, -0.262866, 0.951056),
Vector(0.442863, -0.238856, 0.864188),
Vector(0.162460, -0.262866, 0.951056),
Vector(0.309017, -0.500000, 0.809017),
Vector(0.147621, -0.716567, 0.681718),
Vector(0.000000, -0.525731, 0.850651),
Vector(0.425325, -0.688191, 0.587785),
Vector(0.587785, -0.425325, 0.688191),
Vector(0.688191, -0.587785, 0.425325),
Vector(-0.955423, 0.295242, 0.000000),
Vector(-0.951056, 0.162460, 0.262866),
Vector(-1.000000, 0.000000, 0.000000),
Vector(-0.850651, 0.000000, 0.525731),
Vector(-0.955423, -0.295242, 0.000000),
Vector(-0.951056, -0.162460, 0.262866),
Vector(-0.864188, 0.442863, -0.238856),
Vector(-0.951056, 0.162460, -0.262866),
Vector(-0.809017, 0.309017, -0.500000),
Vector(-0.864188, -0.442863, -0.238856),
Vector(-0.951056, -0.162460, -0.262866),
Vector(-0.809017, -0.309017, -0.500000),
Vector(-0.681718, 0.147621, -0.716567),
Vector(-0.681718, -0.147621, -0.716567),
Vector(-0.850651, 0.000000, -0.525731),
Vector(-0.688191, 0.587785, -0.425325),
Vector(-0.587785, 0.425325, -0.688191),
Vector(-0.425325, 0.688191, -0.587785),
Vector(-0.425325, -0.688191, -0.587785),
Vector(-0.587785, -0.425325, -0.688191),
Vector(-0.688191, -0.587785, -0.425325)
};
#endif // !_STATIC_LINKED || _SHARED_LIB

69
mathlib/bumpvects.cpp Normal file
View File

@ -0,0 +1,69 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $Workfile: $
// $Date: $
//
//-----------------------------------------------------------------------------
// $Log: $
//
// $NoKeywords: $
//=============================================================================//
#if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
#ifdef QUIVER
#include "r_local.h"
#endif
#include "mathlib/bumpvects.h"
#include "mathlib/vector.h"
#include <assert.h>
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
// z is coming out of the face.
void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal,
const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] )
{
Vector tmpNormal;
bool leftHanded;
int i;
assert( NUM_BUMP_VECTS == 3 );
// Are we left or right handed?
CrossProduct( sVect, tVect, tmpNormal );
if( DotProduct( flatNormal, tmpNormal ) < 0.0f )
{
leftHanded = true;
}
else
{
leftHanded = false;
}
// Build a basis for the face around the phong normal
matrix3x4_t smoothBasis;
CrossProduct( phongNormal.Base(), sVect.Base(), smoothBasis[1] );
VectorNormalize( smoothBasis[1] );
CrossProduct( smoothBasis[1], phongNormal.Base(), smoothBasis[0] );
VectorNormalize( smoothBasis[0] );
VectorCopy( phongNormal.Base(), smoothBasis[2] );
if( leftHanded )
{
VectorNegate( smoothBasis[1] );
}
// move the g_localBumpBasis into world space to create bumpNormals
for( i = 0; i < 3; i++ )
{
VectorIRotate( g_localBumpBasis[i], smoothBasis, bumpNormals[i] );
}
}
#endif // !_STATIC_LINKED || _SHARED_LIB

View File

@ -0,0 +1,637 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Color conversion routines.
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
//-----------------------------------------------------------------------------
// Gamma conversion support
//-----------------------------------------------------------------------------
static byte texgammatable[256]; // palette is sent through this to convert to screen gamma
static float texturetolinear[256]; // texture (0..255) to linear (0..1)
static int lineartotexture[1024]; // linear (0..1) to texture (0..255)
static int lineartoscreen[1024]; // linear (0..1) to gamma corrected vertex light (0..255)
// build a lightmap texture to combine with surface texture, adjust for src*dst+dst*src, ramp reprogramming, etc
float lineartovertex[4096]; // linear (0..4) to screen corrected vertex space (0..1?)
unsigned char lineartolightmap[4096]; // linear (0..4) to screen corrected texture value (0..255)
static float g_Mathlib_GammaToLinear[256]; // gamma (0..1) to linear (0..1)
static float g_Mathlib_LinearToGamma[256]; // linear (0..1) to gamma (0..1)
// This is aligned to 16-byte boundaries so that we can load it
// onto SIMD registers easily if needed (used by SSE version of lightmaps)
// TODO: move this into the one DLL that actually uses it, instead of statically
// linking it everywhere via mathlib.
ALIGN128 float power2_n[256] = // 2**(index - 128) / 255
{
1.152445441982634800E-041, 2.304890883965269600E-041, 4.609781767930539200E-041, 9.219563535861078400E-041,
1.843912707172215700E-040, 3.687825414344431300E-040, 7.375650828688862700E-040, 1.475130165737772500E-039,
2.950260331475545100E-039, 5.900520662951090200E-039, 1.180104132590218000E-038, 2.360208265180436100E-038,
4.720416530360872100E-038, 9.440833060721744200E-038, 1.888166612144348800E-037, 3.776333224288697700E-037,
7.552666448577395400E-037, 1.510533289715479100E-036, 3.021066579430958200E-036, 6.042133158861916300E-036,
1.208426631772383300E-035, 2.416853263544766500E-035, 4.833706527089533100E-035, 9.667413054179066100E-035,
1.933482610835813200E-034, 3.866965221671626400E-034, 7.733930443343252900E-034, 1.546786088668650600E-033,
3.093572177337301200E-033, 6.187144354674602300E-033, 1.237428870934920500E-032, 2.474857741869840900E-032,
4.949715483739681800E-032, 9.899430967479363700E-032, 1.979886193495872700E-031, 3.959772386991745500E-031,
7.919544773983491000E-031, 1.583908954796698200E-030, 3.167817909593396400E-030, 6.335635819186792800E-030,
1.267127163837358600E-029, 2.534254327674717100E-029, 5.068508655349434200E-029, 1.013701731069886800E-028,
2.027403462139773700E-028, 4.054806924279547400E-028, 8.109613848559094700E-028, 1.621922769711818900E-027,
3.243845539423637900E-027, 6.487691078847275800E-027, 1.297538215769455200E-026, 2.595076431538910300E-026,
5.190152863077820600E-026, 1.038030572615564100E-025, 2.076061145231128300E-025, 4.152122290462256500E-025,
8.304244580924513000E-025, 1.660848916184902600E-024, 3.321697832369805200E-024, 6.643395664739610400E-024,
1.328679132947922100E-023, 2.657358265895844200E-023, 5.314716531791688300E-023, 1.062943306358337700E-022,
2.125886612716675300E-022, 4.251773225433350700E-022, 8.503546450866701300E-022, 1.700709290173340300E-021,
3.401418580346680500E-021, 6.802837160693361100E-021, 1.360567432138672200E-020, 2.721134864277344400E-020,
5.442269728554688800E-020, 1.088453945710937800E-019, 2.176907891421875500E-019, 4.353815782843751100E-019,
8.707631565687502200E-019, 1.741526313137500400E-018, 3.483052626275000900E-018, 6.966105252550001700E-018,
1.393221050510000300E-017, 2.786442101020000700E-017, 5.572884202040001400E-017, 1.114576840408000300E-016,
2.229153680816000600E-016, 4.458307361632001100E-016, 8.916614723264002200E-016, 1.783322944652800400E-015,
3.566645889305600900E-015, 7.133291778611201800E-015, 1.426658355722240400E-014, 2.853316711444480700E-014,
5.706633422888961400E-014, 1.141326684577792300E-013, 2.282653369155584600E-013, 4.565306738311169100E-013,
9.130613476622338300E-013, 1.826122695324467700E-012, 3.652245390648935300E-012, 7.304490781297870600E-012,
1.460898156259574100E-011, 2.921796312519148200E-011, 5.843592625038296500E-011, 1.168718525007659300E-010,
2.337437050015318600E-010, 4.674874100030637200E-010, 9.349748200061274400E-010, 1.869949640012254900E-009,
3.739899280024509800E-009, 7.479798560049019500E-009, 1.495959712009803900E-008, 2.991919424019607800E-008,
5.983838848039215600E-008, 1.196767769607843100E-007, 2.393535539215686200E-007, 4.787071078431372500E-007,
9.574142156862745000E-007, 1.914828431372549000E-006, 3.829656862745098000E-006, 7.659313725490196000E-006,
1.531862745098039200E-005, 3.063725490196078400E-005, 6.127450980392156800E-005, 1.225490196078431400E-004,
2.450980392156862700E-004, 4.901960784313725400E-004, 9.803921568627450800E-004, 1.960784313725490200E-003,
3.921568627450980300E-003, 7.843137254901960700E-003, 1.568627450980392100E-002, 3.137254901960784300E-002,
6.274509803921568500E-002, 1.254901960784313700E-001, 2.509803921568627400E-001, 5.019607843137254800E-001,
1.003921568627451000E+000, 2.007843137254901900E+000, 4.015686274509803900E+000, 8.031372549019607700E+000,
1.606274509803921500E+001, 3.212549019607843100E+001, 6.425098039215686200E+001, 1.285019607843137200E+002,
2.570039215686274500E+002, 5.140078431372548900E+002, 1.028015686274509800E+003, 2.056031372549019600E+003,
4.112062745098039200E+003, 8.224125490196078300E+003, 1.644825098039215700E+004, 3.289650196078431300E+004,
6.579300392156862700E+004, 1.315860078431372500E+005, 2.631720156862745100E+005, 5.263440313725490100E+005,
1.052688062745098000E+006, 2.105376125490196000E+006, 4.210752250980392100E+006, 8.421504501960784200E+006,
1.684300900392156800E+007, 3.368601800784313700E+007, 6.737203601568627400E+007, 1.347440720313725500E+008,
2.694881440627450900E+008, 5.389762881254901900E+008, 1.077952576250980400E+009, 2.155905152501960800E+009,
4.311810305003921500E+009, 8.623620610007843000E+009, 1.724724122001568600E+010, 3.449448244003137200E+010,
6.898896488006274400E+010, 1.379779297601254900E+011, 2.759558595202509800E+011, 5.519117190405019500E+011,
1.103823438081003900E+012, 2.207646876162007800E+012, 4.415293752324015600E+012, 8.830587504648031200E+012,
1.766117500929606200E+013, 3.532235001859212500E+013, 7.064470003718425000E+013, 1.412894000743685000E+014,
2.825788001487370000E+014, 5.651576002974740000E+014, 1.130315200594948000E+015, 2.260630401189896000E+015,
4.521260802379792000E+015, 9.042521604759584000E+015, 1.808504320951916800E+016, 3.617008641903833600E+016,
7.234017283807667200E+016, 1.446803456761533400E+017, 2.893606913523066900E+017, 5.787213827046133800E+017,
1.157442765409226800E+018, 2.314885530818453500E+018, 4.629771061636907000E+018, 9.259542123273814000E+018,
1.851908424654762800E+019, 3.703816849309525600E+019, 7.407633698619051200E+019, 1.481526739723810200E+020,
2.963053479447620500E+020, 5.926106958895241000E+020, 1.185221391779048200E+021, 2.370442783558096400E+021,
4.740885567116192800E+021, 9.481771134232385600E+021, 1.896354226846477100E+022, 3.792708453692954200E+022,
7.585416907385908400E+022, 1.517083381477181700E+023, 3.034166762954363400E+023, 6.068333525908726800E+023,
1.213666705181745400E+024, 2.427333410363490700E+024, 4.854666820726981400E+024, 9.709333641453962800E+024,
1.941866728290792600E+025, 3.883733456581585100E+025, 7.767466913163170200E+025, 1.553493382632634000E+026,
3.106986765265268100E+026, 6.213973530530536200E+026, 1.242794706106107200E+027, 2.485589412212214500E+027,
4.971178824424429000E+027, 9.942357648848857900E+027, 1.988471529769771600E+028, 3.976943059539543200E+028,
7.953886119079086300E+028, 1.590777223815817300E+029, 3.181554447631634500E+029, 6.363108895263269100E+029,
1.272621779052653800E+030, 2.545243558105307600E+030, 5.090487116210615300E+030, 1.018097423242123100E+031,
2.036194846484246100E+031, 4.072389692968492200E+031, 8.144779385936984400E+031, 1.628955877187396900E+032,
3.257911754374793800E+032, 6.515823508749587500E+032, 1.303164701749917500E+033, 2.606329403499835000E+033,
5.212658806999670000E+033, 1.042531761399934000E+034, 2.085063522799868000E+034, 4.170127045599736000E+034,
8.340254091199472000E+034, 1.668050818239894400E+035, 3.336101636479788800E+035, 6.672203272959577600E+035
};
// You can use this to double check the exponent table and assert that
// the precomputation is correct.
#ifdef DBGFLAG_ASSERT
#pragma warning(push)
#pragma warning( disable : 4189 ) // disable unused local variable warning
static void CheckExponentTable()
{
for( int i = 0; i < 256; i++ )
{
float testAgainst = pow( 2.0f, i - 128 ) / 255.0f;
float diff = testAgainst - power2_n[i] ;
float relativeDiff = diff / testAgainst;
Assert( testAgainst == 0 ?
power2_n[i] < 1.16E-041 :
power2_n[i] == testAgainst );
}
}
#pragma warning(pop)
#endif
void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright )
{
int i, inf;
float g1, g3;
// Con_Printf("BuildGammaTable %.1f %.1f %.1f\n", g, v_lightgamma.GetFloat(), v_texgamma.GetFloat() );
float g = gamma;
if (g > 3.0)
{
g = 3.0;
}
g = 1.0 / g;
g1 = texGamma * g;
if (brightness <= 0.0)
{
g3 = 0.125;
}
else if (brightness > 1.0)
{
g3 = 0.05;
}
else
{
g3 = 0.125 - (brightness * brightness) * 0.075;
}
for (i=0 ; i<256 ; i++)
{
inf = 255 * pow ( i/255.f, g1 );
if (inf < 0)
inf = 0;
if (inf > 255)
inf = 255;
texgammatable[i] = inf;
}
for (i=0 ; i<1024 ; i++)
{
float f;
f = i / 1023.0;
// scale up
if (brightness > 1.0)
f = f * brightness;
// shift up
if (f <= g3)
f = (f / g3) * 0.125;
else
f = 0.125 + ((f - g3) / (1.0 - g3)) * 0.875;
// convert linear space to desired gamma space
inf = 255 * pow ( f, g );
if (inf < 0)
inf = 0;
if (inf > 255)
inf = 255;
lineartoscreen[i] = inf;
}
/*
for (i=0 ; i<1024 ; i++)
{
// convert from screen gamma space to linear space
lineargammatable[i] = 1023 * pow ( i/1023.0, v_gamma.GetFloat() );
// convert from linear gamma space to screen space
screengammatable[i] = 1023 * pow ( i/1023.0, 1.0 / v_gamma.GetFloat() );
}
*/
for (i=0 ; i<256 ; i++)
{
// convert from nonlinear texture space (0..255) to linear space (0..1)
texturetolinear[i] = pow( i / 255.f, texGamma );
// convert from linear space (0..1) to nonlinear (sRGB) space (0..1)
g_Mathlib_LinearToGamma[i] = LinearToGammaFullRange( i / 255.f );
// convert from sRGB gamma space (0..1) to linear space (0..1)
g_Mathlib_GammaToLinear[i] = GammaToLinearFullRange( i / 255.f );
}
for (i=0 ; i<1024 ; i++)
{
// convert from linear space (0..1) to nonlinear texture space (0..255)
lineartotexture[i] = pow( i / 1023.0, 1.0 / texGamma ) * 255;
}
#if 0
for (i=0 ; i<256 ; i++)
{
float f;
// convert from nonlinear lightmap space (0..255) to linear space (0..4)
// f = (i / 255.0) * sqrt( 4 );
f = i * (2.0 / 255.0);
f = f * f;
texlighttolinear[i] = f;
}
#endif
{
float f;
float overbrightFactor = 1.0f;
// Can't do overbright without texcombine
// UNDONE: Add GAMMA ramp to rectify this
if ( overbright == 2 )
{
overbrightFactor = 0.5;
}
else if ( overbright == 4 )
{
overbrightFactor = 0.25;
}
for (i=0 ; i<4096 ; i++)
{
// convert from linear 0..4 (x1024) to screen corrected vertex space (0..1?)
f = pow ( i/1024.0, 1.0 / gamma );
lineartovertex[i] = f * overbrightFactor;
if (lineartovertex[i] > 1)
lineartovertex[i] = 1;
int nLightmap = RoundFloatToInt( f * 255 * overbrightFactor );
nLightmap = clamp( nLightmap, 0, 255 );
lineartolightmap[i] = (unsigned char)nLightmap;
}
}
}
float GammaToLinearFullRange( float gamma )
{
return pow( gamma, 2.2f );
}
float LinearToGammaFullRange( float linear )
{
return pow( linear, 1.0f / 2.2f );
}
float GammaToLinear( float gamma )
{
Assert( s_bMathlibInitialized );
if ( gamma < 0.0f )
{
return 0.0f;
}
if ( gamma >= 0.95f )
{
// Use GammaToLinearFullRange maybe if you trip this.
// X360TEMP
// Assert( gamma <= 1.0f );
return 1.0f;
}
int index = RoundFloatToInt( gamma * 255.0f );
Assert( index >= 0 && index < 256 );
return g_Mathlib_GammaToLinear[index];
}
float LinearToGamma( float linear )
{
Assert( s_bMathlibInitialized );
if ( linear < 0.0f )
{
return 0.0f;
}
if ( linear > 1.0f )
{
// Use LinearToGammaFullRange maybe if you trip this.
Assert( 0 );
return 1.0f;
}
int index = RoundFloatToInt( linear * 255.0f );
Assert( index >= 0 && index < 256 );
return g_Mathlib_LinearToGamma[index];
}
//-----------------------------------------------------------------------------
// Helper functions to convert between sRGB and 360 gamma space
//-----------------------------------------------------------------------------
float SrgbGammaToLinear( float flSrgbGammaValue )
{
float x = clamp( flSrgbGammaValue, 0.0f, 1.0f );
return ( x <= 0.04045f ) ? ( x / 12.92f ) : ( pow( ( x + 0.055f ) / 1.055f, 2.4f ) );
}
float SrgbLinearToGamma( float flLinearValue )
{
float x = clamp( flLinearValue, 0.0f, 1.0f );
return ( x <= 0.0031308f ) ? ( x * 12.92f ) : ( 1.055f * pow( x, ( 1.0f / 2.4f ) ) ) - 0.055f;
}
float X360GammaToLinear( float fl360GammaValue )
{
float flLinearValue;
fl360GammaValue = clamp( fl360GammaValue, 0.0f, 1.0f );
if ( fl360GammaValue < ( 96.0f / 255.0f ) )
{
if ( fl360GammaValue < ( 64.0f / 255.0f ) )
{
flLinearValue = fl360GammaValue * 255.0f;
}
else
{
flLinearValue = fl360GammaValue * ( 255.0f * 2.0f ) - 64.0f;
flLinearValue += floor( flLinearValue * ( 1.0f / 512.0f ) );
}
}
else
{
if( fl360GammaValue < ( 192.0f / 255.0f ) )
{
flLinearValue = fl360GammaValue * ( 255.0f * 4.0f ) - 256.0f;
flLinearValue += floor( flLinearValue * ( 1.0f / 256.0f ) );
}
else
{
flLinearValue = fl360GammaValue * ( 255.0f * 8.0f ) - 1024.0f;
flLinearValue += floor( flLinearValue * ( 1.0f / 128.0f ) );
}
}
flLinearValue *= 1.0f / 1023.0f;
flLinearValue = clamp( flLinearValue, 0.0f, 1.0f );
return flLinearValue;
}
float X360LinearToGamma( float flLinearValue )
{
float fl360GammaValue;
flLinearValue = clamp( flLinearValue, 0.0f, 1.0f );
if ( flLinearValue < ( 128.0f / 1023.0f ) )
{
if ( flLinearValue < ( 64.0f / 1023.0f ) )
{
fl360GammaValue = flLinearValue * ( 1023.0f * ( 1.0f / 255.0f ) );
}
else
{
fl360GammaValue = flLinearValue * ( ( 1023.0f / 2.0f ) * ( 1.0f / 255.0f ) ) + ( 32.0f / 255.0f );
}
}
else
{
if ( flLinearValue < ( 512.0f / 1023.0f ) )
{
fl360GammaValue = flLinearValue * ( ( 1023.0f / 4.0f ) * ( 1.0f / 255.0f ) ) + ( 64.0f / 255.0f );
}
else
{
fl360GammaValue = flLinearValue * ( ( 1023.0f /8.0f ) * ( 1.0f / 255.0f ) ) + ( 128.0f /255.0f ); // 1.0 -> 1.0034313725490196078431372549016
if ( fl360GammaValue > 1.0f )
{
fl360GammaValue = 1.0f;
}
}
}
fl360GammaValue = clamp( fl360GammaValue, 0.0f, 1.0f );
return fl360GammaValue;
}
float SrgbGammaTo360Gamma( float flSrgbGammaValue )
{
float flLinearValue = SrgbGammaToLinear( flSrgbGammaValue );
float fl360GammaValue = X360LinearToGamma( flLinearValue );
return fl360GammaValue;
}
// convert texture to linear 0..1 value
float TextureToLinear( int c )
{
Assert( s_bMathlibInitialized );
if (c < 0)
return 0;
if (c > 255)
return 1.0;
return texturetolinear[c];
}
// convert texture to linear 0..1 value
int LinearToTexture( float f )
{
Assert( s_bMathlibInitialized );
int i;
i = f * 1023; // assume 0..1 range
if (i < 0)
i = 0;
if (i > 1023)
i = 1023;
return lineartotexture[i];
}
// converts 0..1 linear value to screen gamma (0..255)
int LinearToScreenGamma( float f )
{
Assert( s_bMathlibInitialized );
int i;
i = f * 1023; // assume 0..1 range
if (i < 0)
i = 0;
if (i > 1023)
i = 1023;
return lineartoscreen[i];
}
void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out )
{
Assert( s_bMathlibInitialized );
// FIXME: Why is there a factor of 255 built into this?
out.x = 255.0f * TexLightToLinear( in.r, in.exponent );
out.y = 255.0f * TexLightToLinear( in.g, in.exponent );
out.z = 255.0f * TexLightToLinear( in.b, in.exponent );
}
#if 0
// assumes that the desired mantissa range is 128..255
static int VectorToColorRGBExp32_CalcExponent( float in )
{
int power = 0;
if( in != 0.0f )
{
while( in > 255.0f )
{
power += 1;
in *= 0.5f;
}
while( in < 128.0f )
{
power -= 1;
in *= 2.0f;
}
}
return power;
}
void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
{
Vector v = vin;
Assert( s_bMathlibInitialized );
Assert( v.x >= 0.0f && v.y >= 0.0f && v.z >= 0.0f );
int i;
float max = v[0];
for( i = 1; i < 3; i++ )
{
// Get the maximum value.
if( v[i] > max )
{
max = v[i];
}
}
// figure out the exponent for this luxel.
int exponent = VectorToColorRGBExp32_CalcExponent( max );
// make the exponent fits into a signed byte.
if( exponent < -128 )
{
exponent = -128;
}
else if( exponent > 127 )
{
exponent = 127;
}
// undone: optimize with a table
float scalar = pow( 2.0f, -exponent );
// convert to mantissa x 2^exponent format
for( i = 0; i < 3; i++ )
{
v[i] *= scalar;
// clamp
if( v[i] > 255.0f )
{
v[i] = 255.0f;
}
}
c.r = ( unsigned char )v[0];
c.g = ( unsigned char )v[1];
c.b = ( unsigned char )v[2];
c.exponent = ( signed char )exponent;
}
#else
// given a floating point number f, return an exponent e such that
// for f' = f * 2^e, f is on [128..255].
// Uses IEEE 754 representation to directly extract this information
// from the float.
inline static int VectorToColorRGBExp32_CalcExponent( const float *pin )
{
// The thing we will take advantage of here is that the exponent component
// is stored in the float itself, and because we want to map to 128..255, we
// want an "ideal" exponent of 2^7. So, we compute the difference between the
// input exponent and 7 to work out the normalizing exponent. Thus if you pass in
// 32 (represented in IEEE 754 as 2^5), this function will return 2
// (because 32 * 2^2 = 128)
if (*pin == 0.0f)
return 0;
unsigned int fbits = *reinterpret_cast<const unsigned int *>(pin);
// the exponent component is bits 23..30, and biased by +127
const unsigned int biasedSeven = 7 + 127;
signed int expComponent = ( fbits & 0x7F800000 ) >> 23;
expComponent -= biasedSeven; // now the difference from seven (positive if was less than, etc)
return expComponent;
}
/// Slightly faster version of the function to turn a float-vector color into
/// a compressed-exponent notation 32bit color. However, still not SIMD optimized.
/// PS3 developer: note there is a movement of a float onto an int here, which is
/// bad on the base registers -- consider doing this as Altivec code, or better yet
/// moving it onto the cell.
/// \warning: Assumes an IEEE 754 single-precision float representation! Those of you
/// porting to an 8080 are out of luck.
void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
{
Assert( s_bMathlibInitialized );
Assert( vin.x >= 0.0f && vin.y >= 0.0f && vin.z >= 0.0f );
// work out which of the channels is the largest ( we will use that to map the exponent )
// this is a sluggish branch-based decision tree -- most architectures will offer a [max]
// assembly opcode to do this faster.
const float *pMax;
if (vin.x > vin.y)
{
if (vin.x > vin.z)
{
pMax = &vin.x;
}
else
{
pMax = &vin.z;
}
}
else
{
if (vin.y > vin.z)
{
pMax = &vin.y;
}
else
{
pMax = &vin.z;
}
}
// now work out the exponent for this luxel.
signed int exponent = VectorToColorRGBExp32_CalcExponent( pMax );
// make sure the exponent fits into a signed byte.
// (in single precision format this is assured because it was a signed byte to begin with)
Assert(exponent > -128 && exponent <= 127);
// promote the exponent back onto a scalar that we'll use to normalize all the numbers
float scalar;
{
unsigned int fbits = (127 - exponent) << 23;
scalar = *reinterpret_cast<float *>(&fbits);
}
// We can totally wind up above 255 and that's okay--but above 256 would be right out.
Assert(vin.x * scalar < 256.0f &&
vin.y * scalar < 256.0f &&
vin.z * scalar < 256.0f);
// This awful construction is necessary to prevent VC2005 from using the
// fldcw/fnstcw control words around every float-to-unsigned-char operation.
{
int red = (vin.x * scalar);
int green = (vin.y * scalar);
int blue = (vin.z * scalar);
c.r = red;
c.g = green;
c.b = blue;
}
/*
c.r = ( unsigned char )(vin.x * scalar);
c.g = ( unsigned char )(vin.y * scalar);
c.b = ( unsigned char )(vin.z * scalar);
*/
c.exponent = ( signed char )exponent;
}
#endif

63
mathlib/datagen.pl Normal file
View File

@ -0,0 +1,63 @@
#! perl
use Text::Wrap;
# generate output data for noise generators
srand(31456);
print <<END
//========= Copyright <20> 1996-2006, Valve Corporation, All rights reserved. ============//
//
// Purpose: static data for noise() primitives.
//
// \$Workfile: \$
// \$NoKeywords: \$
//=============================================================================//
//
// **** DO NOT EDIT THIS FILE. GENERATED BY DATAGEN.PL ****
//
END
;
@perm_a=0..255;
&fisher_yates_shuffle(\@perm_a);
$Text::Wrap::Columns=78;
$Text::Wrap::break=",";
$Text::Wrap::separator=",\n";
print "static int perm_a[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
&fisher_yates_shuffle(\@perm_a);
print "static int perm_b[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
&fisher_yates_shuffle(\@perm_a);
print "static int perm_c[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
&fisher_yates_shuffle(\@perm_a);
print "static int perm_d[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
for ($i=0;$i<256;$i++)
{
$float_perm=(1.0/255.0)*$perm_a[$i];
$perm_a[$i] = sprintf("%f",$float_perm);
}
&fisher_yates_shuffle(\@perm_a);
print "static float impulse_xcoords[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
&fisher_yates_shuffle(\@perm_a);
print "static float impulse_ycoords[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
&fisher_yates_shuffle(\@perm_a);
print "static float impulse_zcoords[]={\n",wrap(' ',' ',join(",",@perm_a)),"\n};\n\n";
# fisher_yates_shuffle( \@array ) : generate a random permutation
# of @array in place
sub fisher_yates_shuffle {
my $array = shift;
my $i;
for ($i = @$array; --$i; ) {
my $j = int rand ($i+1);
next if $i == $j;
@$array[$i,$j] = @$array[$j,$i];
}
}

30
mathlib/halton.cpp Normal file
View File

@ -0,0 +1,30 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=====================================================================================//
#include <halton.h>
HaltonSequenceGenerator_t::HaltonSequenceGenerator_t(int b)
{
base=b;
fbase=(float) b;
seed=1;
}
float HaltonSequenceGenerator_t::GetElement(int elem)
{
int tmpseed=seed;
float ret=0.0;
float base_inv=1.0/fbase;
while(tmpseed)
{
int dig=tmpseed % base;
ret+=((float) dig)*base_inv;
base_inv/=fbase;
tmpseed/=base;
}
return ret;
}

96
mathlib/imagequant.cpp Normal file
View File

@ -0,0 +1,96 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//
//=============================================================================//
#include <quantize.h>
#include <minmax.h>
#define N_EXTRAVALUES 1
#define N_DIMENSIONS (3+N_EXTRAVALUES)
#define PIXEL(x,y,c) Image[4*((x)+((Width*(y))))+c]
static uint8 Weights[]={5,7,4,8};
static int ExtraValueXForms[3*N_EXTRAVALUES]={
76,151,28,
};
#define MAX_QUANTIZE_IMAGE_WIDTH 4096
void ColorQuantize(uint8 const *Image,
int Width,
int Height,
int flags, int ncolors,
uint8 *out_pixels,
uint8 *out_palette,
int firstcolor)
{
int Error[MAX_QUANTIZE_IMAGE_WIDTH+1][3][2];
struct Sample *s=AllocSamples(Width*Height,N_DIMENSIONS);
int x,y,c;
for(y=0;y<Height;y++)
for(x=0;x<Width;x++)
{
for(c=0;c<3;c++)
NthSample(s,y*Width+x,N_DIMENSIONS)->Value[c]=PIXEL(x,y,c);
// now, let's generate extra values to quantize on
for(int i=0;i<N_EXTRAVALUES;i++)
{
int val1=0;
for(c=0;c<3;c++)
val1+=PIXEL(x,y,c)*ExtraValueXForms[i*3+c];
val1>>=8;
NthSample(s,y*Width+x,N_DIMENSIONS)->Value[c]=(uint8)
(min(255,max(0,val1)));
}
}
struct QuantizedValue *q=Quantize(s,Width*Height,N_DIMENSIONS,
ncolors,Weights,firstcolor);
delete[] s;
memset(out_palette,0x55,768);
for(int p=0;p<256;p++)
{
struct QuantizedValue *v=FindQNode(q,p);
if (v)
for(c=0;c<3;c++)
out_palette[p*3+c]=v->Mean[c];
}
memset(Error,0,sizeof(Error));
for(y=0;y<Height;y++)
{
int ErrorUse=y & 1;
int ErrorUpdate=ErrorUse^1;
for(x=0;x<Width;x++)
{
uint8 samp[3];
for(c=0;c<3;c++)
{
int tryc=PIXEL(x,y,c);
if (! (flags & QUANTFLAGS_NODITHER))
{
tryc+=Error[x][c][ErrorUse];
Error[x][c][ErrorUse]=0;
}
samp[c]=(uint8) min(255,max(0,tryc));
}
struct QuantizedValue *f=FindMatch(samp,3,Weights,q);
out_pixels[Width*y+x]=(uint8) (f->value);
if (! (flags & QUANTFLAGS_NODITHER))
for(int i=0;i<3;i++)
{
int newerr=samp[i]-f->Mean[i];
int orthog_error=(newerr*3)/8;
Error[x+1][i][ErrorUse]+=orthog_error;
Error[x][i][ErrorUpdate]=orthog_error;
Error[x+1][i][ErrorUpdate]=newerr-2*orthog_error;
}
}
}
if (q) FreeQuantization(q);
}

312
mathlib/lightdesc.cpp Normal file
View File

@ -0,0 +1,312 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=====================================================================================//
#include <ssemath.h>
#include <lightdesc.h>
#include "mathlib.h"
void LightDesc_t::RecalculateDerivedValues(void)
{
m_Flags = LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED;
if (m_Attenuation0)
m_Flags|=LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0;
if (m_Attenuation1)
m_Flags|=LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1;
if (m_Attenuation2)
m_Flags|=LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2;
if (m_Type==MATERIAL_LIGHT_SPOT)
{
m_ThetaDot=cos(m_Theta);
m_PhiDot=cos(m_Phi);
float spread=m_ThetaDot-m_PhiDot;
if (spread>1.0e-10)
{
// note - this quantity is very sensitive to round off error. the sse
// reciprocal approximation won't cut it here.
OneOver_ThetaDot_Minus_PhiDot=1.0/spread;
}
else
{
// hard falloff instead of divide by zero
OneOver_ThetaDot_Minus_PhiDot=1.0;
}
}
if (m_Type==MATERIAL_LIGHT_DIRECTIONAL)
{
// set position to be real far away in the right direction
m_Position=m_Direction;
m_Position *= 2.0e6;
}
m_RangeSquared=m_Range*m_Range;
}
void LightDesc_t::ComputeLightAtPointsForDirectional(
const FourVectors &pos, const FourVectors &normal,
FourVectors &color, bool DoHalfLambert ) const
{
FourVectors delta;
delta.DuplicateVector(m_Direction);
// delta.VectorNormalizeFast();
fltx4 strength=delta*normal;
if (DoHalfLambert)
{
strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives);
}
else
strength=MaxSIMD(Four_Zeros,delta*normal);
color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(m_Color.x)));
color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(m_Color.y)));
color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(m_Color.z)));
}
void LightDesc_t::ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal,
FourVectors &color, bool DoHalfLambert ) const
{
FourVectors delta;
Assert((m_Type==MATERIAL_LIGHT_POINT) || (m_Type==MATERIAL_LIGHT_SPOT) || (m_Type==MATERIAL_LIGHT_DIRECTIONAL));
switch (m_Type)
{
case MATERIAL_LIGHT_POINT:
case MATERIAL_LIGHT_SPOT:
delta.DuplicateVector(m_Position);
delta-=pos;
break;
case MATERIAL_LIGHT_DIRECTIONAL:
ComputeLightAtPointsForDirectional( pos, normal, color, DoHalfLambert );
return;
}
fltx4 dist2 = delta*delta;
dist2=MaxSIMD( Four_Ones, dist2 );
fltx4 falloff;
if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 )
{
falloff = ReplicateX4(m_Attenuation0);
}
else
falloff= Four_Epsilons;
if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 )
{
falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation1),SqrtEstSIMD(dist2)));
}
if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 )
{
falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation2),dist2));
}
falloff=ReciprocalEstSIMD(falloff);
// Cull out light beyond this radius
// now, zero out elements for which dist2 was > range^2. !!speed!! lights should store dist^2 in sse format
if (m_Range != 0.f)
{
fltx4 RangeSquared=ReplicateX4(m_RangeSquared); // !!speed!!
falloff=AndSIMD(falloff,CmpLtSIMD(dist2,RangeSquared));
}
delta.VectorNormalizeFast();
fltx4 strength=delta*normal;
if (DoHalfLambert)
{
strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives);
}
else
strength=MaxSIMD(Four_Zeros,delta*normal);
switch(m_Type)
{
case MATERIAL_LIGHT_POINT:
// half-lambert
break;
case MATERIAL_LIGHT_SPOT:
{
fltx4 dot2=SubSIMD(Four_Zeros,delta*m_Direction); // dot position with spot light dir for cone falloff
fltx4 cone_falloff_scale=MulSIMD(ReplicateX4(OneOver_ThetaDot_Minus_PhiDot),
SubSIMD(dot2,ReplicateX4(m_PhiDot)));
cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones);
if ((m_Falloff!=0.0) && (m_Falloff!=1.0))
{
// !!speed!! could compute integer exponent needed by powsimd and store in light
cone_falloff_scale=PowSIMD(cone_falloff_scale,m_Falloff);
}
strength=MulSIMD(cone_falloff_scale,strength);
// now, zero out lighting where dot2<phidot. This will mask out any invalid results
// from pow function, etc
fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(m_PhiDot)); // outside light cone?
strength=AndSIMD(OutsideMask,strength);
}
break;
}
strength=MulSIMD(strength,falloff);
color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(m_Color.x)));
color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(m_Color.y)));
color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(m_Color.z)));
}
void LightDesc_t::ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const
{
FourVectors delta;
Assert((m_Type==MATERIAL_LIGHT_POINT) || (m_Type==MATERIAL_LIGHT_SPOT) || (m_Type==MATERIAL_LIGHT_DIRECTIONAL));
switch (m_Type)
{
case MATERIAL_LIGHT_POINT:
case MATERIAL_LIGHT_SPOT:
delta.DuplicateVector(m_Position);
delta-=pos;
break;
case MATERIAL_LIGHT_DIRECTIONAL:
return;
}
fltx4 dist2 = delta*delta;
dist2=MaxSIMD( Four_Ones, dist2 );
fltx4 falloff;
if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 )
{
falloff = ReplicateX4(m_Attenuation0);
}
else
falloff= Four_Epsilons;
if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 )
{
falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation1),SqrtEstSIMD(dist2)));
}
if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 )
{
falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation2),dist2));
}
falloff=ReciprocalEstSIMD(falloff);
// Cull out light beyond this radius
// now, zero out elements for which dist2 was > range^2. !!speed!! lights should store dist^2 in sse format
if (m_Range != 0.f)
{
fltx4 RangeSquared=ReplicateX4(m_RangeSquared); // !!speed!!
falloff=AndSIMD(falloff,CmpLtSIMD(dist2,RangeSquared));
}
delta.VectorNormalizeFast();
fltx4 strength = Four_Ones;
//fltx4 strength=delta;
//fltx4 strength = MaxSIMD(Four_Zeros,delta);
switch(m_Type)
{
case MATERIAL_LIGHT_POINT:
// half-lambert
break;
case MATERIAL_LIGHT_SPOT:
{
fltx4 dot2=SubSIMD(Four_Zeros,delta*m_Direction); // dot position with spot light dir for cone falloff
fltx4 cone_falloff_scale=MulSIMD(ReplicateX4(OneOver_ThetaDot_Minus_PhiDot),
SubSIMD(dot2,ReplicateX4(m_PhiDot)));
cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones);
if ((m_Falloff!=0.0) && (m_Falloff!=1.0))
{
// !!speed!! could compute integer exponent needed by powsimd and store in light
cone_falloff_scale=PowSIMD(cone_falloff_scale,m_Falloff);
}
strength=MulSIMD(cone_falloff_scale,strength);
// now, zero out lighting where dot2<phidot. This will mask out any invalid results
// from pow function, etc
fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(m_PhiDot)); // outside light cone?
strength=AndSIMD(OutsideMask,strength);
}
break;
}
strength=MulSIMD(strength,falloff);
color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(m_Color.x)));
color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(m_Color.y)));
color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(m_Color.z)));
}
void LightDesc_t::SetupOldStyleAttenuation( float fQuadraticAttn, float fLinearAttn, float fConstantAttn )
{
// old-style manually typed quadrtiac coefficients
if ( fQuadraticAttn < EQUAL_EPSILON )
fQuadraticAttn = 0;
if ( fLinearAttn < EQUAL_EPSILON)
fLinearAttn = 0;
if ( fConstantAttn < EQUAL_EPSILON)
fConstantAttn = 0;
if ( ( fConstantAttn < EQUAL_EPSILON ) &&
( fLinearAttn < EQUAL_EPSILON ) &&
( fQuadraticAttn < EQUAL_EPSILON ) )
fConstantAttn = 1;
m_Attenuation2=fQuadraticAttn;
m_Attenuation1=fLinearAttn;
m_Attenuation0=fConstantAttn;
float fScaleFactor = fQuadraticAttn * 10000 + fLinearAttn * 100 + fConstantAttn;
if ( fScaleFactor > 0 )
m_Color *= fScaleFactor;
}
void LightDesc_t::SetupNewStyleAttenuation( float fFiftyPercentDistance,
float fZeroPercentDistance )
{
// new style storing 50% and 0% distances
float d50=fFiftyPercentDistance;
float d0=fZeroPercentDistance;
if (d0<d50)
{
// !!warning in lib code???!!!
Warning("light has _fifty_percent_distance of %f but no zero_percent_distance\n",d50);
d0=2.0*d50;
}
float a=0,b=1,c=0;
if (! SolveInverseQuadraticMonotonic(0,1.0,d50,2.0,d0,256.0,a,b,c))
{
Warning("can't solve quadratic for light %f %f\n",d50,d0);
}
float v50=c+d50*(b+d50*a);
float scale=2.0/v50;
a*=scale;
b*=scale;
c*=scale;
m_Attenuation2=a;
m_Attenuation1=b;
m_Attenuation0=c;
}

82
mathlib/mathlib.vpc Normal file
View File

@ -0,0 +1,82 @@
//-----------------------------------------------------------------------------
// MATHLIB.VPC
//
// Project Script
//-----------------------------------------------------------------------------
$macro SRCDIR ".."
$include "$SRCDIR\vpc_scripts\source_lib_base.vpc"
$Configuration
{
$Compiler
{
$AdditionalIncludeDirectories "$BASE;..\public\mathlib"
$PreprocessorDefinitions "$BASE;MATHLIB_LIB"
}
}
$Project "mathlib"
{
$Folder "Source Files"
{
$File "color_conversion.cpp"
$File "halton.cpp"
$File "lightdesc.cpp"
$File "mathlib_base.cpp"
$File "powsse.cpp"
$File "sparse_convolution_noise.cpp"
$File "sseconst.cpp"
$File "sse.cpp" [$WINDOWS||$POSIX]
$File "ssenoise.cpp"
$File "3dnow.cpp" [$WINDOWS||$LINUX]
$File "anorms.cpp"
$File "bumpvects.cpp"
$File "IceKey.cpp"
$File "imagequant.cpp"
$File "polyhedron.cpp"
$File "quantize.cpp"
$File "randsse.cpp"
$File "spherical.cpp"
$File "simdvectormatrix.cpp"
$File "vector.cpp"
$File "vmatrix.cpp"
$File "almostequal.cpp"
}
$Folder "Public Header Files"
{
$File "$SRCDIR\public\mathlib\amd3dx.h" [$WINDOWS||$LINUX]
$File "$SRCDIR\public\mathlib\anorms.h"
$File "$SRCDIR\public\mathlib\bumpvects.h"
$File "$SRCDIR\public\mathlib\compressed_3d_unitvec.h"
$File "$SRCDIR\public\mathlib\compressed_light_cube.h"
$File "$SRCDIR\public\mathlib\compressed_vector.h"
$File "$SRCDIR\public\mathlib\halton.h"
$File "$SRCDIR\public\mathlib\IceKey.H"
$File "$SRCDIR\public\mathlib\lightdesc.h"
$File "$SRCDIR\public\mathlib\math_pfns.h"
$File "$SRCDIR\public\mathlib\mathlib.h"
$File "$SRCDIR\public\mathlib\noise.h"
$File "$SRCDIR\public\mathlib\polyhedron.h"
$File "$SRCDIR\public\mathlib\quantize.h"
$File "$SRCDIR\public\mathlib\simdvectormatrix.h"
$File "$SRCDIR\public\mathlib\spherical_geometry.h"
$File "$SRCDIR\public\mathlib\ssemath.h"
$File "$SRCDIR\public\mathlib\ssequaternion.h"
$File "$SRCDIR\public\mathlib\vector.h"
$File "$SRCDIR\public\mathlib\vector2d.h"
$File "$SRCDIR\public\mathlib\vector4d.h"
$File "$SRCDIR\public\mathlib\vmatrix.h"
$File "$SRCDIR\public\mathlib\vplane.h"
}
$Folder "Header Files"
{
$File "noisedata.h"
$File "sse.h" [$WINDOWS||$POSIX]
$File "3dnow.h" [$WINDOWS||$LINUX]
}
}

4303
mathlib/mathlib_base.cpp Normal file

File diff suppressed because it is too large Load Diff

180
mathlib/noisedata.h Normal file
View File

@ -0,0 +1,180 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: static data for noise() primitives.
//
// $Workfile: $
// $NoKeywords: $
//=============================================================================//
//
// **** DO NOT EDIT THIS FILE. GENERATED BY DATAGEN.PL ****
//
static int perm_a[]={
66,147,106,213,89,115,239,25,171,175,9,114,141,226,118,128,41,208,4,56,
180,248,43,82,246,219,94,245,133,131,222,103,160,130,168,145,238,38,23,6,
236,67,99,2,70,232,80,209,1,3,68,65,102,210,13,73,55,252,187,170,22,36,
52,181,117,163,46,79,166,224,148,75,113,95,156,185,220,164,51,142,161,35,
206,251,45,136,197,190,132,32,218,127,63,27,137,93,242,20,189,108,183,
122,139,191,249,253,87,98,69,0,144,64,24,214,97,116,158,42,107,15,53,212,
83,111,152,240,74,237,62,77,205,149,26,151,178,204,91,176,234,49,154,203,
33,221,125,134,165,124,86,39,37,60,150,157,179,109,110,44,159,153,5,100,
10,207,40,186,96,215,143,162,230,184,101,54,174,247,76,59,241,223,192,84,
104,78,169,146,138,30,48,85,233,19,29,92,126,17,199,250,31,81,188,225,28,
112,88,11,182,173,211,129,194,172,14,120,200,167,135,12,177,227,229,155,
201,61,105,195,193,244,235,58,8,196,123,254,16,18,50,121,71,243,90,57,
202,119,255,47,7,198,228,21,217,216,231,140,72,34
};
static int perm_b[]={
123,108,201,64,40,75,24,221,137,110,191,142,9,69,230,83,7,247,51,54,115,
133,180,248,109,116,62,99,251,55,89,253,65,106,228,167,131,132,58,143,
97,102,163,202,149,234,12,117,174,94,121,74,32,113,20,60,159,182,204,29,
244,118,3,178,255,38,6,114,36,93,30,134,213,90,245,209,88,232,162,125,
84,166,70,136,208,231,27,71,157,80,76,0,170,225,203,176,33,161,196,128,
252,236,246,2,138,1,250,197,77,243,218,242,19,164,68,212,14,237,144,63,
46,103,177,188,85,223,8,160,222,4,216,219,35,15,44,23,126,127,100,226,
235,37,168,101,49,22,11,73,61,135,111,183,72,96,185,239,82,18,50,155,
186,153,17,233,146,156,107,5,254,10,192,198,148,207,104,13,124,48,95,
129,120,206,199,81,249,91,150,210,119,240,122,194,92,34,28,205,175,227,
179,220,140,152,79,26,195,47,66,173,169,241,53,184,187,145,112,238,214,
147,98,171,229,200,151,25,67,78,189,217,130,224,57,172,59,41,43,16,105,
158,165,21,45,56,141,139,215,190,86,42,52,39,87,181,31,154,193,211
};
static int perm_c[]={
97,65,96,25,122,26,219,85,148,251,102,0,140,130,136,213,138,60,236,52,
178,131,115,183,144,78,147,168,39,45,169,70,57,146,67,142,252,216,28,54,
86,222,194,200,48,5,205,125,214,56,181,255,196,155,37,218,153,208,66,
242,73,248,206,61,62,246,177,2,197,107,162,152,89,41,6,160,94,8,201,38,
235,228,165,93,111,239,74,231,121,47,166,221,157,64,77,244,29,105,150,
123,190,191,225,118,133,42,10,84,185,159,124,132,240,180,44,1,9,19,99,
254,12,207,186,71,234,184,11,20,16,193,139,175,98,59,113,27,170,230,91,
187,46,156,249,108,195,171,114,14,188,82,192,233,24,32,241,87,164,90,43,
163,245,92,40,215,55,226,15,3,112,158,250,172,22,227,137,35,128,145,247,
161,119,80,217,189,81,7,63,202,120,223,83,179,4,106,199,229,95,53,50,33,
182,72,143,23,243,75,18,173,141,167,198,204,58,174,237,17,129,238,127,
31,101,176,36,30,110,209,34,203,135,232,68,149,49,134,126,212,79,76,117,
104,210,211,224,253,100,220,109,116,88,13,151,154,69,21,51,103
};
static int perm_d[]={
94,234,145,235,151,166,187,238,4,5,128,115,87,107,229,175,190,108,218,
32,17,220,97,90,122,121,71,109,64,227,225,75,81,19,27,162,3,89,139,69,
92,26,48,215,116,191,114,2,104,157,66,39,1,127,96,124,30,0,82,233,219,
42,131,173,35,201,182,144,14,98,148,244,160,159,179,91,31,68,119,154,
205,113,149,167,44,60,18,228,251,245,43,10,80,15,129,67,181,174,6,45,
194,237,213,52,99,232,211,212,164,217,57,153,156,102,134,20,249,132,55,
204,65,33,231,85,61,37,163,193,189,170,226,63,168,236,165,224,242,195,
41,200,40,70,112,100,36,172,130,74,137,252,243,135,230,161,207,16,146,
198,118,150,24,29,250,188,25,209,103,23,105,47,7,46,133,83,184,50,79,
110,120,53,253,206,214,9,240,101,147,152,183,254,59,126,216,197,171,51,
208,248,202,58,176,28,72,177,185,141,12,11,56,222,86,178,155,223,88,111,
73,142,210,138,239,221,199,192,84,93,241,125,76,77,255,95,8,78,247,186,
123,196,13,140,180,143,54,106,136,34,62,169,38,117,22,21,49,203,158,246
};
static float impulse_xcoords[]={
0.788235,0.541176,0.972549,0.082353,0.352941,0.811765,0.286275,0.752941,
0.203922,0.705882,0.537255,0.886275,0.580392,0.137255,0.800000,0.533333,
0.117647,0.447059,0.129412,0.925490,0.086275,0.478431,0.666667,0.568627,
0.678431,0.313725,0.321569,0.349020,0.988235,0.419608,0.898039,0.219608,
0.243137,0.623529,0.501961,0.772549,0.952941,0.517647,0.949020,0.701961,
0.454902,0.505882,0.564706,0.960784,0.207843,0.007843,0.831373,0.184314,
0.576471,0.462745,0.572549,0.247059,0.262745,0.694118,0.615686,0.121569,
0.384314,0.749020,0.145098,0.717647,0.415686,0.607843,0.105882,0.101961,
0.200000,0.807843,0.521569,0.780392,0.466667,0.552941,0.996078,0.627451,
0.992157,0.529412,0.407843,0.011765,0.709804,0.458824,0.058824,0.819608,
0.176471,0.317647,0.392157,0.223529,0.156863,0.490196,0.325490,0.074510,
0.239216,0.164706,0.890196,0.603922,0.921569,0.839216,0.854902,0.098039,
0.686275,0.843137,0.152941,0.372549,0.062745,0.474510,0.486275,0.227451,
0.400000,0.298039,0.309804,0.274510,0.054902,0.815686,0.647059,0.635294,
0.662745,0.976471,0.094118,0.509804,0.650980,0.211765,0.180392,0.003922,
0.827451,0.278431,0.023529,0.525490,0.450980,0.725490,0.690196,0.941176,
0.639216,0.560784,0.196078,0.364706,0.043137,0.494118,0.796078,0.113725,
0.760784,0.729412,0.258824,0.290196,0.584314,0.674510,0.823529,0.905882,
0.917647,0.070588,0.862745,0.345098,0.913725,0.937255,0.031373,0.215686,
0.768627,0.333333,0.411765,0.423529,0.945098,0.721569,0.039216,0.792157,
0.956863,0.266667,0.254902,0.047059,0.294118,0.658824,0.250980,1.000000,
0.984314,0.756863,0.027451,0.305882,0.835294,0.513725,0.360784,0.776471,
0.611765,0.192157,0.866667,0.858824,0.592157,0.803922,0.141176,0.435294,
0.588235,0.619608,0.341176,0.109804,0.356863,0.270588,0.737255,0.847059,
0.050980,0.764706,0.019608,0.870588,0.933333,0.784314,0.549020,0.337255,
0.631373,0.929412,0.231373,0.427451,0.078431,0.498039,0.968627,0.654902,
0.125490,0.698039,0.015686,0.878431,0.713725,0.368627,0.431373,0.874510,
0.403922,0.556863,0.443137,0.964706,0.909804,0.301961,0.035294,0.850980,
0.882353,0.741176,0.380392,0.133333,0.470588,0.643137,0.282353,0.396078,
0.980392,0.168627,0.149020,0.235294,0.670588,0.596078,0.733333,0.160784,
0.376471,0.682353,0.545098,0.482353,0.745098,0.894118,0.188235,0.329412,
0.439216,0.901961,0.000000,0.600000,0.388235,0.172549,0.090196,0.066667
};
static float impulse_ycoords[]={
0.827451,0.337255,0.941176,0.886275,0.878431,0.239216,0.400000,0.164706,
0.490196,0.411765,0.964706,0.349020,0.803922,0.317647,0.647059,0.431373,
0.933333,0.156863,0.094118,0.219608,0.039216,0.521569,0.498039,0.705882,
0.717647,0.047059,0.631373,0.517647,0.984314,0.847059,0.482353,0.439216,
0.250980,0.862745,0.690196,0.913725,0.270588,0.070588,0.027451,0.694118,
0.811765,0.000000,0.494118,0.823529,0.800000,0.600000,0.003922,0.443137,
0.639216,0.376471,0.031373,0.035294,0.552941,0.215686,0.305882,0.133333,
0.564706,0.176471,0.211765,0.874510,0.360784,0.654902,0.223529,0.807843,
0.372549,0.137255,0.321569,0.015686,0.007843,0.262745,0.125490,0.078431,
0.396078,0.976471,0.929412,1.000000,0.937255,0.509804,0.188235,0.850980,
0.831373,0.392157,0.741176,0.541176,0.592157,0.286275,0.345098,0.572549,
0.537255,0.725490,0.839216,0.184314,0.772549,0.149020,0.505882,0.423529,
0.780392,0.011765,0.890196,0.086275,0.427451,0.023529,0.788235,0.050980,
0.760784,0.603922,0.066667,0.643137,0.623529,0.960784,0.172549,0.333333,
0.082353,0.290196,0.992157,0.709804,0.894118,0.596078,0.243137,0.752941,
0.486275,0.670588,0.949020,0.784314,0.145098,0.560784,0.513725,0.180392,
0.580392,0.996078,0.380392,0.556863,0.407843,0.945098,0.117647,0.058824,
0.678431,0.129412,0.192157,0.105882,0.968627,0.545098,0.462745,0.227451,
0.019608,0.866667,0.674510,0.207843,0.627451,0.819608,0.921569,0.356863,
0.447059,0.533333,0.435294,0.341176,0.054902,0.529412,0.235294,0.764706,
0.615686,0.043137,0.745098,0.266667,0.501961,0.619608,0.776471,0.450980,
0.309804,0.325490,0.200000,0.635294,0.247059,0.698039,0.721569,0.168627,
0.854902,0.141176,0.611765,0.525490,0.415686,0.298039,0.254902,0.858824,
0.568627,0.329412,0.062745,0.843137,0.588235,0.733333,0.607843,0.478431,
0.576471,0.662745,0.470588,0.666667,0.980392,0.113725,0.898039,0.203922,
0.294118,0.152941,0.098039,0.909804,0.796078,0.768627,0.713725,0.196078,
0.368627,0.419608,0.352941,0.090196,0.749020,0.121569,0.882353,0.278431,
0.388235,0.917647,0.701961,0.729412,0.835294,0.258824,0.301961,0.101961,
0.792157,0.474510,0.686275,0.658824,0.364706,0.682353,0.458824,0.815686,
0.282353,0.160784,0.870588,0.988235,0.756863,0.549020,0.274510,0.384314,
0.650980,0.737255,0.901961,0.956863,0.972549,0.584314,0.925490,0.403922,
0.074510,0.454902,0.952941,0.109804,0.313725,0.905882,0.231373,0.466667
};
static float impulse_zcoords[]={
0.082353,0.643137,0.415686,0.929412,0.568627,0.509804,0.537255,0.815686,
0.698039,0.941176,0.776471,0.752941,0.737255,0.525490,0.498039,0.423529,
0.792157,0.125490,0.619608,0.164706,0.368627,0.870588,0.137255,0.372549,
0.466667,0.486275,0.501961,0.513725,0.709804,0.576471,0.203922,0.258824,
0.152941,0.556863,0.223529,0.047059,0.235294,0.474510,0.764706,0.552941,
0.847059,0.145098,0.176471,0.937255,0.654902,0.894118,0.729412,0.054902,
0.666667,0.749020,0.262745,0.560784,0.431373,0.286275,0.352941,0.239216,
0.156863,0.839216,0.427451,0.949020,0.384314,0.227451,0.180392,0.074510,
0.172549,0.356863,0.066667,0.517647,0.447059,0.184314,0.062745,0.670588,
0.603922,0.219608,0.270588,0.976471,0.505882,0.627451,0.819608,0.854902,
0.843137,0.019608,0.713725,0.035294,0.925490,0.349020,0.866667,0.701961,
0.909804,0.811765,0.717647,0.141176,0.917647,0.023529,0.098039,0.803922,
0.733333,0.658824,0.827451,0.133333,0.858824,0.800000,0.635294,1.000000,
0.078431,0.450980,0.835294,0.321569,0.360784,0.529412,0.725490,0.572549,
0.639216,0.341176,0.533333,0.094118,0.149020,0.545098,0.101961,0.901961,
0.278431,0.694118,0.521569,0.490196,0.454902,0.329412,0.274510,0.027451,
0.745098,0.933333,0.443137,0.168627,0.192157,0.988235,0.070588,0.972549,
0.768627,0.400000,0.470588,0.207843,0.215686,0.388235,0.439216,0.780392,
0.482353,0.121569,0.964706,0.086275,0.890196,0.337255,0.109804,0.305882,
0.113725,0.435294,0.721569,0.772549,0.807843,0.741176,0.254902,0.596078,
0.494118,0.317647,0.419608,0.000000,0.188235,0.031373,0.376471,0.380392,
0.611765,0.945098,0.411765,0.313725,0.874510,0.588235,0.678431,0.160784,
0.007843,0.090196,0.850980,0.788235,0.705882,0.266667,0.309804,0.541176,
0.231373,0.129412,0.294118,0.243137,0.913725,0.996078,0.117647,0.478431,
0.290196,0.549020,0.682353,0.784314,0.396078,0.831373,0.984314,0.584314,
0.039216,0.250980,0.600000,0.392157,0.298039,0.050980,0.364706,0.105882,
0.623529,0.886275,0.980392,0.325490,0.247059,0.690196,0.674510,0.960784,
0.647059,0.211765,0.882353,0.686275,0.823529,0.058824,0.956863,0.043137,
0.345098,0.301961,0.592157,0.862745,0.607843,0.458824,0.282353,0.003922,
0.580392,0.760784,0.564706,0.011765,0.968627,0.905882,0.756863,0.952941,
0.662745,0.015686,0.898039,0.196078,0.333333,0.992157,0.650980,0.407843,
0.796078,0.615686,0.878431,0.921569,0.631373,0.200000,0.403922,0.462745
};

2293
mathlib/polyhedron.cpp Normal file

File diff suppressed because it is too large Load Diff

96
mathlib/powsse.cpp Normal file
View File

@ -0,0 +1,96 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=====================================================================================//
#include "mathlib/ssemath.h"
// NOTE: This has to be the last file included!
#include "tier0/memdbgon.h"
fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent)
{
fltx4 rslt=Four_Ones; // x^0=1.0
int xp=abs(exponent);
if (xp & 3) // fraction present?
{
fltx4 sq_rt=SqrtEstSIMD(x);
if (xp & 1) // .25?
rslt=SqrtEstSIMD(sq_rt); // x^.25
if (xp & 2)
rslt=MulSIMD(rslt,sq_rt);
}
xp>>=2; // strip fraction
fltx4 curpower=x; // curpower iterates through x,x^2,x^4,x^8,x^16...
while(1)
{
if (xp & 1)
rslt=MulSIMD(rslt,curpower);
xp>>=1;
if (xp)
curpower=MulSIMD(curpower,curpower);
else
break;
}
if (exponent<0)
return ReciprocalEstSaturateSIMD(rslt); // pow(x,-b)=1/pow(x,b)
else
return rslt;
}
/*
* (c) Ian Stephenson
*
* ian@dctsystems.co.uk
*
* Fast pow() reference implementation
*/
static float shift23=(1<<23);
static float OOshift23=1.0/(1<<23);
float FastLog2(float i)
{
float LogBodge=0.346607f;
float x;
float y;
x=*(int *)&i;
x*= OOshift23; //1/pow(2,23);
x=x-127;
y=x-floorf(x);
y=(y-y*y)*LogBodge;
return x+y;
}
float FastPow2(float i)
{
float PowBodge=0.33971f;
float x;
float y=i-floorf(i);
y=(y-y*y)*PowBodge;
x=i+127-y;
x*= shift23; //pow(2,23);
*(int*)&x=(int)x;
return x;
}
float FastPow(float a, float b)
{
if (a <= OOshift23)
{
return 0.0f;
}
return FastPow2(b*FastLog2(a));
}
float FastPow10( float i )
{
return FastPow2( i * 3.321928f );
}

679
mathlib/quantize.cpp Normal file
View File

@ -0,0 +1,679 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//
//=============================================================================//
#ifndef STDIO_H
#include <stdio.h>
#endif
#ifndef STRING_H
#include <string.h>
#endif
#ifndef QUANTIZE_H
#include <quantize.h>
#endif
#include <stdlib.h>
#include <minmax.h>
#include <math.h>
static int current_ndims;
static struct QuantizedValue *current_root;
static int current_ssize;
static uint8 *current_weights;
double SquaredError;
#define SPLIT_THEN_SORT 1
#define SQ(x) ((x)*(x))
static struct QuantizedValue *AllocQValue(void)
{
struct QuantizedValue *ret=new QuantizedValue;
ret->Samples=0;
ret->Children[0]=ret->Children[1]=0;
ret->NSamples=0;
ret->ErrorMeasure=new double[current_ndims];
ret->Mean=new uint8[current_ndims];
ret->Mins=new uint8[current_ndims];
ret->Maxs=new uint8[current_ndims];
ret->Sums=new int [current_ndims];
memset(ret->Sums,0,sizeof(int)*current_ndims);
ret->NQuant=0;
ret->sortdim=-1;
return ret;
}
void FreeQuantization(struct QuantizedValue *t)
{
if (t)
{
delete[] t->ErrorMeasure;
delete[] t->Mean;
delete[] t->Mins;
delete[] t->Maxs;
FreeQuantization(t->Children[0]);
FreeQuantization(t->Children[1]);
delete[] t->Sums;
delete[] t;
}
}
static int QNumSort(void const *a, void const *b)
{
int32 as=((struct Sample *) a)->QNum;
int32 bs=((struct Sample *) b)->QNum;
if (as==bs) return 0;
return (as>bs)?1:-1;
}
#if SPLIT_THEN_SORT
#else
static int current_sort_dim;
static int samplesort(void const *a, void const *b)
{
uint8 as=((struct Sample *) a)->Value[current_sort_dim];
uint8 bs=((struct Sample *) b)->Value[current_sort_dim];
if (as==bs) return 0;
return (as>bs)?1:-1;
}
#endif
static int sortlong(void const *a, void const *b)
{
// treat the entire vector of values as a long integer for duplicate removal.
return memcmp(((struct Sample *) a)->Value,
((struct Sample *) b)->Value,current_ndims);
}
#define NEXTSAMPLE(s) ( (struct Sample *) (((uint8 *) s)+current_ssize))
#define SAMPLE(s,i) NthSample(s,i,current_ndims)
static void SetNDims(int n)
{
current_ssize=sizeof(struct Sample)+(n-1);
current_ndims=n;
}
int CompressSamples(struct Sample *s, int nsamples, int ndims)
{
SetNDims(ndims);
qsort(s,nsamples,current_ssize,sortlong);
// now, they are all sorted by treating all dimensions as a large number.
// we may now remove duplicates.
struct Sample *src=s;
struct Sample *dst=s;
struct Sample *lastdst=dst;
dst=NEXTSAMPLE(dst); // copy first sample to get the ball rolling
src=NEXTSAMPLE(src);
int noutput=1;
while(--nsamples) // while some remain
{
if (memcmp(src->Value,lastdst->Value,current_ndims))
{
// yikes, a difference has been found!
memcpy(dst,src,current_ssize);
lastdst=dst;
dst=NEXTSAMPLE(dst);
noutput++;
}
else
lastdst->Count++;
src=NEXTSAMPLE(src);
}
return noutput;
}
void PrintSamples(struct Sample const *s, int nsamples, int ndims)
{
SetNDims(ndims);
int cnt=0;
while(nsamples--)
{
printf("sample #%d, count=%d, values=\n { ",cnt++,s->Count);
for(int d=0;d<ndims;d++)
printf("%02x,",s->Value[d]);
printf("}\n");
s=NEXTSAMPLE(s);
}
}
void PrintQTree(struct QuantizedValue const *p,int idlevel)
{
int i;
if (p)
{
for(i=0;i<idlevel;i++)
printf(" ");
printf("node=%p NSamples=%d value=%d Mean={",p,p->NSamples,p->value);
for(i=0;i<current_ndims;i++)
printf("%x,",p->Mean[i]);
printf("}\n");
for(i=0;i<idlevel;i++)
printf(" ");
printf("Errors={");
for(i=0;i<current_ndims;i++)
printf("%f,",p->ErrorMeasure[i]);
printf("}\n");
for(i=0;i<idlevel;i++)
printf(" ");
printf("Mins={");
for(i=0;i<current_ndims;i++)
printf("%d,",p->Mins[i]);
printf("} Maxs={");
for(i=0;i<current_ndims;i++)
printf("%d,",p->Maxs[i]);
printf("}\n");
PrintQTree(p->Children[0],idlevel+2);
PrintQTree(p->Children[1],idlevel+2);
}
}
static void UpdateStats(struct QuantizedValue *v)
{
// first, find mean
int32 Means[MAXDIMS];
double Errors[MAXDIMS];
double WorstError[MAXDIMS];
int i,j;
memset(Means,0,sizeof(Means));
int N=0;
for(i=0;i<v->NSamples;i++)
{
struct Sample *s=SAMPLE(v->Samples,i);
N+=s->Count;
for(j=0;j<current_ndims;j++)
{
uint8 val=s->Value[j];
Means[j]+=val*s->Count;
}
}
for(j=0;j<current_ndims;j++)
{
if (N) v->Mean[j]=(uint8) (Means[j]/N);
Errors[j]=WorstError[j]=0.;
}
for(i=0;i<v->NSamples;i++)
{
struct Sample *s=SAMPLE(v->Samples,i);
double c=s->Count;
for(j=0;j<current_ndims;j++)
{
double diff=SQ(s->Value[j]-v->Mean[j]);
Errors[j]+=c*diff; // charles uses abs not sq()
if (diff>WorstError[j])
WorstError[j]=diff;
}
}
v->TotalError=0.;
double ErrorScale=1.; // /sqrt((double) (N));
for(j=0;j<current_ndims;j++)
{
v->ErrorMeasure[j]=(ErrorScale*Errors[j]*current_weights[j]);
v->TotalError+=v->ErrorMeasure[j];
#if SPLIT_THEN_SORT
v->ErrorMeasure[j]*=WorstError[j];
#endif
}
v->TotSamples=N;
}
static int ErrorDim;
static double ErrorVal;
static struct QuantizedValue *ErrorNode;
static void UpdateWorst(struct QuantizedValue *q)
{
if (q->Children[0])
{
// not a leaf node
UpdateWorst(q->Children[0]);
UpdateWorst(q->Children[1]);
}
else
{
if (q->TotalError>ErrorVal)
{
ErrorVal=q->TotalError;
ErrorNode=q;
ErrorDim=0;
for(int d=0;d<current_ndims;d++)
if (q->ErrorMeasure[d]>q->ErrorMeasure[ErrorDim])
ErrorDim=d;
}
}
}
static int FindWorst(void)
{
ErrorVal=-1.;
UpdateWorst(current_root);
return (ErrorVal>0);
}
static void SubdivideNode(struct QuantizedValue *n, int whichdim)
{
int NAdded=0;
int i;
#if SPLIT_THEN_SORT
// we will try the "split then sort" method. This works by finding the
// means for all samples above and below the mean along the given axis.
// samples are then split into two groups, with the selection based upon
// which of the n-dimensional means the sample is closest to.
double LocalMean[MAXDIMS][2];
int totsamps[2];
for(i=0;i<current_ndims;i++)
LocalMean[i][0]=LocalMean[i][1]=0.;
totsamps[0]=totsamps[1]=0;
uint8 minv=255;
uint8 maxv=0;
struct Sample *minS=0,*maxS=0;
for(i=0;i<n->NSamples;i++)
{
uint8 v;
int whichside=1;
struct Sample *sl;
sl=SAMPLE(n->Samples,i);
v=sl->Value[whichdim];
if (v<minv) { minv=v; minS=sl; }
if (v>maxv) { maxv=v; maxS=sl; }
if (v<n->Mean[whichdim])
whichside=0;
totsamps[whichside]+=sl->Count;
for(int d=0;d<current_ndims;d++)
LocalMean[d][whichside]+=
sl->Count*sl->Value[d];
}
if (totsamps[0] && totsamps[1])
for(i=0;i<current_ndims;i++)
{
LocalMean[i][0]/=totsamps[0];
LocalMean[i][1]/=totsamps[1];
}
else
{
// it is possible that the clustering failed to split the samples.
// this can happen with a heavily biased sample (i.e. all black
// with a few stars). If this happens, we will cluster around the
// extrema instead. LocalMean[i][0] will be the point with the lowest
// value on the dimension and LocalMean[i][1] the one with the lowest
// value.
for(i=0;i<current_ndims;i++)
{
LocalMean[i][0]=minS->Value[i];
LocalMean[i][1]=maxS->Value[i];
}
}
// now, we have 2 n-dimensional means. We will label each sample
// for which one it is nearer to by using the QNum field.
for(i=0;i<n->NSamples;i++)
{
double dist[2];
dist[0]=dist[1]=0.;
struct Sample *s=SAMPLE(n->Samples,i);
for(int d=0;d<current_ndims;d++)
for(int w=0;w<2;w++)
dist[w]+=current_weights[d]*SQ(LocalMean[d][w]-s->Value[d]);
s->QNum=(dist[0]<dist[1]);
}
// hey ho! we have now labelled each one with a candidate bin. Let's
// sort the array by moving the 0-labelled ones to the head of the array.
n->sortdim=-1;
qsort(n->Samples,n->NSamples,current_ssize,QNumSort);
for(i=0;i<n->NSamples;i++,NAdded++)
if (SAMPLE(n->Samples,i)->QNum)
break;
#else
if (whichdim != n->sortdim)
{
current_sort_dim=whichdim;
qsort(n->Samples,n->NSamples,current_ssize,samplesort);
n->sortdim=whichdim;
}
// now, the samples are sorted along the proper dimension. we need
// to find the place to cut in order to split the node. this is
// complicated by the fact that each sample entry can represent many
// samples. What we will do is start at the beginning of the array,
// adding samples to the first node, until either the number added
// is >=TotSamples/2, or there is only one left.
int TotAdded=0;
for(;;)
{
if (NAdded==n->NSamples-1)
break;
if (TotAdded>=n->TotSamples/2)
break;
TotAdded+=SAMPLE(n->Samples,NAdded)->Count;
NAdded++;
}
#endif
struct QuantizedValue *a=AllocQValue();
a->sortdim=n->sortdim;
a->Samples=n->Samples;
a->NSamples=NAdded;
n->Children[0]=a;
UpdateStats(a);
a=AllocQValue();
a->Samples=SAMPLE(n->Samples,NAdded);
a->NSamples=n->NSamples-NAdded;
a->sortdim=n->sortdim;
n->Children[1]=a;
UpdateStats(a);
}
static int colorid=0;
static void Label(struct QuantizedValue *q, int updatecolor)
{
// fill in max/min values for tree, etc.
if (q)
{
Label(q->Children[0],updatecolor);
Label(q->Children[1],updatecolor);
if (! q->Children[0]) // leaf node?
{
if (updatecolor)
{
q->value=colorid++;
for(int j=0;j<q->NSamples;j++)
{
SAMPLE(q->Samples,j)->QNum=q->value;
SAMPLE(q->Samples,j)->qptr=q;
}
}
for(int i=0;i<current_ndims;i++)
{
q->Mins[i]=q->Mean[i];
q->Maxs[i]=q->Mean[i];
}
}
else
for(int i=0;i<current_ndims;i++)
{
q->Mins[i]=min(q->Children[0]->Mins[i],q->Children[1]->Mins[i]);
q->Maxs[i]=max(q->Children[0]->Maxs[i],q->Children[1]->Maxs[i]);
}
}
}
struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code)
{
if (! (q->Children[0]))
if (code==q->value) return (struct QuantizedValue *) q;
else return 0;
else
{
struct QuantizedValue *found=FindQNode(q->Children[0],code);
if (! found) found=FindQNode(q->Children[1],code);
return found;
}
}
void CheckInRange(struct QuantizedValue *q, uint8 *max, uint8 *min)
{
if (q)
{
if (q->Children[0])
{
// non-leaf node
CheckInRange(q->Children[0],q->Maxs, q->Mins);
CheckInRange(q->Children[1],q->Maxs, q->Mins);
CheckInRange(q->Children[0],max, min);
CheckInRange(q->Children[1],max, min);
}
for (int i=0;i<current_ndims;i++)
{
if (q->Maxs[i]>max[i]) printf("error1\n");
if (q->Mins[i]<min[i]) printf("error2\n");
}
}
}
struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims,
int nvalues, uint8 *weights, int firstvalue)
{
SetNDims(ndims);
current_weights=weights;
current_root=AllocQValue();
current_root->Samples=s;
current_root->NSamples=nsamples;
UpdateStats(current_root);
while(--nvalues)
{
if (! FindWorst())
break; // if <n unique ones, stop now
SubdivideNode(ErrorNode,ErrorDim);
}
colorid=firstvalue;
Label(current_root,1);
return current_root;
}
double MinimumError(struct QuantizedValue const *q, uint8 const *sample,
int ndims, uint8 const *weights)
{
double err=0;
for(int i=0;i<ndims;i++)
{
int val1;
int val2=sample[i];
if ((q->Mins[i]<=val2) && (q->Maxs[i]>=val2)) val1=val2;
else
{
val1=(val2<=q->Mins[i])?q->Mins[i]:q->Maxs[i];
}
err+=weights[i]*SQ(val1-val2);
}
return err;
}
double MaximumError(struct QuantizedValue const *q, uint8 const *sample,
int ndims, uint8 const *weights)
{
double err=0;
for(int i=0;i<ndims;i++)
{
int val2=sample[i];
int val1=(abs(val2-q->Mins[i])>abs(val2-q->Maxs[i]))?
q->Mins[i]:
q->Maxs[i];
err+=weights[i]*SQ(val2-val1);
}
return err;
}
// heap (priority queue) routines used for nearest-neghbor searches
struct FHeap {
int heap_n;
double *heap[MAXQUANT];
};
void InitHeap(struct FHeap *h)
{
h->heap_n=0;
}
void UpHeap(int k, struct FHeap *h)
{
double *tmpk=h->heap[k];
double tmpkn=*tmpk;
while((k>1) && (tmpkn <= *(h->heap[k/2])))
{
h->heap[k]=h->heap[k/2];
k/=2;
}
h->heap[k]=tmpk;
}
void HeapInsert(struct FHeap *h,double *elem)
{
h->heap_n++;
h->heap[h->heap_n]=elem;
UpHeap(h->heap_n,h);
}
void DownHeap(int k, struct FHeap *h)
{
double *v=h->heap[k];
while(k<=h->heap_n/2)
{
int j=2*k;
if (j<h->heap_n)
if (*(h->heap[j]) >= *(h->heap[j+1]))
j++;
if (*v < *(h->heap[j]))
{
h->heap[k]=v;
return;
}
h->heap[k]=h->heap[j]; k=j;
}
h->heap[k]=v;
}
void *RemoveHeapItem(struct FHeap *h)
{
void *ret=0;
if (h->heap_n!=0)
{
ret=h->heap[1];
h->heap[1]=h->heap[h->heap_n];
h->heap_n--;
DownHeap(1,h);
}
return ret;
}
// now, nearest neighbor finder. Use a heap to traverse the tree, stopping
// when there are no nodes with a minimum error < the current error.
struct FHeap TheQueue;
#define PUSHNODE(a) { \
(a)->MinError=MinimumError(a,sample,ndims,weights); \
if ((a)->MinError < besterror) HeapInsert(&TheQueue,&(a)->MinError); \
}
struct QuantizedValue *FindMatch(uint8 const *sample, int ndims,
uint8 *weights, struct QuantizedValue *q)
{
InitHeap(&TheQueue);
struct QuantizedValue *bestmatch=0;
double besterror=1.0e63;
PUSHNODE(q);
for(;;)
{
struct QuantizedValue *test=(struct QuantizedValue *)
RemoveHeapItem(&TheQueue);
if (! test) break; // heap empty
// printf("got pop node =%p minerror=%f\n",test,test->MinError);
if (test->MinError>besterror) break;
if (test->Children[0])
{
// it's a parent node. put the children on the queue
struct QuantizedValue *c1=test->Children[0];
struct QuantizedValue *c2=test->Children[1];
c1->MinError=MinimumError(c1,sample,ndims,weights);
if (c1->MinError < besterror)
HeapInsert(&TheQueue,&(c1->MinError));
c2->MinError=MinimumError(c2,sample,ndims,weights);
if (c2->MinError < besterror)
HeapInsert(&TheQueue,&(c2->MinError));
}
else
{
// it's a leaf node. This must be a new minimum or the MinError
// test would have failed.
if (test->MinError < besterror)
{
bestmatch=test;
besterror=test->MinError;
}
}
}
if (bestmatch)
{
SquaredError+=besterror;
bestmatch->NQuant++;
for(int i=0;i<ndims;i++)
bestmatch->Sums[i]+=sample[i];
}
return bestmatch;
}
static void RecalcMeans(struct QuantizedValue *q)
{
if (q)
{
if (q->Children[0])
{
// not a leaf, invoke recursively.
RecalcMeans(q->Children[0]);
RecalcMeans(q->Children[0]);
}
else
{
// it's a leaf. Set the means
if (q->NQuant)
{
for(int i=0;i<current_ndims;i++)
{
q->Mean[i]=(uint8) (q->Sums[i]/q->NQuant);
q->Sums[i]=0;
}
q->NQuant=0;
}
}
}
}
void OptimizeQuantizer(struct QuantizedValue *q, int ndims)
{
SetNDims(ndims);
RecalcMeans(q); // reset q values
Label(q,0); // update max/mins
}
static void RecalcStats(struct QuantizedValue *q)
{
if (q)
{
UpdateStats(q);
RecalcStats(q->Children[0]);
RecalcStats(q->Children[1]);
}
}
void RecalculateValues(struct QuantizedValue *q, int ndims)
{
SetNDims(ndims);
RecalcStats(q);
Label(q,0);
}

109
mathlib/randsse.cpp Normal file
View File

@ -0,0 +1,109 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: generates 4 randum numbers in the range 0..1 quickly, using SIMD
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#include "mathlib/ssemath.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
// see knuth volume 3 for insight.
class SIMDRandStreamContext
{
fltx4 m_RandY[55];
fltx4 *m_pRand_J, *m_pRand_K;
public:
void Seed( uint32 seed )
{
m_pRand_J=m_RandY+23; m_pRand_K=m_RandY+54;
for(int i=0;i<55;i++)
{
for(int j=0;j<4;j++)
{
SubFloat( m_RandY[i], j) = (seed>>16)/65536.0;
seed=(seed+1)*3141592621u;
}
}
}
inline fltx4 RandSIMD( void )
{
// ret= rand[k]+rand[j]
fltx4 retval=AddSIMD( *m_pRand_K, *m_pRand_J );
// if ( ret>=1.0) ret-=1.0
fltx4 overflow_mask=CmpGeSIMD( retval, Four_Ones );
retval=SubSIMD( retval, AndSIMD( Four_Ones, overflow_mask ) );
*m_pRand_K = retval;
// update pointers w/ wrap-around
if ( --m_pRand_J < m_RandY )
m_pRand_J=m_RandY+54;
if ( --m_pRand_K < m_RandY )
m_pRand_K=m_RandY+54;
return retval;
}
};
#define MAX_SIMULTANEOUS_RANDOM_STREAMS 32
static SIMDRandStreamContext s_SIMDRandContexts[MAX_SIMULTANEOUS_RANDOM_STREAMS];
static volatile int s_nRandContextsInUse[MAX_SIMULTANEOUS_RANDOM_STREAMS];
void SeedRandSIMD(uint32 seed)
{
for( int i = 0; i<MAX_SIMULTANEOUS_RANDOM_STREAMS; i++)
s_SIMDRandContexts[i].Seed( seed+i );
}
fltx4 RandSIMD( int nContextIndex )
{
return s_SIMDRandContexts[nContextIndex].RandSIMD();
}
int GetSIMDRandContext( void )
{
for(;;)
{
for(int i=0; i < NELEMS( s_SIMDRandContexts ); i++)
{
if ( ! s_nRandContextsInUse[i] ) // available?
{
// try to take it!
if ( ThreadInterlockedAssignIf( &( s_nRandContextsInUse[i]), 1, 0 ) )
{
return i; // done!
}
}
}
Assert(0); // why don't we have enough buffers?
ThreadSleep();
}
}
void ReleaseSIMDRandContext( int nContext )
{
s_nRandContextsInUse[ nContext ] = 0;
}
fltx4 RandSIMD( void )
{
return s_SIMDRandContexts[0].RandSIMD();
}

View File

@ -0,0 +1,112 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
// for high speed processing in tools.
//
// $NoKeywords: $
//
//=============================================================================//
#include "basetypes.h"
#include "mathlib/mathlib.h"
#include "mathlib/simdvectormatrix.h"
#include "mathlib/ssemath.h"
#include "tier0/dbg.h"
void CSIMDVectorMatrix::CreateFromRGBA_FloatImageData(int srcwidth, int srcheight,
float const *srcdata )
{
Assert( srcwidth && srcheight && srcdata );
SetSize( srcwidth, srcheight );
FourVectors *p_write_ptr=m_pData;
int n_vectors_per_source_line=(srcwidth >> 2);
int ntrailing_pixels_per_source_line=(srcwidth & 3);
for(int y=0;y<srcheight;y++)
{
float const *data_in=srcdata;
float *data_out=reinterpret_cast<float *>( p_write_ptr );
// copy full input blocks
for(int x=0;x<n_vectors_per_source_line;x++)
{
for(int c=0;c<3;c++)
{
data_out[0]=data_in[c]; // x0
data_out[1]=data_in[4+c]; // x1
data_out[2]=data_in[8+c]; // x2
data_out[3]=data_in[12+c]; // x3
data_out+=4;
}
data_in += 16;
}
// now, copy trailing data and pad with copies
if (ntrailing_pixels_per_source_line )
{
for(int c=0;c<3;c++)
{
for(int cp=0;cp<4; cp++)
{
int real_cp=min( cp, ntrailing_pixels_per_source_line-1 );
data_out[4*c+cp]= data_in[c+4*real_cp];
}
}
}
// advance ptrs to next line
p_write_ptr += m_nPaddedWidth;
srcdata += 4 * srcwidth;
}
}
void CSIMDVectorMatrix::RaiseToPower( float power )
{
int nv=NVectors();
if ( nv )
{
int fixed_point_exp=(int) ( 4.0*power );
FourVectors *src=m_pData;
do
{
src->x=Pow_FixedPoint_Exponent_SIMD( src->x, fixed_point_exp );
src->y=Pow_FixedPoint_Exponent_SIMD( src->y, fixed_point_exp );
src->z=Pow_FixedPoint_Exponent_SIMD( src->z, fixed_point_exp );
src++;
} while (--nv);
}
}
CSIMDVectorMatrix & CSIMDVectorMatrix::operator+=( CSIMDVectorMatrix const &src )
{
Assert( m_nWidth == src.m_nWidth );
Assert( m_nHeight == src.m_nHeight );
int nv=NVectors();
if ( nv )
{
FourVectors *srcv=src.m_pData;
FourVectors *destv=m_pData;
do // !! speed !! inline more iters
{
*( destv++ ) += *( srcv++ );
} while ( --nv );
}
return *this;
}
CSIMDVectorMatrix & CSIMDVectorMatrix::operator*=( Vector const &src )
{
int nv=NVectors();
if ( nv )
{
FourVectors scalevalue;
scalevalue.DuplicateVector( src );
FourVectors *destv=m_pData;
do // !! speed !! inline more iters
{
destv->VProduct( scalevalue );
destv++;
} while ( --nv );
}
return *this;
}

View File

@ -0,0 +1,218 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: noise() primitives.
//
//=====================================================================================//
#include <math.h>
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#include "mathlib/noise.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
// generate high quality noise based upon "sparse convolution". HIgher quality than perlin noise,
// and no direcitonal artifacts.
#include "noisedata.h"
#define N_IMPULSES_PER_CELL 5
#define NORMALIZING_FACTOR 1.0
//(0.5/N_IMPULSES_PER_CELL)
static inline int LatticeCoord(float x)
{
return ((int) floor(x)) & 0xff;
}
static inline int Hash4D(int ix, int iy, int iz, int idx)
{
int ret=perm_a[ix];
ret=perm_b[(ret+iy) & 0xff];
ret=perm_c[(ret+iz) & 0xff];
ret=perm_d[(ret+idx) & 0xff];
return ret;
}
#define SQ(x) ((x)*(x))
static float CellNoise( int ix, int iy, int iz, float xfrac, float yfrac, float zfrac,
float (*pNoiseShapeFunction)(float) )
{
float ret=0;
for(int idx=0;idx<N_IMPULSES_PER_CELL;idx++)
{
int coord_idx=Hash4D( ix, iy, iz, idx );
float dsq=SQ(impulse_xcoords[coord_idx]-xfrac)+
SQ(impulse_ycoords[coord_idx]-yfrac)+
SQ(impulse_zcoords[coord_idx]-zfrac);
dsq = sqrt( dsq );
if (dsq < 1.0 )
{
ret += (*pNoiseShapeFunction)( 1-dsq );
}
}
return ret;
}
float SparseConvolutionNoise( Vector const &pnt )
{
return SparseConvolutionNoise( pnt, QuinticInterpolatingPolynomial );
}
float FractalNoise( Vector const &pnt, int n_octaves)
{
float scale=1.0;
float iscale=1.0;
float ret=0;
float sumscale=0;
for(int o=0;o<n_octaves;o++)
{
Vector p1=pnt;
p1 *= scale;
ret+=iscale * SparseConvolutionNoise( p1 );
sumscale += iscale;
scale *= 2.0;
iscale *= 0.5;
}
return ret * ( 1.0/sumscale );
}
float Turbulence( Vector const &pnt, int n_octaves)
{
float scale=1.0;
float iscale=1.0;
float ret=0;
float sumscale=0;
for(int o=0;o<n_octaves;o++)
{
Vector p1=pnt;
p1 *= scale;
ret+=iscale * fabs ( 2.0*( SparseConvolutionNoise( p1 )-.5 ) );
sumscale += iscale;
scale *= 2.0;
iscale *= 0.5;
}
return ret * ( 1.0/sumscale );
}
#ifdef MEASURE_RANGE
float fmin1=10000000.0;
float fmax1=-1000000.0;
#endif
float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) )
{
// computer integer lattice point
int ix=LatticeCoord(pnt.x);
int iy=LatticeCoord(pnt.y);
int iz=LatticeCoord(pnt.z);
// compute offsets within unit cube
float xfrac=pnt.x-floor(pnt.x);
float yfrac=pnt.y-floor(pnt.y);
float zfrac=pnt.z-floor(pnt.z);
float sum_out=0.;
for(int ox=-1; ox<=1; ox++)
for(int oy=-1; oy<=1; oy++)
for(int oz=-1; oz<=1; oz++)
{
sum_out += CellNoise( ix+ox, iy+oy, iz+oz,
xfrac-ox, yfrac-oy, zfrac-oz,
pNoiseShapeFunction );
}
#ifdef MEASURE_RANGE
fmin1=min(sum_out,fmin1);
fmax1=max(sum_out,fmax1);
#endif
return RemapValClamped( sum_out, .544487, 9.219176, 0.0, 1.0 );
}
// Improved Perlin Noise
// The following code is the c-ification of Ken Perlin's new noise algorithm
// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN"
// as available here: http://mrl.nyu.edu/~perlin/noise/
float NoiseGradient(int hash, float x, float y, float z)
{
int h = hash & 15; // CONVERT LO 4 BITS OF HASH CODE
float u = h<8 ? x : y; // INTO 12 GRADIENT DIRECTIONS.
float v = h<4 ? y : (h==12||h==14 ? x : z);
return ((h&1) == 0 ? u : -u) + ((h&2) == 0 ? v : -v);
}
int NoiseHashIndex( int i )
{
static int s_permutation[] =
{
151,160,137,91,90,15,
131,13,201,95,96,53,194,233,7,225,140,36,103,30,69,142,8,99,37,240,21,10,23,
190, 6,148,247,120,234,75,0,26,197,62,94,252,219,203,117,35,11,32,57,177,33,
88,237,149,56,87,174,20,125,136,171,168, 68,175,74,165,71,134,139,48,27,166,
77,146,158,231,83,111,229,122,60,211,133,230,220,105,92,41,55,46,245,40,244,
102,143,54, 65,25,63,161, 1,216,80,73,209,76,132,187,208, 89,18,169,200,196,
135,130,116,188,159,86,164,100,109,198,173,186, 3,64,52,217,226,250,124,123,
5,202,38,147,118,126,255,82,85,212,207,206,59,227,47,16,58,17,182,189,28,42,
223,183,170,213,119,248,152, 2,44,154,163, 70,221,153,101,155,167, 43,172,9,
129,22,39,253, 19,98,108,110,79,113,224,232,178,185, 112,104,218,246,97,228,
251,34,242,193,238,210,144,12,191,179,162,241, 81,51,145,235,249,14,239,107,
49,192,214, 31,181,199,106,157,184, 84,204,176,115,121,50,45,127, 4,150,254,
138,236,205,93,222,114,67,29,24,72,243,141,128,195,78,66,215,61,156,180
};
return s_permutation[ i & 0xff ];
}
float ImprovedPerlinNoise( Vector const &pnt )
{
float fx = floor(pnt.x);
float fy = floor(pnt.y);
float fz = floor(pnt.z);
int X = (int)fx & 255; // FIND UNIT CUBE THAT
int Y = (int)fy & 255; // CONTAINS POINT.
int Z = (int)fz & 255;
float x = pnt.x - fx; // FIND RELATIVE X,Y,Z
float y = pnt.y - fy; // OF POINT IN CUBE.
float z = pnt.z - fz;
float u = QuinticInterpolatingPolynomial(x); // COMPUTE FADE CURVES
float v = QuinticInterpolatingPolynomial(y); // FOR EACH OF X,Y,Z.
float w = QuinticInterpolatingPolynomial(z);
int A = NoiseHashIndex( X ) + Y; // HASH COORDINATES OF
int AA = NoiseHashIndex( A ) + Z; // THE 8 CUBE CORNERS,
int AB = NoiseHashIndex( A + 1 ) + Z;
int B = NoiseHashIndex( X + 1 ) + Y;
int BA = NoiseHashIndex( B ) + Z;
int BB = NoiseHashIndex( B + 1 ) + Z;
float g0 = NoiseGradient(NoiseHashIndex(AA ), x , y , z );
float g1 = NoiseGradient(NoiseHashIndex(BA ), x-1, y , z );
float g2 = NoiseGradient(NoiseHashIndex(AB ), x , y-1, z );
float g3 = NoiseGradient(NoiseHashIndex(BB ), x-1, y-1, z );
float g4 = NoiseGradient(NoiseHashIndex(AA+1), x , y , z-1 );
float g5 = NoiseGradient(NoiseHashIndex(BA+1), x-1, y , z-1 );
float g6 = NoiseGradient(NoiseHashIndex(AB+1), x , y-1, z-1 );
float g7 = NoiseGradient(NoiseHashIndex(BB+1), x-1, y-1, z-1 );
// AND ADD BLENDED RESULTS FROM 8 CORNERS OF CUBE
float g01 = Lerp( u, g0, g1 );
float g23 = Lerp( u, g2, g3 );
float g45 = Lerp( u, g4, g5 );
float g67 = Lerp( u, g6, g7 );
float g0123 = Lerp( v, g01, g23 );
float g4567 = Lerp( v, g45, g67 );
return Lerp( w, g0123,g4567 );
}

124
mathlib/spherical.cpp Normal file
View File

@ -0,0 +1,124 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: spherical math routines
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#include "mathlib/spherical_geometry.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
float s_flFactorials[]={
1.,
1.,
2.,
6.,
24.,
120.,
720.,
5040.,
40320.,
362880.,
3628800.,
39916800.,
479001600.,
6227020800.,
87178291200.,
1307674368000.,
20922789888000.,
355687428096000.,
6402373705728000.,
121645100408832000.,
2432902008176640000.,
51090942171709440000.,
1124000727777607680000.,
25852016738884976640000.,
620448401733239439360000.,
15511210043330985984000000.,
403291461126605635584000000.,
10888869450418352160768000000.,
304888344611713860501504000000.,
8841761993739701954543616000000.,
265252859812191058636308480000000.,
8222838654177922817725562880000000.,
263130836933693530167218012160000000.,
8683317618811886495518194401280000000.
};
float AssociatedLegendrePolynomial( int nL, int nM, float flX )
{
// evaluate associated legendre polynomial at flX, using recurrence relation
float flPmm = 1.;
if ( nM > 0 )
{
float flSomX2 = sqrt( ( 1 - flX ) * ( 1 + flX ) );
float flFact = 1.;
for( int i = 0 ; i < nM; i++ )
{
flPmm *= -flFact * flSomX2;
flFact += 2.0;
}
}
if ( nL == nM )
return flPmm;
float flPmmp1 = flX * ( 2.0 * nM + 1.0 ) * flPmm;
if ( nL == nM + 1 )
return flPmmp1;
float flPll = 0.;
for( int nLL = nM + 2 ; nLL <= nL; nLL++ )
{
flPll = ( ( 2.0 * nLL - 1.0 ) * flX * flPmmp1 - ( nLL + nM - 1.0 ) * flPmm ) * ( 1.0 / ( nLL - nM ) );
flPmm = flPmmp1;
flPmmp1 = flPll;
}
return flPll;
}
static float SHNormalizationFactor( int nL, int nM )
{
double flTemp = ( ( 2. * nL + 1.0 ) * s_flFactorials[ nL - nM ] )/ ( 4. * M_PI * s_flFactorials[ nL + nM ] );
return sqrt( flTemp );
}
#define SQRT_2 1.414213562373095
FORCEINLINE float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi, float flCosTheta )
{
if ( nM == 0 )
return SHNormalizationFactor( nL, 0 ) * AssociatedLegendrePolynomial( nL, nM, flCosTheta );
if ( nM > 0 )
return SQRT_2 * SHNormalizationFactor( nL, nM ) * cos ( nM * flPhi ) *
AssociatedLegendrePolynomial( nL, nM, flCosTheta );
return
SQRT_2 * SHNormalizationFactor( nL, -nM ) * sin( -nM * flPhi ) * AssociatedLegendrePolynomial( nL, -nM, flCosTheta );
}
float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi )
{
return SphericalHarmonic( nL, nM, flTheta, flPhi, cos( flTheta ) );
}
float SphericalHarmonic( int nL, int nM, Vector const &vecDirection )
{
Assert( fabs( VectorLength( vecDirection ) - 1.0 ) < 0.0001 );
float flPhi = acos( vecDirection.z );
float flTheta = 0;
float S = Square( vecDirection.x ) + Square( vecDirection.y );
if ( S > 0 )
{
flTheta = atan2( vecDirection.y, vecDirection.x );
}
return SphericalHarmonic( nL, nM, flTheta, flPhi, cos( flTheta ) );
}

1107
mathlib/sse.cpp Normal file

File diff suppressed because it is too large Load Diff

27
mathlib/sse.h Normal file
View File

@ -0,0 +1,27 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
//=====================================================================================//
#ifndef _SSE_H
#define _SSE_H
float _SSE_Sqrt(float x);
float _SSE_RSqrtAccurate(float a);
float _SSE_RSqrtFast(float x);
float FASTCALL _SSE_VectorNormalize(Vector& vec);
void FASTCALL _SSE_VectorNormalizeFast(Vector& vec);
float _SSE_InvRSquared(const float* v);
void _SSE_SinCos(float x, float* s, float* c);
float _SSE_cos( float x);
#ifdef PLATFORM_WINDOWS_PC32
void _SSE2_SinCos(float x, float* s, float* c);
float _SSE2_cos(float x);
#endif
#if 0
void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1);
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 );
#endif
#endif // _SSE_H

1164
mathlib/sseconst.cpp Normal file

File diff suppressed because it is too large Load Diff

109
mathlib/ssenoise.cpp Normal file
View File

@ -0,0 +1,109 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Fast low quality noise suitable for real time use
//
//=====================================================================================//
#include <math.h>
#include <float.h> // Needed for FLT_EPSILON
#include "basetypes.h"
#include <memory.h>
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "mathlib/vector.h"
#include "mathlib/ssemath.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
#include "noisedata.h"
#define MAGIC_NUMBER (1<<15) // gives 8 bits of fraction
static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff};
#define MASK255 (*((fltx4 *)(& idx_mask )))
// returns 0..1
static inline float GetLatticePointValue( int idx_x, int idx_y, int idx_z )
{
NOTE_UNUSED(perm_d);
NOTE_UNUSED(impulse_ycoords);
NOTE_UNUSED(impulse_zcoords);
int ret_idx = perm_a[idx_x & 0xff];
ret_idx = perm_b[( idx_y + ret_idx ) & 0xff];
ret_idx = perm_c[( idx_z + ret_idx ) & 0xff];
return impulse_xcoords[ret_idx];
}
fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z )
{
// use magic to convert to integer index
fltx4 x_idx = AndSIMD( MASK255, AddSIMD( x, Four_MagicNumbers ) );
fltx4 y_idx = AndSIMD( MASK255, AddSIMD( y, Four_MagicNumbers ) );
fltx4 z_idx = AndSIMD( MASK255, AddSIMD( z, Four_MagicNumbers ) );
fltx4 lattice000 = Four_Zeros, lattice001 = Four_Zeros, lattice010 = Four_Zeros, lattice011 = Four_Zeros;
fltx4 lattice100 = Four_Zeros, lattice101 = Four_Zeros, lattice110 = Four_Zeros, lattice111 = Four_Zeros;
// FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
// Converting the indexed noise values back to vectors will cause more (128 bytes)
// The noise table could store vectors if we chunked it into 2x2x2 blocks.
fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
#define DOPASS(i) \
{ unsigned int xi = SubInt( x_idx, i ); \
unsigned int yi = SubInt( y_idx, i ); \
unsigned int zi = SubInt( z_idx, i ); \
SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0); \
SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0); \
SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0); \
xi>>=8; \
yi>>=8; \
zi>>=8; \
\
SubFloat( lattice000, i ) = GetLatticePointValue( xi,yi,zi ); \
SubFloat( lattice001, i ) = GetLatticePointValue( xi,yi,zi+1 ); \
SubFloat( lattice010, i ) = GetLatticePointValue( xi,yi+1,zi ); \
SubFloat( lattice011, i ) = GetLatticePointValue( xi,yi+1,zi+1 ); \
SubFloat( lattice100, i ) = GetLatticePointValue( xi+1,yi,zi ); \
SubFloat( lattice101, i ) = GetLatticePointValue( xi+1,yi,zi+1 ); \
SubFloat( lattice110, i ) = GetLatticePointValue( xi+1,yi+1,zi ); \
SubFloat( lattice111, i ) = GetLatticePointValue( xi+1,yi+1,zi+1 ); \
}
DOPASS( 0 );
DOPASS( 1 );
DOPASS( 2 );
DOPASS( 3 );
// now, we have 8 lattice values for each of four points as m128s, and interpolant values for
// each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops
// first, do x interpolation
fltx4 l2d00 = AddSIMD( lattice000, MulSIMD( xfrac, SubSIMD( lattice100, lattice000 ) ) );
fltx4 l2d01 = AddSIMD( lattice001, MulSIMD( xfrac, SubSIMD( lattice101, lattice001 ) ) );
fltx4 l2d10 = AddSIMD( lattice010, MulSIMD( xfrac, SubSIMD( lattice110, lattice010 ) ) );
fltx4 l2d11 = AddSIMD( lattice011, MulSIMD( xfrac, SubSIMD( lattice111, lattice011 ) ) );
// now, do y interpolation
fltx4 l1d0 = AddSIMD( l2d00, MulSIMD( yfrac, SubSIMD( l2d10, l2d00 ) ) );
fltx4 l1d1 = AddSIMD( l2d01, MulSIMD( yfrac, SubSIMD( l2d11, l2d01 ) ) );
// final z interpolation
fltx4 rslt = AddSIMD( l1d0, MulSIMD( zfrac, SubSIMD( l1d1, l1d0 ) ) );
// map to 0..1
return MulSIMD( Four_Twos, SubSIMD( rslt, Four_PointFives ) );
}
fltx4 NoiseSIMD( FourVectors const &pos )
{
return NoiseSIMD( pos.x, pos.y, pos.z );
}

12
mathlib/vector.cpp Normal file
View File

@ -0,0 +1,12 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//
//=============================================================================//
#include "mathlib/vector.h"
Vector vec3_origin(0,0,0);

1293
mathlib/vmatrix.cpp Normal file

File diff suppressed because it is too large Load Diff