1745 lines
59 KiB
C++
1745 lines
59 KiB
C++
//
|
|
// ai/spatialarray.cpp
|
|
//
|
|
// Copyright (C) 1999-2011 Rockstar Games. All Rights Reserved.
|
|
//
|
|
|
|
#include "ai/spatialarray.h"
|
|
|
|
#include "math/amath.h"
|
|
#include "system/criticalsection.h"
|
|
#include "system/memory.h"
|
|
#include "system/system.h"
|
|
#include "vector/colors.h"
|
|
#include "vectormath/classes.h"
|
|
|
|
#if __DEV
|
|
#include "grcore/debugdraw.h"
|
|
#endif
|
|
|
|
#define SA_STATS 0
|
|
#define Align16(x) (((x)+15)&~15)
|
|
|
|
#if SA_STATS
|
|
|
|
#include "profile/profiler.h"
|
|
|
|
namespace CSpatialArrayStats
|
|
{
|
|
PF_PAGE(SpatialArray, "Spatial Array");
|
|
PF_GROUP(Update);
|
|
PF_LINK(SpatialArray, Update);
|
|
PF_TIMER(Insert, Update);
|
|
PF_TIMER(Remove, Update);
|
|
PF_TIMER(Update, Update);
|
|
PF_TIMER(GetTypeFlags, Update);
|
|
PF_TIMER(SetTypeFlags, Update);
|
|
PF_TIMER(FindClosest3, Update);
|
|
PF_TIMER(FindClosest4, Update);
|
|
PF_TIMER(FindInCylinderXY, Update);
|
|
PF_TIMER(FindInSphere, Update);
|
|
PF_TIMER(FindInSphereOfType, Update);
|
|
PF_TIMER(FindNearSegment, Update);
|
|
PF_TIMER(FindBelowZ, Update);
|
|
}
|
|
|
|
using namespace CSpatialArrayStats;
|
|
|
|
#define SA_PF_START(x) PF_START(x)
|
|
#define SA_PF_STOP(x) PF_STOP(x)
|
|
#define SA_PF_FUNC(x) PF_FUNC(x)
|
|
|
|
#else // SA_STATS
|
|
|
|
#define SA_PF_START(x)
|
|
#define SA_PF_STOP(x)
|
|
#define SA_PF_FUNC(x)
|
|
|
|
#endif // SA_STATS
|
|
|
|
#if SPATIALARRAY64BIT
|
|
|
|
namespace
|
|
{
|
|
|
|
u64 sNodePtrToU64(const CSpatialArrayNode* nodePtr)
|
|
{
|
|
#if __64BIT
|
|
return (u64)nodePtr;
|
|
#else
|
|
// Could do this to test:
|
|
// u32 upper = (u32)nodePtr + 1;
|
|
// u32 lower = (u32)nodePtr;
|
|
// return ((u64)(upper) << 32) + (u64)lower;
|
|
return (u64)(u32)nodePtr;
|
|
#endif
|
|
}
|
|
|
|
} // anon namespace
|
|
|
|
#endif // SPATIALARRAY64BIT
|
|
|
|
|
|
bool CSpatialArray::sm_UseLock = false;
|
|
|
|
#define SPATIALARRAYTHREADLOCK \
|
|
sysCriticalSection cs(m_Lock, sm_UseLock); \
|
|
if (!sm_UseLock) \
|
|
{ \
|
|
Assertf(CSystem::IsThisThreadId(SYS_THREAD_UPDATE), "CSpatialArray used from another thread than the main thread, this may not be safe."); \
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// CSpatialArray
|
|
|
|
CSpatialArray::CSpatialArray(void *buffer, int maxObj)
|
|
: m_MaxObj(maxObj)
|
|
, m_NumObj(0)
|
|
{
|
|
// The use of vector selection for addresses wouldn't work right
|
|
// away on 64 bit pointers. It should still be possible to vectorize
|
|
// it efficiently with 128 bit vectors, but we would need one high
|
|
// and one low vector register for four pointers or something.
|
|
// That work hasn't been done, so generate some errors if this is about
|
|
// to happen. /FF
|
|
#if !SPATIALARRAY64BIT
|
|
#if __64BIT
|
|
// To not prevent 64 bit applications to build, this is just a run time error
|
|
// (it's very possible that the application compiles with this class without
|
|
// actually using it). /FF
|
|
Errorf("CSpatialArray was not designed for 64 bit pointers, probably won't work correctly right now.");
|
|
#else
|
|
// If we are not running in 64 bit mode, I wouldn't expect pointers to be
|
|
// anything but 32 bits, so if they are, we may as well catch it at compile time. /FF
|
|
CompileTimeAssert(sizeof(void*) == 4);
|
|
#endif
|
|
#endif // !SPATIALARRAY64BIT
|
|
|
|
Assert((maxObj & 3) == 0);
|
|
|
|
float *posXArray = (float*)buffer;
|
|
float *posYArray = posXArray + maxObj;
|
|
float *posZArray = posYArray + maxObj;
|
|
|
|
Assert(Align16((size_t)posXArray));
|
|
Assert(Align16((size_t)posYArray));
|
|
Assert(Align16((size_t)posZArray));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u32 *nodesUpper = (u32*)(posZArray + maxObj);
|
|
u32 *nodesLower = nodesUpper + maxObj;
|
|
u32 *typeFlagArray = (u32*)(nodesLower + maxObj);
|
|
#else
|
|
CSpatialArrayNodeAddr *nodes = (CSpatialArrayNodeAddr*)(posZArray + maxObj);
|
|
u32 *typeFlagArray = (u32*)(nodes + maxObj);
|
|
#endif
|
|
|
|
Assert(((size_t)posXArray & 0xf) == 0);
|
|
Assert(((size_t)posYArray & 0xf) == 0);
|
|
Assert(((size_t)posZArray & 0xf) == 0);
|
|
#if SPATIALARRAY64BIT
|
|
Assert(((size_t)nodesUpper & 0xf) == 0);
|
|
Assert(((size_t)nodesLower & 0xf) == 0);
|
|
#else
|
|
Assert(((size_t)nodes & 0xf) == 0);
|
|
#endif
|
|
Assert(((size_t)typeFlagArray & 0xf) == 0);
|
|
|
|
m_PosXArray = posXArray;
|
|
m_PosYArray = posYArray;
|
|
m_PosZArray = posZArray;
|
|
#if SPATIALARRAY64BIT
|
|
m_NodeArrayUpper = nodesUpper;
|
|
m_NodeArrayLower = nodesLower;
|
|
#else
|
|
m_NodeArray = nodes;
|
|
#endif
|
|
m_TypeFlagArray = typeFlagArray;
|
|
|
|
// For some of the vector operations to work at the end of the array,
|
|
// we make sure to keep the node pointers to NULL. /FF
|
|
#if SPATIALARRAY64BIT
|
|
sysMemSet((void*)nodesUpper, 0, sizeof(u32)*maxObj);
|
|
sysMemSet((void*)nodesLower, 0, sizeof(u32)*maxObj);
|
|
#else
|
|
sysMemSet((void*)nodes, 0, sizeof(CSpatialArrayNodeAddr)*maxObj);
|
|
#endif
|
|
|
|
|
|
|
|
Assert(kMaxObjForTempBuffer >= maxObj);
|
|
Assert(maxObj*sizeof(float) <= 0xffff);
|
|
}
|
|
|
|
void CSpatialArray::Reset()
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
m_NumObj = 0;
|
|
}
|
|
|
|
void CSpatialArray::Insert(CSpatialArrayNode &node, u32 typeFlags, bool forceInsert)
|
|
{
|
|
SA_PF_FUNC(Insert);
|
|
|
|
if(forceInsert || Verifyf(node.m_Offs == CSpatialArrayNode::kOffsInvalid, "Tried to insert a spatial array node that's already inserted."))
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const int numObj = m_NumObj;
|
|
const int maxObj = m_MaxObj;
|
|
|
|
if(Verifyf(numObj < maxObj, "Out of space in spatial array."))
|
|
{
|
|
const unsigned int offs = numObj*sizeof(float);
|
|
const int newNumObj = numObj + 1;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u32 *addedNodePtrUpper = (u32*)((char*)m_NodeArrayUpper + offs);
|
|
u32 *addedNodePtrLower = (u32*)((char*)m_NodeArrayLower + offs);
|
|
#else
|
|
CSpatialArrayNodeAddr *addedNodePtr = (CSpatialArrayNodeAddr*)((char*)m_NodeArray + offs);
|
|
#endif
|
|
u32 *typeFlagPtr = (u32*)((char*)m_TypeFlagArray + offs);
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u64 nodePtr = sNodePtrToU64(&node);
|
|
*addedNodePtrLower = (u32)nodePtr;
|
|
*addedNodePtrUpper = (u32)(nodePtr >> 32);
|
|
#else
|
|
*addedNodePtr = ptrdiff_t_to_int((ptrdiff_t)&node); // Catch truncation on x64 builds
|
|
#endif
|
|
node.m_Offs = (u16)offs;
|
|
m_NumObj = newNumObj;
|
|
*typeFlagPtr = typeFlags;
|
|
}
|
|
else
|
|
{
|
|
node.m_Offs = CSpatialArrayNode::kOffsInvalid;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void CSpatialArray::Remove(CSpatialArrayNode &node)
|
|
{
|
|
SA_PF_FUNC(Remove);
|
|
|
|
if(Verifyf(node.m_Offs != CSpatialArrayNode::kOffsInvalid, "Removing spatial array node not in array."))
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const int oldNumObj = m_NumObj;
|
|
const int newNumObj = oldNumObj - 1;
|
|
|
|
const unsigned int removedOffs = node.m_Offs;
|
|
#if SPATIALARRAY64BIT
|
|
u32* nodesLower = m_NodeArrayLower;
|
|
u32* nodesUpper = m_NodeArrayUpper;
|
|
u32 oldLastNodeLower = nodesLower[newNumObj];
|
|
u32 oldLastNodeUpper = nodesUpper[newNumObj];
|
|
CSpatialArrayNode* oldLastNodePtr = NodePtrFromUpperLower(oldLastNodeUpper, oldLastNodeLower);
|
|
|
|
u32* removedNodePtrLower = (u32*)((char*)nodesLower + removedOffs);
|
|
u32* removedNodePtrUpper = (u32*)((char*)nodesUpper + removedOffs);
|
|
#else
|
|
CSpatialArrayNodeAddr* nodes = m_NodeArray;
|
|
CSpatialArrayNodeAddr oldLastNode = nodes[newNumObj];
|
|
CSpatialArrayNode* oldLastNodePtr = (CSpatialArrayNode*)oldLastNode;
|
|
|
|
CSpatialArrayNodeAddr *removedNodePtr = (CSpatialArrayNodeAddr*)((char*)nodes + removedOffs);
|
|
#endif
|
|
|
|
float *posXArray = m_PosXArray;
|
|
float *posYArray = m_PosYArray;
|
|
float *posZArray = m_PosZArray;
|
|
u32 *typeFlagArray = m_TypeFlagArray;
|
|
|
|
const unsigned int oldOffs = oldLastNodePtr->m_Offs;
|
|
|
|
float *posXPtr = (float*)((char*)posXArray + removedOffs);
|
|
float *posYPtr = (float*)((char*)posYArray + removedOffs);
|
|
float *posZPtr = (float*)((char*)posZArray + removedOffs);
|
|
u32 *typeFlagPtr = (u32*)((char*)typeFlagArray + removedOffs);
|
|
|
|
float *posXPtrOld = (float*)((char*)posXArray + oldOffs);
|
|
float *posYPtrOld = (float*)((char*)posYArray + oldOffs);
|
|
float *posZPtrOld = (float*)((char*)posZArray + oldOffs);
|
|
u32 *typeFlagArrayOld = (u32*)((char*)typeFlagArray + oldOffs);
|
|
|
|
*posXPtr = *posXPtrOld;
|
|
*posYPtr = *posYPtrOld;
|
|
*posZPtr = *posZPtrOld;
|
|
*typeFlagPtr = *typeFlagArrayOld;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
*removedNodePtrLower = oldLastNodeLower;
|
|
*removedNodePtrUpper = oldLastNodeUpper;
|
|
#else
|
|
*removedNodePtr = oldLastNode;
|
|
#endif
|
|
|
|
oldLastNodePtr->m_Offs = (u16)removedOffs;
|
|
node.m_Offs = CSpatialArrayNode::kOffsInvalid;
|
|
m_NumObj = newNumObj;
|
|
|
|
// For some of the vector operations to work properly at the end
|
|
// of the array, we make sure to clear out the node pointer at
|
|
// the previous end of the array. /FF
|
|
#if SPATIALARRAY64BIT
|
|
nodesUpper[newNumObj] = 0;
|
|
nodesLower[newNumObj] = 0;
|
|
#else
|
|
nodes[newNumObj] = 0;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
|
|
void CSpatialArray::Update(CSpatialArrayNode &node, float posX, float posY, float posZ)
|
|
{
|
|
SA_PF_FUNC(Update);
|
|
|
|
if(Verifyf(node.m_Offs != CSpatialArrayNode::kOffsInvalid, "Tried to update position of invalid spatial array node."))
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const unsigned int offs = node.m_Offs;
|
|
float *posXArray = m_PosXArray;
|
|
float *posYArray = m_PosYArray;
|
|
float *posZArray = m_PosZArray;
|
|
|
|
float *posXPtr = (float*)((char*)posXArray + offs);
|
|
float *posYPtr = (float*)((char*)posYArray + offs);
|
|
float *posZPtr = (float*)((char*)posZArray + offs);
|
|
|
|
*posXPtr = posX;
|
|
*posYPtr = posY;
|
|
*posZPtr = posZ;
|
|
}
|
|
}
|
|
|
|
|
|
void CSpatialArray::UpdateWithTypeFlags(CSpatialArrayNode &node, float posX, float posY, float posZ, u32 flagsToChange, u32 flagValues)
|
|
{
|
|
SA_PF_FUNC(Update);
|
|
|
|
if(Verifyf(node.m_Offs != CSpatialArrayNode::kOffsInvalid, "Tried to update position of invalid spatial array node."))
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const unsigned int offs = node.m_Offs;
|
|
float *posXArray = m_PosXArray;
|
|
float *posYArray = m_PosYArray;
|
|
float *posZArray = m_PosZArray;
|
|
|
|
float *posXPtr = (float*)((char*)posXArray + offs);
|
|
float *posYPtr = (float*)((char*)posYArray + offs);
|
|
float *posZPtr = (float*)((char*)posZArray + offs);
|
|
u32 *flagPtr = (u32*)((char*)m_TypeFlagArray + offs);
|
|
|
|
const u32 oldFlags = *flagPtr;
|
|
const u32 newFlags = (oldFlags & ~flagsToChange) | flagValues;
|
|
|
|
*posXPtr = posX;
|
|
*posYPtr = posY;
|
|
*posZPtr = posZ;
|
|
*flagPtr = newFlags;
|
|
}
|
|
}
|
|
|
|
|
|
void CSpatialArray::GetPosition(const CSpatialArrayNode &node, Vec3V_Ref posOut) const
|
|
{
|
|
// Not sure:
|
|
// SPATIALARRAYTHREADLOCK;
|
|
|
|
const unsigned int offs = node.m_Offs;
|
|
const float *posXArray = m_PosXArray;
|
|
const float *posYArray = m_PosYArray;
|
|
const float *posZArray = m_PosZArray;
|
|
|
|
const float *posXPtr = (const float*)((char*)posXArray + offs);
|
|
const float *posYPtr = (const float*)((char*)posYArray + offs);
|
|
const float *posZPtr = (const float*)((char*)posZArray + offs);
|
|
|
|
posOut.SetXf(*posXPtr);
|
|
posOut.SetYf(*posYPtr);
|
|
posOut.SetZf(*posZPtr);
|
|
}
|
|
|
|
|
|
|
|
void CSpatialArray::SetTypeFlags(CSpatialArrayNode &node, u32 flagsToChange, u32 flagValues)
|
|
{
|
|
SA_PF_FUNC(SetTypeFlags); // Maybe not really accurate. /FF
|
|
|
|
// If this fails, there are values set in flagValues that are not in flagsToChange,
|
|
// which we are probably better off if the user could avoid, so we don't have to
|
|
// spend time on masking them here. /FF
|
|
Assert((flagValues & ~flagsToChange) == 0);
|
|
|
|
if(Verifyf(node.m_Offs != CSpatialArrayNode::kOffsInvalid, "Tried to set type flags of invalid spatial array node."))
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const unsigned int offs = node.m_Offs;
|
|
u32 *flagPtr = (u32*)((char*)m_TypeFlagArray + offs);
|
|
|
|
const u32 oldFlags = *flagPtr;
|
|
const u32 newFlags = (oldFlags & ~flagsToChange) | flagValues;
|
|
|
|
*flagPtr = newFlags;
|
|
}
|
|
}
|
|
|
|
|
|
u32 CSpatialArray::GetTypeFlags(const CSpatialArrayNode &node) const
|
|
{
|
|
SA_PF_FUNC(GetTypeFlags); // Maybe not really accurate. /FF
|
|
|
|
u32 r;
|
|
|
|
if(Verifyf(node.m_Offs != CSpatialArrayNode::kOffsInvalid, "Tried to get type flags of invalid spatial array node."))
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const unsigned int offs = node.m_Offs;
|
|
const u32 *flagPtr = (u32*)((char*)m_TypeFlagArray + offs);
|
|
r = *flagPtr;
|
|
}
|
|
else
|
|
{
|
|
r = 0;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
#if SPATIALARRAY64BIT
|
|
static int sPickFromSortedArrays(const Vec4V *closestND2V,
|
|
const Vec4V *closestNNodesUpperV,
|
|
const Vec4V *closestNNodesLowerV,
|
|
float maxDist, CSpatialArrayNode **found,
|
|
int numToPick)
|
|
#else
|
|
static int sPickFromSortedArrays(const Vec4V *closestND2V, const Vec4V *closestNNodesV,
|
|
float maxDist, CSpatialArrayNode **found,
|
|
int numToPick)
|
|
#endif
|
|
{
|
|
const float maxDistSq = square(Min(maxDist, LARGE_FLOAT)); // Make sure we don't square FLT_MAX. /FF
|
|
|
|
int numFound = 0;
|
|
|
|
// We now basically have four sorted arrays of length N in memory,
|
|
// and we will do comparisons between these arrays to find the closest
|
|
// three over all. We do the comparisons using u32's, making use of the
|
|
// fact that positive IEEE754 floating point numbers preserve the numerical
|
|
// order when interpreted as integeres. That way, we avoid floating point
|
|
// branches. /FF
|
|
int k1 = 0, k2 = 1, k3 = 2, k4 = 3;
|
|
u32 d1 = ((u32*)closestND2V)[k1];
|
|
u32 d2 = ((u32*)closestND2V)[k2];
|
|
u32 d3 = ((u32*)closestND2V)[k3];
|
|
u32 d4 = ((u32*)closestND2V)[k4];
|
|
for(int i = 0; i < numToPick; i++)
|
|
{
|
|
// Note: should be set in all code paths below. /FF
|
|
int closestIndex;
|
|
|
|
if(d1 < d2)
|
|
{
|
|
if(d1 < d3)
|
|
{
|
|
if(d1 < d4)
|
|
{
|
|
// d1 smallest
|
|
closestIndex = k1;
|
|
k1 += 4;
|
|
d1 = ((u32*)closestND2V)[k1];
|
|
}
|
|
else
|
|
{
|
|
// d4 smallest
|
|
closestIndex = k4;
|
|
k4 += 4;
|
|
d4 = ((u32*)closestND2V)[k4];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(d3 < d4)
|
|
{
|
|
// d3 smallest
|
|
closestIndex = k3;
|
|
k3 += 4;
|
|
d3 = ((u32*)closestND2V)[k3];
|
|
}
|
|
else
|
|
{
|
|
// d4 smallest
|
|
closestIndex = k4;
|
|
k4 += 4;
|
|
d4 = ((u32*)closestND2V)[k4];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(d2 < d3)
|
|
{
|
|
if(d2 < d4)
|
|
{
|
|
// d2 smallest
|
|
closestIndex = k2;
|
|
k2 += 4;
|
|
d2 = ((u32*)closestND2V)[k2];
|
|
}
|
|
else
|
|
{
|
|
// d4 smallest
|
|
closestIndex = k4;
|
|
k4 += 4;
|
|
d4 = ((u32*)closestND2V)[k4];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(d3 < d4)
|
|
{
|
|
// d3 smallest
|
|
closestIndex = k3;
|
|
k3 += 4;
|
|
d3 = ((u32*)closestND2V)[k3];
|
|
}
|
|
else
|
|
{
|
|
// d4 smallest
|
|
closestIndex = k4;
|
|
k4 += 4;
|
|
d4 = ((u32*)closestND2V)[k4];
|
|
}
|
|
}
|
|
}
|
|
|
|
#if SPATIALARRAY64BIT
|
|
CSpatialArrayNode* closest = CSpatialArray::NodePtrFromUpperLower(
|
|
((u32*)closestNNodesUpperV)[closestIndex],
|
|
((u32*)closestNNodesLowerV)[closestIndex]
|
|
);
|
|
#else
|
|
CSpatialArrayNode* closest = (CSpatialArrayNode*)(((CSpatialArrayNodeAddr*)closestNNodesV)[closestIndex]);
|
|
#endif
|
|
if(closest)
|
|
{
|
|
const float distSq = ((float*)closestND2V)[closestIndex];
|
|
if(distSq <= maxDistSq)
|
|
{
|
|
found[numFound++] = closest;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Note: if it's useful, we could potentially let the code above extract
|
|
// more close objects by continuing to operate on the arrays. It wouldn't
|
|
// be perfectly accurate beyond the first three, but they would still be
|
|
// objects closer than many others. /FF
|
|
|
|
return numFound;
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindClosest3(Vec3V_In centerV,
|
|
CSpatialArrayNode **found, int ASSERT_ONLY(maxFound),
|
|
const u32 &typeFlagsToCareAbout, const u32 &typeFlagValues,
|
|
const CSpatialArrayNode* &excl1, const CSpatialArrayNode* &excl2,
|
|
float maxDist) const
|
|
{
|
|
SA_PF_FUNC(FindClosest3);
|
|
|
|
Assert(maxFound >= 3);
|
|
|
|
// If this fails, there are values set in flagValues that are not in flagsToChange,
|
|
// which we are probably better off if the user could avoid, so we don't have to
|
|
// spend time on masking them here. /FF
|
|
Assert((typeFlagValues & ~typeFlagsToCareAbout) == 0);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
// Load the type flag stuff into vector registers. Note that we intentionally
|
|
// pass in these by reference, requiring the user to put them in memory, because
|
|
// if they were passed in in general purpose registers, we would need to store
|
|
// them to memory and load them back anyway. Could pass them in in vector
|
|
// registers, of course, but that's probably not worth the trouble. /FF
|
|
const Vec4V typeFlagsToCareAboutV = Vec4V(LoadScalar32IntoScalarV(typeFlagsToCareAbout));
|
|
const Vec4V typeFlagValuesV = Vec4V(LoadScalar32IntoScalarV(typeFlagValues));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
|
|
// Get the exclusion pointers into vector registers. This way
|
|
// of doing it is probably sub-optimal: we should be able to
|
|
// read straight from excl1/excl2 (references to caller's memory)
|
|
// into vector registers like we do in the 32 bit case, but to do so
|
|
// we would have to be really careful to avoid endianness issues.
|
|
|
|
u64 excl1Ptr = sNodePtrToU64(excl1);
|
|
u64 excl2Ptr = sNodePtrToU64(excl2);
|
|
|
|
ScalarV excl1LowerSV, excl1UpperSV;
|
|
ScalarV excl2LowerSV, excl2UpperSV;
|
|
excl1UpperSV.Seti((u32)(excl1Ptr >> 32));
|
|
excl2UpperSV.Seti((u32)(excl2Ptr >> 32));
|
|
excl1LowerSV.Seti((u32)excl1Ptr);
|
|
excl2LowerSV.Seti((u32)excl2Ptr);
|
|
|
|
const Vec4V excl1LowerV = Vec4V(excl1LowerSV);
|
|
const Vec4V excl2LowerV = Vec4V(excl2LowerSV);
|
|
const Vec4V excl1UpperV = Vec4V(excl1UpperSV);
|
|
const Vec4V excl2UpperV = Vec4V(excl2UpperSV);
|
|
|
|
#else
|
|
const Vec4V excl1V = Vec4V(LoadScalar32IntoScalarV(*(u32*)&excl1));
|
|
const Vec4V excl2V = Vec4V(LoadScalar32IntoScalarV(*(u32*)&excl2));
|
|
#endif
|
|
|
|
const Vec4V* RESTRICT objXPtr = (const Vec4V*)m_PosXArray;
|
|
const Vec4V* RESTRICT objYPtr = (const Vec4V*)m_PosYArray;
|
|
const Vec4V* RESTRICT objZPtr = (const Vec4V*)m_PosZArray;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V* RESTRICT nodesPtrLower = (const Vec4V*)m_NodeArrayLower;
|
|
const Vec4V* RESTRICT nodesPtrUpper = (const Vec4V*)m_NodeArrayUpper;
|
|
#else
|
|
const Vec4V* RESTRICT nodesPtr = (const Vec4V*)m_NodeArray;
|
|
#endif
|
|
const Vec4V* RESTRICT typeFlagPtr = (const Vec4V*)m_TypeFlagArray;
|
|
|
|
const Vec4V centerxV(SplatX(centerV));
|
|
const Vec4V centeryV(SplatY(centerV));
|
|
const Vec4V centerzV(SplatZ(centerV));
|
|
|
|
const Vec4V zeroV(V_ZERO);
|
|
const Vec4V maxDistV(V_FLT_MAX);
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
// These are used to keep track of the three closest objects
|
|
// for each of the components in the vector registers. /FF
|
|
#if SPATIALARRAY64BIT
|
|
Vec4V close1NodesLowerV(V_ZERO);
|
|
Vec4V close2NodesLowerV(V_ZERO);
|
|
Vec4V close3NodesLowerV(V_ZERO);
|
|
Vec4V close1NodesUpperV(V_ZERO);
|
|
Vec4V close2NodesUpperV(V_ZERO);
|
|
Vec4V close3NodesUpperV(V_ZERO);
|
|
#else
|
|
Vec4V close1NodesV(V_ZERO);
|
|
Vec4V close2NodesV(V_ZERO);
|
|
Vec4V close3NodesV(V_ZERO);
|
|
#endif
|
|
|
|
// These are the squared distances for the objects in
|
|
// close[1/2/3]NodesV. /FF
|
|
Vec4V close1D2V(V_FLT_MAX);
|
|
Vec4V close2D2V(V_FLT_MAX);
|
|
Vec4V close3D2V(V_FLT_MAX);
|
|
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
// Load from the arrays to the vector registers. /FF
|
|
const Vec4V xxV = *objXPtr;
|
|
const Vec4V yyV = *objYPtr;
|
|
const Vec4V zzV = *objZPtr;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *nodesPtrUpper;
|
|
const Vec4V nodesLowerV = *nodesPtrLower;
|
|
#else
|
|
const Vec4V nodesV = *nodesPtr;
|
|
#endif
|
|
const Vec4V objTypeFlagsV = *typeFlagPtr;
|
|
|
|
// Compute the squared distance to the center. /FF
|
|
const Vec4V dxV = Subtract(xxV, centerxV);
|
|
const Vec4V dyV = Subtract(yyV, centeryV);
|
|
const Vec4V dzV = Subtract(zzV, centerzV);
|
|
const Vec4V dx2V = Scale(dxV, dxV);
|
|
const Vec4V dxy2V = AddScaled(dx2V, dyV, dyV);
|
|
const Vec4V d2BeforeMaskV = AddScaled(dxy2V, dzV, dzV);
|
|
|
|
const Vec4V objTypeFlagsCaredAboutV = And(objTypeFlagsV, typeFlagsToCareAboutV);
|
|
|
|
// This is needed to deal properly with the end of the array if the number
|
|
// of objects is not aligned with 4. The node pointers beyond the end will
|
|
// be NULL, and here we create a mask where 0x0000 means that the node was
|
|
// within range (pointer not NULL) while 0xffff indicates a value past the
|
|
// end of the array. /FF
|
|
#if SPATIALARRAY64BIT
|
|
const VecBoolV selectNodePtrZeroV = IsEqualInt(Or(nodesLowerV, nodesUpperV), zeroV); // Ptr is NULL only if both halves are 0.
|
|
#else
|
|
const VecBoolV selectNodePtrZeroV = IsEqualInt(nodesV, zeroV);
|
|
#endif
|
|
|
|
// Match the type flags. 0xffff in this mask indicates that
|
|
// (objTypeFlags & typeFlagsToCareAbout) == typeFlagValues
|
|
// i.e. the bits we care about have the values we are looking for. /FF
|
|
const VecBoolV selectTypeFlagMatchV = IsEqualInt(objTypeFlagsCaredAboutV, typeFlagValuesV);
|
|
|
|
#if SPATIALARRAY64BIT
|
|
// Check for matches on the upper and lower halves of the exclusion addresses.
|
|
const VecBoolV selectNodePtrExcl1LowerV = IsEqualInt(nodesLowerV, excl1LowerV);
|
|
const VecBoolV selectNodePtrExcl2LowerV = IsEqualInt(nodesLowerV, excl2LowerV);
|
|
const VecBoolV selectNodePtrExcl1UpperV = IsEqualInt(nodesUpperV, excl1UpperV);
|
|
const VecBoolV selectNodePtrExcl2UpperV = IsEqualInt(nodesUpperV, excl2UpperV);
|
|
|
|
// Combine the upper/lower halves together: both halves have to match.
|
|
const VecBoolV selectNodePtrExcl1V = And(selectNodePtrExcl1LowerV, selectNodePtrExcl1UpperV);
|
|
const VecBoolV selectNodePtrExcl2V = And(selectNodePtrExcl2LowerV, selectNodePtrExcl2UpperV);
|
|
#else
|
|
const VecBoolV selectNodePtrExcl1V = IsEqualInt(nodesV, excl1V);
|
|
const VecBoolV selectNodePtrExcl2V = IsEqualInt(nodesV, excl2V);
|
|
#endif
|
|
// We have a couple of vectors now that are 0xffff on a mismatch,
|
|
// instead of 0x0000 on a match. We OR them and NOT them so that
|
|
// we get a mask that's 0xffff when they all match. /FF
|
|
const VecBoolV selectNodePtrV = InvertBits(Or(Or(selectNodePtrExcl1V, selectNodePtrExcl2V),
|
|
selectNodePtrZeroV));
|
|
|
|
// To allow use of an element, we require both that it's not past the end
|
|
// of the array or otherwise an ineligible node pointer, and that the type flags match.
|
|
const VecBoolV combinedSelect = And(selectNodePtrV, selectTypeFlagMatchV);
|
|
|
|
// Now, select between the true measured distances and FLT_MAX, depending on whether
|
|
// these objects fit the acceptance criteria above. If FLT_MAX is selected here,
|
|
// it won't be closer than objects we have previously found, so the objects being
|
|
// looked at now won't be chosen. /FF
|
|
const Vec4V d2V = SelectFT(combinedSelect, maxDistV, d2BeforeMaskV);
|
|
|
|
// Compare the squared distance of these objects vs. the squared distances
|
|
// of the closest objects found so far. /FF
|
|
const VecBoolV selectCloserThan1V = IsLessThan(d2V, close1D2V);
|
|
const VecBoolV selectCloserThan2V = IsLessThan(d2V, close2D2V);
|
|
const VecBoolV selectCloserThan3V = IsLessThan(d2V, close3D2V);
|
|
|
|
// Compute some temporary vectors for the logic of how to move the elements.
|
|
// For example, temp2D2V is used for the squared distance of the 2nd closest
|
|
// object. If we are going to replace that element, it would either be replaced
|
|
// by the current distance (if the new object is closest than the old 2nd closest,
|
|
// but not closer than the #1 closest one) or by the old distance for the #1 closest
|
|
// one (if that's going to get replaced). /FF
|
|
const Vec4V temp2D2V = SelectFT(selectCloserThan1V, d2V, close1D2V);
|
|
const Vec4V temp3D2V = SelectFT(selectCloserThan2V, d2V, close2D2V);
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V temp2NodesUpperV = SelectFT(selectCloserThan1V, nodesUpperV, close1NodesUpperV);
|
|
const Vec4V temp3NodesUpperV = SelectFT(selectCloserThan2V, nodesUpperV, close2NodesUpperV);
|
|
const Vec4V temp2NodesLowerV = SelectFT(selectCloserThan1V, nodesLowerV, close1NodesLowerV);
|
|
const Vec4V temp3NodesLowerV = SelectFT(selectCloserThan2V, nodesLowerV, close2NodesLowerV);
|
|
#else
|
|
const Vec4V temp2NodesV = SelectFT(selectCloserThan1V, nodesV, close1NodesV);
|
|
const Vec4V temp3NodesV = SelectFT(selectCloserThan2V, nodesV, close2NodesV);
|
|
#endif
|
|
|
|
// Finally, compute the new first, second, and third closest objects found
|
|
// so far. /FF
|
|
close3D2V = SelectFT(selectCloserThan3V, close3D2V, temp3D2V);
|
|
close2D2V = SelectFT(selectCloserThan2V, close2D2V, temp2D2V);
|
|
close1D2V = SelectFT(selectCloserThan1V, close1D2V, d2V);
|
|
#if SPATIALARRAY64BIT
|
|
close3NodesUpperV = SelectFT(selectCloserThan3V, close3NodesUpperV, temp3NodesUpperV);
|
|
close2NodesUpperV = SelectFT(selectCloserThan2V, close2NodesUpperV, temp2NodesUpperV);
|
|
close1NodesUpperV = SelectFT(selectCloserThan1V, close1NodesUpperV, nodesUpperV);
|
|
|
|
close3NodesLowerV = SelectFT(selectCloserThan3V, close3NodesLowerV, temp3NodesLowerV);
|
|
close2NodesLowerV = SelectFT(selectCloserThan2V, close2NodesLowerV, temp2NodesLowerV);
|
|
close1NodesLowerV = SelectFT(selectCloserThan1V, close1NodesLowerV, nodesLowerV);
|
|
#else
|
|
close3NodesV = SelectFT(selectCloserThan3V, close3NodesV, temp3NodesV);
|
|
close2NodesV = SelectFT(selectCloserThan2V, close2NodesV, temp2NodesV);
|
|
close1NodesV = SelectFT(selectCloserThan1V, close1NodesV, nodesV);
|
|
#endif
|
|
|
|
// Move on in the arrays. /FF
|
|
objXPtr++;
|
|
objYPtr++;
|
|
objZPtr++;
|
|
#if SPATIALARRAY64BIT
|
|
nodesPtrUpper++;
|
|
nodesPtrLower++;
|
|
#else
|
|
nodesPtr++;
|
|
#endif
|
|
typeFlagPtr++;
|
|
}
|
|
|
|
// Store out the squared distances and pointers to memory. /FF
|
|
Vec4V closest3D2V[3];
|
|
closest3D2V[0] = close1D2V;
|
|
closest3D2V[1] = close2D2V;
|
|
closest3D2V[2] = close3D2V;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
Vec4V closest3NodesUpperV[3];
|
|
closest3NodesUpperV[0] = close1NodesUpperV;
|
|
closest3NodesUpperV[1] = close2NodesUpperV;
|
|
closest3NodesUpperV[2] = close3NodesUpperV;
|
|
|
|
Vec4V closest3NodesLowerV[3];
|
|
closest3NodesLowerV[0] = close1NodesLowerV;
|
|
closest3NodesLowerV[1] = close2NodesLowerV;
|
|
closest3NodesLowerV[2] = close3NodesLowerV;
|
|
|
|
return sPickFromSortedArrays(closest3D2V, closest3NodesUpperV, closest3NodesLowerV, maxDist, found, 3);
|
|
#else
|
|
Vec4V closest3NodesV[3];
|
|
closest3NodesV[0] = close1NodesV;
|
|
closest3NodesV[1] = close2NodesV;
|
|
closest3NodesV[2] = close3NodesV;
|
|
|
|
return sPickFromSortedArrays(closest3D2V, closest3NodesV, maxDist, found, 3);
|
|
#endif
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindClosest4(Vec3V_In centerV,
|
|
CSpatialArrayNode **found, int ASSERT_ONLY(maxFound),
|
|
const u32 &typeFlagsToCareAbout, const u32 &typeFlagValues,
|
|
const CSpatialArrayNode* &excl1, const CSpatialArrayNode* &excl2,
|
|
float maxDist) const
|
|
{
|
|
SA_PF_FUNC(FindClosest4);
|
|
|
|
Assert(maxFound >= 4);
|
|
|
|
// If this fails, there are values set in flagValues that are not in flagsToChange,
|
|
// which we are probably better off if the user could avoid, so we don't have to
|
|
// spend time on masking them here. /FF
|
|
Assert((typeFlagValues & ~typeFlagsToCareAbout) == 0);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
// Load the type flag stuff into vector registers. Note that we intentionally
|
|
// pass in these by reference, requiring the user to put them in memory, because
|
|
// if they were passed in in general purpose registers, we would need to store
|
|
// them to memory and load them back anyway. Could pass them in in vector
|
|
// registers, of course, but that's probably not worth the trouble. /FF
|
|
const Vec4V typeFlagsToCareAboutV = Vec4V(LoadScalar32IntoScalarV(typeFlagsToCareAbout));
|
|
const Vec4V typeFlagValuesV = Vec4V(LoadScalar32IntoScalarV(typeFlagValues));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
|
|
// Get the exclusion pointers into vector registers. This way
|
|
// of doing it is probably sub-optimal: we should be able to
|
|
// read straight from excl1/excl2 (references to caller's memory)
|
|
// into vector registers like we do in the 32 bit case, but to do so
|
|
// we would have to be really careful to avoid endianness issues.
|
|
|
|
u64 excl1Ptr = sNodePtrToU64(excl1);
|
|
u64 excl2Ptr = sNodePtrToU64(excl2);
|
|
|
|
ScalarV excl1LowerSV, excl1UpperSV;
|
|
ScalarV excl2LowerSV, excl2UpperSV;
|
|
excl1UpperSV.Seti((u32)(excl1Ptr >> 32));
|
|
excl2UpperSV.Seti((u32)(excl2Ptr >> 32));
|
|
excl1LowerSV.Seti((u32)excl1Ptr);
|
|
excl2LowerSV.Seti((u32)excl2Ptr);
|
|
|
|
const Vec4V excl1LowerV = Vec4V(excl1LowerSV);
|
|
const Vec4V excl2LowerV = Vec4V(excl2LowerSV);
|
|
const Vec4V excl1UpperV = Vec4V(excl1UpperSV);
|
|
const Vec4V excl2UpperV = Vec4V(excl2UpperSV);
|
|
|
|
#else
|
|
const Vec4V excl1V = Vec4V(LoadScalar32IntoScalarV(*(u32*)&excl1));
|
|
const Vec4V excl2V = Vec4V(LoadScalar32IntoScalarV(*(u32*)&excl2));
|
|
#endif
|
|
|
|
const Vec4V* RESTRICT objXPtr = (const Vec4V*)m_PosXArray;
|
|
const Vec4V* RESTRICT objYPtr = (const Vec4V*)m_PosYArray;
|
|
const Vec4V* RESTRICT objZPtr = (const Vec4V*)m_PosZArray;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V* RESTRICT nodesPtrLower = (const Vec4V*)m_NodeArrayLower;
|
|
const Vec4V* RESTRICT nodesPtrUpper = (const Vec4V*)m_NodeArrayUpper;
|
|
#else
|
|
const Vec4V* RESTRICT nodesPtr = (const Vec4V*)m_NodeArray;
|
|
#endif
|
|
const Vec4V* RESTRICT typeFlagPtr = (const Vec4V*)m_TypeFlagArray;
|
|
|
|
const Vec4V centerxV(SplatX(centerV));
|
|
const Vec4V centeryV(SplatY(centerV));
|
|
const Vec4V centerzV(SplatZ(centerV));
|
|
|
|
const Vec4V zeroV(V_ZERO);
|
|
const Vec4V maxDistV(V_FLT_MAX);
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
// These are used to keep track of the three closest objects
|
|
// for each of the components in the vector registers. /FF
|
|
#if SPATIALARRAY64BIT
|
|
Vec4V close1NodesLowerV(V_ZERO);
|
|
Vec4V close2NodesLowerV(V_ZERO);
|
|
Vec4V close3NodesLowerV(V_ZERO);
|
|
Vec4V close4NodesLowerV(V_ZERO);
|
|
Vec4V close1NodesUpperV(V_ZERO);
|
|
Vec4V close2NodesUpperV(V_ZERO);
|
|
Vec4V close3NodesUpperV(V_ZERO);
|
|
Vec4V close4NodesUpperV(V_ZERO);
|
|
#else
|
|
Vec4V close1NodesV(V_ZERO);
|
|
Vec4V close2NodesV(V_ZERO);
|
|
Vec4V close3NodesV(V_ZERO);
|
|
Vec4V close4NodesV(V_ZERO);
|
|
#endif
|
|
|
|
// These are the squared distances for the objects in
|
|
// close[1/2/3/4]NodesV. /FF
|
|
Vec4V close1D2V(V_FLT_MAX);
|
|
Vec4V close2D2V(V_FLT_MAX);
|
|
Vec4V close3D2V(V_FLT_MAX);
|
|
Vec4V close4D2V(V_FLT_MAX);
|
|
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
// Load from the arrays to the vector registers. /FF
|
|
const Vec4V xxV = *objXPtr;
|
|
const Vec4V yyV = *objYPtr;
|
|
const Vec4V zzV = *objZPtr;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *nodesPtrUpper;
|
|
const Vec4V nodesLowerV = *nodesPtrLower;
|
|
#else
|
|
const Vec4V nodesV = *nodesPtr;
|
|
#endif
|
|
const Vec4V objTypeFlagsV = *typeFlagPtr;
|
|
|
|
// Compute the squared distance to the center. /FF
|
|
const Vec4V dxV = Subtract(xxV, centerxV);
|
|
const Vec4V dyV = Subtract(yyV, centeryV);
|
|
const Vec4V dzV = Subtract(zzV, centerzV);
|
|
const Vec4V dx2V = Scale(dxV, dxV);
|
|
const Vec4V dxy2V = AddScaled(dx2V, dyV, dyV);
|
|
const Vec4V d2BeforeMaskV = AddScaled(dxy2V, dzV, dzV);
|
|
|
|
const Vec4V objTypeFlagsCaredAboutV = And(objTypeFlagsV, typeFlagsToCareAboutV);
|
|
|
|
// This is needed to deal properly with the end of the array if the number
|
|
// of objects is not aligned with 4. The node pointers beyond the end will
|
|
// be NULL, and here we create a mask where 0x0000 means that the node was
|
|
// within range (pointer not NULL) while 0xffff indicates a value past the
|
|
// end of the array. /FF
|
|
#if SPATIALARRAY64BIT
|
|
const VecBoolV selectNodePtrZeroV = IsEqualInt(Or(nodesLowerV, nodesUpperV), zeroV); // Ptr is NULL only if both halves are 0.
|
|
#else
|
|
const VecBoolV selectNodePtrZeroV = IsEqualInt(nodesV, zeroV);
|
|
#endif
|
|
|
|
// Match the type flags. 0xffff in this mask indicates that
|
|
// (objTypeFlags & typeFlagsToCareAbout) == typeFlagValues
|
|
// i.e. the bits we care about have the values we are looking for. /FF
|
|
const VecBoolV selectTypeFlagMatchV = IsEqualInt(objTypeFlagsCaredAboutV, typeFlagValuesV);
|
|
|
|
#if SPATIALARRAY64BIT
|
|
// Check for matches on the upper and lower halves of the exclusion addresses.
|
|
const VecBoolV selectNodePtrExcl1LowerV = IsEqualInt(nodesLowerV, excl1LowerV);
|
|
const VecBoolV selectNodePtrExcl2LowerV = IsEqualInt(nodesLowerV, excl2LowerV);
|
|
const VecBoolV selectNodePtrExcl1UpperV = IsEqualInt(nodesUpperV, excl1UpperV);
|
|
const VecBoolV selectNodePtrExcl2UpperV = IsEqualInt(nodesUpperV, excl2UpperV);
|
|
|
|
// Combine the upper/lower halves together: both halves have to match.
|
|
const VecBoolV selectNodePtrExcl1V = And(selectNodePtrExcl1LowerV, selectNodePtrExcl1UpperV);
|
|
const VecBoolV selectNodePtrExcl2V = And(selectNodePtrExcl2LowerV, selectNodePtrExcl2UpperV);
|
|
#else
|
|
const VecBoolV selectNodePtrExcl1V = IsEqualInt(nodesV, excl1V);
|
|
const VecBoolV selectNodePtrExcl2V = IsEqualInt(nodesV, excl2V);
|
|
#endif
|
|
// We have a couple of vectors now that are 0xffff on a mismatch,
|
|
// instead of 0x0000 on a match. We OR them and NOT them so that
|
|
// we get a mask that's 0xffff when they all match. /FF
|
|
const VecBoolV selectNodePtrV = InvertBits(Or(Or(selectNodePtrExcl1V, selectNodePtrExcl2V),
|
|
selectNodePtrZeroV));
|
|
|
|
// To allow use of an element, we require both that it's not past the end
|
|
// of the array or otherwise an ineligible node pointer, and that the type flags match.
|
|
const VecBoolV combinedSelect = And(selectNodePtrV, selectTypeFlagMatchV);
|
|
|
|
// Now, select between the true measured distances and FLT_MAX, depending on whether
|
|
// these objects fit the acceptance criteria above. If FLT_MAX is selected here,
|
|
// it won't be closer than objects we have previously found, so the objects being
|
|
// looked at now won't be chosen. /FF
|
|
const Vec4V d2V = SelectFT(combinedSelect, maxDistV, d2BeforeMaskV);
|
|
|
|
// Compare the squared distance of these objects vs. the squared distances
|
|
// of the closest objects found so far. /FF
|
|
const VecBoolV selectCloserThan1V = IsLessThan(d2V, close1D2V);
|
|
const VecBoolV selectCloserThan2V = IsLessThan(d2V, close2D2V);
|
|
const VecBoolV selectCloserThan3V = IsLessThan(d2V, close3D2V);
|
|
const VecBoolV selectCloserThan4V = IsLessThan(d2V, close4D2V);
|
|
|
|
// Compute some temporary vectors for the logic of how to move the elements.
|
|
// For example, temp2D2V is used for the squared distance of the 2nd closest
|
|
// object. If we are going to replace that element, it would either be replaced
|
|
// by the current distance (if the new object is closest than the old 2nd closest,
|
|
// but not closer than the #1 closest one) or by the old distance for the #1 closest
|
|
// one (if that's going to get replaced). /FF
|
|
const Vec4V temp2D2V = SelectFT(selectCloserThan1V, d2V, close1D2V);
|
|
const Vec4V temp3D2V = SelectFT(selectCloserThan2V, d2V, close2D2V);
|
|
const Vec4V temp4D2V = SelectFT(selectCloserThan3V, d2V, close3D2V);
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V temp2NodesUpperV = SelectFT(selectCloserThan1V, nodesUpperV, close1NodesUpperV);
|
|
const Vec4V temp3NodesUpperV = SelectFT(selectCloserThan2V, nodesUpperV, close2NodesUpperV);
|
|
const Vec4V temp4NodesUpperV = SelectFT(selectCloserThan3V, nodesUpperV, close3NodesUpperV);
|
|
const Vec4V temp2NodesLowerV = SelectFT(selectCloserThan1V, nodesLowerV, close1NodesLowerV);
|
|
const Vec4V temp3NodesLowerV = SelectFT(selectCloserThan2V, nodesLowerV, close2NodesLowerV);
|
|
const Vec4V temp4NodesLowerV = SelectFT(selectCloserThan3V, nodesLowerV, close3NodesLowerV);
|
|
#else
|
|
const Vec4V temp2NodesV = SelectFT(selectCloserThan1V, nodesV, close1NodesV);
|
|
const Vec4V temp3NodesV = SelectFT(selectCloserThan2V, nodesV, close2NodesV);
|
|
const Vec4V temp4NodesV = SelectFT(selectCloserThan3V, nodesV, close3NodesV);
|
|
#endif
|
|
|
|
// Finally, compute the new first, second, and third closest objects found
|
|
// so far. /FF
|
|
close4D2V = SelectFT(selectCloserThan4V, close4D2V, temp4D2V);
|
|
close3D2V = SelectFT(selectCloserThan3V, close3D2V, temp3D2V);
|
|
close2D2V = SelectFT(selectCloserThan2V, close2D2V, temp2D2V);
|
|
close1D2V = SelectFT(selectCloserThan1V, close1D2V, d2V);
|
|
#if SPATIALARRAY64BIT
|
|
close4NodesUpperV = SelectFT(selectCloserThan4V, close4NodesUpperV, temp4NodesUpperV);
|
|
close3NodesUpperV = SelectFT(selectCloserThan3V, close3NodesUpperV, temp3NodesUpperV);
|
|
close2NodesUpperV = SelectFT(selectCloserThan2V, close2NodesUpperV, temp2NodesUpperV);
|
|
close1NodesUpperV = SelectFT(selectCloserThan1V, close1NodesUpperV, nodesUpperV);
|
|
|
|
close4NodesLowerV = SelectFT(selectCloserThan4V, close4NodesLowerV, temp4NodesLowerV);
|
|
close3NodesLowerV = SelectFT(selectCloserThan3V, close3NodesLowerV, temp3NodesLowerV);
|
|
close2NodesLowerV = SelectFT(selectCloserThan2V, close2NodesLowerV, temp2NodesLowerV);
|
|
close1NodesLowerV = SelectFT(selectCloserThan1V, close1NodesLowerV, nodesLowerV);
|
|
#else
|
|
close4NodesV = SelectFT(selectCloserThan4V, close4NodesV, temp4NodesV);
|
|
close3NodesV = SelectFT(selectCloserThan3V, close3NodesV, temp3NodesV);
|
|
close2NodesV = SelectFT(selectCloserThan2V, close2NodesV, temp2NodesV);
|
|
close1NodesV = SelectFT(selectCloserThan1V, close1NodesV, nodesV);
|
|
#endif
|
|
|
|
// Move on in the arrays. /FF
|
|
objXPtr++;
|
|
objYPtr++;
|
|
objZPtr++;
|
|
#if SPATIALARRAY64BIT
|
|
nodesPtrUpper++;
|
|
nodesPtrLower++;
|
|
#else
|
|
nodesPtr++;
|
|
#endif
|
|
typeFlagPtr++;
|
|
}
|
|
|
|
// Store out the squared distances and pointers to memory. /FF
|
|
Vec4V closest4D2V[4];
|
|
closest4D2V[0] = close1D2V;
|
|
closest4D2V[1] = close2D2V;
|
|
closest4D2V[2] = close3D2V;
|
|
closest4D2V[3] = close4D2V;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
Vec4V closest4NodesUpperV[4];
|
|
closest4NodesUpperV[0] = close1NodesUpperV;
|
|
closest4NodesUpperV[1] = close2NodesUpperV;
|
|
closest4NodesUpperV[2] = close3NodesUpperV;
|
|
closest4NodesUpperV[3] = close4NodesUpperV;
|
|
|
|
Vec4V closest4NodesLowerV[4];
|
|
closest4NodesLowerV[0] = close1NodesLowerV;
|
|
closest4NodesLowerV[1] = close2NodesLowerV;
|
|
closest4NodesLowerV[2] = close3NodesLowerV;
|
|
closest4NodesLowerV[3] = close4NodesLowerV;
|
|
|
|
return sPickFromSortedArrays(closest4D2V, closest4NodesUpperV, closest4NodesLowerV, maxDist, found, 4);
|
|
#else
|
|
Vec4V closest4NodesV[4];
|
|
closest4NodesV[0] = close1NodesV;
|
|
closest4NodesV[1] = close2NodesV;
|
|
closest4NodesV[2] = close3NodesV;
|
|
closest4NodesV[3] = close4NodesV;
|
|
|
|
return sPickFromSortedArrays(closest4D2V, closest4NodesV, maxDist, found, 4);
|
|
#endif
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindInSphere(Vec3V_In centerV, ScalarV_In radiusV,
|
|
FindResult *found, int maxFound) const
|
|
{
|
|
SA_PF_FUNC(FindInSphere);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
// TODO: Probably operate on 8 objects instead of 4, to keep vector pipeline busy.
|
|
// TODO: Maybe use cache prefetch and/or clear instructions.
|
|
// TODO: Add some protection about assumption of 32 bit pointers, etc. /FF
|
|
|
|
const Vec4V radius2V(Scale(radiusV, radiusV));
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
const Vec4V* RESTRICT objXPtr = (const Vec4V*)m_PosXArray;
|
|
const Vec4V* RESTRICT objYPtr = (const Vec4V*)m_PosYArray;
|
|
const Vec4V* RESTRICT objZPtr = (const Vec4V*)m_PosZArray;
|
|
|
|
const Vec4V centerxV(SplatX(centerV));
|
|
const Vec4V centeryV(SplatY(centerV));
|
|
const Vec4V centerzV(SplatZ(centerV));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(u32))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuffUpper[tempArraySize];
|
|
Vec4V foundArrayBuffLower[tempArraySize];
|
|
u32* RESTRICT foundArrayUpper = (u32*)foundArrayBuffUpper;
|
|
u32* RESTRICT foundArrayLower = (u32*)foundArrayBuffLower;
|
|
|
|
// Make really sure they got aligned properly. /FF
|
|
Assertf((((size_t)foundArrayUpper) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayUpper);
|
|
Assertf((((size_t)foundArrayLower) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayLower);
|
|
|
|
u32* RESTRICT foundArrayPtrUpper = foundArrayUpper;
|
|
u32* RESTRICT foundArrayPtrLower = foundArrayLower;
|
|
#else
|
|
// Reserve a vector-aligned array on the stack. /FF
|
|
// Note: I believe this would work too: ALIGNAS(16) CSpatialArrayNode* foundArrayBuff[kMaxObjForTempBuffer] ;
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(CSpatialArrayNodeAddr))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuff[ tempArraySize ];
|
|
CSpatialArrayNodeAddr* RESTRICT foundArray = (CSpatialArrayNodeAddr*)foundArrayBuff;
|
|
|
|
// Make really sure it got aligned properly. /FF
|
|
Assertf((((size_t)foundArray) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArray);
|
|
|
|
CSpatialArrayNodeAddr* RESTRICT foundArrayPtr = foundArray;
|
|
#endif
|
|
|
|
Vec4V distanceArray[ kMaxObjForTempBuffer ];
|
|
|
|
int numfound = 0;
|
|
Vec4V* RESTRICT distanceArrayPtr = distanceArray;
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
const Vec4V xxV = *objXPtr;
|
|
const Vec4V yyV = *objYPtr;
|
|
const Vec4V zzV = *objZPtr;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *(Vec4V*)&m_NodeArrayUpper[i];
|
|
const Vec4V nodesLowerV = *(Vec4V*)&m_NodeArrayLower[i];
|
|
#else
|
|
const Vec4V nodesV = *(Vec4V*)&m_NodeArray[i];
|
|
#endif
|
|
|
|
const Vec4V dxV = Subtract(xxV, centerxV);
|
|
const Vec4V dyV = Subtract(yyV, centeryV);
|
|
const Vec4V dzV = Subtract(zzV, centerzV);
|
|
|
|
const Vec4V dx2V = Scale(dxV, dxV);
|
|
const Vec4V dxy2V = AddScaled(dx2V, dyV, dyV);
|
|
const Vec4V d2V = AddScaled(dxy2V, dzV, dzV);
|
|
|
|
const Vec4V selectWithinSphereV(IsLessThan(d2V, radius2V));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesWithinSphereUpperV = And(selectWithinSphereV, nodesUpperV);
|
|
const Vec4V nodesWithinSphereLowerV = And(selectWithinSphereV, nodesLowerV);
|
|
#else
|
|
const Vec4V nodesWithinSphereV = And(selectWithinSphereV, nodesV);
|
|
#endif
|
|
|
|
objXPtr++;
|
|
objYPtr++;
|
|
objZPtr++;
|
|
|
|
Vec4V* RESTRICT oldDistancePtr = distanceArrayPtr;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u32* RESTRICT oldFoundArrayPtrUpper = foundArrayPtrUpper;
|
|
foundArrayPtrUpper += 4;
|
|
|
|
u32* RESTRICT oldFoundArrayPtrLower = foundArrayPtrLower;
|
|
foundArrayPtrLower += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtrLower = nodesWithinSphereLowerV;
|
|
*(Vec4V*)oldFoundArrayPtrUpper = nodesWithinSphereUpperV;
|
|
#else
|
|
CSpatialArrayNodeAddr* RESTRICT oldFoundArrayPtr = foundArrayPtr;
|
|
foundArrayPtr += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtr = nodesWithinSphereV;
|
|
#endif
|
|
distanceArrayPtr ++;
|
|
*oldDistancePtr = d2V;
|
|
}
|
|
|
|
float* floatDistanceArray = reinterpret_cast<float*>(distanceArray);
|
|
for(int i = 0; i < numObj; i++)
|
|
{
|
|
#if SPATIALARRAY64BIT
|
|
CSpatialArrayNode* addr = NodePtrFromUpperLower(foundArrayUpper[i], foundArrayLower[i]);
|
|
#else
|
|
CSpatialArrayNode* addr = (CSpatialArrayNode*)foundArray[i];
|
|
#endif
|
|
if(addr)
|
|
{
|
|
found[numfound].m_Node = addr;
|
|
found[numfound].m_DistanceSq = floatDistanceArray[i];
|
|
numfound++;
|
|
if(numfound >= maxFound)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return numfound;
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindInCylinderXY(Vec2V_In centerXYV, ScalarV_In radiusV,
|
|
CSpatialArrayNode **found, int maxFound) const
|
|
{
|
|
SA_PF_FUNC(FindInCylinderXY);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const Vec4V radius2V(Scale(radiusV, radiusV));
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
const Vec4V* RESTRICT objXPtr = (const Vec4V*)m_PosXArray;
|
|
const Vec4V* RESTRICT objYPtr = (const Vec4V*)m_PosYArray;
|
|
|
|
const Vec4V centerxV(SplatX(centerXYV));
|
|
const Vec4V centeryV(SplatY(centerXYV));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(u32))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuffUpper[tempArraySize];
|
|
Vec4V foundArrayBuffLower[tempArraySize];
|
|
u32* RESTRICT foundArrayUpper = (u32*)foundArrayBuffUpper;
|
|
u32* RESTRICT foundArrayLower = (u32*)foundArrayBuffLower;
|
|
|
|
// Make really sure they got aligned properly. /FF
|
|
Assertf((((size_t)foundArrayUpper) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayUpper);
|
|
Assertf((((size_t)foundArrayLower) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayLower);
|
|
|
|
u32* RESTRICT foundArrayPtrUpper = foundArrayUpper;
|
|
u32* RESTRICT foundArrayPtrLower = foundArrayLower;
|
|
#else
|
|
// Reserve a vector-aligned array on the stack. /FF
|
|
// Note: I believe this would work too: ALIGNAS(16) CSpatialArrayNode* foundArrayBuff[kMaxObjForTempBuffer] ;
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(CSpatialArrayNodeAddr))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuff[ tempArraySize ];
|
|
CSpatialArrayNodeAddr* RESTRICT foundArray = (CSpatialArrayNodeAddr*)foundArrayBuff;
|
|
|
|
// Make really sure it got aligned properly. /FF
|
|
Assertf((((size_t)foundArray) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArray);
|
|
|
|
CSpatialArrayNodeAddr* RESTRICT foundArrayPtr = foundArray;
|
|
#endif
|
|
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
const Vec4V xxV = *objXPtr;
|
|
const Vec4V yyV = *objYPtr;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *(Vec4V*)&m_NodeArrayUpper[i];
|
|
const Vec4V nodesLowerV = *(Vec4V*)&m_NodeArrayLower[i];
|
|
#else
|
|
const Vec4V nodesV = *(Vec4V*)&m_NodeArray[i];
|
|
#endif
|
|
|
|
const Vec4V dxV = Subtract(xxV, centerxV);
|
|
const Vec4V dyV = Subtract(yyV, centeryV);
|
|
|
|
const Vec4V dx2V = Scale(dxV, dxV);
|
|
const Vec4V d2V = AddScaled(dx2V, dyV, dyV);
|
|
|
|
const Vec4V selectWithinCylV(IsLessThan(d2V, radius2V));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesWithinCylUpperV = And(selectWithinCylV, nodesUpperV);
|
|
const Vec4V nodesWithinCylLowerV = And(selectWithinCylV, nodesLowerV);
|
|
#else
|
|
const Vec4V nodesWithinCylV = And(selectWithinCylV, nodesV);
|
|
#endif
|
|
|
|
objXPtr++;
|
|
objYPtr++;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u32* RESTRICT oldFoundArrayPtrUpper = foundArrayPtrUpper;
|
|
foundArrayPtrUpper += 4;
|
|
|
|
u32* RESTRICT oldFoundArrayPtrLower = foundArrayPtrLower;
|
|
foundArrayPtrLower += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtrLower = nodesWithinCylLowerV;
|
|
*(Vec4V*)oldFoundArrayPtrUpper = nodesWithinCylUpperV;
|
|
#else
|
|
CSpatialArrayNodeAddr* RESTRICT oldFoundArrayPtr = foundArrayPtr;
|
|
foundArrayPtr += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtr = nodesWithinCylV;
|
|
#endif
|
|
}
|
|
|
|
#if SPATIALARRAY64BIT
|
|
return CreateCompactNodePointerArray(foundArrayUpper, foundArrayLower, numObj, found, maxFound);
|
|
#else
|
|
return CreateCompactNodePointerArray(foundArray, numObj, found, maxFound);
|
|
#endif
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindInSphere(Vec3V_In centerV, float radius, FindResult *found,
|
|
int maxFound) const
|
|
{
|
|
const ScalarV radiusV(LoadScalar32IntoScalarV(radius));
|
|
return FindInSphere(centerV, radiusV, found, maxFound);
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindInSphereOfType(Vec3V_In centerV, ScalarV_In radiusV,
|
|
CSpatialArrayNode **found, int maxFound,
|
|
const u32 &typeFlagsToCareAbout, const u32 &typeFlagValues) const
|
|
{
|
|
SA_PF_FUNC(FindInSphereOfType);
|
|
|
|
// If this fails, there are values set in flagValues that are not in flagsToChange,
|
|
// which we are probably better off if the user could avoid, so we don't have to
|
|
// spend time on masking them here. /FF
|
|
Assert((typeFlagValues & ~typeFlagsToCareAbout) == 0);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
// TODO: Probably operate on 8 objects instead of 4, to keep vector pipeline busy.
|
|
// TODO: Maybe use cache prefetch and/or clear instructions.
|
|
// TODO: Add some protection about assumption of 32 bit pointers, etc. /FF
|
|
|
|
const Vec4V radius2V(Scale(radiusV, radiusV));
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
// Load the type flag stuff into vector registers. Note that we intentionally
|
|
// pass in these by reference, requiring the user to put them in memory, because
|
|
// if they were passed in in general purpose registers, we would need to store
|
|
// them to memory and load them back anyway. Could pass them in in vector
|
|
// registers, of course, but that's probably not worth the trouble. /FF
|
|
const Vec4V typeFlagsToCareAboutV = Vec4V(LoadScalar32IntoScalarV(typeFlagsToCareAbout));
|
|
const Vec4V typeFlagValuesV = Vec4V(LoadScalar32IntoScalarV(typeFlagValues));
|
|
|
|
const Vec4V* RESTRICT objXPtr = (const Vec4V*)m_PosXArray;
|
|
const Vec4V* RESTRICT objYPtr = (const Vec4V*)m_PosYArray;
|
|
const Vec4V* RESTRICT objZPtr = (const Vec4V*)m_PosZArray;
|
|
const Vec4V* RESTRICT typeFlagPtr = (const Vec4V*)m_TypeFlagArray;
|
|
|
|
const Vec4V centerxV(SplatX(centerV));
|
|
const Vec4V centeryV(SplatY(centerV));
|
|
const Vec4V centerzV(SplatZ(centerV));
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(u32))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuffUpper[tempArraySize];
|
|
Vec4V foundArrayBuffLower[tempArraySize];
|
|
u32* RESTRICT foundArrayUpper = (u32*)foundArrayBuffUpper;
|
|
u32* RESTRICT foundArrayLower = (u32*)foundArrayBuffLower;
|
|
|
|
// Make really sure they got aligned properly. /FF
|
|
Assertf((((size_t)foundArrayUpper) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayUpper);
|
|
Assertf((((size_t)foundArrayLower) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayLower);
|
|
|
|
u32* RESTRICT foundArrayPtrUpper = foundArrayUpper;
|
|
u32* RESTRICT foundArrayPtrLower = foundArrayLower;
|
|
#else
|
|
// Reserve a vector-aligned array on the stack. /FF
|
|
// Note: I believe this would work too: ALIGNAS(16) CSpatialArrayNode* foundArrayBuff[kMaxObjForTempBuffer] ;
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(CSpatialArrayNodeAddr))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuff[ tempArraySize ];
|
|
CSpatialArrayNodeAddr* RESTRICT foundArray = (CSpatialArrayNodeAddr*)foundArrayBuff;
|
|
|
|
// Make really sure it got aligned properly. /FF
|
|
Assertf((((size_t)foundArray) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArray);
|
|
|
|
CSpatialArrayNodeAddr* RESTRICT foundArrayPtr = foundArray;
|
|
#endif
|
|
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
const Vec4V xxV = *objXPtr;
|
|
const Vec4V yyV = *objYPtr;
|
|
const Vec4V zzV = *objZPtr;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *(Vec4V*)&m_NodeArrayUpper[i];
|
|
const Vec4V nodesLowerV = *(Vec4V*)&m_NodeArrayLower[i];
|
|
#else
|
|
const Vec4V nodesV = *(Vec4V*)&m_NodeArray[i];
|
|
#endif
|
|
const Vec4V objTypeFlagsV = *typeFlagPtr;
|
|
|
|
const Vec4V dxV = Subtract(xxV, centerxV);
|
|
const Vec4V dyV = Subtract(yyV, centeryV);
|
|
const Vec4V dzV = Subtract(zzV, centerzV);
|
|
|
|
const Vec4V dx2V = Scale(dxV, dxV);
|
|
const Vec4V dxy2V = AddScaled(dx2V, dyV, dyV);
|
|
const Vec4V d2V = AddScaled(dxy2V, dzV, dzV);
|
|
|
|
const VecBoolV selectWithinSphereV = IsLessThan(d2V, radius2V);
|
|
|
|
// Get the type flags and filter out the ones we don't care about.
|
|
const Vec4V objTypeFlagsCaredAboutV = And(objTypeFlagsV, typeFlagsToCareAboutV);
|
|
|
|
// See if the remaining ones have the values we want. If so, selectTypeFlagMatchV
|
|
// should be all 0xffffffff.
|
|
const VecBoolV selectTypeFlagMatchV = IsEqualInt(objTypeFlagsCaredAboutV, typeFlagValuesV);
|
|
|
|
// Compute the mask for matching both the type and being within the sphere.
|
|
const VecBoolV selectMatch = And(selectWithinSphereV, selectTypeFlagMatchV);
|
|
|
|
// Mask out the pointers for the nodes that didn't match.
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V matchingNodesUpperV = And((Vec4V)selectMatch, nodesUpperV);
|
|
const Vec4V matchingNodesLowerV = And((Vec4V)selectMatch, nodesLowerV);
|
|
#else
|
|
const Vec4V matchingNodesV = And((Vec4V)selectMatch, nodesV);
|
|
#endif
|
|
|
|
objXPtr++;
|
|
objYPtr++;
|
|
objZPtr++;
|
|
typeFlagPtr++;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u32* RESTRICT oldFoundArrayPtrUpper = foundArrayPtrUpper;
|
|
foundArrayPtrUpper += 4;
|
|
|
|
u32* RESTRICT oldFoundArrayPtrLower = foundArrayPtrLower;
|
|
foundArrayPtrLower += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtrLower = matchingNodesLowerV;
|
|
*(Vec4V*)oldFoundArrayPtrUpper = matchingNodesUpperV;
|
|
#else
|
|
CSpatialArrayNodeAddr* RESTRICT oldFoundArrayPtr = foundArrayPtr;
|
|
foundArrayPtr += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtr = matchingNodesV;
|
|
#endif
|
|
|
|
}
|
|
|
|
#if SPATIALARRAY64BIT
|
|
return CreateCompactNodePointerArray(foundArrayUpper, foundArrayLower, numObj, found, maxFound);
|
|
#else
|
|
return CreateCompactNodePointerArray(foundArray, numObj, found, maxFound);
|
|
#endif
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindBelowZ(ScalarV_In scalar_thresholdZV,
|
|
CSpatialArrayNode **found, int maxFound) const
|
|
{
|
|
SA_PF_FUNC(FindBelowZ);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
// TODO: Probably operate on 8 objects instead of 4, to keep vector pipeline busy.
|
|
// TODO: Maybe use cache prefetch and/or clear instructions.
|
|
// TODO: Add some protection about assumption of 32 bit pointers, etc. /FF
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
const Vec4V* RESTRICT objZPtr = (const Vec4V*)m_PosZArray;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(u32))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuffUpper[tempArraySize];
|
|
Vec4V foundArrayBuffLower[tempArraySize];
|
|
u32* RESTRICT foundArrayUpper = (u32*)foundArrayBuffUpper;
|
|
u32* RESTRICT foundArrayLower = (u32*)foundArrayBuffLower;
|
|
|
|
// Make really sure they got aligned properly. /FF
|
|
Assertf((((size_t)foundArrayUpper) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayUpper);
|
|
Assertf((((size_t)foundArrayLower) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayLower);
|
|
|
|
u32* RESTRICT foundArrayPtrUpper = foundArrayUpper;
|
|
u32* RESTRICT foundArrayPtrLower = foundArrayLower;
|
|
#else
|
|
// Reserve a vector-aligned array on the stack. /FF
|
|
// Note: I believe this would work too: ALIGNAS(16) CSpatialArrayNode* foundArrayBuff[kMaxObjForTempBuffer] ;
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(CSpatialArrayNodeAddr))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuff[ tempArraySize ];
|
|
CSpatialArrayNodeAddr* RESTRICT foundArray = (CSpatialArrayNodeAddr*)foundArrayBuff;
|
|
|
|
// Make really sure it got aligned properly. /FF
|
|
Assertf((((size_t)foundArray) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArray);
|
|
|
|
CSpatialArrayNodeAddr* RESTRICT foundArrayPtr = foundArray;
|
|
#endif
|
|
|
|
const Vec4V thresholdZV(scalar_thresholdZV);
|
|
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
const Vec4V zzV = *objZPtr;
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *(Vec4V*)&m_NodeArrayUpper[i];
|
|
const Vec4V nodesLowerV = *(Vec4V*)&m_NodeArrayLower[i];
|
|
#else
|
|
const Vec4V nodesV = *(Vec4V*)&m_NodeArray[i];
|
|
#endif
|
|
const VecBoolV selectMatch = IsLessThan(zzV, thresholdZV);
|
|
|
|
// Mask out the pointers for the nodes that didn't match.
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V matchingNodesUpperV = And((Vec4V)selectMatch, nodesUpperV);
|
|
const Vec4V matchingNodesLowerV = And((Vec4V)selectMatch, nodesLowerV);
|
|
#else
|
|
const Vec4V matchingNodesV = And((Vec4V)selectMatch, nodesV);
|
|
#endif
|
|
|
|
objZPtr++;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
u32* RESTRICT oldFoundArrayPtrUpper = foundArrayPtrUpper;
|
|
foundArrayPtrUpper += 4;
|
|
|
|
u32* RESTRICT oldFoundArrayPtrLower = foundArrayPtrLower;
|
|
foundArrayPtrLower += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtrLower = matchingNodesLowerV;
|
|
*(Vec4V*)oldFoundArrayPtrUpper = matchingNodesUpperV;
|
|
#else
|
|
CSpatialArrayNodeAddr* RESTRICT oldFoundArrayPtr = foundArrayPtr;
|
|
foundArrayPtr += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtr = matchingNodesV;
|
|
#endif
|
|
|
|
}
|
|
|
|
#if SPATIALARRAY64BIT
|
|
return CreateCompactNodePointerArray(foundArrayUpper, foundArrayLower, numObj, found, maxFound);
|
|
#else
|
|
return CreateCompactNodePointerArray(foundArray, numObj, found, maxFound);
|
|
#endif
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindInSphereOfType(Vec3V_In centerV, float radius,
|
|
CSpatialArrayNode **found,
|
|
int maxFound, const u32 &typeFlagsToCareAbout, const u32 &typeFlagValues) const
|
|
{
|
|
const ScalarV radiusV(LoadScalar32IntoScalarV(radius));
|
|
return FindInSphereOfType(centerV, radiusV, found, maxFound, typeFlagsToCareAbout, typeFlagValues);
|
|
}
|
|
|
|
|
|
int CSpatialArray::FindNearSegment(Vec3V_In segPos1V, Vec3V_In segPos2V, const float& distSegToObjCenter, CSpatialArrayNode** found, int maxFound) const
|
|
{
|
|
SA_PF_FUNC(FindNearSegment);
|
|
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
const ScalarV thresholdDistV = LoadScalar32IntoScalarV(distSegToObjCenter);
|
|
const Vec4V thresholdDistSqV = Vec4V(Scale(thresholdDistV, thresholdDistV));
|
|
|
|
const int numObj = m_NumObj;
|
|
|
|
const Vec4V* RESTRICT objXPtr = (const Vec4V*)m_PosXArray;
|
|
const Vec4V* RESTRICT objYPtr = (const Vec4V*)m_PosYArray;
|
|
const Vec4V* RESTRICT objZPtr = (const Vec4V*)m_PosZArray;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(u32))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuffUpper[tempArraySize];
|
|
Vec4V foundArrayBuffLower[tempArraySize];
|
|
u32* RESTRICT foundArrayUpper = (u32*)foundArrayBuffUpper;
|
|
u32* RESTRICT foundArrayLower = (u32*)foundArrayBuffLower;
|
|
|
|
// Make really sure they got aligned properly. /FF
|
|
Assertf((((size_t)foundArrayUpper) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayUpper);
|
|
Assertf((((size_t)foundArrayLower) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArrayLower);
|
|
|
|
u32* RESTRICT foundArrayPtrUpper = foundArrayUpper;
|
|
u32* RESTRICT foundArrayPtrLower = foundArrayLower;
|
|
#else
|
|
// Reserve a vector-aligned array on the stack. /FF
|
|
// Note: I believe this would work too: ALIGNAS(16) CSpatialArrayNode* foundArrayBuff[kMaxObjForTempBuffer] ;
|
|
const static int tempArraySize = (kMaxObjForTempBuffer*sizeof(CSpatialArrayNodeAddr))/sizeof(Vec4V);
|
|
Vec4V foundArrayBuff[ tempArraySize ];
|
|
CSpatialArrayNodeAddr* RESTRICT foundArray = (CSpatialArrayNodeAddr*)foundArrayBuff;
|
|
|
|
// Make really sure it got aligned properly. /FF
|
|
Assertf((((size_t)foundArray) & 0xf) == 0, "Got address %p, expected 16 byte alignment.", foundArray);
|
|
|
|
CSpatialArrayNodeAddr* RESTRICT foundArrayPtr = foundArray;
|
|
#endif
|
|
|
|
const Vec3V segPos1To2V = Subtract(segPos2V, segPos1V);
|
|
const Vec4V point1XV(segPos1V.GetX());
|
|
const Vec4V point1YV(segPos1V.GetY());
|
|
const Vec4V point1ZV(segPos1V.GetZ());
|
|
|
|
const Vec4V deltaXV(segPos1To2V.GetX());
|
|
const Vec4V deltaYV(segPos1To2V.GetY());
|
|
const Vec4V deltaZV(segPos1To2V.GetZ());
|
|
|
|
const Vec4V zeroV(V_ZERO);
|
|
const Vec4V oneV(V_ONE);
|
|
|
|
for(int i = 0; i < numObj; i += 4)
|
|
{
|
|
const Vec4V xxV = *objXPtr;
|
|
const Vec4V yyV = *objYPtr;
|
|
const Vec4V zzV = *objZPtr;
|
|
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesUpperV = *(Vec4V*)&m_NodeArrayUpper[i];
|
|
const Vec4V nodesLowerV = *(Vec4V*)&m_NodeArrayLower[i];
|
|
#else
|
|
const Vec4V nodesV = *(Vec4V*)&m_NodeArray[i];
|
|
#endif
|
|
|
|
// Here, we will compute the T values of the closest points on the segment,
|
|
// for the four points. It's more or less done with the same operations
|
|
// as in geomTValues::FindTValueSegToOriginV(), and it's even more similar
|
|
// to sFindTValueSegToPoint() in 'TaskNavBase.cpp'.
|
|
|
|
const Vec4V ptXV = Subtract(xxV, point1XV);
|
|
const Vec4V ptYV = Subtract(yyV, point1YV);
|
|
const Vec4V ptZV = Subtract(zzV, point1ZV);
|
|
|
|
const Vec4V oneDotXV = Scale(deltaXV, ptXV);
|
|
const Vec4V oneDotXYV = AddScaled(oneDotXV, deltaYV, ptYV);
|
|
const Vec4V oneDotV = AddScaled(oneDotXYV, deltaZV, ptZV);
|
|
|
|
const Vec4V bothDotXV = Scale(deltaXV, deltaXV);
|
|
const Vec4V bothDotXYV = AddScaled(bothDotXV, deltaYV, deltaYV);
|
|
const Vec4V bothDotV = AddScaled(bothDotXYV, deltaZV, deltaZV);
|
|
|
|
const Vec4V tOnInfLineV = InvScaleFast(oneDotV, bothDotV);
|
|
const VecBoolV tMaxMaskV = IsGreaterThanOrEqual(tOnInfLineV, oneV);
|
|
const Vec4V tClampedMaxV = SelectFT(tMaxMaskV, tOnInfLineV, oneV);
|
|
const VecBoolV tMinMaskV = IsGreaterThan(oneDotV, zeroV);
|
|
const Vec4V tClampedV = And(tClampedMaxV, Vec4V(tMinMaskV));
|
|
|
|
// Next, compute the X, Y, and Z coordinates of the closest points
|
|
// to each of the four objects.
|
|
const Vec4V closestPtXV = AddScaled(point1XV, deltaXV, tClampedV);
|
|
const Vec4V closestPtYV = AddScaled(point1YV, deltaYV, tClampedV);
|
|
const Vec4V closestPtZV = AddScaled(point1ZV, deltaZV, tClampedV);
|
|
|
|
// Compute the squared distance to each of these.
|
|
const Vec4V ptToClosestXV = Subtract(closestPtXV, xxV);
|
|
const Vec4V ptToClosestYV = Subtract(closestPtYV, yyV);
|
|
const Vec4V ptToClosestZV = Subtract(closestPtZV, zzV);
|
|
const Vec4V distSqXV = Scale(ptToClosestXV, ptToClosestXV);
|
|
const Vec4V distSqXYV = AddScaled(distSqXV, ptToClosestYV, ptToClosestYV);
|
|
const Vec4V distSqV = AddScaled(distSqXYV, ptToClosestZV, ptToClosestZV);
|
|
|
|
// Compute a mask for which objects are close enough, and AND that with
|
|
// the node addresses.
|
|
const Vec4V selectNearSegV(IsLessThan(distSqV, thresholdDistSqV));
|
|
#if SPATIALARRAY64BIT
|
|
const Vec4V nodesWithinSphereUpperV = And(selectNearSegV, nodesUpperV);
|
|
const Vec4V nodesWithinSphereLowerV = And(selectNearSegV, nodesLowerV);
|
|
#else
|
|
const Vec4V nodesWithinSphereV = And(selectNearSegV, nodesV);
|
|
#endif
|
|
|
|
// Advance to the next four objects.
|
|
objXPtr++;
|
|
objYPtr++;
|
|
objZPtr++;
|
|
#if SPATIALARRAY64BIT
|
|
u32* RESTRICT oldFoundArrayPtrUpper = foundArrayPtrUpper;
|
|
foundArrayPtrUpper += 4;
|
|
|
|
u32* RESTRICT oldFoundArrayPtrLower = foundArrayPtrLower;
|
|
foundArrayPtrLower += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtrLower = nodesWithinSphereLowerV;
|
|
*(Vec4V*)oldFoundArrayPtrUpper = nodesWithinSphereUpperV;
|
|
#else
|
|
CSpatialArrayNodeAddr* RESTRICT oldFoundArrayPtr = foundArrayPtr;
|
|
foundArrayPtr += 4;
|
|
|
|
*(Vec4V*)oldFoundArrayPtr = nodesWithinSphereV;
|
|
#endif
|
|
}
|
|
|
|
// Create a compact array of pointers to return to the caller.
|
|
#if SPATIALARRAY64BIT
|
|
return CreateCompactNodePointerArray(foundArrayUpper, foundArrayLower, numObj, found, maxFound);
|
|
#else
|
|
return CreateCompactNodePointerArray(foundArray, numObj, found, maxFound);
|
|
#endif
|
|
}
|
|
|
|
#if __DEV
|
|
|
|
void CSpatialArray::DebugDraw() const
|
|
{
|
|
SPATIALARRAYTHREADLOCK;
|
|
|
|
Matrix34 mtrx;
|
|
mtrx.Identity();
|
|
|
|
const int numObj = m_NumObj;
|
|
for(int i = 0; i < numObj; i++)
|
|
{
|
|
mtrx.d.x = m_PosXArray[i];
|
|
mtrx.d.y = m_PosYArray[i];
|
|
mtrx.d.z = m_PosZArray[i];
|
|
grcDebugDraw::Axis(mtrx, 1.0f);
|
|
|
|
char buf[16];
|
|
formatf(buf, "%04x", m_TypeFlagArray[i]);
|
|
|
|
grcDebugDraw::Text(mtrx.d, Color_white, buf);
|
|
}
|
|
}
|
|
|
|
#endif // __DEV
|
|
|
|
#if SPATIALARRAY64BIT
|
|
int CSpatialArray::CreateCompactNodePointerArray(const u32* foundArrayUpper, const u32* foundArrayLower, int numObj, CSpatialArrayNode** found, int maxFound)
|
|
#else
|
|
int CSpatialArray::CreateCompactNodePointerArray(const CSpatialArrayNodeAddr* foundArray, int numObj, CSpatialArrayNode** found, int maxFound)
|
|
#endif
|
|
{
|
|
int numfound = 0;
|
|
for(int i = 0; i < numObj; i++)
|
|
{
|
|
#if SPATIALARRAY64BIT
|
|
CSpatialArrayNode* addr = NodePtrFromUpperLower(foundArrayUpper[i], foundArrayLower[i]);
|
|
#else
|
|
CSpatialArrayNode* addr = (CSpatialArrayNode*)foundArray[i];
|
|
#endif
|
|
if(addr)
|
|
{
|
|
found[numfound] = addr;
|
|
numfound++;
|
|
if(numfound >= maxFound)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return numfound;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
/* End of file sagcore/spatialarray.cpp */
|