Files
GTASource/game/scene/EntityBatch_Def.h
expvintl 419f2e4752 init
2025-02-23 17:40:52 +08:00

335 lines
12 KiB
C++

//
// entity/entitybatch_def.h : Common definitions used by batched entities
//
// Copyright (C) 1999-2012 Rockstar Games. All Rights Reserved.
//
#ifndef SCENE_ENTITY_BATCH_DEF_H_
#define SCENE_ENTITY_BATCH_DEF_H_
#include "shader_source/util/BatchInstancing.fxh" //For GRASS_BATCH_CS_CULLING defines... and some others
#include "system/noncopyable.h"
#include "grcore/effect_typedefs.h"
#include "grmodel/shadergroupvar.h"
#include "system/pix.h"
#define GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS (1 && GRASS_BATCH_CS_CULLING && RSG_PC)
#if GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS
# define GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(...) __VA_ARGS__
# define GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_SWITCH(_if_CS_,_else_) _if_CS_
#else
# define GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(...)
# define GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_SWITCH(_if_CS_,_else_) _else_
#endif
#define GRASS_BATCH_CS_DYNAMIC_BUFFERS (GRASS_BATCH_CS_CULLING && !GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS)
#if GRASS_BATCH_CS_DYNAMIC_BUFFERS
# define GRASS_BATCH_CS_DYNAMIC_BUFFERS_ONLY(...) __VA_ARGS__
# define GRASS_BATCH_CS_DYNAMIC_BUFFERS_SWITCH(_if_CS_,_else_) _if_CS_
#else
# define GRASS_BATCH_CS_DYNAMIC_BUFFERS_ONLY(...)
# define GRASS_BATCH_CS_DYNAMIC_BUFFERS_SWITCH(_if_CS_,_else_) _else_
#endif
#if RSG_PC &&__D3D11
# include "grcore/buffer_d3d11.h"
#elif RSG_DURANGO
# include "grcore/buffer_d3d11.h"
# include "grcore/buffer_durango.h"
#elif RSG_ORBIS
# include "grcore/buffer_gnm.h"
#endif
#include "grcore/device.h"
namespace rage {
class fwGrassInstanceListDef;
}
namespace EBStatic {
#if GRASS_BATCH_CS_CULLING
#if GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS
typedef atRangeArray<atArray<atArray<u32> >, LOD_COUNT> IndirectArgsDrawableOffsetMap;
#endif //GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS
#if RSG_ORBIS
BEGIN_ALIGNED(8) //sce::Gnm::kAlignmentOfIndirectArgsInBytes
struct IndirectArgParams : public sce::Gnm::DrawIndexIndirectArgs
{
static const u32 sIndexCountPerInstanceOffset;
static const u32 sInstanceCountOffset; // = offsetof(IndirectArgParams, m_instanceCount);
static const u32 sStartIndexLocationOffset;
static const u32 sBaseVertexLocationOffset;
static const u32 sStartInstanceLocationOffset;
static const u32 sInvalidInstanceCount = static_cast<u32>(-1);
IndirectArgParams()
{
m_indexCountPerInstance = m_startIndexLocation = m_baseVertexLocation = m_startInstanceLocation = 0;
m_instanceCount = sInvalidInstanceCount; //m_instanceCount = 1;
}
} ALIGNED(8); //sce::Gnm::kAlignmentOfIndirectArgsInBytes
CompileTimeAssert(__alignof(EBStatic::IndirectArgParams) == sce::Gnm::kAlignmentOfIndirectArgsInBytes);
#else //RSG_ORBIS
struct IndirectArgParams
{
static const u32 sIndexCountPerInstanceOffset;
static const u32 sInstanceCountOffset;
static const u32 sStartIndexLocationOffset;
static const u32 sBaseVertexLocationOffset;
static const u32 sStartInstanceLocationOffset;
IndirectArgParams() : m_indexCountPerInstance(0), m_instanceCount(0), m_startIndexLocation(0), m_baseVertexLocation(0), m_startInstanceLocation(0) { }
u32 m_indexCountPerInstance;
u32 m_instanceCount;
u32 m_startIndexLocation;
int m_baseVertexLocation;
u32 m_startInstanceLocation;
};
#endif //RSG_ORBIS
#if GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS
template<u32 _BufferSizeInBytes>
class PreAllocatedIndirectArgBufferResource : public grcBufferBasic
{
public:
typedef grcBufferBasic parent_type;
static const u32 sSizeInBytes = _BufferSizeInBytes;
PreAllocatedIndirectArgBufferResource() : parent_type(ORBIS_ONLY(grcBuffer_Structured, true)) {}
#if NV_SUPPORT
bool bNeedsNVFlaging;
#endif // NV_SUPPORT
void Create( ASSERT_ONLY( bool preCreate ) )
{
Assertf(GRCDEVICE.CheckThreadOwnership() || !preCreate, "WARNING: Make sure to lock the device context before pre-allocating.");
#if RSG_ORBIS
const sce::Gnm::SizeAlign accumSizeAlign = { sSizeInBytes, sce::Gnm::kAlignmentOfIndirectArgsInBytes };
Allocate(accumSizeAlign, true));
Assertf(GRCDEVICE.CheckThreadOwnership() || preCreate, "WARNING: If this happens because of allocation during overflow, then ignore it and just increase the pre-allocated pool size.");
#elif RSG_DURANGO
Initialise(sSizeInBytes, 1, grcBindNone, NULL, grcBufferMisc_DrawIndirectArgs);
Assertf(GRCDEVICE.CheckThreadOwnership() || preCreate, "WARNING: If this happens because of allocation during overflow, then ignore it and just increase the pre-allocated pool size.");
#else // RSG_PC
// url:bugstar:2322748 and url:bugstar:2206695: This is not critical on PC due to more flexible memory requirements + likely due to extreme draw distances. In these situations just rely on
// dynamic allocation if we overflow + print out a message to say we're overflowing. Ideally this shouldn't happen much but downgrading this from an assert for now on Alan's advice.
// I've checked the code - and as we only use an ID3D11Device (whose calls are thread-safe) and not a ID3D11DeviceContext - we don't need to do a LockContext() here on PC.
// See bug for more details.
Initialise(sSizeInBytes, 1, grcBindNone, grcsBufferCreate_ReadWrite, grcsBufferSync_None, NULL, false, grcBufferMisc_DrawIndirectArgs);
ASSERT_ONLY( if (!(GRCDEVICE.CheckThreadOwnership() || preCreate)) { Warningf("WARNING: If this happens because of allocation during overflow, then ignore it and just increase the pre-allocated pool size."); } );
#endif
#if NV_SUPPORT
static const bool isNV = GRCDEVICE.GetManufacturer() == NVIDIA;
bNeedsNVFlaging = isNV;
#endif // NV_SUPPORT
}
void PreCreate()
{
Create( ASSERT_ONLY( true ) );
#if NV_SUPPORT
// Avoid calling this on non-Nvidia platforms. The NvAPI calls ought to be harmless on all platforms.
// If this breaks on laptops with Intel and Nvidia graphics, then ask Adrian. :-)
static const bool isNV = GRCDEVICE.GetManufacturer() == NVIDIA;
if (isNV)
{
FlagForNoSLITransfer();
}
bNeedsNVFlaging = false;
#endif // NV_SUPPORT
}
#if NV_SUPPORT
// These buffers are written every frame with no inter-frame dependencies. However, for SLI, the Nvidia driver is not able to
// determine that no inter-frame dependencies exist because the buffer is partially updated. In theory, parts that were updated
// in the previous frame could be consumed by the draw call. Hence, flag it explicitly as not requiring transfer between GPUs.
// This makes a huge difference to SLI performance.
void FlagForNoSLITransfer()
{
GRCDEVICE.LockContext();
NVDX_ObjectHandle pNvAPIObj = 0;
if (GRCDEVICE.GetCurrent())
{
NvAPI_D3D_GetObjectHandleForResource(GRCDEVICE.GetCurrent(), GetD3DBuffer(), &pNvAPIObj);
if (pNvAPIObj)
{
NvU32 hint = 1;
NvAPI_D3D_SetResourceHint(GRCDEVICE.GetCurrent(), pNvAPIObj, NVAPI_D3D_SRH_CATEGORY_SLI, NVAPI_D3D_SRH_SLI_APP_CONTROLLED_INTERFRAME_CONTENT_SYNC, &hint);
}
}
GRCDEVICE.UnlockContext();
bNeedsNVFlaging = false;
}
#endif //NV_SUPPORT
};
typedef PreAllocatedIndirectArgBufferResource<1024 * 4> PreAllocatedIndirectArgBuffer;
template<u32 _MaxBufferCount>
class PreAllocatedAppendBufferResource : public grcBufferUAV
{
public:
typedef grcBufferUAV parent_type;
static const u32 sMaxBufferCount = _MaxBufferCount;
PreAllocatedAppendBufferResource() : parent_type(ORBIS_ONLY(grcBuffer_Structured, true)) {}
void Create(ASSERT_ONLY(bool /*preCreate*/))
{
#if RSG_ORBIS
const sce::Gnm::SizeAlign accumSizeAlign = { GrassBatchCSInstanceData_Size * sMaxBufferCount, GrassBatchCSInstanceData_Align };
Allocate(accumSizeAlign, true, NULL, GrassBatchCSInstanceData_Size));
#elif RSG_DURANGO
Initialise(sMaxBufferCount, GrassBatchCSInstanceData_Size, grcBindShaderResource|grcBindUnorderedAccess, NULL, grcBufferMisc_BufferStructured, grcBuffer_UAV_FLAG_APPEND);
#else // RSG_PC
Initialise(sMaxBufferCount, GrassBatchCSInstanceData_Size, grcBindShaderResource|grcBindUnorderedAccess, grcsBufferCreate_ReadWriteOnceOnly, grcsBufferSync_None, NULL, false, grcBufferMisc_BufferStructured, grcBuffer_UAV_FLAG_APPEND);
#endif
}
void PreCreate()
{
Create(ASSERT_ONLY(true));
}
};
typedef PreAllocatedAppendBufferResource<1024 * 4> PreAllocatedAppendBuffer;
struct DrawableDeviceResources
{
DrawableDeviceResources(rmcDrawable &drawable);
PreAllocatedIndirectArgBuffer m_IndirectArgsBuffer;
IndirectArgsDrawableOffsetMap m_OffsetMap;
NON_COPYABLE(DrawableDeviceResources);
};
#endif //GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS
struct GrassCSVars
{
GrassCSVars();
bool IsValid() const { return m_IsValid; }
void UpdateValidity();
int m_ShaderIndex;
atRangeArray<grcEffectVar, LOD_COUNT> m_idVarAppendInstBuffer;
DURANGO_ONLY(GRASS_BATCH_CS_DYNAMIC_BUFFERS_ONLY(grcEffectVar m_idIndirectArgs));
grcEffectVar m_idInstCullParams;
grcEffectVar m_idNumClipPlanes;
grcEffectVar m_idClipPlanes;
grcEffectVar m_idCameraPosition;
grcEffectVar m_idLodInstantTransition;
#if RSG_DURANGO || RSG_PC
grcEffectVar m_idGrassSkipInstance;
#endif // RSG_DURANGO || RSG_PC...
grcEffectVar m_idLodThresholds;
grcEffectVar m_idCrossFadeDistance;
grcEffectVar m_idIsShadowPass;
grcEffectVar m_idLodFadeControlRange;
DURANGO_ONLY(grcEffectVar m_idIndirectCountPerLod);
grmShaderGroupVar m_idVarInstanceBuffer;
grmShaderGroupVar m_idVarRawInstBuffer;
grmShaderGroupVar m_idVarUseCSOutputBuffer;
grmShaderGroupVar m_idAabbMin;
grmShaderGroupVar m_idAabbDelta;
grmShaderGroupVar m_idScaleRange;
private:
bool m_IsValid;
};
//These params are set on the update thread and stored in the draw handler for use on the render thread.
struct GrassCSBaseParams
{
public:
GrassCSBaseParams();
bool IsValid() const { return m_IsValid; }
void UpdateValidity();
const fwGrassInstanceListDef *m_InstanceList; //Smaller than saving the Vec3Vs for params (AABB min/delta & scale range) and since we use resources from list anyway, it's safe to keep a pointer.
const GrassCSVars *m_Vars;
u32 m_InstanceCount;
float m_InstAabbRadius;
GRASS_BATCH_CS_DYNAMIC_BUFFERS_ONLY(u32 m_IndirectArgCount);
GRASS_BATCH_CS_DYNAMIC_BUFFERS_ONLY(atRangeArray<u32, LOD_COUNT> m_IndirectArgLodOffsets);
const grcBufferUAV *m_RawInstBuffer;
GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(PreAllocatedIndirectArgBuffer *m_IndirectArgsBuffer);
GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(IndirectArgsDrawableOffsetMap *m_OffsetMap);
PIX_TAGGING_ONLY(int m_ModelIndex);
private:
bool m_IsValid;
};
#if GRASS_BATCH_CS_DYNAMIC_BUFFERS
struct GrassCSDeviceResources
{
GrassCSDeviceResources() {}
# if RSG_ORBIS
grcBufferConstructable<grcBufferUAV, grcBuffer_Structured, true> m_AppendInstBuffer;
# else
grcBufferUAV m_AppendInstBuffer;
# endif
NON_COPYABLE(GrassCSDeviceResources);
};
#endif //GRASS_BATCH_CS_DYNAMIC_BUFFERS
struct GrassCSParams
{
GrassCSParams();
GrassCSParams(const GrassCSBaseParams &base);
inline bool IsActive() const
{
bool active = m_Base.IsValid() && m_Active;
GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(active = active && m_AppendInstBuffer[0] != NULL);
GRASS_BATCH_CS_DYNAMIC_BUFFERS_ONLY(active = active && m_AppendBufferMem[0] != NULL);
WIN32PC_ONLY(active = active && GRCDEVICE.SupportsFeature(COMPUTE_SHADER_50));
return active;
}
const GrassCSBaseParams &m_Base; //Make this a reference to cut down on drawlist memory usage & mem copying
//All non-base value should be setup when used in the frame... Base params can be set once in batch entity and used as a template for DL copy.
const grcViewport *m_CurrVp;
GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(atRangeArray<grcBufferUAV *, LOD_COUNT> m_AppendInstBuffer); //Not in device resources b/c this isn't the owner of the resource. Resource is preallocated and this is just a ptr.
GRASS_BATCH_CS_PRE_ALLOCATE_BUFFERS_ONLY(PreAllocatedIndirectArgBuffer *m_LocalIndirectArgBuffer);
#if GRASS_BATCH_CS_DYNAMIC_BUFFERS
ORBIS_ONLY(atRangeArray<grcBufferConstructable<grcBufferUAV, grcBuffer_Structured, false>, LOD_COUNT> m_AppendDeviceBuffer);
atRangeArray<void *, LOD_COUNT> m_AppendBufferMem;
grcCrossContextAllocTyped<IndirectArgParams> m_IndirectBufferMem;
#endif //GRASS_BATCH_CS_DYNAMIC_BUFFERS
u32 m_phaseLODs : 15;
u32 m_lastLODIdx : 2;
u32 m_LODIdx : 2;
bool m_UseAltfadeDist : 1;
bool m_Active;
static const GrassCSBaseParams sm_InvalidBaseForDefaultConstructor;
};
#endif //GRASS_BATCH_CS_CULLING
}
#endif //SCENE_ENTITY_BATCH_DEF_H_