This commit is contained in:
FluorescentCIAAfricanAmerican
2020-04-22 12:56:21 -04:00
commit 3bf9df6b27
15370 changed files with 5489726 additions and 0 deletions

1215
togl/linuxwin/cglmbuffer.cpp Normal file

File diff suppressed because it is too large Load Diff

355
togl/linuxwin/cglmfbo.cpp Normal file
View File

@ -0,0 +1,355 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// cglmfbo.cpp
//
//===============================================================================
#include "togl/rendermechanism.h"
// memdbgon -must- be the last include file in a .cpp file.
#include "tier0/memdbgon.h"
CGLMFBO::CGLMFBO( GLMContext *ctx )
{
m_ctx = ctx;
m_ctx->CheckCurrent();
gGL->glGenFramebuffersEXT( 1, &m_name );
memset( m_attach, 0, sizeof( m_attach ) );
}
CGLMFBO::~CGLMFBO( )
{
m_ctx->CheckCurrent();
// detach all known attached textures first... necessary ?
for( int index = 0; index < kAttCount; index++)
{
if (m_attach[ index ].m_tex)
{
TexDetach( (EGLMFBOAttachment)index );
}
}
gGL->glDeleteFramebuffersEXT( 1, &m_name );
m_name = 0;
m_ctx = NULL;
}
// the tex attach path should also select a specific slice of the texture...
// and we need a way to make renderbuffers..
static GLenum EncodeAttachmentFBO( EGLMFBOAttachment index )
{
if (index < kAttDepth)
{
return GL_COLOR_ATTACHMENT0_EXT + (int) index;
}
else
{
switch( index )
{
case kAttDepth:
return GL_DEPTH_ATTACHMENT_EXT;
break;
case kAttStencil:
return GL_STENCIL_ATTACHMENT_EXT;
break;
case kAttDepthStencil:
return GL_DEPTH_STENCIL_ATTACHMENT_EXT;
break;
default:
GLMStop(); // bad news
break;
}
}
GLMStop(); // bad news
// shouldn't get here
return GL_COLOR_ATTACHMENT0_EXT;
}
void CGLMFBO::TexAttach( GLMFBOTexAttachParams *params, EGLMFBOAttachment attachIndex, GLenum fboBindPoint )
{
// force our parent context to be current
m_ctx->MakeCurrent();
// bind to context (will cause FBO object creation on first use)
m_ctx->BindFBOToCtx( this, fboBindPoint );
// it's either a plain 2D, a 2D face of a cube map, or a slice of a 3D.
CGLMTex *tex = params->m_tex;
// always detach what is currently there, if anything
this->TexDetach( attachIndex, fboBindPoint );
if (!tex)
{
// andif they pass NULL to us, then we are done.
return;
}
GLMTexLayout *layout = tex->m_layout;
GLenum target = tex->m_layout->m_key.m_texGLTarget;
GLenum attachIndexGL = EncodeAttachmentFBO( attachIndex );
switch( target )
{
case GL_TEXTURE_2D:
{
// we will attach the underlying RBO on a multisampled tex, iff the tex has one, **and** we're not being asked to attach it to the read buffer.
// if we get a req to attach an MSAA tex to the read buffer, chances are it's BlitTex calling, andit has already resolved the tex, so in those
// cases you really do want to attach the texture and not the RBO to the FBO in question.
bool useRBO = false; // initial state
if (layout->m_key.m_texFlags & kGLMTexMultisampled)
{
// it is an MSAA tex
if (fboBindPoint == GL_READ_FRAMEBUFFER_EXT)
{
// I think you just want to read a resolved tex.
// But I will check that it is resolved first..
Assert( tex->IsRBODirty() == false );
}
else
{
// you want to draw into it. You get the RBO bound instead of the tex.
useRBO = true;
}
}
if (useRBO)
{
// MSAA path - attach the RBO, not the texture, and mark the RBO dirty
if (attachIndexGL==GL_DEPTH_STENCIL_ATTACHMENT_EXT)
{
// you have to attach it both places...
// http://www.opengl.org/wiki/GL_EXT_framebuffer_object
// bind the RBO to the GL_RENDERBUFFER_EXT target
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, tex->m_rboName );
// attach the GL_RENDERBUFFER_EXT target to the depth and stencil attach points
gGL->glFramebufferRenderbufferEXT( fboBindPoint, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, tex->m_rboName);
gGL->glFramebufferRenderbufferEXT( fboBindPoint, GL_STENCIL_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, tex->m_rboName);
// no need to leave the RBO hanging on
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, 0 );
}
else
{
// color attachment (likely 0)
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, tex->m_rboName );
gGL->glFramebufferRenderbufferEXT( fboBindPoint, attachIndexGL, GL_RENDERBUFFER_EXT, tex->m_rboName);
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, 0 );
}
tex->ForceRBODirty();
}
else
{
// regular path - attaching a texture2d
if (attachIndexGL==GL_DEPTH_STENCIL_ATTACHMENT_EXT)
{
// you have to attach it both places...
// http://www.opengl.org/wiki/GL_EXT_framebuffer_object
gGL->glFramebufferTexture2DEXT( fboBindPoint, GL_DEPTH_ATTACHMENT_EXT, target, tex->m_texName, params->m_mip );
gGL->glFramebufferTexture2DEXT( fboBindPoint, GL_STENCIL_ATTACHMENT_EXT, target, tex->m_texName, params->m_mip );
}
else
{
gGL->glFramebufferTexture2DEXT( fboBindPoint, attachIndexGL, target, tex->m_texName, params->m_mip );
}
}
}
break;
case GL_TEXTURE_3D:
{
gGL->glFramebufferTexture3DEXT( fboBindPoint, attachIndexGL, target, tex->m_texName, params->m_mip, params->m_zslice );
}
break;
case GL_TEXTURE_CUBE_MAP:
{
// adjust target to steer to the proper face of the cube map
target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + params->m_face;
gGL->glFramebufferTexture2DEXT( fboBindPoint, attachIndexGL, target, tex->m_texName, params->m_mip );
}
break;
}
// log the attached tex
m_attach[ attachIndex ] = *params;
// indicate that the tex has been bound to an RT
tex->m_rtAttachCount++;
}
void CGLMFBO::TexDetach( EGLMFBOAttachment attachIndex, GLenum fboBindPoint )
{
// force our parent context to be current
m_ctx->MakeCurrent();
// bind to context (will cause FBO object creation on first use)
m_ctx->BindFBOToCtx( this, fboBindPoint );
if (m_attach[ attachIndex ].m_tex)
{
CGLMTex *tex = m_attach[ attachIndex ].m_tex;
GLMTexLayout *layout = tex->m_layout;
GLenum target = tex->m_layout->m_key.m_texGLTarget;
GLenum attachIndexGL = EncodeAttachmentFBO( attachIndex );
switch( target )
{
case GL_TEXTURE_2D:
{
if (layout->m_key.m_texFlags & kGLMTexMultisampled)
{
// MSAA path - detach the RBO, not the texture
// (is this the right time to resolve? probably better to wait until someone tries to sample the texture)
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, 0 );
if (attachIndexGL==GL_DEPTH_STENCIL_ATTACHMENT_EXT)
{
// detach the GL_RENDERBUFFER_EXT target at depth and stencil attach points
gGL->glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, 0);
gGL->glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, 0);
}
else
{
// color attachment (likely 0)
gGL->glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachIndexGL, GL_RENDERBUFFER_EXT, 0);
}
}
else
{
// plain tex detach
if (attachIndexGL==GL_DEPTH_STENCIL_ATTACHMENT_EXT)
{
// you have to detach it both places...
// http://www.opengl.org/wiki/GL_EXT_framebuffer_object
gGL->glFramebufferTexture2DEXT( fboBindPoint, GL_DEPTH_ATTACHMENT_EXT, target, 0, 0 );
gGL->glFramebufferTexture2DEXT( fboBindPoint, GL_STENCIL_ATTACHMENT_EXT, target, 0, 0 );
}
else
{
gGL->glFramebufferTexture2DEXT( fboBindPoint, attachIndexGL, target, 0, 0 );
}
}
}
break;
case GL_TEXTURE_3D:
{
gGL->glFramebufferTexture3DEXT( fboBindPoint, attachIndexGL, target, 0, 0, 0 );
}
break;
case GL_TEXTURE_CUBE_MAP:
{
gGL->glFramebufferTexture2DEXT( fboBindPoint, attachIndexGL, target, 0, 0 );
}
break;
}
// un-log the attached tex
memset( &m_attach[ attachIndex ], 0, sizeof( m_attach[0] ) );
// drop the RT attach count
tex->m_rtAttachCount--;
}
else
{
//Debugger(); // odd, but not harmful - typ comes from D3D code passing NULL into SetRenderTarget
}
}
void CGLMFBO::TexScrub( CGLMTex *tex )
{
// see if it's attached anywhere
for( int attachIndex = 0; attachIndex < kAttCount; attachIndex++ )
{
if (m_attach[ attachIndex ].m_tex == tex)
{
// blammo
TexDetach( (EGLMFBOAttachment)attachIndex, GL_DRAW_FRAMEBUFFER_EXT );
}
}
}
bool CGLMFBO::IsReady( void )
{
bool result = false;
// ensure our parent context is current
m_ctx->CheckCurrent();
// bind to context (will cause FBO object creation on first use)
m_ctx->BindFBOToCtx( this );
GLenum status;
status = gGL->glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
switch(status)
{
case GL_FRAMEBUFFER_COMPLETE_EXT:
result = true;
break;
case GL_FRAMEBUFFER_UNSUPPORTED_EXT:
result = false;
DebuggerBreak();
/* choose different formats */
break;
default:
result = false;
DebuggerBreak();
/* programming error; will fail on all hardware */
break;
}
return result;
}

File diff suppressed because it is too large Load Diff

363
togl/linuxwin/cglmquery.cpp Normal file
View File

@ -0,0 +1,363 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// cglmquery.cpp
//
//===============================================================================
#include "togl/rendermechanism.h"
#ifndef _WIN32
#include <unistd.h>
#endif
// memdbgon -must- be the last include file in a .cpp file.
#include "tier0/memdbgon.h"
//===============================================================================
// http://www.opengl.org/registry/specs/ARB/occlusion_query.txt
// Workaround for "Calling either GenQueriesARB or DeleteQueriesARB while any query of any target is active causes an INVALID_OPERATION error to be generated."
uint CGLMQuery::s_nTotalOcclusionQueryCreatesOrDeletes;
extern ConVar gl_errorcheckall;
extern ConVar gl_errorcheckqueries;
extern ConVar gl_errorchecknone;
// how many microseconds to wait after a failed query-available test
// presently on MTGL this doesn't happen, but it could change, keep this handy
ConVar gl_nullqueries( "gl_nullqueries", "0" );
//===============================================================================
CGLMQuery::CGLMQuery( GLMContext *ctx, GLMQueryParams *params )
{
// get the type of query requested
// generate name(s) needed
// set initial state appropriately
m_ctx = ctx;
m_params = *params;
m_name = 0;
m_syncobj = 0;
m_started = m_stopped = m_done = false;
m_nullQuery = false;
// assume value of convar at start time
// does not change during individual query lifetime
// started null = stays null
// started live = stays live
switch(m_params.m_type)
{
case EOcclusion:
{
//make an occlusion query (and a fence to go with it)
gGL->glGenQueriesARB( 1, &m_name );
s_nTotalOcclusionQueryCreatesOrDeletes++;
GLMPRINTF(("-A- CGLMQuery(OQ) created name %d", m_name));
}
break;
case EFence:
//make a fence - no aux fence needed
m_syncobj = 0;
if (gGL->m_bHave_GL_ARB_sync)
{ /* GL_ARB_sync doesn't separate gen and set, so we do glFenceSync() later. */ }
else if (gGL->m_bHave_GL_NV_fence)
gGL->glGenFencesNV(1, &m_name );
else if (gGL->m_bHave_GL_APPLE_fence)
gGL->glGenFencesAPPLE(1, &m_name );
GLMPRINTF(("-A- CGLMQuery(fence) created name %d", m_name));
break;
}
}
CGLMQuery::~CGLMQuery()
{
GLMPRINTF(("-A-> ~CGLMQuery"));
// make sure query has completed (might not be necessary)
// delete the name(s)
switch(m_params.m_type)
{
case EOcclusion:
{
// do a finish occlusion query ?
GLMPRINTF(("-A- ~CGLMQuery(OQ) deleting name %d", m_name));
gGL->glDeleteQueriesARB(1, &m_name );
s_nTotalOcclusionQueryCreatesOrDeletes++;
}
break;
case EFence:
{
// do a finish fence ?
GLMPRINTF(("-A- ~CGLMQuery(fence) deleting name %llu", gGL->m_bHave_GL_ARB_sync ? (unsigned long long) m_syncobj : (unsigned long long) m_name));
#ifdef HAVE_GL_ARB_SYNC
if (gGL->m_bHave_GL_ARB_sync)
gGL->glDeleteSync( m_syncobj );
else
#endif
if (gGL->m_bHave_GL_NV_fence)
gGL->glDeleteFencesNV(1, &m_name );
else if (gGL->m_bHave_GL_APPLE_fence)
gGL->glDeleteFencesAPPLE(1, &m_name );
}
break;
}
m_name = 0;
m_syncobj = 0;
GLMPRINTF(("-A-< ~CGLMQuery"));
}
void CGLMQuery::Start( void ) // "start counting"
{
m_nullQuery = (gl_nullqueries.GetInt() != 0); // latch value for remainder of query life
m_started = true;
m_stopped = false;
m_done = false;
switch(m_params.m_type)
{
case EOcclusion:
{
if (m_nullQuery)
{
// do nothing..
}
else
{
gGL->glBeginQueryARB( GL_SAMPLES_PASSED_ARB, m_name );
}
}
break;
case EFence:
#ifdef HAVE_GL_ARB_SYNC
if (gGL->m_bHave_GL_ARB_sync)
{
if (m_syncobj != 0) gGL->glDeleteSync(m_syncobj);
m_syncobj = gGL->glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
else
#endif
if (gGL->m_bHave_GL_NV_fence)
gGL->glSetFenceNV( m_name, GL_ALL_COMPLETED_NV );
else if (gGL->m_bHave_GL_APPLE_fence)
gGL->glSetFenceAPPLE( m_name );
m_stopped = true; // caller should not call Stop on a fence, it self-stops
break;
}
}
void CGLMQuery::Stop( void ) // "stop counting"
{
Assert(m_started);
if ( m_stopped )
return;
switch(m_params.m_type)
{
case EOcclusion:
{
if (m_nullQuery)
{
// do nothing..
}
else
{
gGL->glEndQueryARB( GL_SAMPLES_PASSED_ARB ); // we are only putting the request-to-stop-counting into the cmd stream.
}
}
break;
case EFence:
// nop - you don't "end" a fence, you just test it and/or finish it out in Complete
break;
}
m_stopped = true;
}
bool CGLMQuery::IsDone( void )
{
Assert(m_started);
Assert(m_stopped);
if(!m_done) // you can ask more than once, but we only check until it comes back as done.
{
// on occlusion: glGetQueryObjectivARB - large cost on pre SLGU, cheap after
// on fence: glTestFence* on the fence
switch(m_params.m_type)
{
case EOcclusion: // just test the fence that was set after the query begin
{
if (m_nullQuery)
{
// do almost nothing.. but claim work is complete
m_done = true;
}
else
{
// prepare to pay a big price on drivers prior to 10.6.4+SLGU
GLint available = 0;
gGL->glGetQueryObjectivARB(m_name, GL_QUERY_RESULT_AVAILABLE_ARB, &available );
m_done = (available != 0);
}
}
break;
case EFence:
{
#ifdef HAVE_GL_ARB_SYNC
if (gGL->m_bHave_GL_ARB_sync)
m_done = (gGL->glClientWaitSync( m_syncobj, 0, 0 ) == GL_ALREADY_SIGNALED);
else
#endif
if ( m_name == 0 )
m_done = true;
else if (gGL->m_bHave_GL_NV_fence)
m_done = gGL->glTestFenceNV( m_name ) != 0;
else if (gGL->m_bHave_GL_APPLE_fence)
m_done = gGL->glTestFenceAPPLE( m_name ) != 0;
if (m_done)
{
if (gGL->m_bHave_GL_ARB_sync)
{ /* no-op; we already know it's set to GL_ALREADY_SIGNALED. */ }
else
{
if (gGL->m_bHave_GL_NV_fence)
gGL->glFinishFenceNV( m_name ); // no set fence goes un-finished
else if (gGL->m_bHave_GL_APPLE_fence)
gGL->glFinishFenceAPPLE( m_name ); // no set fence goes un-finished
}
}
}
break;
}
}
return m_done;
}
void CGLMQuery::Complete( uint *result )
{
uint resultval = 0;
//bool bogus_available = false;
// blocking call if not done
Assert(m_started);
Assert(m_stopped);
switch(m_params.m_type)
{
case EOcclusion:
{
if (m_nullQuery)
{
m_done = true;
resultval = 0; // we did say "null queries..."
}
else
{
gGL->glGetQueryObjectuivARB( m_name, GL_QUERY_RESULT_ARB, &resultval);
m_done = true;
}
}
break;
case EFence:
{
if(!m_done)
{
#ifdef HAVE_GL_ARB_SYNC
if (gGL->m_bHave_GL_ARB_sync)
{
if (gGL->glClientWaitSync( m_syncobj, 0, 0 ) != GL_ALREADY_SIGNALED)
{
GLenum syncstate;
do {
const GLuint64 timeout = 10 * ((GLuint64)1000 * 1000 * 1000); // 10 seconds in nanoseconds.
(void)timeout;
syncstate = gGL->glClientWaitSync( m_syncobj, GL_SYNC_FLUSH_COMMANDS_BIT, 0 );
} while (syncstate == GL_TIMEOUT_EXPIRED); // any errors or success break out of this loop.
}
}
else
#endif
if (gGL->m_bHave_GL_NV_fence)
gGL->glFinishFenceNV( m_name );
else if (gGL->m_bHave_GL_APPLE_fence)
gGL->glFinishFenceAPPLE( m_name );
m_done = true; // for clarity or if they try to Complete twice
}
}
break;
}
Assert( m_done );
// reset state for re-use - i.e. you have to call Complete if you want to re-use the object
m_started = m_stopped = m_done = false;
if (result) // caller may pass NULL if not interested in result, for example to clear a fence
{
*result = resultval;
}
}
// accessors for the started/stopped state
bool CGLMQuery::IsStarted ( void )
{
return m_started;
}
bool CGLMQuery::IsStopped ( void )
{
return m_stopped;
}

1990
togl/linuxwin/cglmtex.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

261
togl/linuxwin/dx9asmtogl2.h Normal file
View File

@ -0,0 +1,261 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//------------------------------------------------------------------------------
// DX9AsmToGL2.h
//------------------------------------------------------------------------------
#ifndef DX9_ASM_TO_GL_2_H
#define DX9_ASM_TO_GL_2_H
#include "tier1/utlstring.h"
#define DISASM_OK 0
#define DISASM_ERROR 1
#define MAX_SHADER_CONSTANTS 512
#define MAX_DECLARED_OUTPUTS 32
#define MAX_DECLARED_INPUTS 32
#define HEXCODE_HEADER "// Hex: "
// Option bits
#define D3DToGL_OptionUseEnvParams 0x0001
#define D3DToGL_OptionDoFixupZ 0x0002 // Add instructions to put Z in the right interval for GL
#define D3DToGL_OptionDoFixupY 0x0004 // Add instructions to flip the Y over for GL
#define D3DToGL_OptionDoUserClipPlanes 0x0008 // ARB mode: Include OPTION vertex_program_2 and append DP4's to write into oCLP[0] and oCLP[1]
// GLSL mode: generate code to write gl_ClipVertex
#define D3DToGL_AddHexComments 0x0020 // Include hex comments in the code for debugging
#define D3DToGL_PutHexCommentsAfterLines 0x0040 // If D3DToGL_AddHexComments is set, this puts the codes to the right, rather than on separate lines
#define D3DToGL_GeneratingDebugText 0x0080 // This tells it that we're just getting info for debugging so go easy on asserts and errors
#define D3DToGL_OptionSRGBWriteSuffix 0x0400 // Tack sRGB conversion suffix on to pixel shaders
#define D3DToGL_OptionGenerateBoneUniformBuffer 0x0800 // if enabled, the vertex shader "bone" registers (all regs DXABSTRACT_VS_FIRST_BONE_SLOT and higher) will be separated out into another uniform buffer (vcbone)
#define D3DToGL_OptionUseBindlessTexturing 0x1000
#define D3DToGL_OptionSpew 0x80000000
// Code for which component of the "dummy" address register is needed by an instruction
#define ARL_DEST_NONE -1
#define ARL_DEST_X 0
#define ARL_DEST_Y 1
#define ARL_DEST_Z 2
#define ARL_DEST_W 3
class D3DToGL
{
private:
// Pointers for dwToken stream management
uint32* m_pdwBaseToken;
uint32* m_pdwNextToken;
// Vertex shader or pixel shader, and version (necessary because some opcodes alias)
bool m_bVertexShader;
uint32 m_dwMinorVersion;
uint32 m_dwMajorVersion;
// Option flags
bool m_bUseEnvParams; // set D3DToGL_OptionUseEnvParams in 'options' to use
bool m_bDoFixupZ; // set D3DToGL_OptionDoFixupZ
bool m_bDoFixupY; // set D3DToGL_OptionDoFixupZ
bool m_bDoUserClipPlanes; // set D3DToGL_OptionDoUserClipPlanes
bool m_bSpew; // set D3DToGL_OptionSpew
bool m_bGenerateSRGBWriteSuffix; // set D3DToGL_OptionSRGBWriteSuffix
bool m_bGenerateBoneUniformBuffer;
bool m_bUseBindlessTexturing;
// Counter for dealing with nested loops
int m_nLoopDepth;
// Add "// Hex: 0xFFEEF00"-type statements after each instruction is parsed.
bool m_bAddHexCodeComments; // set D3DToGL_AddHexComments
// Only applicable if m_bAddHexCodeComments is true.
// If this is true, then it puts the hex code comments to the right of the instructions in a comment
// rather than preceding the instructions.
// Defaults to FALSE.
bool m_bPutHexCodesAfterLines; // set D3DToGL_PutHexCommentsAtEnd
// This tells it that we're just getting info for debugging so go easy on asserts and errors.
// Defaults to FALSE.
bool m_bGeneratingDebugText;
// Various scratch temps needed to handle mis-matches in instruction sets between D3D and OpenGL
bool m_bNeedsD2AddTemp;
bool m_bNeedsNRMTemp;
bool m_bDeclareAddressReg;
bool m_bNeedsLerpTemp;
bool m_bNeedsSinCosDeclarations;
// Keep track of which vs outputs are used so we can declare them
bool m_bDeclareVSOPos;
bool m_bDeclareVSOFog;
uint32 m_dwTexCoordOutMask;
int32 m_nVSPositionOutput;
// Mask of varyings which need centroid decoration
uint32 m_nCentroidMask;
// Keep track of which temps are used so they can be declared
uint32 m_dwTempUsageMask;
uint32 m_dwTempBoolUsageMask;
bool m_bOutputColorRegister[4];
bool m_bOutputDepthRegister;
// Declaration of integer and bool constants
uint32 m_dwConstIntUsageMask;
uint32 m_dwConstBoolUsageMask;
uint32 m_dwDefConstIntUsageMask;
uint32 m_dwDefConstIntIterCount[32];
// Did we use atomic_temp_var?
bool m_bUsedAtomicTempVar;
// Track constants so we know how to declare them
bool m_bConstantRegisterDefined[MAX_SHADER_CONSTANTS];
// Track sampler types when declared so we can properly decorate TEX instructions
uint32 m_dwSamplerTypes[32];
// Track sampler usage
uint32 m_dwSamplerUsageMask;
// Track shadow sampler usage
int m_nShadowDepthSamplerMask;
bool m_bDeclareShadowOption;
// Track attribute references
// init to 0xFFFFFFFF (unhit)
// index by (dwRegToken & D3DSP_REGNUM_MASK) in VS DCL insns
// fill with (usage<<4) | (usage index).
uint32 m_dwAttribMap[16];
// Register high water mark
uint32 m_nHighestRegister;
int32 m_nHighestBoneRegister;
// GLSL does indentation for readability
int m_NumIndentTabs;
// Output buffers.
CUtlBuffer *m_pBufHeaderCode;
CUtlBuffer *m_pBufAttribCode;
CUtlBuffer *m_pBufParamCode;
CUtlBuffer *m_pBufALUCode;
char *m_pFinalAssignmentsCode;
int m_nFinalAssignmentsBufSize;
// Recorded positions for debugging.
uint32* m_pRecordedInputTokenStart;
int m_nRecordedParamCodeStrlen;
int m_nRecordedALUCodeStrlen;
int m_nRecordedAttribCodeStrlen;
// In GLSL mode, these store the semantic attached to each oN register.
// They are the values that you pass to GetUsageIndexAndString.
uint32 m_DeclaredOutputs[MAX_DECLARED_OUTPUTS];
uint32 m_DeclaredInputs[MAX_DECLARED_INPUTS];
// Have they used the tangent input semantic (i.e. is g_pTangentAttributeName declared)?
bool m_bTangentInputUsed;
bool m_bUsesDSTInstruction;
private:
// Utilities to aid in decoding token stream
uint32 GetNextToken( void );
void SkipTokens( uint32 numToSkip );
uint32 Opcode( uint32 dwToken );
uint32 OpcodeSpecificData( uint32 dwToken );
uint32 TextureType ( uint32 dwToken );
uint32 GetRegType( uint32 dwRegToken );
// Write to the different buffers.
void StrcatToHeaderCode( const char *pBuf );
void StrcatToALUCode( const char *pBuf );
void StrcatToParamCode( const char *pBuf );
void StrcatToAttribCode( const char *pBuf );
void PrintToBufWithIndents( CUtlBuffer &buf, const char *pFormat, ... );
// This helps write the token hex codes into the output stream for debugging.
void AddTokenHexCodeToBuffer( char *pBuffer, int nSize, int nLastStrlen );
void RecordInputAndOutputPositions();
void AddTokenHexCode();
// Utilities for decoding tokens in to strings according to ASM syntax
void PrintOpcode( uint32 inst, char* buff, int nBufLen );
// fSemanticFlags is SEMANTIC_INPUT or SEMANTIC_OUTPUT.
void PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageIndexName, int nBufLen, int fSemanticFlags );
CUtlString GetUsageAndIndexString( uint32 dwToken, int fSemanticFlags );
CUtlString GetParameterString( uint32 dwToken, uint32 dwSourceOrDest, bool bForceScalarSource, int *pARLDestReg );
const char* GetGLSLOperatorString( uint32 inst );
void PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, char *pRegisterName, int nBufLen, bool bForceScalarSource, int *pARLDestReg );
void InsertMoveFromAddressRegister( CUtlBuffer *pCode, int nARLComp0, int nARLComp1, int nARLComp2 = ARL_DEST_NONE );
void InsertMoveInstruction( CUtlBuffer *pCode, int nARLComponent );
void FlagIndirectRegister( uint32 dwToken, int *pARLDestReg );
// Utilities for decoding tokens in to strings according to GLSL syntax
bool OpenIntrinsic( uint32 inst, char* buff, int nBufLen, uint32 destDimension, uint32 nArgumentDimension );
void PrintIndentation( char *pBuf, int nBufLen );
uint32 MaintainAttributeMap( uint32 dwToken, uint32 dwRegToken );
CUtlString FixGLSLSwizzle( const char *pDestRegisterName, const char *pSrcRegisterName );
void WriteGLSLCmp( const char *pDestReg, const char *pSrc0Reg, const char *pSrc1Reg, const char *pSrc2Reg );
void WriteGLSLSamplerDefinitions();
void WriteGLSLOutputVariableAssignments();
void WriteGLSLInputVariableAssignments();
void NoteTangentInputUsed();
void Handle_DCL();
void Handle_DEF();
void Handle_DEFIB( uint32 nInstruction );
void Handle_MAD( uint32 nInstruction );
void Handle_DP2ADD();
void Handle_SINCOS();
void Handle_LRP( uint32 nInstruction );
void Handle_TEX( uint32 dwToken, bool bIsTexLDL );
void Handle_TexLDD( uint32 nInstruction );
void Handle_TexCoord();
void Handle_UnaryOp( uint32 nInstruction );
void Handle_BREAKC( uint32 dwToken );
void HandleBinaryOp_GLSL( uint32 nInstruction );
void HandleBinaryOp_ASM( uint32 nInstruction );
void Handle_CMP();
void Handle_NRM();
void Handle_DeclarativeNonDclOp( uint32 nInstruction );
public:
D3DToGL();
int TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bool *bVertexShader, uint32 options, int32 nShadowDepthSamplerMask, uint32 nCentroidMask, char *debugLabel );
};
#endif // DX9_ASM_TO_GL_2_H

6845
togl/linuxwin/dxabstract.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,510 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// glentrypoints.cpp
//
//=============================================================================//
// Immediately include gl.h, etc. here to avoid compilation warnings.
#include "togl/rendermechanism.h"
#include "appframework/AppFramework.h"
#include "appframework/IAppSystemGroup.h"
#include "tier0/dbg.h"
#include "tier0/icommandline.h"
#include "tier0/platform.h"
#include "interface.h"
#include "filesystem.h"
#include "filesystem_init.h"
#include "tier1/convar.h"
#include "vstdlib/cvar.h"
#include "inputsystem/ButtonCode.h"
#include "tier1.h"
#include "tier2/tier2.h"
#ifdef _LINUX
#include <GL/glx.h>
#endif
// NOTE: This has to be the last file included!
#include "tier0/memdbgon.h"
#if !defined(DX_TO_GL_ABSTRACTION)
#error
#endif
#if defined(OSX) || defined(LINUX) || (defined (WIN32) && defined( DX_TO_GL_ABSTRACTION ))
#include "appframework/ilaunchermgr.h"
ILauncherMgr *g_pLauncherMgr = NULL;
#endif
#define DEBUG_ALL_GLCALLS 0
#if DEBUG_ALL_GLCALLS
bool g_bDebugOpenGLCalls = true;
bool g_bPrintOpenGLCalls = false;
#define GL_EXT(x,glmajor,glminor)
#define GL_FUNC(ext,req,ret,fn,arg,call) \
static ret (*fn##_gldebugptr) arg = NULL; \
static ret fn##_gldebug arg { \
if (!g_bDebugOpenGLCalls) { return fn##_gldebugptr call; } \
if (g_bPrintOpenGLCalls) { \
printf("Calling %s ... ", #fn); \
fflush(stdout); \
} \
ret retval = fn##_gldebugptr call; \
if (g_bPrintOpenGLCalls) { \
printf("%s returned!\n", #fn); \
fflush(stdout); \
} \
const GLenum err = glGetError_gldebugptr(); \
if ( err == GL_INVALID_FRAMEBUFFER_OPERATION_EXT ) { \
const GLenum fberr = gGL->glCheckFramebufferStatus( GL_FRAMEBUFFER_EXT ); \
printf("%s triggered error GL_INVALID_FRAMEBUFFER_OPERATION_EXT! (0x%X)\n\n\n", #fn, (int) fberr); \
fflush(stdout); \
__asm__ __volatile__ ( "int $3\n\t" ); \
} else if (err != GL_NO_ERROR) { \
printf("%s triggered error 0x%X!\n\n\n", #fn, (int) err); \
fflush(stdout); \
__asm__ __volatile__ ( "int $3\n\t" ); \
} \
return retval; \
}
#define GL_FUNC_VOID(ext,req,fn,arg,call) \
static void (*fn##_gldebugptr) arg = NULL; \
static void fn##_gldebug arg { \
if (!g_bDebugOpenGLCalls) { fn##_gldebugptr call; return; } \
if (g_bPrintOpenGLCalls) { \
printf("Calling %s ... ", #fn); \
fflush(stdout); \
} \
fn##_gldebugptr call; \
if (g_bPrintOpenGLCalls) { \
printf("%s returned!\n", #fn); \
fflush(stdout); \
} \
const GLenum err = glGetError_gldebugptr(); \
if ( err == GL_INVALID_FRAMEBUFFER_OPERATION_EXT ) { \
const GLenum fberr = gGL->glCheckFramebufferStatus( GL_FRAMEBUFFER_EXT ); \
printf("%s triggered error GL_INVALID_FRAMEBUFFER_OPERATION_EXT! (0x%X)\n\n\n", #fn, (int) fberr); \
fflush(stdout); \
__asm__ __volatile__ ( "int $3\n\t" ); \
} else if (err != GL_NO_ERROR) { \
printf("%s triggered error 0x%X!\n\n\n", #fn, (int) err); \
fflush(stdout); \
__asm__ __volatile__ ( "int $3\n\t" ); \
} \
}
#include "togl/glfuncs.inl"
#undef GL_FUNC_VOID
#undef GL_FUNC
#undef GL_EXT
#endif
COpenGLEntryPoints *gGL = NULL;
GL_GetProcAddressCallbackFunc_t gGL_GetProcAddressCallback = NULL;
void *VoidFnPtrLookup_GlMgr(const char *fn, bool &okay, const bool bRequired, void *fallback)
{
void *retval = NULL;
if ((!okay) && (!bRequired)) // always look up if required (so we get a complete list of crucial missing symbols).
return NULL;
// SDL does the right thing, so we never need to use tier0 in this case.
retval = (*gGL_GetProcAddressCallback)(fn, okay, bRequired, fallback);
//printf("CDynamicFunctionOpenGL: SDL_GL_GetProcAddress(\"%s\") returned %p\n", fn, retval);
if ((retval == NULL) && (fallback != NULL))
{
//printf("CDynamicFunctionOpenGL: Using fallback %p for \"%s\"\n", fallback, fn);
retval = fallback;
}
// Note that a non-NULL response doesn't mean it's safe to call the function!
// You always have to check that the extension is supported;
// an implementation MAY return NULL in this case, but it doesn't have to (and doesn't, with the DRI drivers).
okay = (okay && (retval != NULL));
if (bRequired && !okay)
fprintf(stderr, "Could not find required OpenGL entry point '%s'!\n", fn);
return retval;
}
COpenGLEntryPoints *GetOpenGLEntryPoints(GL_GetProcAddressCallbackFunc_t callback)
{
if (gGL == NULL)
{
gGL_GetProcAddressCallback = callback;
gGL = new COpenGLEntryPoints();
if (!gGL->m_bHave_OpenGL)
Error( "Missing basic required OpenGL functionality." );
}
return gGL;
}
void ClearOpenGLEntryPoints()
{
if ( gGL )
{
gGL->ClearEntryPoints();
}
}
COpenGLEntryPoints *ToGLConnectLibraries( CreateInterfaceFn factory )
{
ConnectTier1Libraries( &factory, 1 );
ConVar_Register();
ConnectTier2Libraries( &factory, 1 );
if ( !g_pFullFileSystem )
{
Warning( "ToGL was unable to access the required interfaces!\n" );
}
// NOTE! : Overbright is 1.0 so that Hammer will work properly with the white bumped and unbumped lightmaps.
MathLib_Init( 2.2f, 2.2f, 0.0f, 2.0f );
#if defined( USE_SDL )
g_pLauncherMgr = (ILauncherMgr *)factory( SDLMGR_INTERFACE_VERSION, NULL );
#endif
return gGL;
}
void ToGLDisconnectLibraries()
{
DisconnectTier2Libraries();
ConVar_Unregister();
DisconnectTier1Libraries();
}
#define GLVERNUM(Major, Minor, Patch) (((Major) * 100000) + ((Minor) * 1000) + (Patch))
static void GetOpenGLVersion(int *major, int *minor, int *patch)
{
*major = *minor = *patch = 0;
static CDynamicFunctionOpenGL< true, const GLubyte *( APIENTRY *)(GLenum name), const GLubyte * > glGetString("glGetString");
if (glGetString)
{
const char *version = (const char *) glGetString(GL_VERSION);
if (version)
{
sscanf( version, "%d.%d.%d", major, minor, patch );
}
}
}
static int GetOpenGLVersionMajor()
{
int major, minor, patch;
GetOpenGLVersion(&major, &minor, &patch);
return major;
}
static int GetOpenGLVersionMinor()
{
int major, minor, patch;
GetOpenGLVersion(&major, &minor, &patch);
return minor;
}
static int GetOpenGLVersionPatch()
{
int major, minor, patch;
GetOpenGLVersion(&major, &minor, &patch);
return patch;
}
static bool CheckBaseOpenGLVersion()
{
const int NEED_MAJOR = 2;
const int NEED_MINOR = 0;
const int NEED_PATCH = 0;
int major, minor, patch;
GetOpenGLVersion(&major, &minor, &patch);
const int need = GLVERNUM(NEED_MAJOR, NEED_MINOR, NEED_PATCH);
const int have = GLVERNUM(major, minor, patch);
if (have < need)
{
fprintf(stderr, "PROBLEM: You appear to have OpenGL %d.%d.%d, but we need at least %d.%d.%d!\n",
major, minor, patch, NEED_MAJOR, NEED_MINOR, NEED_PATCH);
return false;
}
return true;
}
static bool CheckOpenGLExtension_internal(const char *ext, const int coremajor, const int coreminor)
{
if ((coremajor >= 0) && (coreminor >= 0)) // we know that this extension is part of the base spec as of GL_VERSION coremajor.coreminor.
{
int major, minor, patch;
GetOpenGLVersion(&major, &minor, &patch);
const int need = GLVERNUM(coremajor, coreminor, 0);
const int have = GLVERNUM(major, minor, patch);
if (have >= need)
return true; // we definitely have access to this "extension," as it is part of this version of the GL's core functionality.
}
// okay, see if the GL_EXTENSIONS string reports it.
static CDynamicFunctionOpenGL< true, const GLubyte *( APIENTRY *)(GLenum name), const GLubyte * > glGetString("glGetString");
if (!glGetString)
return false;
// hacky scanning of this string, because I don't want to spend time breaking it into a vector like I should have.
const char *extensions = (const char *) glGetString(GL_EXTENSIONS);
const size_t extlen = strlen(ext);
while ((extensions) && (*extensions))
{
const char *ptr = strstr(extensions, ext);
#if _WIN32
if (!ptr)
{
static CDynamicFunctionOpenGL< true, const char *( APIENTRY *)( ), const char * > wglGetExtensionsStringEXT("wglGetExtensionsStringEXT");
if (wglGetExtensionsStringEXT)
{
extensions = wglGetExtensionsStringEXT();
ptr = strstr(extensions, ext);
}
if (!ptr)
{
return false;
}
}
#elif !defined ( OSX )
if (!ptr)
{
static CDynamicFunctionOpenGL< true, Display *( APIENTRY *)( ), Display* > glXGetCurrentDisplay("glXGetCurrentDisplay");
static CDynamicFunctionOpenGL< true, const char *( APIENTRY *)( Display*, int ), const char * > glXQueryExtensionsString("glXQueryExtensionsString");
if (glXQueryExtensionsString && glXGetCurrentDisplay)
{
extensions = glXQueryExtensionsString(glXGetCurrentDisplay(), 0);
ptr = strstr(extensions, ext);
}
}
#endif
if (!ptr)
return false;
// make sure this matches the entire string, and isn't a substring match of some other extension.
// if ( ( (string is at start of extension list) or (the char before the string is a space) ) and
// (the next char after the string is a space or a null terminator) )
if ( ((ptr == extensions) || (ptr[-1] == ' ')) &&
((ptr[extlen] == ' ') || (ptr[extlen] == '\0')) )
return true; // found it!
extensions = ptr + extlen; // skip ahead, search again.
}
return false;
}
static bool CheckOpenGLExtension(const char *ext, const int coremajor, const int coreminor)
{
const bool retval = CheckOpenGLExtension_internal(ext, coremajor, coreminor);
printf("This system %s the OpenGL extension %s.\n", retval ? "supports" : "DOES NOT support", ext);
return retval;
}
// The GL context you want entry points for must be current when you hit this constructor!
COpenGLEntryPoints::COpenGLEntryPoints()
: m_nTotalGLCycles(0)
, m_nTotalGLCalls(0)
, m_nOpenGLVersionMajor(GetOpenGLVersionMajor())
, m_nOpenGLVersionMinor(GetOpenGLVersionMinor())
, m_nOpenGLVersionPatch(GetOpenGLVersionPatch())
, m_bHave_OpenGL(CheckBaseOpenGLVersion()) // may reset to false as these lookups happen.
#define GL_EXT(x,glmajor,glminor) , m_bHave_##x(CheckOpenGLExtension(#x, glmajor, glminor))
#define GL_FUNC(ext,req,ret,fn,arg,call) , fn(#fn, m_bHave_##ext)
#define GL_FUNC_VOID(ext,req,fn,arg,call) , fn(#fn, m_bHave_##ext)
#include "togl/glfuncs.inl"
#undef GL_FUNC_VOID
#undef GL_FUNC
#undef GL_EXT
{
// Locally cache the copy of the GL device strings, to avoid needing to call these glGet's (which can be extremely slow) more than once.
const char *pszString = ( const char * )glGetString(GL_VENDOR);
m_pGLDriverStrings[cGLVendorString] = strdup( pszString ? pszString : "" );
m_nDriverProvider = cGLDriverProviderUnknown;
if ( V_stristr( m_pGLDriverStrings[cGLVendorString], "nvidia" ) )
m_nDriverProvider = cGLDriverProviderNVIDIA;
else if ( V_stristr( m_pGLDriverStrings[cGLVendorString], "amd" ) || V_stristr( m_pGLDriverStrings[cGLVendorString], "ati" ) )
m_nDriverProvider = cGLDriverProviderAMD;
else if ( V_stristr( m_pGLDriverStrings[cGLVendorString], "intel" ) )
m_nDriverProvider = cGLDriverProviderIntelOpenSource;
else if ( V_stristr( m_pGLDriverStrings[cGLVendorString], "apple" ) )
m_nDriverProvider = cGLDriverProviderApple;
pszString = ( const char * )glGetString(GL_RENDERER);
m_pGLDriverStrings[cGLRendererString] = strdup( pszString ? pszString : "" );
pszString = ( const char * )glGetString(GL_VERSION);
m_pGLDriverStrings[cGLVersionString] = strdup( pszString ? pszString : "" );
pszString = ( const char * )glGetString(GL_EXTENSIONS);
m_pGLDriverStrings[cGLExtensionsString] = strdup( pszString ? pszString : "" );
printf( "OpenGL: %s %s (%d.%d.%d)\n", m_pGLDriverStrings[ cGLRendererString ], m_pGLDriverStrings[ cGLVersionString ],
m_nOpenGLVersionMajor, m_nOpenGLVersionMinor, m_nOpenGLVersionPatch );
// !!! FIXME: Alfred says the original GL_APPLE_fence code only exists to
// !!! FIXME: hint Apple's drivers and not because we rely on the
// !!! FIXME: functionality. If so, just remove this check (and the
// !!! FIXME: GL_NV_fence code entirely).
if ((m_bHave_OpenGL) && ((!m_bHave_GL_NV_fence) && (!m_bHave_GL_ARB_sync) && (!m_bHave_GL_APPLE_fence)))
{
Error( "Required OpenGL extension \"GL_NV_fence\", \"GL_ARB_sync\", or \"GL_APPLE_fence\" is not supported. Please upgrade your OpenGL driver." );
}
// same extension, different name.
if (m_bHave_GL_EXT_vertex_array_bgra || m_bHave_GL_ARB_vertex_array_bgra)
{
m_bHave_GL_EXT_vertex_array_bgra = m_bHave_GL_ARB_vertex_array_bgra = true;
}
// GL_ARB_framebuffer_object is a superset of GL_EXT_framebuffer_object,
// (etc) but if you don't call in through the ARB entry points, you won't
// get the relaxed restrictions on mismatched attachment dimensions.
if (m_bHave_GL_ARB_framebuffer_object)
{
m_bHave_GL_EXT_framebuffer_object = true;
m_bHave_GL_EXT_framebuffer_blit = true;
m_bHave_GL_EXT_framebuffer_multisample = true;
glBindFramebufferEXT.Force(glBindFramebuffer.Pointer());
glBindRenderbufferEXT.Force(glBindRenderbuffer.Pointer());
glCheckFramebufferStatusEXT.Force(glCheckFramebufferStatus.Pointer());
glDeleteRenderbuffersEXT.Force(glDeleteRenderbuffers.Pointer());
glFramebufferRenderbufferEXT.Force(glFramebufferRenderbuffer.Pointer());
glFramebufferTexture2DEXT.Force(glFramebufferTexture2D.Pointer());
glFramebufferTexture3DEXT.Force(glFramebufferTexture3D.Pointer());
glGenFramebuffersEXT.Force(glGenFramebuffers.Pointer());
glGenRenderbuffersEXT.Force(glGenRenderbuffers.Pointer());
glDeleteFramebuffersEXT.Force(glDeleteFramebuffers.Pointer());
glBlitFramebufferEXT.Force(glBlitFramebuffer.Pointer());
glRenderbufferStorageMultisampleEXT.Force(glRenderbufferStorageMultisample.Pointer());
}
#if DEBUG_ALL_GLCALLS
// push all GL calls through the debug wrappers.
#define GL_EXT(x,glmajor,glminor)
#define GL_FUNC(ext,req,ret,fn,arg,call) \
fn##_gldebugptr = this->fn; \
this->fn.Force(fn##_gldebug);
#define GL_FUNC_VOID(ext,req,fn,arg,call) \
fn##_gldebugptr = this->fn; \
this->fn.Force(fn##_gldebug);
#include "togl/glfuncs.inl"
#undef GL_FUNC_VOID
#undef GL_FUNC
#undef GL_EXT
#endif
#ifdef OSX
m_bHave_GL_NV_bindless_texture = false;
m_bHave_GL_AMD_pinned_memory = false;
#else
if ( ( m_bHave_GL_NV_bindless_texture ) && ( !CommandLine()->CheckParm( "-gl_nv_bindless_texturing" ) ) )
{
m_bHave_GL_NV_bindless_texture = false;
glGetTextureHandleNV.Force( NULL );
glGetTextureSamplerHandleNV.Force( NULL );
glMakeTextureHandleResidentNV.Force( NULL );
glMakeTextureHandleNonResidentNV.Force( NULL );
glUniformHandleui64NV.Force( NULL );
glUniformHandleui64vNV.Force( NULL );
glProgramUniformHandleui64NV.Force( NULL );
glProgramUniformHandleui64vNV.Force( NULL );
glIsTextureHandleResidentNV.Force( NULL );
}
if ( !CommandLine()->CheckParm( "-gl_amd_pinned_memory" ) )
{
m_bHave_GL_AMD_pinned_memory = false;
}
#endif // !OSX
// Getting reports of black screens, etc. with ARB_buffer_storage and AMD drivers. This type of thing:
// http://forums.steampowered.com/forums/showthread.php?t=3266806
// So disable it for now.
if ( ( m_nDriverProvider == cGLDriverProviderAMD ) || CommandLine()->CheckParm( "-gl_disable_arb_buffer_storage" ) )
{
m_bHave_GL_ARB_buffer_storage = false;
}
printf( "GL_NV_bindless_texture: %s\n", m_bHave_GL_NV_bindless_texture ? "ENABLED" : "DISABLED" );
printf( "GL_AMD_pinned_memory: %s\n", m_bHave_GL_AMD_pinned_memory ? "ENABLED" : "DISABLED" );
printf( "GL_ARB_buffer_storage: %s\n", m_bHave_GL_ARB_buffer_storage ? "AVAILABLE" : "NOT AVAILABLE" );
printf( "GL_EXT_texture_sRGB_decode: %s\n", m_bHave_GL_EXT_texture_sRGB_decode ? "AVAILABLE" : "NOT AVAILABLE" );
bool bGLCanDecodeS3TCTextures = m_bHave_GL_EXT_texture_compression_s3tc || ( m_bHave_GL_EXT_texture_compression_dxt1 && m_bHave_GL_ANGLE_texture_compression_dxt3 && m_bHave_GL_ANGLE_texture_compression_dxt5 );
if ( !bGLCanDecodeS3TCTextures )
{
Error( "This application requires either the GL_EXT_texture_compression_s3tc, or the GL_EXT_texture_compression_dxt1 + GL_ANGLE_texture_compression_dxt3 + GL_ANGLE_texture_compression_dxt5 OpenGL extensions. Please install S3TC texture support.\n" );
}
#ifdef OSX
if ( CommandLine()->FindParm( "-glmnosrgbdecode" ) )
{
Msg( "Forcing m_bHave_GL_EXT_texture_sRGB_decode off.\n" );
m_bHave_GL_EXT_texture_sRGB_decode = false;
}
#endif
#ifndef OSX
if ( !m_bHave_GL_EXT_texture_sRGB_decode )
{
Error( "Required OpenGL extension \"GL_EXT_texture_sRGB_decode\" is not supported. Please update your OpenGL driver.\n" );
}
#endif
}
COpenGLEntryPoints::~COpenGLEntryPoints()
{
for ( uint i = 0; i < cGLTotalDriverProviders; ++i )
{
free( m_pGLDriverStrings[i] );
m_pGLDriverStrings[i] = NULL;
}
}
void COpenGLEntryPoints::ClearEntryPoints()
{
#define GL_EXT(x,glmajor,glminor)
#define GL_FUNC(ext,req,ret,fn,arg,call) fn.Force( NULL );
#define GL_FUNC_VOID(ext,req,fn,arg,call) fn.Force( NULL );
#include "togl/glfuncs.inl"
#undef GL_FUNC_VOID
#undef GL_FUNC
#undef GL_EXT
}
// Turn off memdbg macros (turned on up top) since this is included like a header
#include "tier0/memdbgoff.h"

6092
togl/linuxwin/glmgr.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,622 @@
// BE VERY VERY CAREFUL what you do in these function. They are extremely hot, and calling the wrong GL API's in here will crush perf. (especially on NVidia threaded drivers).
FORCEINLINE uint32 bitmix32(uint32 a)
{
a -= (a<<6);
//a ^= (a>>17);
//a -= (a<<9);
a ^= (a<<4);
//a -= (a<<3);
//a ^= (a<<10);
a ^= (a>>15);
return a;
}
#ifndef OSX
FORCEINLINE GLuint GLMContext::FindSamplerObject( const GLMTexSamplingParams &desiredParams )
{
int h = bitmix32( desiredParams.m_bits + desiredParams.m_borderColor ) & ( cSamplerObjectHashSize - 1 );
while ( ( m_samplerObjectHash[h].m_params.m_bits != desiredParams.m_bits ) || ( m_samplerObjectHash[h].m_params.m_borderColor != desiredParams.m_borderColor ) )
{
if ( !m_samplerObjectHash[h].m_params.m_packed.m_isValid )
break;
if ( ++h >= cSamplerObjectHashSize )
h = 0;
}
if ( !m_samplerObjectHash[h].m_params.m_packed.m_isValid )
{
GLMTexSamplingParams &hashParams = m_samplerObjectHash[h].m_params;
hashParams = desiredParams;
hashParams.SetToSamplerObject( m_samplerObjectHash[h].m_samplerObject );
if ( ++m_nSamplerObjectHashNumEntries == cSamplerObjectHashSize )
{
// TODO: Support resizing
Error( "Sampler object hash is full, increase cSamplerObjectHashSize" );
}
}
return m_samplerObjectHash[h].m_samplerObject;
}
#endif // !OSX
// BE VERY CAREFUL WHAT YOU DO IN HERE. This is called on every batch, even seemingly simple changes can kill perf.
FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex, uint nBaseVertex ) // shadersOn = true for draw calls, false for clear calls
{
Assert( m_drawingLang == kGLMGLSL ); // no support for ARB shaders right now (and NVidia reports that they aren't worth targeting under Windows/Linux for various reasons anyway)
Assert( ( m_drawingFBO == m_boundDrawFBO ) && ( m_drawingFBO == m_boundReadFBO ) ); // this check MUST succeed
Assert( m_pDevice->m_pVertDecl );
#if GLMDEBUG
GLM_FUNC;
#endif
GL_BATCH_PERF( m_FlushStats.m_nTotalBatchFlushes++; )
#if GLMDEBUG
bool tex0_srgb = (m_boundDrawFBO[0].m_attach[0].m_tex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
// you can only actually use the sRGB FB state on some systems.. check caps
if (m_caps.m_hasGammaWrites)
{
GLBlendEnableSRGB_t writeSRGBState;
m_BlendEnableSRGB.Read( &writeSRGBState, 0 ); // the client set value, not the API-written value yet..
bool draw_srgb = writeSRGBState.enable != 0;
if (draw_srgb)
{
if (tex0_srgb)
{
// good - draw mode and color tex agree
}
else
{
// bad
// Client has asked to write sRGB into a texture that can't do it.
// there is no way to satisfy this unless we change the RT tex and we avoid doing that.
// (although we might consider a ** ONE TIME ** promotion.
// this shouldn't be a big deal if the tex format is one where it doesn't matter like 32F.
GLMPRINTF(("-Z- srgb-enabled FBO conflict: attached tex %08x [%s] is not SRGB", m_boundDrawFBO[0].m_attach[0].m_tex, m_boundDrawFBO[0].m_attach[0].m_tex->m_layout->m_layoutSummary ));
// do we shoot down the srgb-write state for this batch?
// I think the runtime will just ignore it.
}
}
else
{
if (tex0_srgb)
{
// odd - client is not writing sRGB into a texture which *can* do it.
//GLMPRINTF(( "-Z- srgb-disabled FBO conflict: attached tex %08x [%s] is SRGB", m_boundFBO[0].m_attach[0].m_tex, m_boundFBO[0].m_attach[0].m_tex->m_layout->m_layoutSummary ));
//writeSRGBState.enable = true;
//m_BlendEnableSRGB.Write( &writeSRGBState );
}
else
{
// good - draw mode and color tex agree
}
}
}
#endif
Assert( m_drawingProgram[ kGLMVertexProgram ] );
Assert( m_drawingProgram[ kGLMFragmentProgram ] );
Assert( ( m_drawingProgram[kGLMVertexProgram]->m_type == kGLMVertexProgram ) && ( m_drawingProgram[kGLMFragmentProgram]->m_type == kGLMFragmentProgram ) );
Assert( m_drawingProgram[ kGLMVertexProgram ]->m_bTranslatedProgram && m_drawingProgram[ kGLMFragmentProgram ]->m_bTranslatedProgram );
#if GLMDEBUG
// Depth compare mode check
uint nCurMask = 1, nShaderSamplerMask = m_drawingProgram[kGLMFragmentProgram]->m_samplerMask;
for ( int nSamplerIndex = 0; nSamplerIndex < GLM_SAMPLER_COUNT; ++nSamplerIndex, nCurMask <<= 1 )
{
if ( !m_samplers[nSamplerIndex].m_pBoundTex )
continue;
if ( m_samplers[nSamplerIndex].m_pBoundTex->m_layout->m_mipCount == 1 )
{
if ( m_samplers[nSamplerIndex].m_samp.m_packed.m_mipFilter == D3DTEXF_LINEAR )
{
GLMDebugPrintf( "Sampler %u has mipmap filtering enabled on a texture without mipmaps! (texture name: %s, pixel shader: %s)!\n",
nSamplerIndex,
m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel ? m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel : "?",
m_drawingProgram[kGLMFragmentProgram]->m_shaderName );
}
}
if ( ( nShaderSamplerMask & nCurMask ) == 0 )
continue;
if ( m_samplers[nSamplerIndex].m_pBoundTex->m_layout->m_mipCount == 1 )
{
if ( m_samplers[nSamplerIndex].m_samp.m_packed.m_mipFilter == D3DTEXF_LINEAR )
{
// Note this is not always an error - shadow buffer debug visualization shaders purposely want to read shadow depths (and not do the comparison)
GLMDebugPrintf( "Sampler %u has mipmap filtering enabled on a texture without mipmaps! (texture name: %s, pixel shader: %s)!\n",
nSamplerIndex,
m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel ? m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel : "?",
m_drawingProgram[kGLMFragmentProgram]->m_shaderName );
}
}
bool bSamplerIsDepth = ( m_samplers[nSamplerIndex].m_pBoundTex->m_layout->m_key.m_texFlags & kGLMTexIsDepth ) != 0;
bool bSamplerShadow = m_samplers[nSamplerIndex].m_samp.m_packed.m_compareMode != 0;
bool bShaderShadow = ( m_drawingProgram[kGLMFragmentProgram]->m_nShadowDepthSamplerMask & nCurMask ) != 0;
if ( bShaderShadow )
{
// Shader expects shadow depth sampling at this sampler index
// Must have a depth texture and compare mode must be enabled
if ( !bSamplerIsDepth || !bSamplerShadow )
{
// FIXME: This occasionally occurs in L4D2 when CShaderAPIDx8::ExecuteCommandBuffer() sets the TEXTURE_WHITE texture in the flashlight depth texture slot.
GLMDebugPrintf( "Sampler %u's compare mode (%u) or format (depth=%u) is not consistent with pixel shader's compare mode (%u) (texture name: %s, pixel shader: %s)!\n",
nSamplerIndex, bSamplerShadow, bSamplerIsDepth, bShaderShadow,
m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel ? m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel : "?",
m_drawingProgram[kGLMFragmentProgram]->m_shaderName );
}
}
else
{
// Shader does not expect shadow depth sampling as this sampler index
// We don't care if comparemode is enabled, but we can't have a depth texture in this sampler
if ( bSamplerIsDepth )
{
GLMDebugPrintf( "Sampler %u is a depth texture but the pixel shader's shadow depth sampler mask does not expect depth here (texture name: %s, pixel shader: %s)!\n",
nSamplerIndex,
m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel ? m_samplers[nSamplerIndex].m_pBoundTex->m_debugLabel : "?",
m_drawingProgram[kGLMFragmentProgram]->m_shaderName );
}
}
}
#endif
if ( m_bDirtyPrograms )
{
m_bDirtyPrograms = false;
CGLMShaderPair *pNewPair = m_pairCache->SelectShaderPair( m_drawingProgram[ kGLMVertexProgram ], m_drawingProgram[ kGLMFragmentProgram ], 0 );
if ( pNewPair != m_pBoundPair )
{
#if GL_BATCH_TELEMETRY_ZONES
tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "NewProgram" );
#endif
if ( !pNewPair->m_valid )
{
if ( !pNewPair->ValidateProgramPair() )
{
goto flush_error_exit;
}
}
gGL->glUseProgram( (GLuint)pNewPair->m_program );
GL_BATCH_PERF( m_FlushStats.m_nTotalProgramPairChanges++; )
if ( !m_pBoundPair )
{
GL_BATCH_PERF( m_FlushStats.m_nNewPS++; )
GL_BATCH_PERF( m_FlushStats.m_nNewVS++; )
}
else
{
GL_BATCH_PERF( if ( pNewPair->m_fragmentProg != m_pBoundPair->m_fragmentProg ) m_FlushStats.m_nNewPS++; )
GL_BATCH_PERF( if ( pNewPair->m_vertexProg != m_pBoundPair->m_vertexProg ) m_FlushStats.m_nNewVS++; )
}
#if GL_BATCH_PERF_ANALYSIS
tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_NOTE, "V:%s (V Regs:%u V Bone Regs:%u) F:%s (F Regs:%u)",
m_drawingProgram[ kGLMVertexProgram ]->m_shaderName,
m_drawingProgram[ kGLMVertexProgram ]->m_descs[kGLMGLSL].m_highWater,
m_drawingProgram[ kGLMVertexProgram ]->m_descs[kGLMGLSL].m_VSHighWaterBone,
m_drawingProgram[ kGLMFragmentProgram ]->m_shaderName,
m_drawingProgram[ kGLMFragmentProgram ]->m_descs[kGLMGLSL].m_highWater );
#endif
m_pBoundPair = pNewPair;
// set the dirty levels appropriately since the program changed and has never seen any of the current values.
m_programParamsF[kGLMVertexProgram].m_firstDirtySlotNonBone = 0;
m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone = m_drawingProgram[ kGLMVertexProgram ]->m_descs[kGLMGLSL].m_highWater;
m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterBone = m_drawingProgram[ kGLMVertexProgram ]->m_descs[kGLMGLSL].m_VSHighWaterBone;
m_programParamsF[kGLMFragmentProgram].m_firstDirtySlotNonBone = 0;
m_programParamsF[kGLMFragmentProgram].m_dirtySlotHighWaterNonBone = m_drawingProgram[ kGLMFragmentProgram ]->m_descs[kGLMGLSL].m_highWater;
// bool and int dirty levels get set to max, we don't have actual high water marks for them
// code which sends the values must clamp on these types.
m_programParamsB[kGLMVertexProgram].m_dirtySlotCount = kGLMProgramParamBoolLimit;
m_programParamsB[kGLMFragmentProgram].m_dirtySlotCount = kGLMProgramParamBoolLimit;
m_programParamsI[kGLMVertexProgram].m_dirtySlotCount = kGLMProgramParamInt4Limit;
m_programParamsI[kGLMFragmentProgram].m_dirtySlotCount = 0;
// check fragment buffers used (MRT)
if( pNewPair->m_fragmentProg->m_fragDataMask != m_fragDataMask )
{
gGL->glDrawBuffers( pNewPair->m_fragmentProg->m_numDrawBuffers, pNewPair->m_fragmentProg->m_drawBuffers );
m_fragDataMask = pNewPair->m_fragmentProg->m_fragDataMask;
}
}
}
Assert( m_ViewportBox.GetData().width == (int)( m_ViewportBox.GetData().widthheight & 0xFFFF ) );
Assert( m_ViewportBox.GetData().height == (int)( m_ViewportBox.GetData().widthheight >> 16 ) );
m_pBoundPair->UpdateScreenUniform( m_ViewportBox.GetData().widthheight );
GL_BATCH_PERF( m_FlushStats.m_nNumChangedSamplers += m_nNumDirtySamplers );
#if !defined( OSX ) // no support for sampler objects in OSX 10.6 (GL 2.1 profile)
if ( m_bUseSamplerObjects)
{
while ( m_nNumDirtySamplers )
{
const uint nSamplerIndex = m_nDirtySamplers[--m_nNumDirtySamplers];
Assert( ( nSamplerIndex < GLM_SAMPLER_COUNT ) && ( !m_nDirtySamplerFlags[nSamplerIndex]) );
m_nDirtySamplerFlags[nSamplerIndex] = 1;
gGL->glBindSampler( nSamplerIndex, FindSamplerObject( m_samplers[nSamplerIndex].m_samp ) );
GL_BATCH_PERF( m_FlushStats.m_nNumSamplingParamsChanged++ );
#if defined( OSX ) // valid for OSX only if using GL 3.3 context
CGLMTex *pTex = m_samplers[nSamplerIndex].m_pBoundTex;
if( pTex && !( gGL->m_bHave_GL_EXT_texture_sRGB_decode ) )
{
// see if requested SRGB state differs from the known one
bool texSRGB = ( pTex->m_layout->m_key.m_texFlags & kGLMTexSRGB ) != 0;
bool glSampSRGB = m_samplers[nSamplerIndex].m_samp.m_packed.m_srgb;
if ( texSRGB != glSampSRGB ) // mismatch
{
pTex->HandleSRGBMismatch( glSampSRGB, pTex->m_srgbFlipCount );
}
}
#endif
}
}
else
#endif // if !defined( OSX )
{
while ( m_nNumDirtySamplers )
{
const uint nSamplerIndex = m_nDirtySamplers[--m_nNumDirtySamplers];
Assert( ( nSamplerIndex < GLM_SAMPLER_COUNT ) && ( !m_nDirtySamplerFlags[nSamplerIndex]) );
m_nDirtySamplerFlags[nSamplerIndex] = 1;
CGLMTex *pTex = m_samplers[nSamplerIndex].m_pBoundTex;
if ( ( pTex ) && ( !( pTex->m_SamplingParams == m_samplers[nSamplerIndex].m_samp ) ) )
{
SelectTMU( nSamplerIndex );
m_samplers[nSamplerIndex].m_samp.DeltaSetToTarget( pTex->m_texGLTarget, pTex->m_SamplingParams );
pTex->m_SamplingParams = m_samplers[nSamplerIndex].m_samp;
#if defined( OSX )
if( pTex && !( gGL->m_bHave_GL_EXT_texture_sRGB_decode ) )
{
// see if requested SRGB state differs from the known one
bool texSRGB = ( pTex->m_layout->m_key.m_texFlags & kGLMTexSRGB ) != 0;
bool glSampSRGB = m_samplers[nSamplerIndex].m_samp.m_packed.m_srgb;
if ( texSRGB != glSampSRGB ) // mismatch
{
pTex->HandleSRGBMismatch( glSampSRGB, pTex->m_srgbFlipCount );
}
}
#endif
}
}
}
// vertex stage --------------------------------------------------------------------
if ( m_bUseBoneUniformBuffers )
{
// vertex stage --------------------------------------------------------------------
if ( m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone )
{
int firstDirtySlot = m_programParamsF[kGLMVertexProgram].m_firstDirtySlotNonBone;
int dirtySlotHighWater = MIN( m_drawingProgram[kGLMVertexProgram]->m_descs[kGLMGLSL].m_highWater, m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone );
GLint vconstLoc = m_pBoundPair->m_locVertexParams;
if ( ( vconstLoc >= 0 ) && ( dirtySlotHighWater > firstDirtySlot ) )
{
#if GL_BATCH_TELEMETRY_ZONES
tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "VSNonBoneUniformUpdate %u %u", firstDirtySlot, dirtySlotHighWater );
#endif
int numSlots = dirtySlotHighWater - DXABSTRACT_VS_FIRST_BONE_SLOT;
// consts after the bones (c217 onwards), since we use the concatenated destination array vc[], upload these consts starting from vc[58]
if( numSlots > 0 )
{
gGL->glUniform4fv( m_pBoundPair->m_UniformBufferParams[kGLMVertexProgram][DXABSTRACT_VS_FIRST_BONE_SLOT], numSlots, &m_programParamsF[kGLMVertexProgram].m_values[(DXABSTRACT_VS_LAST_BONE_SLOT+1)][0] );
dirtySlotHighWater = DXABSTRACT_VS_FIRST_BONE_SLOT;
GL_BATCH_PERF( m_nTotalVSUniformCalls++; )
GL_BATCH_PERF( m_nTotalVSUniformsSet += numSlots; )
GL_BATCH_PERF( m_FlushStats.m_nFirstVSConstant = DXABSTRACT_VS_FIRST_BONE_SLOT; )
GL_BATCH_PERF( m_FlushStats.m_nNumVSConstants += numSlots; )
}
numSlots = dirtySlotHighWater - firstDirtySlot;
// consts before the bones (c0-c57)
if( numSlots > 0 )
{
gGL->glUniform4fv( m_pBoundPair->m_UniformBufferParams[kGLMVertexProgram][firstDirtySlot], dirtySlotHighWater - firstDirtySlot, &m_programParamsF[kGLMVertexProgram].m_values[firstDirtySlot][0] );
GL_BATCH_PERF( m_nTotalVSUniformCalls++; )
GL_BATCH_PERF( m_nTotalVSUniformsSet += dirtySlotHighWater - firstDirtySlot; )
GL_BATCH_PERF( m_FlushStats.m_nFirstVSConstant = firstDirtySlot; )
GL_BATCH_PERF( m_FlushStats.m_nNumVSConstants += (dirtySlotHighWater - firstDirtySlot); )
}
}
m_programParamsF[kGLMVertexProgram].m_firstDirtySlotNonBone = 256;
m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone = 0;
}
if ( m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterBone )
{
const GLint vconstBoneLoc = m_pBoundPair->m_locVertexBoneParams;
if ( vconstBoneLoc >= 0 )
{
int shaderSlotsBone = 0;
if ( ( m_drawingProgram[kGLMVertexProgram]->m_descs[kGLMGLSL].m_VSHighWaterBone > 0 ) && ( m_nMaxUsedVertexProgramConstantsHint > DXABSTRACT_VS_FIRST_BONE_SLOT ) )
{
shaderSlotsBone = MIN( m_drawingProgram[kGLMVertexProgram]->m_descs[kGLMGLSL].m_VSHighWaterBone, m_nMaxUsedVertexProgramConstantsHint - DXABSTRACT_VS_FIRST_BONE_SLOT );
}
int dirtySlotHighWaterBone = MIN( shaderSlotsBone, m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterBone );
if ( dirtySlotHighWaterBone )
{
uint nNumBoneRegs = dirtySlotHighWaterBone;
#if GL_BATCH_TELEMETRY_ZONES
tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "VSBoneUniformUpdate %u", nNumBoneRegs );
#endif
gGL->glUniform4fv( vconstBoneLoc, nNumBoneRegs, &m_programParamsF[kGLMVertexProgram].m_values[DXABSTRACT_VS_FIRST_BONE_SLOT][0] );
GL_BATCH_PERF( m_nTotalVSUniformBoneCalls++; )
GL_BATCH_PERF( m_nTotalVSUniformsBoneSet += nNumBoneRegs; )
GL_BATCH_PERF( m_FlushStats.m_nNumVSBoneConstants += nNumBoneRegs; )
}
m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterBone = 0;
}
}
}
else
{
if ( m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone )
{
const int nMaxUsedShaderSlots = m_drawingProgram[kGLMVertexProgram]->m_descs[kGLMGLSL].m_highWater;
int firstDirtySlot = m_programParamsF[kGLMVertexProgram].m_firstDirtySlotNonBone;
int dirtySlotHighWater = MIN( nMaxUsedShaderSlots, m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone );
GLint vconstLoc = m_pBoundPair->m_locVertexParams;
if ( ( vconstLoc >= 0 ) && ( dirtySlotHighWater > firstDirtySlot ) )
{
#if GL_BATCH_TELEMETRY_ZONES
tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "VSNonBoneUniformUpdate %u %u", firstDirtySlot, dirtySlotHighWater );
#endif
gGL->glUniform4fv( m_pBoundPair->m_UniformBufferParams[kGLMVertexProgram][firstDirtySlot], dirtySlotHighWater - firstDirtySlot, &m_programParamsF[kGLMVertexProgram].m_values[firstDirtySlot][0] );
GL_BATCH_PERF( m_nTotalVSUniformCalls++; )
GL_BATCH_PERF( m_nTotalVSUniformsSet += dirtySlotHighWater - firstDirtySlot; )
GL_BATCH_PERF( m_FlushStats.m_nFirstVSConstant = firstDirtySlot; )
GL_BATCH_PERF( m_FlushStats.m_nNumVSConstants += (dirtySlotHighWater - firstDirtySlot); )
}
m_programParamsF[kGLMVertexProgram].m_firstDirtySlotNonBone = 256;
m_programParamsF[kGLMVertexProgram].m_dirtySlotHighWaterNonBone = 0;
}
}
// see if VS uses i0, b0, b1, b2, b3.
// use a glUniform1i to set any one of these if active. skip all of them if no dirties reported.
// my kingdom for the UBO extension!
// ------- bools ---------- //
if ( m_pBoundPair->m_bHasBoolOrIntUniforms )
{
if ( m_programParamsB[kGLMVertexProgram].m_dirtySlotCount ) // optimize this later after the float param pushes are proven out
{
const uint nLimit = MIN( CGLMShaderPair::cMaxVertexShaderBoolUniforms, m_programParamsB[kGLMVertexProgram].m_dirtySlotCount );
for ( uint i = 0; i < nLimit; ++i )
{
GLint constBoolLoc = m_pBoundPair->m_locVertexBool[i];
if ( constBoolLoc >= 0 )
gGL->glUniform1i( constBoolLoc, m_programParamsB[kGLMVertexProgram].m_values[i] );
}
m_programParamsB[kGLMVertexProgram].m_dirtySlotCount = 0;
}
if ( m_programParamsB[kGLMFragmentProgram].m_dirtySlotCount ) // optimize this later after the float param pushes are proven out
{
const uint nLimit = MIN( CGLMShaderPair::cMaxFragmentShaderBoolUniforms, m_programParamsB[kGLMFragmentProgram].m_dirtySlotCount );
for ( uint i = 0; i < nLimit; ++i )
{
GLint constBoolLoc = m_pBoundPair->m_locFragmentBool[i];
if ( constBoolLoc >= 0 )
gGL->glUniform1i( constBoolLoc, m_programParamsB[kGLMFragmentProgram].m_values[i] );
}
m_programParamsB[kGLMFragmentProgram].m_dirtySlotCount = 0;
}
if ( m_programParamsI[kGLMVertexProgram].m_dirtySlotCount )
{
GLint vconstInt0Loc = m_pBoundPair->m_locVertexInteger0; //glGetUniformLocationARB( prog, "i0");
if ( vconstInt0Loc >= 0 )
{
gGL->glUniform1i( vconstInt0Loc, m_programParamsI[kGLMVertexProgram].m_values[0][0] ); //FIXME magic number
}
m_programParamsI[kGLMVertexProgram].m_dirtySlotCount = 0;
}
}
Assert( ( m_pDevice->m_streams[0].m_vtxBuffer && ( m_pDevice->m_streams[0].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[0] ) ) || ( ( !m_pDevice->m_streams[0].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[0] == m_pDevice->m_pDummy_vtx_buffer ) ) );
Assert( ( m_pDevice->m_streams[1].m_vtxBuffer && ( m_pDevice->m_streams[1].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[1] ) ) || ( ( !m_pDevice->m_streams[1].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[1] == m_pDevice->m_pDummy_vtx_buffer ) ) );
Assert( ( m_pDevice->m_streams[2].m_vtxBuffer && ( m_pDevice->m_streams[2].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[2] ) ) || ( ( !m_pDevice->m_streams[2].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[2] == m_pDevice->m_pDummy_vtx_buffer ) ) );
Assert( ( m_pDevice->m_streams[3].m_vtxBuffer && ( m_pDevice->m_streams[3].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[3] ) ) || ( ( !m_pDevice->m_streams[3].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[3] == m_pDevice->m_pDummy_vtx_buffer ) ) );
uint nCurTotalBufferRevision;
nCurTotalBufferRevision = m_pDevice->m_vtx_buffers[0]->m_nRevision + m_pDevice->m_vtx_buffers[1]->m_nRevision + m_pDevice->m_vtx_buffers[2]->m_nRevision + m_pDevice->m_vtx_buffers[3]->m_nRevision;
// If any of these inputs have changed, we need to enumerate through all of the expected GL vertex attribs and modify anything in the GL layer that have changed.
// This is not always a win, but it is a net win on NVidia (by 1-4.8% depending on whether driver threading is enabled).
if ( ( nCurTotalBufferRevision != m_CurAttribs.m_nTotalBufferRevision ) ||
( m_CurAttribs.m_pVertDecl != m_pDevice->m_pVertDecl ) ||
( m_CurAttribs.m_vtxAttribMap[0] != reinterpret_cast<const uint64 *>(m_pDevice->m_vertexShader->m_vtxAttribMap)[0] ) ||
( m_CurAttribs.m_vtxAttribMap[1] != reinterpret_cast<const uint64 *>(m_pDevice->m_vertexShader->m_vtxAttribMap)[1] ) ||
( memcmp( m_CurAttribs.m_streams, m_pDevice->m_streams, sizeof( m_pDevice->m_streams ) ) != 0 ) )
{
// This branch is taken 52.2% of the time in the L4D2 test1 (long) timedemo.
#if GL_BATCH_TELEMETRY_ZONES
tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "SetVertexAttribs" );
#endif
m_CurAttribs.m_nTotalBufferRevision = nCurTotalBufferRevision;
m_CurAttribs.m_pVertDecl = m_pDevice->m_pVertDecl;
m_CurAttribs.m_vtxAttribMap[0] = reinterpret_cast<const uint64 *>(m_pDevice->m_vertexShader->m_vtxAttribMap)[0];
m_CurAttribs.m_vtxAttribMap[1] = reinterpret_cast<const uint64 *>(m_pDevice->m_vertexShader->m_vtxAttribMap)[1];
memcpy( m_CurAttribs.m_streams, m_pDevice->m_streams, sizeof( m_pDevice->m_streams ) );
unsigned char *pVertexShaderAttribMap = m_pDevice->m_vertexShader->m_vtxAttribMap;
const int nMaxVertexAttributesToCheck = m_drawingProgram[ kGLMVertexProgram ]->m_maxVertexAttrs;
IDirect3DVertexDeclaration9 *pVertDecl = m_pDevice->m_pVertDecl;
const uint8 *pVertexAttribDescToStreamIndex = pVertDecl->m_VertexAttribDescToStreamIndex;
for( int nMask = 1, nIndex = 0; nIndex < nMaxVertexAttributesToCheck; ++nIndex, nMask <<= 1 )
{
uint8 vertexShaderAttrib = pVertexShaderAttribMap[ nIndex ];
uint nDeclIndex = pVertexAttribDescToStreamIndex[vertexShaderAttrib];
if ( nDeclIndex == 0xFF )
{
// Not good - the vertex shader has an attribute which can't be located in the decl!
// The D3D9 debug runtime is also going to complain.
Assert( 0 );
if ( m_lastKnownVertexAttribMask & nMask )
{
m_lastKnownVertexAttribMask &= ~nMask;
gGL->glDisableVertexAttribArray( nIndex );
}
continue;
}
D3DVERTEXELEMENT9_GL *pDeclElem = &pVertDecl->m_elements[nDeclIndex];
Assert( ( ( vertexShaderAttrib >> 4 ) == pDeclElem->m_dxdecl.Usage ) && ( ( vertexShaderAttrib & 0x0F ) == pDeclElem->m_dxdecl.UsageIndex) );
const uint nStreamIndex = pDeclElem->m_dxdecl.Stream;
const D3DStreamDesc *pStream = &m_pDevice->m_streams[ nStreamIndex ];
CGLMBuffer *pBuf = m_pDevice->m_vtx_buffers[ nStreamIndex ];
if ( pBuf == m_pDevice->m_pDummy_vtx_buffer )
{
Assert( pStream->m_vtxBuffer == NULL );
// this shader doesn't use that pair.
if ( m_lastKnownVertexAttribMask & nMask )
{
m_lastKnownVertexAttribMask &= ~nMask;
gGL->glDisableVertexAttribArray( nIndex );
}
continue;
}
Assert( pStream->m_vtxBuffer->m_vtxBuffer == pBuf );
int nBufOffset = pDeclElem->m_gldecl.m_offset + pStream->m_offset;
Assert( nBufOffset >= 0 );
Assert( nBufOffset < (int)pBuf->m_nSize );
if ( pBuf->m_bUsingPersistentBuffer )
{
nBufOffset += pBuf->m_nPersistentBufferStartOffset;
}
SetBufAndVertexAttribPointer( nIndex, pBuf->GetHandle(),
pStream->m_stride, pDeclElem->m_gldecl.m_datatype, pDeclElem->m_gldecl.m_normalized, pDeclElem->m_gldecl.m_nCompCount,
reinterpret_cast< const GLvoid * >( reinterpret_cast< int >( pBuf->m_pPseudoBuf ) + nBufOffset ),
pBuf->m_nRevision );
if ( !( m_lastKnownVertexAttribMask & nMask ) )
{
m_lastKnownVertexAttribMask |= nMask;
gGL->glEnableVertexAttribArray( nIndex );
}
}
for( int nIndex = nMaxVertexAttributesToCheck; nIndex < m_nNumSetVertexAttributes; nIndex++ )
{
gGL->glDisableVertexAttribArray( nIndex );
m_lastKnownVertexAttribMask &= ~(1 << nIndex);
}
m_nNumSetVertexAttributes = nMaxVertexAttributesToCheck;
}
// fragment stage --------------------------------------------------------------------
if ( m_programParamsF[kGLMFragmentProgram].m_dirtySlotHighWaterNonBone )
{
GLint fconstLoc;
fconstLoc = m_pBoundPair->m_locFragmentParams;
if ( fconstLoc >= 0 )
{
const int nMaxUsedShaderSlots = m_drawingProgram[kGLMFragmentProgram]->m_descs[kGLMGLSL].m_highWater;
int firstDirtySlot = m_programParamsF[kGLMFragmentProgram].m_firstDirtySlotNonBone;
int dirtySlotHighWater = MIN( nMaxUsedShaderSlots, m_programParamsF[kGLMFragmentProgram].m_dirtySlotHighWaterNonBone );
if ( dirtySlotHighWater > firstDirtySlot )
{
#if GL_BATCH_TELEMETRY_ZONES
tmZone( TELEMETRY_LEVEL2, TMZF_NONE, "PSUniformUpdate %u %u", firstDirtySlot, dirtySlotHighWater );
#endif
gGL->glUniform4fv( m_pBoundPair->m_UniformBufferParams[kGLMFragmentProgram][firstDirtySlot], dirtySlotHighWater - firstDirtySlot, &m_programParamsF[kGLMFragmentProgram].m_values[firstDirtySlot][0] );
GL_BATCH_PERF( m_nTotalPSUniformCalls++; )
GL_BATCH_PERF( m_nTotalPSUniformsSet += dirtySlotHighWater - firstDirtySlot; )
GL_BATCH_PERF( m_FlushStats.m_nFirstPSConstant = firstDirtySlot; )
GL_BATCH_PERF( m_FlushStats.m_nNumPSConstants += (dirtySlotHighWater - firstDirtySlot); )
}
m_programParamsF[kGLMFragmentProgram].m_firstDirtySlotNonBone = 256;
m_programParamsF[kGLMFragmentProgram].m_dirtySlotHighWaterNonBone = 0;
}
}
return;
flush_error_exit:
m_pBoundPair = NULL;
m_bDirtyPrograms = true;
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
//========= Copyright 1996-2009, Valve Corporation, All rights reserved. ============//
//
// Purpose: provide some call-out glue to ObjC from the C++ GLMgr code
//
// $Revision: $
// $NoKeywords: $
//=============================================================================//
#include <Cocoa/Cocoa.h>
#include <OpenGL/OpenGL.h>
#include <OpenGL/gl.h>
#include <OpenGL/glext.h>
#undef MIN
#undef MAX
#define DONT_DEFINE_BOOL // Don't define BOOL!
#include "tier0/threadtools.h"
#include "tier1/interface.h"
#include "tier1/strtools.h"
#include "tier1/utllinkedlist.h"
#include "togl/rendermechanism.h"
// ------------------------------------------------------------------------------------ //
// some glue to let GLMgr call into NS/ObjC classes.
// ------------------------------------------------------------------------------------ //
CGLContextObj GetCGLContextFromNSGL( PseudoNSGLContextPtr nsglCtx )
{
return (CGLContextObj)[ (NSOpenGLContext*)nsglCtx CGLContextObj];
}

View File

@ -0,0 +1,29 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef CGLMTEXINLINES_H
#define CGLMTEXINLINES_H
#pragma once
#endif // CGLMTEXINLINES_H

View File

@ -0,0 +1,71 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "intelglmallocworkaround.h"
#include "mach_override.h"
// memdbgon -must- be the last include file in a .cpp file.
#include "tier0/memdbgon.h"
IntelGLMallocWorkaround* IntelGLMallocWorkaround::s_pWorkaround = NULL;
void *IntelGLMallocWorkaround::ZeroingAlloc(size_t size)
{
// We call into this pointer that resumes the original malloc.
void *memory = s_pWorkaround->m_pfnMallocReentry(size);
if (size < 96)
{
// Since the Intel driver has an issue with a small allocation
// that's left uninitialized, we use memset to ensure it's zero-initialized.
memset(memory, 0, size);
}
return memory;
}
IntelGLMallocWorkaround* IntelGLMallocWorkaround::Get()
{
if (!s_pWorkaround)
{
s_pWorkaround = new IntelGLMallocWorkaround();
}
return s_pWorkaround;
}
bool IntelGLMallocWorkaround::Enable()
{
if ( m_pfnMallocReentry != NULL )
{
return true;
}
mach_error_t error = mach_override_ptr( (void*)&malloc, (const void*)&ZeroingAlloc, (void**)&m_pfnMallocReentry );
if ( error == err_cannot_override )
{
m_pfnMallocReentry = NULL;
return false;
}
return true;
}

View File

@ -0,0 +1,61 @@
//========= Copyright Valve Corporation, All rights reserved. ============//
// TOGL CODE LICENSE
//
// Copyright 2011-2014 Valve Corporation
// All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// intelglmallocworkaround.h
// class responsible for setting up a malloc override that zeroes allocated
// memory of less than 96 bytes. this is to work around a bug
// in the Intel GLSL compiler on Mac OS X 10.8 due to uninitialized memory.
//
// 96 was chosen due to this quote from Apple:
// "I verified that the size of the structure is exactly 64 bytes on 10.8.3, 10.8.4 and will be on 10.8.5."
//
// certain GLSL shaders would (intermittently) cause a crash the first time they
// were drawn, and the bug has supposedly been fixed in 10.9, but is unlikely to
// ever make it to 10.8.
//
//===============================================================================
#ifndef INTELGLMALLOCWORKAROUND_H
#define INTELGLMALLOCWORKAROUND_H
#include <stdlib.h>
class IntelGLMallocWorkaround
{
public:
static IntelGLMallocWorkaround *Get();
bool Enable();
protected:
IntelGLMallocWorkaround() :m_pfnMallocReentry(NULL) {}
~IntelGLMallocWorkaround() {}
static IntelGLMallocWorkaround *s_pWorkaround;
static void* ZeroingAlloc(size_t);
typedef void* (*pfnMalloc_t)(size_t);
pfnMalloc_t m_pfnMallocReentry;
};
#endif // INTELGLMALLOCWORKAROUND_H

View File

@ -0,0 +1,765 @@
// mach_override.c semver:1.2.0
// Copyright (c) 2003-2012 Jonathan 'Wolf' Rentzsch: http://rentzsch.com
// Some rights reserved: http://opensource.org/licenses/mit
// https://github.com/rentzsch/mach_override
#include "mach_override.h"
#include <mach-o/dyld.h>
#include <mach/mach_host.h>
#include <mach/mach_init.h>
#include <mach/vm_map.h>
#include <sys/mman.h>
#include <libkern/OSAtomic.h>
#include <CoreServices/CoreServices.h>
/**************************
*
* Constants
*
**************************/
#pragma mark -
#pragma mark (Constants)
#define kPageSize 4096
#if defined(__ppc__) || defined(__POWERPC__)
long kIslandTemplate[] = {
0x9001FFFC, // stw r0,-4(SP)
0x3C00DEAD, // lis r0,0xDEAD
0x6000BEEF, // ori r0,r0,0xBEEF
0x7C0903A6, // mtctr r0
0x8001FFFC, // lwz r0,-4(SP)
0x60000000, // nop ; optionally replaced
0x4E800420 // bctr
};
#define kAddressHi 3
#define kAddressLo 5
#define kInstructionHi 10
#define kInstructionLo 11
#elif defined(__i386__)
#define kOriginalInstructionsSize 16
// On X86 we migh need to instert an add with a 32 bit immediate after the
// original instructions.
#define kMaxFixupSizeIncrease 5
unsigned char kIslandTemplate[] = {
// kOriginalInstructionsSize nop instructions so that we
// should have enough space to host original instructions
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
// Now the real jump instruction
0xE9, 0xEF, 0xBE, 0xAD, 0xDE
};
#define kInstructions 0
#define kJumpAddress kInstructions + kOriginalInstructionsSize + 1
#elif defined(__x86_64__)
#define kOriginalInstructionsSize 32
// On X86-64 we never need to instert a new instruction.
#define kMaxFixupSizeIncrease 0
#define kJumpAddress kOriginalInstructionsSize + 6
unsigned char kIslandTemplate[] = {
// kOriginalInstructionsSize nop instructions so that we
// should have enough space to host original instructions
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
// Now the real jump instruction
0xFF, 0x25, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};
#endif
/**************************
*
* Data Types
*
**************************/
#pragma mark -
#pragma mark (Data Types)
typedef struct {
char instructions[sizeof(kIslandTemplate)];
} BranchIsland;
/**************************
*
* Funky Protos
*
**************************/
#pragma mark -
#pragma mark (Funky Protos)
static mach_error_t
allocateBranchIsland(
BranchIsland **island,
void *originalFunctionAddress);
mach_error_t
freeBranchIsland(
BranchIsland *island );
#if defined(__ppc__) || defined(__POWERPC__)
mach_error_t
setBranchIslandTarget(
BranchIsland *island,
const void *branchTo,
long instruction );
#endif
#if defined(__i386__) || defined(__x86_64__)
mach_error_t
setBranchIslandTarget_i386(
BranchIsland *island,
const void *branchTo,
char* instructions );
void
atomic_mov64(
uint64_t *targetAddress,
uint64_t value );
static Boolean
eatKnownInstructions(
unsigned char *code,
uint64_t *newInstruction,
int *howManyEaten,
char *originalInstructions,
int *originalInstructionCount,
uint8_t *originalInstructionSizes );
static void
fixupInstructions(
uint32_t offset,
void *instructionsToFix,
int instructionCount,
uint8_t *instructionSizes );
#endif
/*******************************************************************************
*
* Interface
*
*******************************************************************************/
#pragma mark -
#pragma mark (Interface)
#if defined(__i386__) || defined(__x86_64__)
mach_error_t makeIslandExecutable(void *address) {
mach_error_t err = err_none;
uintptr_t page = (uintptr_t)address & ~(uintptr_t)(kPageSize-1);
int e = err_none;
e |= mprotect((void *)page, kPageSize, PROT_EXEC | PROT_READ | PROT_WRITE);
e |= msync((void *)page, kPageSize, MS_INVALIDATE );
if (e) {
err = err_cannot_override;
}
return err;
}
#endif
mach_error_t
mach_override_ptr(
void *originalFunctionAddress,
const void *overrideFunctionAddress,
void **originalFunctionReentryIsland )
{
assert( originalFunctionAddress );
assert( overrideFunctionAddress );
// this addresses overriding such functions as AudioOutputUnitStart()
// test with modified DefaultOutputUnit project
#if defined(__x86_64__)
for(;;){
if(*(uint16_t*)originalFunctionAddress==0x25FF) // jmp qword near [rip+0x????????]
originalFunctionAddress=*(void**)((char*)originalFunctionAddress+6+*(int32_t *)((uint16_t*)originalFunctionAddress+1));
else break;
}
#elif defined(__i386__)
for(;;){
if(*(uint16_t*)originalFunctionAddress==0x25FF) // jmp *0x????????
originalFunctionAddress=**(void***)((uint16_t*)originalFunctionAddress+1);
else break;
}
#endif
long *originalFunctionPtr = (long*) originalFunctionAddress;
mach_error_t err = err_none;
#if defined(__ppc__) || defined(__POWERPC__)
// Ensure first instruction isn't 'mfctr'.
#define kMFCTRMask 0xfc1fffff
#define kMFCTRInstruction 0x7c0903a6
long originalInstruction = *originalFunctionPtr;
if( !err && ((originalInstruction & kMFCTRMask) == kMFCTRInstruction) )
err = err_cannot_override;
#elif defined(__i386__) || defined(__x86_64__)
int eatenCount = 0;
int originalInstructionCount = 0;
char originalInstructions[kOriginalInstructionsSize];
uint8_t originalInstructionSizes[kOriginalInstructionsSize];
uint64_t jumpRelativeInstruction = 0; // JMP
Boolean overridePossible = eatKnownInstructions ((unsigned char *)originalFunctionPtr,
&jumpRelativeInstruction, &eatenCount,
originalInstructions, &originalInstructionCount,
originalInstructionSizes );
if (eatenCount + kMaxFixupSizeIncrease > kOriginalInstructionsSize) {
//printf ("Too many instructions eaten\n");
overridePossible = false;
}
if (!overridePossible) err = err_cannot_override;
if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
#endif
// Make the original function implementation writable.
if( !err ) {
err = vm_protect( mach_task_self(),
(vm_address_t) originalFunctionPtr, 8, false,
(VM_PROT_ALL | VM_PROT_COPY) );
if( err )
err = vm_protect( mach_task_self(),
(vm_address_t) originalFunctionPtr, 8, false,
(VM_PROT_DEFAULT | VM_PROT_COPY) );
}
if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
// Allocate and target the escape island to the overriding function.
BranchIsland *escapeIsland = NULL;
if( !err )
err = allocateBranchIsland( &escapeIsland, originalFunctionAddress );
if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
#if defined(__ppc__) || defined(__POWERPC__)
if( !err )
err = setBranchIslandTarget( escapeIsland, overrideFunctionAddress, 0 );
// Build the branch absolute instruction to the escape island.
long branchAbsoluteInstruction = 0; // Set to 0 just to silence warning.
if( !err ) {
long escapeIslandAddress = ((long) escapeIsland) & 0x3FFFFFF;
branchAbsoluteInstruction = 0x48000002 | escapeIslandAddress;
}
#elif defined(__i386__) || defined(__x86_64__)
if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
if( !err )
err = setBranchIslandTarget_i386( escapeIsland, overrideFunctionAddress, 0 );
if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
// Build the jump relative instruction to the escape island
#endif
#if defined(__i386__) || defined(__x86_64__)
if (!err) {
uint32_t addressOffset = ((char*)escapeIsland - (char*)originalFunctionPtr - 5);
addressOffset = OSSwapInt32(addressOffset);
jumpRelativeInstruction |= 0xE900000000000000LL;
jumpRelativeInstruction |= ((uint64_t)addressOffset & 0xffffffff) << 24;
jumpRelativeInstruction = OSSwapInt64(jumpRelativeInstruction);
}
#endif
// Optionally allocate & return the reentry island. This may contain relocated
// jmp instructions and so has all the same addressing reachability requirements
// the escape island has to the original function, except the escape island is
// technically our original function.
BranchIsland *reentryIsland = NULL;
if( !err && originalFunctionReentryIsland ) {
err = allocateBranchIsland( &reentryIsland, escapeIsland);
if( !err )
*originalFunctionReentryIsland = reentryIsland;
}
#if defined(__ppc__) || defined(__POWERPC__)
// Atomically:
// o If the reentry island was allocated:
// o Insert the original instruction into the reentry island.
// o Target the reentry island at the 2nd instruction of the
// original function.
// o Replace the original instruction with the branch absolute.
if( !err ) {
int escapeIslandEngaged = false;
do {
if( reentryIsland )
err = setBranchIslandTarget( reentryIsland,
(void*) (originalFunctionPtr+1), originalInstruction );
if( !err ) {
escapeIslandEngaged = CompareAndSwap( originalInstruction,
branchAbsoluteInstruction,
(UInt32*)originalFunctionPtr );
if( !escapeIslandEngaged ) {
// Someone replaced the instruction out from under us,
// re-read the instruction, make sure it's still not
// 'mfctr' and try again.
originalInstruction = *originalFunctionPtr;
if( (originalInstruction & kMFCTRMask) == kMFCTRInstruction)
err = err_cannot_override;
}
}
} while( !err && !escapeIslandEngaged );
}
#elif defined(__i386__) || defined(__x86_64__)
// Atomically:
// o If the reentry island was allocated:
// o Insert the original instructions into the reentry island.
// o Target the reentry island at the first non-replaced
// instruction of the original function.
// o Replace the original first instructions with the jump relative.
//
// Note that on i386, we do not support someone else changing the code under our feet
if ( !err ) {
uint32_t offset = (uintptr_t)originalFunctionPtr - (uintptr_t)reentryIsland;
fixupInstructions(offset, originalInstructions,
originalInstructionCount, originalInstructionSizes );
if( reentryIsland )
err = setBranchIslandTarget_i386( reentryIsland,
(void*) ((char *)originalFunctionPtr+eatenCount), originalInstructions );
// try making islands executable before planting the jmp
#if defined(__x86_64__) || defined(__i386__)
if( !err )
err = makeIslandExecutable(escapeIsland);
if( !err && reentryIsland )
err = makeIslandExecutable(reentryIsland);
#endif
if ( !err )
atomic_mov64((uint64_t *)originalFunctionPtr, jumpRelativeInstruction);
}
#endif
// Clean up on error.
if( err ) {
if( reentryIsland )
freeBranchIsland( reentryIsland );
if( escapeIsland )
freeBranchIsland( escapeIsland );
}
return err;
}
/*******************************************************************************
*
* Implementation
*
*******************************************************************************/
#pragma mark -
#pragma mark (Implementation)
static bool jump_in_range(intptr_t from, intptr_t to) {
intptr_t field_value = to - from - 5;
int32_t field_value_32 = field_value;
return field_value == field_value_32;
}
/*******************************************************************************
Implementation: Allocates memory for a branch island.
@param island <- The allocated island.
@result <- mach_error_t
***************************************************************************/
static mach_error_t
allocateBranchIslandAux(
BranchIsland **island,
void *originalFunctionAddress,
bool forward)
{
assert( island );
assert( sizeof( BranchIsland ) <= kPageSize );
vm_map_t task_self = mach_task_self();
vm_address_t original_address = (vm_address_t) originalFunctionAddress;
vm_address_t address = original_address;
for (;;) {
vm_size_t vmsize = 0;
memory_object_name_t object = 0;
kern_return_t kr = 0;
vm_region_flavor_t flavor = VM_REGION_BASIC_INFO;
// Find the region the address is in.
#if __WORDSIZE == 32
vm_region_basic_info_data_t info;
mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT;
kr = vm_region(task_self, &address, &vmsize, flavor,
(vm_region_info_t)&info, &info_count, &object);
#else
vm_region_basic_info_data_64_t info;
mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
kr = vm_region_64(task_self, &address, &vmsize, flavor,
(vm_region_info_t)&info, &info_count, &object);
#endif
if (kr != KERN_SUCCESS)
return kr;
assert((address & (kPageSize - 1)) == 0);
// Go to the first page before or after this region
vm_address_t new_address = forward ? address + vmsize : address - kPageSize;
#if __WORDSIZE == 64
if(!jump_in_range(original_address, new_address))
break;
#endif
address = new_address;
// Try to allocate this page.
kr = vm_allocate(task_self, &address, kPageSize, 0);
if (kr == KERN_SUCCESS) {
*island = (BranchIsland*) address;
return err_none;
}
if (kr != KERN_NO_SPACE)
return kr;
}
return KERN_NO_SPACE;
}
static mach_error_t
allocateBranchIsland(
BranchIsland **island,
void *originalFunctionAddress)
{
mach_error_t err =
allocateBranchIslandAux(island, originalFunctionAddress, true);
if (!err)
return err;
return allocateBranchIslandAux(island, originalFunctionAddress, false);
}
/*******************************************************************************
Implementation: Deallocates memory for a branch island.
@param island -> The island to deallocate.
@result <- mach_error_t
***************************************************************************/
mach_error_t
freeBranchIsland(
BranchIsland *island )
{
assert( island );
assert( (*(long*)&island->instructions[0]) == kIslandTemplate[0] );
assert( sizeof( BranchIsland ) <= kPageSize );
return vm_deallocate( mach_task_self(), (vm_address_t) island,
kPageSize );
}
/*******************************************************************************
Implementation: Sets the branch island's target, with an optional
instruction.
@param island -> The branch island to insert target into.
@param branchTo -> The address of the target.
@param instruction -> Optional instruction to execute prior to branch. Set
to zero for nop.
@result <- mach_error_t
***************************************************************************/
#if defined(__ppc__) || defined(__POWERPC__)
mach_error_t
setBranchIslandTarget(
BranchIsland *island,
const void *branchTo,
long instruction )
{
// Copy over the template code.
bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
// Fill in the address.
((short*)island->instructions)[kAddressLo] = ((long) branchTo) & 0x0000FFFF;
((short*)island->instructions)[kAddressHi]
= (((long) branchTo) >> 16) & 0x0000FFFF;
// Fill in the (optional) instuction.
if( instruction != 0 ) {
((short*)island->instructions)[kInstructionLo]
= instruction & 0x0000FFFF;
((short*)island->instructions)[kInstructionHi]
= (instruction >> 16) & 0x0000FFFF;
}
//MakeDataExecutable( island->instructions, sizeof( kIslandTemplate ) );
msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
return err_none;
}
#endif
#if defined(__i386__)
mach_error_t
setBranchIslandTarget_i386(
BranchIsland *island,
const void *branchTo,
char* instructions )
{
// Copy over the template code.
bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
// copy original instructions
if (instructions) {
bcopy (instructions, island->instructions + kInstructions, kOriginalInstructionsSize);
}
// Fill in the address.
int32_t addressOffset = (char *)branchTo - (island->instructions + kJumpAddress + 4);
*((int32_t *)(island->instructions + kJumpAddress)) = addressOffset;
msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
return err_none;
}
#elif defined(__x86_64__)
mach_error_t
setBranchIslandTarget_i386(
BranchIsland *island,
const void *branchTo,
char* instructions )
{
// Copy over the template code.
bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
// Copy original instructions.
if (instructions) {
bcopy (instructions, island->instructions, kOriginalInstructionsSize);
}
// Fill in the address.
*((uint64_t *)(island->instructions + kJumpAddress)) = (uint64_t)branchTo;
msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
return err_none;
}
#endif
#if defined(__i386__) || defined(__x86_64__)
// simplistic instruction matching
typedef struct {
unsigned int length; // max 15
unsigned char mask[15]; // sequence of bytes in memory order
unsigned char constraint[15]; // sequence of bytes in memory order
} AsmInstructionMatch;
#if defined(__i386__)
static AsmInstructionMatch possibleInstructions[] = {
{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} }, // jmp 0x????????
{ 0x5, {0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, {0x55, 0x89, 0xe5, 0xc9, 0xc3} }, // push %ebp; mov %esp,%ebp; leave; ret
{ 0x1, {0xFF}, {0x90} }, // nop
{ 0x1, {0xFF}, {0x55} }, // push %esp
{ 0x2, {0xFF, 0xFF}, {0x89, 0xE5} }, // mov %esp,%ebp
{ 0x1, {0xFF}, {0x53} }, // push %ebx
{ 0x3, {0xFF, 0xFF, 0x00}, {0x83, 0xEC, 0x00} }, // sub 0x??, %esp
{ 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, {0x81, 0xEC, 0x00, 0x00, 0x00, 0x00} }, // sub 0x??, %esp with 32bit immediate
{ 0x1, {0xFF}, {0x57} }, // push %edi
{ 0x1, {0xFF}, {0x56} }, // push %esi
{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} }, // xor %eax, %eax
{ 0x3, {0xFF, 0x4F, 0x00}, {0x8B, 0x45, 0x00} }, // mov $imm(%ebp), %reg
{ 0x3, {0xFF, 0x4C, 0x00}, {0x8B, 0x40, 0x00} }, // mov $imm(%eax-%edx), %reg
{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x8B, 0x4C, 0x24, 0x00} }, // mov $imm(%esp), %ecx
{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, // mov $imm, %eax
{ 0x6, {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, {0xE8, 0x00, 0x00, 0x00, 0x00, 0x58} }, // call $imm; pop %eax
{ 0x0 }
};
#elif defined(__x86_64__)
static AsmInstructionMatch possibleInstructions[] = {
{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} }, // jmp 0x????????
{ 0x1, {0xFF}, {0x90} }, // nop
{ 0x1, {0xF8}, {0x50} }, // push %rX
{ 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x89, 0xE5} }, // mov %rsp,%rbp
{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xEC, 0x00} }, // sub 0x??, %rsp
{ 0x4, {0xFB, 0xFF, 0x00, 0x00}, {0x48, 0x89, 0x00, 0x00} }, // move onto rbp
{ 0x4, {0xFF, 0xFF, 0xFF, 0xFF}, {0x40, 0x0f, 0xbe, 0xce} }, // movsbl %sil, %ecx
{ 0x2, {0xFF, 0x00}, {0x41, 0x00} }, // push %rXX
{ 0x2, {0xFF, 0x00}, {0x85, 0x00} }, // test %rX,%rX
{ 0x5, {0xF8, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, // mov $imm, %reg
{ 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} }, // pushq $imm(%rdi)
{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} }, // xor %eax, %eax
{ 0x2, {0xFF, 0xFF}, {0x89, 0xF8} }, // mov %edi, %eax
//leaq offset(%rip),%rax
{ 0x7, {0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, {0x48, 0x8d, 0x05, 0x00, 0x00, 0x00, 0x00} },
{ 0x0 }
};
#endif
static Boolean codeMatchesInstruction(unsigned char *code, AsmInstructionMatch* instruction)
{
Boolean match = true;
size_t i;
for (i=0; i<instruction->length; i++) {
unsigned char mask = instruction->mask[i];
unsigned char constraint = instruction->constraint[i];
unsigned char codeValue = code[i];
match = ((codeValue & mask) == constraint);
if (!match) break;
}
return match;
}
#if defined(__i386__) || defined(__x86_64__)
static Boolean
eatKnownInstructions(
unsigned char *code,
uint64_t *newInstruction,
int *howManyEaten,
char *originalInstructions,
int *originalInstructionCount,
uint8_t *originalInstructionSizes )
{
Boolean allInstructionsKnown = true;
int totalEaten = 0;
unsigned char* ptr = code;
int remainsToEat = 5; // a JMP instruction takes 5 bytes
int instructionIndex = 0;
if (howManyEaten) *howManyEaten = 0;
if (originalInstructionCount) *originalInstructionCount = 0;
while (remainsToEat > 0) {
Boolean curInstructionKnown = false;
// See if instruction matches one we know
AsmInstructionMatch* curInstr = possibleInstructions;
do {
if ((curInstructionKnown = codeMatchesInstruction(ptr, curInstr))) break;
curInstr++;
} while (curInstr->length > 0);
// if all instruction matches failed, we don't know current instruction then, stop here
if (!curInstructionKnown) {
allInstructionsKnown = false;
fprintf(stderr, "mach_override: some instructions unknown! Need to update mach_override.c\n");
break;
}
// At this point, we've matched curInstr
int eaten = curInstr->length;
ptr += eaten;
remainsToEat -= eaten;
totalEaten += eaten;
if (originalInstructionSizes) originalInstructionSizes[instructionIndex] = eaten;
instructionIndex += 1;
if (originalInstructionCount) *originalInstructionCount = instructionIndex;
}
if (howManyEaten) *howManyEaten = totalEaten;
if (originalInstructions) {
Boolean enoughSpaceForOriginalInstructions = (totalEaten < kOriginalInstructionsSize);
if (enoughSpaceForOriginalInstructions) {
memset(originalInstructions, 0x90 /* NOP */, kOriginalInstructionsSize); // fill instructions with NOP
bcopy(code, originalInstructions, totalEaten);
} else {
// printf ("Not enough space in island to store original instructions. Adapt the island definition and kOriginalInstructionsSize\n");
return false;
}
}
if (allInstructionsKnown) {
// save last 3 bytes of first 64bits of codre we'll replace
uint64_t currentFirst64BitsOfCode = *((uint64_t *)code);
currentFirst64BitsOfCode = OSSwapInt64(currentFirst64BitsOfCode); // back to memory representation
currentFirst64BitsOfCode &= 0x0000000000FFFFFFLL;
// keep only last 3 instructions bytes, first 5 will be replaced by JMP instr
*newInstruction &= 0xFFFFFFFFFF000000LL; // clear last 3 bytes
*newInstruction |= (currentFirst64BitsOfCode & 0x0000000000FFFFFFLL); // set last 3 bytes
}
return allInstructionsKnown;
}
static void
fixupInstructions(
uint32_t offset,
void *instructionsToFix,
int instructionCount,
uint8_t *instructionSizes )
{
// The start of "leaq offset(%rip),%rax"
static const uint8_t LeaqHeader[] = {0x48, 0x8d, 0x05};
int index;
for (index = 0;index < instructionCount;index += 1)
{
if (*(uint8_t*)instructionsToFix == 0xE9) // 32-bit jump relative
{
uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 1);
*jumpOffsetPtr += offset;
}
// leaq offset(%rip),%rax
if (memcmp(instructionsToFix, LeaqHeader, 3) == 0) {
uint32_t *LeaqOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 3);
*LeaqOffsetPtr += offset;
}
// 32-bit call relative to the next addr; pop %eax
if (*(uint8_t*)instructionsToFix == 0xE8)
{
// Just this call is larger than the jump we use, so we
// know this is the last instruction.
assert(index == (instructionCount - 1));
assert(instructionSizes[index] == 6);
// Insert "addl $offset, %eax" in the end so that when
// we jump to the rest of the function %eax has the
// value it would have if eip had been pushed by the
// call in its original position.
uint8_t *op = (uint8_t*)instructionsToFix;
op += 6;
*op = 0x05; // addl
uint32_t *addImmPtr = (uint32_t*)(op + 1);
*addImmPtr = offset;
}
instructionsToFix = (void*)((uintptr_t)instructionsToFix + instructionSizes[index]);
}
}
#endif
#if defined(__i386__)
void atomic_mov64(
uint64_t *targetAddress,
uint64_t value)
{
while (true)
{
uint64_t old_value = *targetAddress;
if (OSAtomicCompareAndSwap64(old_value, value, (int64_t*)targetAddress)) return;
}
}
#elif defined(__x86_64__)
void atomic_mov64(
uint64_t *targetAddress,
uint64_t value )
{
*targetAddress = value;
}
#endif
#endif

View File

@ -0,0 +1,76 @@
// mach_override.h semver:1.2.0
// Copyright (c) 2003-2012 Jonathan 'Wolf' Rentzsch: http://rentzsch.com
// Some rights reserved: http://opensource.org/licenses/mit
// https://github.com/rentzsch/mach_override
#ifndef _mach_override_
#define _mach_override_
#include <sys/types.h>
#include <mach/error.h>
#define err_cannot_override (err_local|1)
__BEGIN_DECLS
/****************************************************************************************
Dynamically overrides the function implementation referenced by
originalFunctionAddress with the implentation pointed to by overrideFunctionAddress.
Optionally returns a pointer to a "reentry island" which, if jumped to, will resume
the original implementation.
@param originalFunctionAddress -> Required address of the function to
override (with overrideFunctionAddress).
@param overrideFunctionAddress -> Required address to the overriding
function.
@param originalFunctionReentryIsland <- Optional pointer to pointer to the
reentry island. Can be NULL.
@result <- err_cannot_override if the original
function's implementation begins with
the 'mfctr' instruction.
************************************************************************************/
mach_error_t
mach_override_ptr(
void *originalFunctionAddress,
const void *overrideFunctionAddress,
void **originalFunctionReentryIsland );
__END_DECLS
/****************************************************************************************
If you're using C++ this macro will ease the tedium of typedef'ing, naming, keeping
track of reentry islands and defining your override code. See test_mach_override.cp
for example usage.
************************************************************************************/
#ifdef __cplusplus
#define MACH_OVERRIDE( ORIGINAL_FUNCTION_RETURN_TYPE, ORIGINAL_FUNCTION_NAME, ORIGINAL_FUNCTION_ARGS, ERR ) \
{ \
static ORIGINAL_FUNCTION_RETURN_TYPE (*ORIGINAL_FUNCTION_NAME##_reenter)ORIGINAL_FUNCTION_ARGS; \
static bool ORIGINAL_FUNCTION_NAME##_overriden = false; \
class mach_override_class__##ORIGINAL_FUNCTION_NAME { \
public: \
static kern_return_t override(void *originalFunctionPtr) { \
kern_return_t result = err_none; \
if (!ORIGINAL_FUNCTION_NAME##_overriden) { \
ORIGINAL_FUNCTION_NAME##_overriden = true; \
result = mach_override_ptr( (void*)originalFunctionPtr, \
(void*)mach_override_class__##ORIGINAL_FUNCTION_NAME::replacement, \
(void**)&ORIGINAL_FUNCTION_NAME##_reenter ); \
} \
return result; \
} \
static ORIGINAL_FUNCTION_RETURN_TYPE replacement ORIGINAL_FUNCTION_ARGS {
#define END_MACH_OVERRIDE( ORIGINAL_FUNCTION_NAME ) \
} \
}; \
\
err = mach_override_class__##ORIGINAL_FUNCTION_NAME::override((void*)ORIGINAL_FUNCTION_NAME); \
}
#endif
#endif // _mach_override_

115
togl/togl.vpc Normal file
View File

@ -0,0 +1,115 @@
//-----------------------------------------------------------------------------
// TOGL.VPC
//
// Project Script
//-----------------------------------------------------------------------------
$Macro SRCDIR ".." [$WIN32]
$Macro SRCDIR ".." [!$WIN32]
$Macro OUTBINDIR "$SRCDIR\..\game\bin"
$Macro OUTBINNAME "togl"
$Macro TOGL_SRCDIR "$SRCDIR/togl/linuxwin"
$Macro TOGL_INCDIR "$SRCDIR/public/togl/linuxwin"
$include "$SRCDIR\vpc_scripts\source_dll_base.vpc"
// Common Configuration
$Configuration
{
$Compiler
{
$AdditionalIncludeDirectories "$BASE;..\"
$PreprocessorDefinitions "$BASE;TOGL_DLL_EXPORT;PROTECTED_THINGS_ENABLE;strncpy=use_Q_strncpy_instead;_snprintf=use_Q_snprintf_instead" [!$OSXALL]
$PreprocessorDefinitions "$BASE;TOGL_DLL_EXPORT" [$OSXALL]
}
$Linker
{
$ImportLibrary "$LIBPUBLIC\$_IMPLIB_PREFIX$OUTBINNAME$_IMPLIB_EXT" [!$X360 && !$OSXALL]
$ImportLibrary "$SRCDIR\lib\$PLATFORM\$_IMPLIB_PREFIX$OUTBINNAME$_IMPLIB_EXT" [$OSXALL]
}
$Linker [$OSXALL]
{
$SystemFrameworks "Carbon;OpenGL;Quartz;Cocoa;IOKit"
}
// togl/tier0/vstdlib traditionally used "lib" prefix though nobody else seems to.
$Linker [$POSIX]
{
$OutputFile "$(OBJ_DIR)/$_IMPLIB_PREFIX$OUTBINNAME$_DLL_EXT"
}
$General [$POSIX]
{
$GameOutputFile "$OUTBINDIR/$_IMPLIB_PREFIX$OUTBINNAME$_DLL_EXT"
}
$PreLinkEvent [$WINDOWS]
{
$CommandLine "call $SRCDIR\vpc_scripts\valve_p4_edit.cmd $LIBPUBLIC\$(TargetName).lib $SRCDIR" "\n" \
"$BASE"
}
}
$Project "togl"
{
$Folder "Source Files" [$GL]
{
$File "$TOGL_SRCDIR/dx9asmtogl2.cpp"
$File "$TOGL_SRCDIR/dxabstract.cpp"
$File "$TOGL_SRCDIR/glentrypoints.cpp"
$File "$TOGL_SRCDIR/glmgr.cpp"
$File "$TOGL_SRCDIR/glmgrbasics.cpp"
$File "$TOGL_SRCDIR/glmgrcocoa.mm" [$OSXALL]
$File "$TOGL_SRCDIR/intelglmallocworkaround.cpp" [$OSXALL]
$File "$TOGL_SRCDIR/mach_override.c" [$OSXALL]
$File "$TOGL_SRCDIR/cglmtex.cpp"
$File "$TOGL_SRCDIR/cglmfbo.cpp"
$File "$TOGL_SRCDIR/cglmprogram.cpp"
$File "$TOGL_SRCDIR/cglmbuffer.cpp"
$File "$TOGL_SRCDIR/cglmquery.cpp"
}
$Folder "DirectX Header Files" [$WIN32 && !$GL]
{
}
$Folder "Header Files" [$GL]
{
$File "$TOGL_SRCDIR/dx9asmtogl2.h"
$File "$TOGL_SRCDIR/glmgr_flush.inl"
$File "$TOGL_SRCDIR/intelglmallocworkaround.h" [$OSXALL]
$File "$TOGL_SRCDIR/mach_override.h" [$OSXALL]
}
$Folder "Public Header Files" [$GL]
{
$File "$SRCDIR/public/togl/rendermechanism.h"
$File "$TOGL_INCDIR/dxabstract.h"
$File "$TOGL_INCDIR/dxabstract_types.h"
$File "$TOGL_INCDIR/glbase.h"
$File "$TOGL_INCDIR/glentrypoints.h"
$File "$TOGL_INCDIR/glmgr.h"
$File "$TOGL_INCDIR/glmdebug.h"
$File "$TOGL_INCDIR/glmgrbasics.h"
$File "$TOGL_INCDIR/glmgrext.h"
$File "$TOGL_INCDIR/glmdisplay.h"
$File "$TOGL_INCDIR/glmdisplaydb.h"
$File "$TOGL_INCDIR/glfuncs.h"
$File "$TOGL_INCDIR/cglmtex.h"
$File "$TOGL_INCDIR/cglmfbo.h"
$File "$TOGL_INCDIR/cglmprogram.h"
$File "$TOGL_INCDIR/cglmbuffer.h"
$File "$TOGL_INCDIR/cglmquery.h"
}
$Folder "Link Libraries"
{
$Lib tier2
$Lib mathlib
}
}