radial motion blur, fsr2 support, velocity separation, skybox blurring

Plenty of features added today after a long time in the oven.

- velocity thresholding
- velocity z based testing
This commit is contained in:
sphynx-owner
2024-07-19 00:43:18 +03:00
parent 6eb5a55469
commit 349b3b7c1a
14 changed files with 1028 additions and 279 deletions

View File

@ -6,8 +6,10 @@
layout(set = 0, binding = 0) uniform sampler2D depth_sampler;
layout(set = 0, binding = 1) uniform sampler2D velocity_sampler;
layout(rgba16f, set = 0, binding = 2) uniform image2D buffer_a;
layout(rgba16f, set = 0, binding = 3) uniform image2D buffer_b;
layout(rgba16f, set = 0, binding = 2) uniform writeonly image2D buffer_a;
layout(rgba16f, set = 0, binding = 3) uniform writeonly image2D buffer_b;
layout(set = 0, binding = 4) uniform sampler2D buffer_a_sampler;
layout(set = 0, binding = 5) uniform sampler2D buffer_b_sampler;
layout(push_constant, std430) uniform Params
{
@ -22,15 +24,18 @@ layout(push_constant, std430) uniform Params
float parallel_sensitivity;
float perpendicular_sensitivity;
float depth_match_threshold;
float nan4;
float step_exponent_modifier;
float step_size;
float max_dilation_radius;
float nan_fl_1;
float nan_fl_2;
} params;
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
const int kernel_size = 9;//8;//
const int kernel_size = 8;
const vec2 check_step_kernel[kernel_size] = {
vec2(0, 0),
vec2(1, 1),
vec2(0, 1),
vec2(-1, 1),
@ -41,27 +46,24 @@ const vec2 check_step_kernel[kernel_size] = {
vec2(0, -1),
};
// near plane distance
float npd = 0.05;
vec4 get_value(bool a, ivec2 uvi, ivec2 render_size)
vec4 get_value(bool a, vec2 uv, ivec2 render_size)
{
if ((uvi.x >= render_size.x) || (uvi.x < 0) || (uvi.y >= render_size.y) || (uvi.y < 0))
if (any(notEqual(uv, clamp(uv, vec2(0.0), vec2(1.0)))))
{
return vec4(-1, -1, 0, 1);
}
if(a)
{
return imageLoad(buffer_a, uvi);
return textureLod(buffer_a_sampler, uv, 0.0);
}
return imageLoad(buffer_b, uvi);
return textureLod(buffer_b_sampler, uv, 0.0);
}
void set_value(bool a, ivec2 uvi, vec4 value, ivec2 render_size)
{
if ((uvi.x >= render_size.x) || (uvi.x < 0) || (uvi.y >= render_size.y) || (uvi.y < 0))
if (any(notEqual(uvi, clamp(uvi, ivec2(0), render_size))))
{
return;
}
@ -79,10 +81,12 @@ void set_value(bool a, ivec2 uvi, vec4 value, ivec2 render_size)
float get_motion_difference(vec2 V, vec2 V2, float parallel_sensitivity, float perpendicular_sensitivity)
{
vec2 VO = V - V2;
// parallel offset
float parallel = abs(dot(VO, V) / max(FLT_MIN, dot(V, V)));
vec2 perpen_V = vec2(V.y, -V.x);
float perpendicular = abs(dot(VO, perpen_V) / max(FLT_MIN, dot(V, V)));
float difference = float(parallel) * parallel_sensitivity + float(perpendicular) * perpendicular_sensitivity;
// perpendicular offset
float perpendicular = abs(dot(VO, vec2(V.y, -V.x)) / max(FLT_MIN, dot(V, V)));
// weighted difference
float difference = parallel * parallel_sensitivity + perpendicular * perpendicular_sensitivity;
return clamp(difference, 0, 1);
}
// ----------------------------------------------------------
@ -90,60 +94,56 @@ float get_motion_difference(vec2 V, vec2 V2, float parallel_sensitivity, float p
vec4 sample_fitness(vec2 uv_offset, vec4 uv_sample)
{
vec2 sample_velocity = -uv_sample.xy;
if (dot(sample_velocity, sample_velocity) <= FLT_MIN)
// if velocity is 0, we never reach it (steps never smaller than 1)
if (dot(sample_velocity, sample_velocity) <= FLT_MIN || uv_sample.w == 0)
{
return vec4(FLT_MAX, FLT_MAX, FLT_MAX, 0);
}
// if(dot(uv_offset, uv_offset) <= FLT_MIN)
// {
// uv_offset = normalize(sample_velocity) * FLT_MIN;
// }
float velocity_space_distance = dot(sample_velocity, uv_offset) / max(FLT_MIN, dot(sample_velocity, sample_velocity));
// velocity space distance (projected pixel offset onto velocity vector)
float velocity_space_distance = dot(sample_velocity, uv_offset) / dot(sample_velocity, sample_velocity);
// the velcity space distance to gravitate the JFA to (found more relieable than doing a 0 - 1 range)
float mid_point = params.motion_blur_intensity / 2;
// centralize the velocity space distance around that mid point
float absolute_velocity_space_distance = abs(velocity_space_distance - mid_point);
// if that distance is half the original, its within range (we centered around a mid point)
float within_velocity_range = step(absolute_velocity_space_distance, mid_point);
vec2 perpen_offset = vec2(uv_offset.y, -uv_offset.x);
float side_offset = abs(dot(perpen_offset, sample_velocity)) / max(FLT_MIN, dot(sample_velocity, sample_velocity));
// perpendicular offset
float side_offset = abs(dot(vec2(uv_offset.y, -uv_offset.x), sample_velocity)) / dot(sample_velocity, sample_velocity);
// arbitrary perpendicular limit (lower means tighter dilation, but less reliable)
float within_perpen_error_range = step(side_offset, params.perpen_error_thresh * params.motion_blur_intensity);
return vec4(absolute_velocity_space_distance, velocity_space_distance, uv_sample.z, within_velocity_range * within_perpen_error_range);
// store relevant data for use in conditions
return vec4(absolute_velocity_space_distance, velocity_space_distance, uv_sample.w + uv_sample.z * velocity_space_distance, within_velocity_range * within_perpen_error_range);
}
bool is_sample_better(vec4 a, vec4 b)
float is_sample_better(vec4 a, vec4 b)
{
if((a.w == b.w) && (a.w == 1))
{
return a.z < b.z;
}
float nearer = a.z > b.z ? 1 : 0;
return a.x * b.w * nearer < b.x * a.w;
// see explanation at end of code
return mix(1. - step(b.x * a.w, a.x * b.w * (1. - step(b.z, a.z))), (1. - step(a.z, b.z)), step(abs(a.w - b.w), 0.5) * step(0.5, a.w));
}
vec4 get_backtracked_sample(vec2 uvn, vec2 chosen_uv, vec2 chosen_velocity, vec4 best_sample_fitness, vec2 render_size)
vec2 round_uv(vec2 uv, vec2 render_size)
{
//return vec4(chosen_uv, best_sample_fitness.x, 0);// comment this to enable backtracking
return (round((uv * render_size) - vec2(0.5)) + vec2(0.5)) / render_size;
}
// dilation validation and better sample selection
vec4 get_backtracked_sample(vec2 uvn, vec2 chosen_uv, vec3 chosen_velocity, vec4 best_sample_fitness, vec2 render_size)
{
//return vec4(chosen_uv, best_sample_fitness.z, 0);// comment this to enable backtracking
float smallest_step = 1 / max(render_size.x, render_size.y);
float max_dilation_radius = pow(2, params.last_iteration_index) * params.sample_step_multiplier * smallest_step / (length(chosen_velocity) * params.motion_blur_intensity);
float general_velocity_multiplier = min(best_sample_fitness.y, max_dilation_radius);
// choose maximum range to check along (matches with implementation in blur stage)
float general_velocity_multiplier = min(best_sample_fitness.y, params.max_dilation_radius * smallest_step / (length(chosen_velocity) * params.motion_blur_intensity));
vec2 best_uv = chosen_uv;
float best_velocity_match_threshold = params.velocity_match_threshold;
//float best_multiplier = best_sample_fitness.y;
float best_depth = best_sample_fitness.z;
// set temp variable to keet track of better matches
float smallest_velocity_difference = params.velocity_match_threshold;
// minimum amount of valid velocities to compare before decision
int initial_steps_to_compare = 2;
int steps_to_compare = initial_steps_to_compare;
@ -152,43 +152,46 @@ vec4 get_backtracked_sample(vec2 uvn, vec2 chosen_uv, vec2 chosen_velocity, vec4
{
float velocity_multiplier = general_velocity_multiplier * (1 + float(i) / float(params.backtracking_sample_count));
if(velocity_multiplier > params.motion_blur_intensity + 0.2 || velocity_multiplier < FLT_MIN)
if(velocity_multiplier > params.motion_blur_intensity || velocity_multiplier < 0)
{
continue;
}
vec2 new_sample = uvn - chosen_velocity * velocity_multiplier;
vec2 check_uv = round_uv(uvn - chosen_velocity.xy * velocity_multiplier, render_size);
if((new_sample.x < 0.) || (new_sample.x > 1.) || (new_sample.y < 0.) || (new_sample.y > 1.))
if(any(notEqual(check_uv, clamp(check_uv, vec2(0.0), vec2(1.0)))))
{
continue;
}
vec2 velocity_test = textureLod(velocity_sampler, new_sample, 0.0).xy;
// get potential velocity and depth matches
vec3 velocity_test = textureLod(velocity_sampler, check_uv, 0.0).xyz;
float depth_test = textureLod(depth_sampler, new_sample, 0.0).x;
float depth_test = textureLod(depth_sampler, check_uv, 0.0).x;
float velocity_match = get_motion_difference(chosen_velocity, velocity_test, params.parallel_sensitivity, params.perpendicular_sensitivity);
if((abs(depth_test - npd / best_sample_fitness.z) < params.depth_match_threshold) && (velocity_match <= best_velocity_match_threshold))
float velocity_difference = get_motion_difference(chosen_velocity.xy, velocity_test.xy, params.parallel_sensitivity, params.perpendicular_sensitivity);
float current_depth = depth_test + chosen_velocity.z * velocity_multiplier;
// if checked sample matches depth and velocity, it is valid for backtracking
if((abs(current_depth - best_sample_fitness.z) < params.depth_match_threshold) && (velocity_difference <= smallest_velocity_difference))
{
best_uv = new_sample;
best_uv = check_uv;
//best_multiplier = velocity_multiplier;
best_depth = current_depth;
if(steps_to_compare == 0)
{
chosen_uv = best_uv;
best_velocity_match_threshold = velocity_match;
return vec4(chosen_uv, 0, 0);
return vec4(best_uv, best_depth, 0);
}
steps_to_compare--;
}
// if a sample was found and we lost footing after, go with that found sample right away
else if(initial_steps_to_compare > steps_to_compare)
{
chosen_uv = best_uv;
return vec4(chosen_uv, 0, 0);
return vec4(best_uv, best_depth, 0);
}
}
return vec4(uvn, best_sample_fitness.x, 1);
return vec4(uvn, best_sample_fitness.z, 1);
}
void main()
@ -199,77 +202,78 @@ void main()
{
return;
}
// must be on pixel center for whole values (tested)
vec2 uvn = (vec2(uvi) + vec2(0.5)) / render_size;
int iteration_index = params.iteration_index;
float step_size = round(pow(2, params.last_iteration_index - iteration_index));
vec2 uv_step = vec2(step_size) * params.sample_step_multiplier / render_size;
vec2 uv_step = vec2(params.step_size) / render_size;
vec4 best_sample_fitness = vec4(FLT_MAX, FLT_MAX, FLT_MAX, 0);
vec2 chosen_uv = uvn;
vec2 chosen_velocity = vec2(0);
vec3 chosen_velocity = vec3(0);
bool set_a = !bool(step(0.5, float(iteration_index % 2)));
bool set_a = !bool(step(0.5, float(params.iteration_index % 2)));
for(int i = 0; i < kernel_size; i++)
{
if((true || params.iteration_index == 0) && i == 0)
{
continue;
}
vec2 step_offset = check_step_kernel[i] * uv_step;
vec2 check_uv = uvn + step_offset;
if((check_uv.x < 0.) || (check_uv.x > 1.) || (check_uv.y < 0.) || (check_uv.y > 1.))
if(any(notEqual(check_uv, clamp(check_uv, vec2(0.0), vec2(1.0)))))
{
continue;
}
if(iteration_index > 0)
{
ivec2 check_uv2 = ivec2(check_uv * render_size);
vec4 buffer_load = get_value(!set_a, check_uv2, render_size);
if(params.iteration_index > 0)
{
vec4 buffer_load = get_value(!set_a, check_uv, render_size);
check_uv = buffer_load.xy;
step_offset = check_uv - uvn;
}
vec4 uv_sample = vec4(textureLod(velocity_sampler, check_uv, 0.0).xy, npd / textureLod(depth_sampler, check_uv, 0.0).x, 0);
vec4 uv_sample = vec4(textureLod(velocity_sampler, check_uv, 0.0).xyz, textureLod(depth_sampler, check_uv, 0.0).x);
vec4 current_sample_fitness = sample_fitness(step_offset, uv_sample);
if (is_sample_better(current_sample_fitness, best_sample_fitness))
if (is_sample_better(current_sample_fitness, best_sample_fitness) > 0.5)
{
best_sample_fitness = current_sample_fitness;
chosen_uv = check_uv;
chosen_velocity = uv_sample.xy;
chosen_velocity = uv_sample.xyz;
}
}
if(iteration_index < params.last_iteration_index)
if(params.iteration_index < params.last_iteration_index)
{
set_value(set_a, uvi, vec4(chosen_uv, best_sample_fitness.x, best_sample_fitness.w), render_size);
set_value(set_a, uvi, vec4(chosen_uv, 0, 0), render_size);
return;
}
float depth = npd / textureLod(depth_sampler, uvn, 0.0).x;
if(best_sample_fitness.w == 0 || depth < best_sample_fitness.z)
{
set_value(set_a, uvi, vec4(uvn, best_sample_fitness.x, 0), render_size);
return;
}
float depth = textureLod(depth_sampler, uvn, 0.0).x;
// best_sample_fitness.z contains the depth of the texture + offset of velocity z
vec4 backtracked_sample = get_backtracked_sample(uvn, chosen_uv, chosen_velocity, best_sample_fitness, render_size);
set_value(set_a, uvi, backtracked_sample, render_size);
if(best_sample_fitness.w == 0 || depth > backtracked_sample.z)
{
set_value(set_a, uvi, vec4(uvn, 0, 0), render_size);
return;
}
set_value(set_a, uvi, vec4(backtracked_sample.xy, 0, backtracked_sample.w), render_size);
return;
}
}
// ------ sample fitness conditions -------
// if((a.w == b.w) && (a.w == 1))
// {
// return a.z < b.z ? 1. : 0.;
// }
//
// float nearer = a.z > b.z ? 1 : 0;
//
// return a.x * b.w * nearer < b.x * a.w ? 1. : 0.;

View File

@ -0,0 +1,14 @@
[remap]
importer="glsl"
type="RDShaderFile"
uid="uid://yfjbb3iso4jt"
path="res://.godot/imported/jfp_backtracking_experimental.glsl-d67307f6899fed647164be410a7debc8.res"
[deps]
source_file="res://MyJumpFloodIteration/jfp_backtracking_experimental.glsl"
dest_files=["res://.godot/imported/jfp_backtracking_experimental.glsl-d67307f6899fed647164be410a7debc8.res"]
[params]

View File

@ -32,8 +32,16 @@ class_name MotionBlurSphynxJumpFlood
## an initial step size that can increase the dilation radius proportionally, at the
## sacrifice of some quality in the final resolution of the dilation.[br][br]
## the formula for the maximum radius of the dilation (in pixels) is: pow(2, JFA_pass_count) * sample_step_multiplier
@export var sample_step_multiplier : float = 8
## the formula for the maximum radius of the dilation (in pixels) is: pow(2 + step_exponent_modifier, JFA_pass_count) * sample_step_multiplier
@export var sample_step_multiplier : float = 4
## by default, the jump flood makes samples along distances that start
## at 2 to the power of the pass count you want to perform, which is also
## the dilation radius you desire. You can change it to values higher than
## 2 with this variable, and reach higher dilation radius at the sacrifice of
## some accuracy in the dilation.
## the formula for the maximum radius of the dilation (in pixels) is: pow(2 + step_exponent_modifier, JFA_pass_count) * sample_step_multiplier
@export var step_exponent_modifier : float = 1
## how many steps along a range of 2 velocities from the
## dilation target velocity space do we go along to find a better fitting velocity sample
@ -57,7 +65,7 @@ class_name MotionBlurSphynxJumpFlood
## the number of passes performed by the jump flood algorithm based dilation,
## each pass added doubles the maximum radius of dilation available.[br][br]
## the formula for the maximum radius of the dilation (in pixels) is: pow(2, JFA_pass_count) * sample_step_multiplier
## the formula for the maximum radius of the dilation (in pixels) is: pow(2 + step_exponent_modifier, JFA_pass_count) * sample_step_multiplier
@export var JFA_pass_count : int = 3
## wether this motion blur stays the same intensity below
@ -76,6 +84,14 @@ class_name MotionBlurSphynxJumpFlood
## sutter speeds at that framerate, and up.
@export var target_constant_framerate : float = 30
## wether to display debug views for velocity and depth
## buffers
@export var draw_debug : bool = false
## currently 0 - 1, flip between velocity buffers
## and depth buffers debug views
@export var debug_page : int = 0
var rd: RenderingDevice
var linear_sampler: RID
@ -97,15 +113,12 @@ var buffer_b : StringName = "buffer_b"
var past_color : StringName = "past_color"
var velocity_3D : StringName = "velocity_3D"
var velocity_curl : StringName = "velocity_curl"
var draw_debug : float = 0
var custom_velocity : StringName = "custom_velocity"
var freeze : bool = false
func _init():
effect_callback_type = CompositorEffect.EFFECT_CALLBACK_TYPE_POST_TRANSPARENT
#effect_callback_type = EFFECT_CALLBACK_TYPE_POST_TRANSPARENT
needs_motion_vectors = true
RenderingServer.call_on_render_thread(_initialize_compute)
@ -178,7 +191,7 @@ func _render_callback(p_effect_callback_type, p_render_data):
temp_motion_blur_intensity = motion_blur_intensity * capped_frame_time / delta_time
if rd and p_effect_callback_type == CompositorEffect.EFFECT_CALLBACK_TYPE_POST_TRANSPARENT:
if rd:
var render_scene_buffers: RenderSceneBuffersRD = p_render_data.get_render_scene_buffers()
var render_scene_data: RenderSceneDataRD = p_render_data.get_render_scene_data()
if render_scene_buffers and render_scene_data:
@ -190,42 +203,59 @@ func _render_callback(p_effect_callback_type, p_render_data):
ensure_texture(buffer_a, render_scene_buffers)
ensure_texture(buffer_b, render_scene_buffers)
ensure_texture(past_color, render_scene_buffers)
ensure_texture(custom_velocity, render_scene_buffers)
rd.draw_command_begin_label("Motion Blur", Color(1.0, 1.0, 1.0, 1.0))
var last_iteration_index : int = JFA_pass_count - 1;
var max_dilation_radius : float = pow(2 + step_exponent_modifier, last_iteration_index) * sample_step_multiplier / motion_blur_intensity;
var push_constant: PackedFloat32Array = [
motion_blur_samples, temp_motion_blur_intensity,
motion_blur_center_fade, draw_debug,
motion_blur_center_fade, 1 if draw_debug else 0,
freeze,
Engine.get_frames_drawn() % 8,
last_iteration_index,
sample_step_multiplier
sample_step_multiplier,
step_exponent_modifier,
max_dilation_radius,
0,
0
]
var int_push_constant : PackedInt32Array = [
debug_page,
0,
0,
0
]
var byte_array = push_constant.to_byte_array()
byte_array.append_array(int_push_constant.to_byte_array())
var view_count = render_scene_buffers.get_view_count()
for view in range(view_count):
var color_image := render_scene_buffers.get_color_layer(view)
var depth_image := render_scene_buffers.get_depth_layer(view)
var velocity_image := render_scene_buffers.get_velocity_layer(view)
var texture_image := render_scene_buffers.get_texture_slice(context, texture, view, 0, 1, 1)
var buffer_a_image := render_scene_buffers.get_texture_slice(context, buffer_a, view, 0, 1, 1)
var buffer_b_image := render_scene_buffers.get_texture_slice(context, buffer_b, view, 0, 1, 1)
var past_color_image := render_scene_buffers.get_texture_slice(context, past_color, view, 0, 1, 1)
var custom_velocity_image := render_scene_buffers.get_texture_slice(context, custom_velocity, view, 0, 1, 1)
rd.draw_command_begin_label("Construct blur " + str(view), Color(1.0, 1.0, 1.0, 1.0))
var tex_uniform_set
var compute_list
var x_groups := floori((render_size.x - 1) / 8 + 1)
var y_groups := floori((render_size.y - 1) / 8 + 1)
var x_groups := floori((render_size.x - 1) / 16 + 1)
var y_groups := floori((render_size.y - 1) / 16 + 1)
tex_uniform_set = UniformSetCacheRD.get_cache(construct_shader, 0, [
get_sampler_uniform(depth_image, 0),
get_sampler_uniform(velocity_image, 1),
get_sampler_uniform(custom_velocity_image, 1),
get_image_uniform(buffer_a_image, 2),
get_image_uniform(buffer_b_image, 3),
get_sampler_uniform(buffer_a_image, 4),
get_sampler_uniform(buffer_b_image, 5)
])
compute_list = rd.compute_list_begin()
@ -240,6 +270,8 @@ func _render_callback(p_effect_callback_type, p_render_data):
16
]
var step_size : float = round(pow(2 + step_exponent_modifier, last_iteration_index - i)) * sample_step_multiplier;
var jf_float_push_constants_test : PackedFloat32Array = [
perpen_error_threshold,
sample_step_multiplier,
@ -248,6 +280,10 @@ func _render_callback(p_effect_callback_type, p_render_data):
backtracking_velocity_match_parallel_sensitivity,
backtracking_velcoity_match_perpendicular_sensitivity,
backtracbing_depth_match_threshold,
step_exponent_modifier,
step_size,
max_dilation_radius,
0,
0
]
@ -266,8 +302,8 @@ func _render_callback(p_effect_callback_type, p_render_data):
tex_uniform_set = UniformSetCacheRD.get_cache(motion_blur_shader, 0, [
get_sampler_uniform(color_image, 0),
get_sampler_uniform(depth_image, 1),
get_sampler_uniform(velocity_image, 2),
get_image_uniform(buffer_b_image if last_iteration_index % 2 else buffer_a_image, 3),
get_sampler_uniform(custom_velocity_image, 2),
get_sampler_uniform(buffer_b_image if last_iteration_index % 2 else buffer_a_image, 3),
get_image_uniform(texture_image, 4),
get_image_uniform(past_color_image, 5),
])
@ -275,7 +311,7 @@ func _render_callback(p_effect_callback_type, p_render_data):
compute_list = rd.compute_list_begin()
rd.compute_list_bind_compute_pipeline(compute_list, motion_blur_pipeline)
rd.compute_list_bind_uniform_set(compute_list, tex_uniform_set, 0)
rd.compute_list_set_push_constant(compute_list, push_constant.to_byte_array(), push_constant.size() * 4)
rd.compute_list_set_push_constant(compute_list, byte_array, byte_array.size())
rd.compute_list_dispatch(compute_list, x_groups, y_groups, 1)
rd.compute_list_end()
rd.draw_command_end_label()
@ -297,7 +333,7 @@ func _render_callback(p_effect_callback_type, p_render_data):
rd.draw_command_end_label()
func ensure_texture(texture_name : StringName, render_scene_buffers : RenderSceneBuffersRD, high_accuracy : bool = false, render_size_multiplier : Vector2 = Vector2(1, 1)):
func ensure_texture(texture_name : StringName, render_scene_buffers : RenderSceneBuffersRD, texture_format : RenderingDevice.DataFormat = RenderingDevice.DATA_FORMAT_R16G16B16A16_SFLOAT, high_accuracy : bool = false, render_size_multiplier : Vector2 = Vector2(1, 1)):
var render_size : Vector2 = Vector2(render_scene_buffers.get_internal_size()) * render_size_multiplier
if render_scene_buffers.has_texture(context, texture_name):
@ -307,5 +343,4 @@ func ensure_texture(texture_name : StringName, render_scene_buffers : RenderScen
if !render_scene_buffers.has_texture(context, texture_name):
var usage_bits: int = RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT | RenderingDevice.TEXTURE_USAGE_STORAGE_BIT
var texture_format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT if high_accuracy else RenderingDevice.DATA_FORMAT_R16G16B16A16_SFLOAT
render_scene_buffers.create_texture(context, texture_name, texture_format, usage_bits, RenderingDevice.TEXTURE_SAMPLES_1, render_size, 1, 1, true)

View File

@ -9,8 +9,8 @@
layout(set = 0, binding = 0) uniform sampler2D color_sampler;
layout(set = 0, binding = 1) uniform sampler2D depth_sampler;
layout(set = 0, binding = 2) uniform sampler2D vector_sampler;
layout(rgba16f, set = 0, binding = 3) uniform readonly image2D velocity_map;
layout(rgba16f, set = 0, binding = 4) uniform image2D output_image;
layout(set = 0, binding = 3) uniform sampler2D velocity_map;
layout(rgba16f, set = 0, binding = 4) uniform writeonly image2D output_image;
layout(rgba16f, set = 0, binding = 5) uniform image2D past_color_image;
layout(push_constant, std430) uniform Params
@ -23,18 +23,17 @@ layout(push_constant, std430) uniform Params
float frame;
float last_iteration_index;
float sample_step_multiplier;
float step_exponent_modifier;
float max_dilation_radius;
float nan_fl_3;
float nan_fl_4;
int debug_page;
int nan1;
int nan2;
int nan3;
} params;
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
// velocity similarity divisors
float vsim_parallel = 20;
float vsim_perpendicular = 20;
// for velocity similarity check
float depth_bias = 0.1;
// sample weight threshold
float sw_threshold = 0.1;
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
// near plane distance
float npd = 0.05;
@ -44,14 +43,6 @@ float sze = 0.1;
// Helper functions
// --------------------------------------------
vec2 get_depth_difference_at_derivative(vec2 uv, vec2 step_size)
{
float base = textureLod(depth_sampler, uv, 0.0).x;
float x = textureLod(depth_sampler, uv + vec2(0, step_size.x), 0.0).x;
float y = textureLod(depth_sampler, uv + vec2(step_size.y, 0), 0.0).x;
return vec2(x - base, y - base);
}
// from https://www.shadertoy.com/view/ftKfzc
float interleaved_gradient_noise(vec2 uv, int FrameId){
uv += float(FrameId) * (vec2(47, 17) * 0.695);
@ -60,94 +51,23 @@ float interleaved_gradient_noise(vec2 uv, int FrameId){
return fract(magic.z * fract(dot(uv, magic.xy)));
}
float get_velocity_convergence(vec2 uv, vec2 step_size)
{
vec2 base = textureLod(vector_sampler, uv, 0.0).xy;
vec2 x = textureLod(vector_sampler, uv + vec2(0, step_size.x), 0.0).xy;
vec2 y = textureLod(vector_sampler, uv + vec2(step_size.y, 0), 0.0).xy;
return (dot(vec2(0, 1), vec2(x - base)) + dot(vec2(1, 0), vec2(y - base)));
}
vec3 get_ndc_velocity(vec2 uv, vec2 render_size, float depth)
{
float ndc_velocity_z = get_velocity_convergence(uv, vec2(1) / render_size) / depth;
vec2 ndc_velocity_xy = textureLod(vector_sampler, uv, 0.0).xy;
return vec3(ndc_velocity_xy, ndc_velocity_z);
}
vec3 get_world_velocity(vec2 uv, vec2 render_size, float depth)
{
return get_ndc_velocity(uv, render_size, depth) / depth;
}
vec3 get_velocity_curl_vector(vec2 uv, vec2 render_size)
{
float depth = textureLod(depth_sampler, uv, 0.0).x;
vec2 step_size = vec2(1) / render_size;
vec3 base = get_world_velocity(uv, render_size, depth);
vec3 x = get_world_velocity(uv + vec2(step_size.x, 0), render_size, depth);
vec3 y = get_world_velocity(uv + vec2(0, step_size.y), render_size, depth);
vec2 depth_derivative = get_depth_difference_at_derivative(uv, step_size) / depth;
vec3 x_vector = normalize(vec3(step_size.x, 0, 0));
vec3 y_vector = normalize(vec3(0, step_size.y, 0));
vec3 cross_x = cross((x - base) / vec3(step_size, 0), x_vector);
vec3 cross_y = cross((y - base) / vec3(step_size, 0), y_vector);
return cross_x + cross_y;
}
float get_velocity_curl(vec2 uv, vec2 render_size)
{
vec2 step_size = vec2(1) / render_size;
vec2 base = textureLod(vector_sampler, uv, 0.0).xy;
vec2 x = textureLod(vector_sampler, uv + vec2(0, step_size.x), 0.0).xy;
vec2 y = textureLod(vector_sampler, uv + vec2(step_size.y, 0), 0.0).xy;
return (cross(vec3(0, 1, 0), vec3(x - base, 0) / vec3(step_size, 0)) + cross(vec3(1, 0, 0), vec3(y - base, 0) / vec3(step_size, 0))).z;
}
// -------------------------------------------------------
// McGuire's functions https://docs.google.com/document/d/1IIlAKTj-O01hcXEdGxTErQbCHO9iBmRx6oFUy_Jm0fI/edit
// ----------------------------------------------------------
// This function would return 1 if depth_x is bigger than depth_y by sze amount. meaning, it would return 1 if x is closer to camera
float soft_depth_compare(float depth_X, float depth_Y)
{
return clamp(1 - (depth_X - depth_Y) / sze, 0, 1);
}
float soft_depth_compare_custom(float depth_X, float depth_Y, float csze)
{
return clamp(1 - (depth_X - depth_Y) / csze, 0, 1);
}
float cone(vec2 X, vec2 Y, vec2 v)
{
return clamp(1 - length(X - Y) / length(v), 0, 1);
}
float cylinder(vec2 X, vec2 Y, vec2 v)
{
return 1.0 + smoothstep(0.95 * length(v), 1.05 * length(v), length(X - Y));
}
// ----------------------------------------------------------
// Motion similarity
// ----------------------------------------------------------
float get_motion_difference(vec2 V, vec2 V2, float power)
{
vec2 VO = V - V2;
float difference = dot(VO, V) / max(FLT_MIN, dot(V, V));
return pow(clamp(difference, 0, 1), power);
}
// ----------------------------------------------------------
// McGuire's function https://docs.google.com/document/d/1IIlAKTj-O01hcXEdGxTErQbCHO9iBmRx6oFUy_Jm0fI/edit
float soft_depth_compare(float depth_X, float depth_Y)
{
return clamp(1 - (depth_X - depth_Y) / sze, 0, 1);
}
// -------------------------------------------------------
vec2 round_uv(vec2 uv, vec2 render_size)
{
return (round((uv * render_size) - vec2(0.5)) + vec2(0.5)) / render_size;
}
void main()
{
@ -157,91 +77,95 @@ void main()
{
return;
}
// show past image for freeze frame
if(params.freeze > 0)
{
imageStore(output_image, uvi, imageLoad(past_color_image, uvi));
return;
}
// must be on pixel center for whole values (tested)
vec2 uvn = vec2(uvi + vec2(0.5)) / render_size;
int iteration_count = int(params.motion_blur_samples);
vec4 base_color = textureLod(color_sampler, uvn, 0.0);
// get dominant velocity data
vec4 velocity_map_sample = textureLod(velocity_map, uvn, 0.0);
vec4 base = textureLod(color_sampler, uvn, 0.0);
vec4 result_constructed_color = vec4(0);
vec4 velocity_map_sample = imageLoad(velocity_map, uvi);
vec3 velocity = -textureLod(vector_sampler, velocity_map_sample.xy, 0.0).xyz;
vec3 dominant_velocity = -textureLod(vector_sampler, velocity_map_sample.xy, 0.0).xyz;
vec3 naive_velocity = -textureLod(vector_sampler, uvn, 0.0).xyz;
float max_dialtion_radius = pow(2, params.last_iteration_index) * params.sample_step_multiplier * 2 / max(render_size.x, render_size.y);
if ((dot(velocity, velocity) == 0 || params.motion_blur_intensity == 0) && params.debug == 0)
// if velocity is 0 and we dont show debug, return right away.
if ((dot(dominant_velocity, dominant_velocity) == 0 || params.motion_blur_intensity == 0) && params.debug == 0)
{
imageStore(output_image, uvi, base);
imageStore(past_color_image, uvi, base);
imageStore(output_image, uvi, base_color);
imageStore(past_color_image, uvi, base_color);
return;
}
float noise_offset = (interleaved_gradient_noise(uvi, int(params.frame)) - 1);
// offset along velocity to blend between sample steps
float noise_offset = interleaved_gradient_noise(uvi, int(params.frame)) - 1;
// scale of step
float velocity_step_coef = min(params.motion_blur_intensity, params.max_dilation_radius / max(render_size.x, render_size.y) / (length(dominant_velocity) * params.motion_blur_intensity)) / max(1.0, params.motion_blur_samples - 1.0);
float velocity_step_coef = min(params.motion_blur_intensity, max_dialtion_radius / (length(velocity) * params.motion_blur_intensity)) / max(1.0, params.motion_blur_samples - 1.0);
vec3 step_sample = dominant_velocity * velocity_step_coef;
vec3 sample_step = velocity * velocity_step_coef;
vec3 naive_step_sample = naive_velocity * velocity_step_coef;
vec4 velocity_map_sample_step = vec4(0);
vec4 velocity_map_step_sample = vec4(0);
//float d = 1.0 - min(1.0, 2.0 * distance(uvn, vec2(0.5)));
//sample_step *= 1.0 - d * params.fade_padding.x;
float total_weight = 1;
vec2 offset = vec2(sample_step * noise_offset);//vec2(0);//
vec3 dominant_offset = step_sample * noise_offset;
vec4 col = base * total_weight;
vec3 naive_offset = naive_step_sample * noise_offset;
float depth = max(FLT_MIN, textureLod(depth_sampler, velocity_map_sample.xy, 0.0).x);
vec4 col = base_color * total_weight;
float naive_depth = max(FLT_MIN, textureLod(depth_sampler, uvn, 0.0).x);
float naive_background = soft_depth_compare_custom(depth, naive_depth, 0.0001);
for (int i = 1; i < iteration_count; i++)
float dominant_depth = textureLod(depth_sampler, velocity_map_sample.xy, 0.0).x;
float naive_depth = textureLod(depth_sampler, uvn, 0.0).x;
// is dilation in front of ground truth (have we started sampling inside a dilation)
float dilation_foreground = step(naive_depth, dominant_depth - 0.000001);
for (int i = 1; i < params.motion_blur_samples; i++)
{
offset += sample_step.xy;
dominant_offset += step_sample;
vec2 uvo = uvn + offset;
naive_offset += naive_step_sample;
if (any(notEqual(uvo, clamp(uvo, vec2(0.0), vec2(1.0)))))
vec2 dominant_uvo = round_uv(uvn + dominant_offset.xy, render_size);
vec2 naive_uvo = round_uv(uvn + naive_offset.xy, render_size);
if (any(notEqual(dominant_uvo, clamp(dominant_uvo, vec2(0.0), vec2(1.0)))))
{
break;
}
velocity_map_sample_step = imageLoad(velocity_map, ivec2(uvo * render_size));
velocity_map_step_sample = textureLod(velocity_map, dominant_uvo, 0.0);
vec3 current_velocity = -textureLod(vector_sampler, velocity_map_sample_step.xy, 0.0).xyz;
vec3 current_velocity = -textureLod(vector_sampler, velocity_map_step_sample.xy, 0.0).xyz;
float current_depth = max(FLT_MIN, textureLod(depth_sampler, velocity_map_sample_step.xy, 0.0).x);
float current_dominant_depth = textureLod(depth_sampler, velocity_map_step_sample.xy, 0.0).x;
float current_naive_depth = max(FLT_MIN, textureLod(depth_sampler, uvo, 0.0).x);
float current_naive_depth = textureLod(depth_sampler, dominant_uvo, 0.0).x;
// is current velocity different than dilated velocity
float motion_difference = get_motion_difference(dominant_velocity.xy, current_velocity.xy, 0.1);
// is current depth closer than origin of dilation (object in the foreground)
float foreground = step(naive_depth + dominant_offset.z, current_naive_depth - 0.0001);
// is dilation in front of current ground truth (are we within a dilation still)
float naive_foreground = step(0.05 / dominant_depth + 0.1, 0.05 / current_naive_depth);
// if we are sampling a foreground object and its velocity is different, discard this sample (prevent ghosting)
float sample_weight = 1 - (foreground * motion_difference);
float motion_difference = get_motion_difference(velocity.xy, current_velocity.xy, 0.1);
float naive_sample_weight = 1 - (foreground * motion_difference);
// if we started from and are still inside a dilation, choose the naive values for blurring
float dominant_naive_mix = dilation_foreground * naive_foreground;
float foreground = soft_depth_compare(npd / current_depth, npd / depth);
vec2 sample_uv = mix(dominant_uvo, naive_uvo, dominant_naive_mix);
float naive_foreground = soft_depth_compare(npd / current_naive_depth - sze, npd / depth);//soft_depth_compare_custom(depth, current_naive_depth, 0.0001);//
float sample_weight = 1;
sample_weight *= 1 - (foreground * motion_difference);
total_weight += sample_weight;
vec2 sample_uv = mix(uvo, uvn, 1 - max(naive_background, naive_foreground));//uvo;//
total_weight += mix(sample_weight, naive_sample_weight, dominant_naive_mix);
col += textureLod(color_sampler, sample_uv, 0.0) * sample_weight;
}
@ -255,15 +179,28 @@ void main()
return;
}
vec4 tl_col = vec4(abs(textureLod(vector_sampler, uvn, 0.0).xy) * 10, 0, 1);
vec4 tl_col;
vec4 tr_col = vec4(abs(velocity.xy) * 10, 0, 1);//vec4(naive_background);//
vec4 tr_col;
vec4 bl_col = vec4(abs(velocity_map_sample.xyw - vec3(uvn, 0)) * vec3(10, 10, 1), 1);
vec4 bl_col;
vec4 br_col = col;
vec4 br_col;
//imageStore(past_color_image, uvi, imageLoad(output_image, uvi));
if(params.debug_page == 0)
{
tl_col = vec4((textureLod(vector_sampler, uvn, 0.0).xyz) * vec3(10, 10, 10000), 1);
tr_col = vec4(abs(dominant_velocity.xy) * 10, 0, 1);
bl_col = vec4(abs(velocity_map_sample.xyw - vec3(uvn, 0)) * vec3(10, 10, 1), 1);
br_col = col;
}
if(params.debug_page == 1)
{
tl_col = vec4(naive_depth * 10);
tr_col = vec4(dilation_foreground);
bl_col = vec4(dominant_depth * 10);
br_col = col;
}
imageStore(output_image, uvi / 2, tl_col);
imageStore(output_image, uvi / 2 + ivec2(vec2(0.5, 0.5) * render_size), br_col);

View File

@ -0,0 +1,14 @@
[remap]
importer="glsl"
type="RDShaderFile"
uid="uid://c3hemlr50tv6p"
path="res://.godot/imported/jump_flood_blur.glsl-df0c6b7cc65d8b0520871790f9075253.res"
[deps]
source_file="res://MyJumpFloodIteration/jump_flood_blur.glsl"
dest_files=["res://.godot/imported/jump_flood_blur.glsl-df0c6b7cc65d8b0520871790f9075253.res"]
[params]

View File

@ -4,7 +4,7 @@
layout(set = 0, binding = 0) uniform sampler2D blur_sampler;
layout(rgba16f, set = 0, binding = 1) uniform image2D color_image;
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
void main()
{
ivec2 render_size = ivec2(textureSize(blur_sampler, 0));

View File

@ -0,0 +1,14 @@
[remap]
importer="glsl"
type="RDShaderFile"
uid="uid://d03sqqhg3n1ys"
path="res://.godot/imported/jump_flood_overlay.glsl-764d3c488a9e9576eab825591e868325.res"
[deps]
source_file="res://MyJumpFloodIteration/jump_flood_overlay.glsl"
dest_files=["res://.godot/imported/jump_flood_overlay.glsl-764d3c488a9e9576eab825591e868325.res"]
[params]

View File

@ -0,0 +1,9 @@
extends Resource
class_name BlurVelocityComponentResource
## the overal multiplier of the velocity component
@export var multiplier : float = 1.0
## a lower threshold for the velocity to be blurred
@export var lower_threshold : float = 0.0
## an upper threshold for the velocity to be blurred
@export var upper_threshold : float = 0.0

View File

@ -0,0 +1,153 @@
extends CompositorEffect
class_name PreBlurProcessor
@export var pre_blur_processor_shader_file : RDShaderFile = preload("res://addons/PreBlurProcessing/pre_blur_processor.glsl"):
set(value):
pre_blur_processor_shader_file = value
_init()
@export var camera_rotation_component : BlurVelocityComponentResource = BlurVelocityComponentResource.new()
@export var camera_movement_component : BlurVelocityComponentResource = BlurVelocityComponentResource.new()
@export var object_movement_component : BlurVelocityComponentResource = BlurVelocityComponentResource.new()
var context: StringName = "MotionBlur"
var rd: RenderingDevice
var linear_sampler: RID
var construct_shader : RID
var construct_pipeline : RID
var pre_blur_processor_shader: RID
var pre_blur_processor_pipeline: RID
var custom_velocity : StringName = "custom_velocity"
func _init():
needs_motion_vectors = true
RenderingServer.call_on_render_thread(_initialize_compute)
func _notification(what):
if what == NOTIFICATION_PREDELETE:
if linear_sampler.is_valid():
rd.free_rid(linear_sampler)
if pre_blur_processor_shader.is_valid():
rd.free_rid(pre_blur_processor_shader)
func _initialize_compute():
rd = RenderingServer.get_rendering_device()
if !rd:
return
var sampler_state := RDSamplerState.new()
sampler_state.min_filter = RenderingDevice.SAMPLER_FILTER_LINEAR
sampler_state.mag_filter = RenderingDevice.SAMPLER_FILTER_LINEAR
sampler_state.repeat_u = RenderingDevice.SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE
sampler_state.repeat_v = RenderingDevice.SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE
linear_sampler = rd.sampler_create(sampler_state)
var shader_spirv: RDShaderSPIRV = pre_blur_processor_shader_file.get_spirv()
pre_blur_processor_shader = rd.shader_create_from_spirv(shader_spirv)
pre_blur_processor_pipeline = rd.compute_pipeline_create(pre_blur_processor_shader)
func get_image_uniform(image: RID, binding: int) -> RDUniform:
var uniform: RDUniform = RDUniform.new()
uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
uniform.binding = binding
uniform.add_id(image)
return uniform
func get_sampler_uniform(image: RID, binding: int) -> RDUniform:
var uniform: RDUniform = RDUniform.new()
uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_SAMPLER_WITH_TEXTURE
uniform.binding = binding
uniform.add_id(linear_sampler)
uniform.add_id(image)
return uniform
func _render_callback(p_effect_callback_type, p_render_data : RenderData):
if rd:
var render_scene_buffers: RenderSceneBuffersRD = p_render_data.get_render_scene_buffers()
var render_scene_data: RenderSceneDataRD = p_render_data.get_render_scene_data()
if render_scene_buffers and render_scene_data:
var render_size: Vector2 = render_scene_buffers.get_internal_size()
if render_size.x == 0.0 or render_size.y == 0.0:
return
ensure_texture(custom_velocity, render_scene_buffers)
rd.draw_command_begin_label("Pre Blur Processing", Color(1.0, 1.0, 1.0, 1.0))
var float_pre_blur_push_constants: PackedFloat32Array = [
camera_rotation_component.multiplier,
camera_movement_component.multiplier,
object_movement_component.multiplier,
camera_rotation_component.lower_threshold,
camera_movement_component.lower_threshold,
object_movement_component.lower_threshold,
camera_rotation_component.upper_threshold,
camera_movement_component.upper_threshold,
object_movement_component.upper_threshold,
1 if true else 0,
0,
0,
]
var int_pre_blur_push_constants : PackedInt32Array = [
]
var byte_array = float_pre_blur_push_constants.to_byte_array()
byte_array.append_array(int_pre_blur_push_constants.to_byte_array())
var view_count = render_scene_buffers.get_view_count()
for view in range(view_count):
var color_image := render_scene_buffers.get_color_layer(view)
var depth_image := render_scene_buffers.get_depth_layer(view)
var velocity_image := render_scene_buffers.get_velocity_layer(view)
var custom_velocity_image := render_scene_buffers.get_texture_slice(context, custom_velocity, view, 0, 1, 1)
var scene_data_buffer : RID = render_scene_data.get_uniform_buffer()
var scene_data_buffer_uniform := RDUniform.new()
scene_data_buffer_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_UNIFORM_BUFFER
scene_data_buffer_uniform.binding = 5
scene_data_buffer_uniform.add_id(scene_data_buffer)
var tex_uniform_set
var compute_list
var x_groups := floori((render_size.x - 1) / 16 + 1)
var y_groups := floori((render_size.y - 1) / 16 + 1)
rd.draw_command_begin_label("Process Velocity Buffer " + str(view), Color(1.0, 1.0, 1.0, 1.0))
tex_uniform_set = UniformSetCacheRD.get_cache(pre_blur_processor_shader, 0, [
get_sampler_uniform(color_image, 0),
get_sampler_uniform(depth_image, 1),
get_sampler_uniform(velocity_image, 2),
get_image_uniform(custom_velocity_image, 3),
get_image_uniform(color_image, 4),
scene_data_buffer_uniform,
])
compute_list = rd.compute_list_begin()
rd.compute_list_bind_compute_pipeline(compute_list, pre_blur_processor_pipeline)
rd.compute_list_bind_uniform_set(compute_list, tex_uniform_set, 0)
rd.compute_list_set_push_constant(compute_list, byte_array, byte_array.size())
rd.compute_list_dispatch(compute_list, x_groups, y_groups, 1)
rd.compute_list_end()
rd.draw_command_end_label()
rd.draw_command_end_label()
func ensure_texture(texture_name : StringName, render_scene_buffers : RenderSceneBuffersRD, high_accuracy : bool = false, render_size_multiplier : Vector2 = Vector2(1, 1)):
var render_size : Vector2 = Vector2(render_scene_buffers.get_internal_size()) * render_size_multiplier
if render_scene_buffers.has_texture(context, texture_name):
var tf: RDTextureFormat = render_scene_buffers.get_texture_format(context, texture_name)
if tf.width != render_size.x or tf.height != render_size.y:
render_scene_buffers.clear_context(context)
if !render_scene_buffers.has_texture(context, texture_name):
var usage_bits: int = RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT | RenderingDevice.TEXTURE_USAGE_STORAGE_BIT
var texture_format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT if high_accuracy else RenderingDevice.DATA_FORMAT_R16G16B16A16_SFLOAT
render_scene_buffers.create_texture(context, texture_name, texture_format, usage_bits, RenderingDevice.TEXTURE_SAMPLES_1, render_size, 1, 1, true)

View File

@ -0,0 +1,192 @@
#[compute]
#version 450
#define FLT_MAX 3.402823466e+38
#define FLT_MIN 1.175494351e-38
layout(set = 0, binding = 0) uniform sampler2D color_sampler;
layout(set = 0, binding = 1) uniform sampler2D depth_sampler;
layout(set = 0, binding = 2) uniform sampler2D vector_sampler;
layout(rgba32f, set = 0, binding = 3) uniform writeonly image2D vector_output;
layout(rgba16f, set = 0, binding = 4) uniform writeonly image2D color_output;
struct SceneData {
highp mat4 projection_matrix;
highp mat4 inv_projection_matrix;
highp mat4 inv_view_matrix;
highp mat4 view_matrix;
// only used for multiview
highp mat4 projection_matrix_view[2];
highp mat4 inv_projection_matrix_view[2];
highp vec4 eye_offset[2];
// Used for billboards to cast correct shadows.
highp mat4 main_cam_inv_view_matrix;
highp vec2 viewport_size;
highp vec2 screen_pixel_size;
// Use vec4s because std140 doesn't play nice with vec2s, z and w are wasted.
highp vec4 directional_penumbra_shadow_kernel[32];
highp vec4 directional_soft_shadow_kernel[32];
highp vec4 penumbra_shadow_kernel[32];
highp vec4 soft_shadow_kernel[32];
mediump mat3 radiance_inverse_xform;
mediump vec4 ambient_light_color_energy;
mediump float ambient_color_sky_mix;
bool use_ambient_light;
bool use_ambient_cubemap;
bool use_reflection_cubemap;
highp vec2 shadow_atlas_pixel_size;
highp vec2 directional_shadow_pixel_size;
uint directional_light_count;
mediump float dual_paraboloid_side;
highp float z_far;
highp float z_near;
bool roughness_limiter_enabled;
mediump float roughness_limiter_amount;
mediump float roughness_limiter_limit;
mediump float opaque_prepass_threshold;
bool fog_enabled;
uint fog_mode;
highp float fog_density;
highp float fog_height;
highp float fog_height_density;
highp float fog_depth_curve;
highp float pad;
highp float fog_depth_begin;
mediump vec3 fog_light_color;
highp float fog_depth_end;
mediump float fog_sun_scatter;
mediump float fog_aerial_perspective;
highp float time;
mediump float reflection_multiplier; // one normally, zero when rendering reflections
vec2 taa_jitter;
bool material_uv2_mode;
float emissive_exposure_normalization;
float IBL_exposure_normalization;
bool pancake_shadows;
uint camera_visible_layers;
float pass_alpha_multiplier;
};
layout(set = 0, binding = 5, std140) uniform SceneDataBlock {
SceneData data;
SceneData prev_data;
}
scene;
layout(push_constant, std430) uniform Params
{
float rotation_velocity_multiplier;
float movement_velocity_multiplier;
float object_velocity_multilpier;
float rotation_velocity_lower_threshold;
float movement_velocity_lower_threshold;
float object_velocity_lower_threshold;
float rotation_velocity_upper_threshold;
float movement_velocity_upper_threshold;
float object_velocity_upper_threshold;
float is_fsr2;
float nan_fl_1;
float nan_fl_2;
} params;
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
float sharp_step(float lower, float upper, float x)
{
return clamp((x - lower) / (upper - lower), 0, 1);
}
float get_view_depth(float depth)
{
return 0.;
}
void main()
{
ivec2 render_size = ivec2(textureSize(color_sampler, 0));
ivec2 uvi = ivec2(gl_GlobalInvocationID.xy);
if ((uvi.x >= render_size.x) || (uvi.y >= render_size.y))
{
return;
}
// must be on pixel center for whole values (tested)
vec2 uvn = vec2(uvi + vec2(0.5)) / render_size;
SceneData scene_data = scene.data;
SceneData previous_scene_data = scene.prev_data;
float depth = textureLod(depth_sampler, uvn, 0.0).x;
vec4 view_position = scene_data.inv_projection_matrix * vec4(uvn * 2.0 - 1.0, depth, 1.0);
view_position.xyz /= view_position.w;
// get full change
vec4 world_local_position = mat4(scene_data.inv_view_matrix) * vec4(view_position.xyz, 1.0);
vec4 view_past_position = mat4(previous_scene_data.view_matrix) * vec4(world_local_position.xyz, 1.0);
vec4 view_past_ndc = previous_scene_data.projection_matrix * view_past_position;
view_past_ndc.xyz /= view_past_ndc.w;
vec3 past_uv = vec3(view_past_ndc.xy * 0.5 + 0.5, view_past_ndc.z);
vec3 camera_uv_change = past_uv - vec3(uvn, depth);
// get just rotation change
world_local_position = mat4(mat3(scene_data.inv_view_matrix)) * vec4(view_position.xyz, 1.0);
view_past_position = mat4(mat3(previous_scene_data.view_matrix)) * vec4(world_local_position.xyz, 1.0);
view_past_ndc = previous_scene_data.projection_matrix * view_past_position;
view_past_ndc.xyz /= view_past_ndc.w;
past_uv = vec3(view_past_ndc.xy * 0.5 + 0.5, view_past_ndc.z);
vec3 camera_rotation_uv_change = past_uv - vec3(uvn, depth);
// get just movement change
vec3 camera_movement_uv_change = camera_uv_change - camera_rotation_uv_change;
// fill in gaps in base velocity (skybox, z velocity)
vec3 base_velocity = vec3(textureLod(vector_sampler, uvn, 0.0).xy + mix(vec2(0), camera_uv_change.xy, step(depth, 0.)), camera_uv_change.z);
// fsr just makes it so values are larger than 1, I assume its the only case when it happens
if(params.is_fsr2 > 0.5 && dot(base_velocity, base_velocity) >= 1)
{
base_velocity = camera_uv_change;
}
// get object velocity
vec3 object_uv_change = base_velocity - camera_uv_change.xyz;
// construct final velocity with user defined weights
vec3 total_velocity = camera_rotation_uv_change * params.rotation_velocity_multiplier * sharp_step(params.rotation_velocity_lower_threshold, params.rotation_velocity_upper_threshold, length(camera_rotation_uv_change))
+ camera_movement_uv_change * params.movement_velocity_multiplier * sharp_step(params.movement_velocity_lower_threshold, params.movement_velocity_upper_threshold, length(camera_movement_uv_change))
+ object_uv_change * params.object_velocity_multilpier * sharp_step(params.object_velocity_lower_threshold, params.object_velocity_upper_threshold, length(object_uv_change));
// if objects move, clear z direction, (z only correct for static environment)
if(dot(object_uv_change, object_uv_change) > 0.000001)
{
total_velocity.z = 0;
base_velocity.z = 0;
}
// choose the smaller option out of the two based on amgnitude, seems to work well
if(dot(total_velocity.xy * 99, total_velocity.xy * 100) >= dot(base_velocity.xy * 100, base_velocity.xy * 100))
{
total_velocity = base_velocity;
}
imageStore(vector_output, uvi, vec4(total_velocity, 1.0));
}

View File

@ -0,0 +1,14 @@
[remap]
importer="glsl"
type="RDShaderFile"
uid="uid://tx5dpg4cese4"
path="res://.godot/imported/pre_blur_processor.glsl-15aad2fcd84f6b03da65e9fe7bb8e345.res"
[deps]
source_file="res://PreBlurProcessing/pre_blur_processor.glsl"
dest_files=["res://.godot/imported/pre_blur_processor.glsl-15aad2fcd84f6b03da65e9fe7bb8e345.res"]
[params]

View File

@ -0,0 +1,118 @@
extends MeshInstance3D
class_name RadialBlurMesh
@export var target_node : Node3D
## the rotation vector the current mesh blur's around
## locally
@export_enum("x", "y", "z") var local_rotation_axis : int
@export var negate_local_rotation_axis : bool = false
## the rotation vector that the target mesh spins along locally
@export_enum("x", "y", "z") var target_local_rotation_axis : int
@export var negate_target_local_rotation_axis : bool = false
## At what speed does the mesh become visible and start blurring
@export var speed_visibility_threshold : float = 0.2
## make mesh visible for debugging
@export var show_debug : bool = false
@onready var local_rotation_vector : Vector3 = Vector3(1 if local_rotation_axis == 0 else 0, 1 if local_rotation_axis == 1 else 0, 1 if local_rotation_axis == 2 else 0) * (1 if !negate_local_rotation_axis else -1)
@onready var target_local_rotation_vector : Vector3 = Vector3(1 if target_local_rotation_axis == 0 else 0, 1 if target_local_rotation_axis == 1 else 0, 1 if target_local_rotation_axis == 2 else 0) * (1 if !negate_target_local_rotation_axis else -1)
var mesh_last_rotation : float = 0;
var previous_mesh_basis : Basis = Basis()
var mesh_has_rotation_signal : bool = false
var signal_rotation_velocity : float = 0
var debug_toggle : float = 0
var axis_offset : float
var shape_radius : float = 0
var shape_depth : float = 0
var shape_axis_offset : float = 0
func _ready():
get_surface_override_material(0).set_shader_parameter("debug_color", Color(0, 0, 0, 0) if !show_debug else Color(1, 0, 0, 0))
previous_mesh_basis = target_node.global_basis
var target_rotation_vector : Vector3 = previous_mesh_basis.orthonormalized() * target_local_rotation_vector
axis_offset = target_rotation_vector.dot(global_position - target_node.global_position)
var mesh_aabb : AABB = mesh.get_aabb()
var extent : Vector3 = mesh_aabb.size * global_basis.get_scale()
var all_axis : Array[float] = [extent.x, extent.y, extent.z]
var center : Vector3 = mesh_aabb.get_center() * global_basis.get_scale()
var all_centers : Array[float] = [center.x, center.y, center.z]
shape_depth = all_axis[local_rotation_axis]
shape_axis_offset = all_centers[local_rotation_axis] * (1 if !negate_local_rotation_axis else -1)
shape_radius = 0
for i in all_axis.size():
if i == local_rotation_axis:
continue
shape_radius = max(shape_radius, all_axis[i] / 2)
#print(name, "has the shape depth of ", shape_depth, ", radius of ", shape_radius, " and axis offset of ", shape_axis_offset)
if target_node.has_signal("rotation_velocity_signal"):
mesh_has_rotation_signal = true
target_node.rotation_velocity_signal.connect(on_rotation_velocity_signal)
deferred_update_cylinder_data.call_deferred()
func on_rotation_velocity_signal(velocity : float):
signal_rotation_velocity = velocity
func deferred_update_cylinder_data():
get_surface_override_material(0).set_shader_parameter("shape_depth", shape_depth)
get_surface_override_material(0).set_shader_parameter("shape_radius", shape_radius)
get_surface_override_material(0).set_shader_parameter("shape_axis_offset", shape_axis_offset)
get_surface_override_material(0).set_shader_parameter("local_rotation_axis", local_rotation_vector)
func _process(delta: float) -> void:
var target_transform : Transform3D = target_node.global_transform
var target_rotation_vector : Vector3 = target_transform.orthonormalized().basis * target_local_rotation_vector
var current_mesh_basis : Basis = target_transform.basis
var difference_quat : Quaternion = Quaternion(current_mesh_basis.get_rotation_quaternion() * previous_mesh_basis.get_rotation_quaternion().inverse())
var centered_angle : float = difference_quat.get_angle() - PI
var angle = (PI - abs(centered_angle)) * abs(target_rotation_vector.dot(difference_quat.get_axis()))
if mesh_has_rotation_signal:
angle = signal_rotation_velocity
visible = abs(angle) > speed_visibility_threshold
get_surface_override_material(0).set_shader_parameter("rotation_speed", clamp(angle, -TAU, TAU))
previous_mesh_basis = current_mesh_basis
global_position = target_transform.origin + target_rotation_vector * axis_offset
var alignment_quaternion : Quaternion = Quaternion(global_basis.orthonormalized() * local_rotation_vector, target_rotation_vector)
global_basis = Basis(alignment_quaternion) * global_basis;

View File

@ -0,0 +1,219 @@
shader_type spatial;
render_mode unshaded, depth_draw_always, fog_disabled;
uniform sampler2D screen_texture : hint_screen_texture, filter_nearest;
uniform sampler2D depth_texture : hint_depth_texture, filter_nearest;
uniform vec3 local_rotation_axis = vec3(0, 1, 0);
uniform float rotation_speed = 0;
uniform int sample_count = 8;
uniform float shape_depth = 1;
uniform float shape_radius = 1;
uniform float shape_axis_offset = 0;
uniform float debug_toggle = 0;
uniform vec4 debug_color : source_color = vec4(0);
//https://www.shadertoy.com/view/fdtfWM
vec3 rotate(float angle, vec3 axis, vec3 point) // NOTE: axis must be unit!
{
float c = cos(angle);
float s = sin(angle);
return c * point + s * cross(axis, point) + (1.0 - c) * (dot(point, axis) * axis); // Rodrigues' Rotation Formula
}
// from https://www.shadertoy.com/view/ftKfzc
float interleaved_gradient_noise(vec2 uv, int FrameId){
uv += float(FrameId) * (vec2(47, 17) * 0.695);
vec3 magic = vec3( 0.06711056, 0.00583715, 52.9829189 );
return fract(magic.z * fract(dot(uv, magic.xy)));
}
vec3 get_projection_onto_plane(vec3 plane_origin, vec3 normal, vec3 vector)
{
float plane_distance = dot(plane_origin, normal);
return vector * plane_distance / dot(normal, vector);
}
float soft_depth_compare(float x, float y, float sze)
{
return clamp(1. - (x - y) / sze, 0., 1.);
}
vec2 intersect_cylinder(vec3 eye_point, vec3 end_point, vec3 origin, vec3 axis, float radius)
{
eye_point -= axis * dot(eye_point - origin, axis) + origin;
end_point -= axis * dot(end_point - origin, axis) + origin;
vec3 direction = end_point - eye_point;
float A = dot(direction, direction);
float B = 2. * dot(eye_point, direction);
float C = dot(eye_point, eye_point) - radius * radius;
float square_component = sqrt(B * B - 4. * A * C);
return vec2(-B + square_component, -B - square_component) / (2. * A);
}
vec2 within_cylinder(vec3 point, vec3 origin, vec3 axis, float radius, float depth, float axis_offset)
{
float within_depth = step(abs(dot(point - origin - axis * axis_offset, axis)), depth / 2.);
vec3 perpendicular_component = point - axis * dot(axis, point - origin) - origin;
float within_radius = step(dot(perpendicular_component, perpendicular_component), radius * radius);
return vec2(within_depth * within_radius, step(0, dot(point - origin, axis)));
}
vec3 color_corrected(vec3 color)
{
return color / mix(
pow((vec3(1.) + vec3(0.055)) * (1.0 / (1.0 + 0.055)), vec3(2.4)),
vec3(1.) * (1.0 / 12.92),
lessThan(vec3(1.), vec3(0.04045)));
}
void fragment() {
vec2 screen_uv = SCREEN_UV;
float depth = texture(depth_texture, screen_uv).x;
vec3 ndc = vec3(screen_uv * 2.0 - 1.0, depth);
vec4 world_position = INV_VIEW_MATRIX * INV_PROJECTION_MATRIX * vec4(ndc, 1.0);
world_position.xyz /= world_position.w;
vec4 world_mesh_position = INV_VIEW_MATRIX * INV_PROJECTION_MATRIX * vec4(screen_uv * 2.0 - 1.0, FRAGCOORD.z, 1.0);
world_mesh_position.xyz /= world_mesh_position.w;
vec3 node_relative_position = world_position.xyz - NODE_POSITION_WORLD;
vec3 world_rotation_axis = normalize(mat3(MODEL_MATRIX) * local_rotation_axis);
float axis_parallel_offset = dot(node_relative_position, world_rotation_axis);
vec3 axis_parallel_component = axis_parallel_offset * world_rotation_axis;
vec3 axis_perpendicular_component = node_relative_position - axis_parallel_component;
float axis_perpendicular_offset = length(axis_perpendicular_component);
vec3 camera_node_position = NODE_POSITION_WORLD - CAMERA_POSITION_WORLD;
vec3 camera_cylinder_back_plane_origin = camera_node_position + world_rotation_axis * (clamp(axis_parallel_offset - shape_axis_offset, -shape_depth / 2., shape_depth / 2.) + shape_axis_offset);
vec3 camera_relative_position = world_position.xyz - CAMERA_POSITION_WORLD;
vec3 camera_plane_projected_results = get_projection_onto_plane(camera_cylinder_back_plane_origin, world_rotation_axis, camera_relative_position);
vec2 world_cylinder_intersect_result = intersect_cylinder(CAMERA_POSITION_WORLD, camera_plane_projected_results + CAMERA_POSITION_WORLD, NODE_POSITION_WORLD, world_rotation_axis, shape_radius);
float lands_within_cylinder = step(1, world_cylinder_intersect_result.x);
camera_plane_projected_results *= mix(world_cylinder_intersect_result.x, 1, lands_within_cylinder);
vec3 node_cylinder_clamped_result = camera_plane_projected_results - camera_node_position;
float on_mesh = 1.;
vec3 raw_clamed_difference = node_cylinder_clamped_result - node_relative_position;
if(dot(raw_clamed_difference, raw_clamed_difference) > 0.001)
{
node_relative_position = world_mesh_position.xyz - NODE_POSITION_WORLD;
on_mesh = 0.;
}
float noise_variation = interleaved_gradient_noise(SCREEN_UV * vec2(textureSize(screen_texture, 0)), int(TIME * 100.)) / float(sample_count);
float sum = 1.;
vec4 base_sample = texture(screen_texture, screen_uv);
vec4 col = base_sample;
vec2 nearest_ahead_of_mesh_uv = screen_uv;
float nearest_ahead_of_mesh_set = 0.;
vec2 past_mesh_uv_found = screen_uv;
float was_mesh_uv_found = 0.;
vec3 camera_relative_position_normalized = normalize(node_relative_position.xyz + camera_node_position);
for(int i = 0; i < sample_count; i++)
{
float angle = (float(i) / float(sample_count) + noise_variation) * rotation_speed;
vec3 node_rotated_sample = rotate(-angle, world_rotation_axis.xyz, node_relative_position.xyz);
vec4 current_ndc = (PROJECTION_MATRIX * VIEW_MATRIX * (vec4(node_rotated_sample, 1) + vec4(NODE_POSITION_WORLD, 0)));
current_ndc.xyz /= current_ndc.w;
vec2 current_uv_sample = ((current_ndc + 1.) / 2.).xy ;
float current_depth = texture(depth_texture, current_uv_sample).x;
vec4 current_world_position = INV_VIEW_MATRIX * INV_PROJECTION_MATRIX * vec4(vec3(current_ndc.xy, current_depth), 1.0);
current_world_position.xyz /= current_world_position.w;
vec3 current_camera_unrotated_position = rotate(angle, world_rotation_axis.xyz, current_world_position.xyz - NODE_POSITION_WORLD) + camera_node_position;
vec3 current_unrotated_perpendicular_component = current_camera_unrotated_position - camera_relative_position_normalized * dot(current_camera_unrotated_position, camera_relative_position_normalized);
float current_unrotated_perpendicular_offset = length(current_unrotated_perpendicular_component);
current_depth = 0.05 / current_depth;
float current_sample_depth = 0.05 / current_ndc.z;
vec2 current_sample_inside_cylinder = within_cylinder(current_world_position.xyz, NODE_POSITION_WORLD, world_rotation_axis, shape_radius, shape_depth, shape_axis_offset);
float occluding_mesh = soft_depth_compare(current_depth + 0.1, current_sample_depth, 0.1) * (1. - current_sample_inside_cylinder.x);
float choose_best_uv = on_mesh * (1. - current_sample_inside_cylinder.x);
current_uv_sample = mix(screen_uv, current_uv_sample, 1. - (1. - current_sample_inside_cylinder.x) * (1. - on_mesh));
current_uv_sample = mix(current_uv_sample, past_mesh_uv_found, occluding_mesh);
current_uv_sample = mix(current_uv_sample, nearest_ahead_of_mesh_uv, nearest_ahead_of_mesh_set * choose_best_uv);
if (current_uv_sample.x < 0. || current_uv_sample.x > 1. || current_uv_sample.y < 0. || current_uv_sample.y > 1.)
{
continue;
}
nearest_ahead_of_mesh_uv = mix(nearest_ahead_of_mesh_uv, current_uv_sample, (1. - nearest_ahead_of_mesh_set) * choose_best_uv);
nearest_ahead_of_mesh_set = mix(nearest_ahead_of_mesh_set, 1., (1. - nearest_ahead_of_mesh_set) * choose_best_uv);
past_mesh_uv_found = mix(current_uv_sample, past_mesh_uv_found, current_sample_inside_cylinder.x);
was_mesh_uv_found = mix(1, was_mesh_uv_found, current_sample_inside_cylinder.x);
float unrotated_sample_within_perpendicular_range = step(current_unrotated_perpendicular_offset, 0.1);
float on_mesh_in_front = on_mesh * (1. - soft_depth_compare(current_sample_depth - 0.02, current_depth, 0.01)) * (1. - unrotated_sample_within_perpendicular_range);
float weight = 1. - on_mesh_in_front * (1. - debug_toggle);
//weight = 1. - (1. - was_mesh_uv_found) * (occluding_mesh);
sum += weight;
col += texture(screen_texture, current_uv_sample) * weight;
}
col /= sum;
ALBEDO = col.xyz + debug_color.xyz;//vec3(depth * 10.);//
}

View File

@ -0,0 +1,26 @@
[gd_scene load_steps=5 format=3 uid="uid://b0shum42bqq0y"]
[ext_resource type="Shader" path="res://RadialBlurTest/radial_blur_mesh.gdshader" id="1_20eg7"]
[ext_resource type="Script" path="res://RadialBlurTest/radial_blur_mesh.gd" id="2_wbael"]
[sub_resource type="CylinderMesh" id="CylinderMesh_pgvyt"]
height = 0.25
[sub_resource type="ShaderMaterial" id="ShaderMaterial_stapv"]
resource_local_to_scene = true
render_priority = 0
shader = ExtResource("1_20eg7")
shader_parameter/local_rotation_axis = Vector3(0, 1, 0)
shader_parameter/rotation_speed = 0.0
shader_parameter/sample_count = 8
shader_parameter/shape_depth = 1.0
shader_parameter/shape_radius = 1.0
shader_parameter/shape_axis_offset = 0.0
shader_parameter/debug_toggle = 0.0
shader_parameter/debug_color = Color(1, 0, 0, 0)
[node name="RadialBlurMesh" type="MeshInstance3D"]
process_priority = 1
mesh = SubResource("CylinderMesh_pgvyt")
surface_material_override/0 = SubResource("ShaderMaterial_stapv")
script = ExtResource("2_wbael")