Upstream optimizations from GLSL; split out vert shader

This commit is contained in:
fishku 2024-05-01 18:48:27 +02:00
parent f047ae72aa
commit 9d662ee821
3 changed files with 122 additions and 56 deletions

View file

@ -1,7 +1,7 @@
// See pixel_aa.slang for copyright and other information.
// clang-format off
#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.4 settings ===" 0.0 0.0 1.0 1.0
#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.5 settings ===" 0.0 0.0 1.0 1.0
#pragma parameter PIX_AA_SHARP "Pixel AA sharpening amount" 1.5 0.0 2.0 0.05
#pragma parameter PIX_AA_GAMMA "Enable gamma-correct blending" 1.0 0.0 1.0 1.0
#pragma parameter PIX_AA_SUBPX "Enable subpixel AA" 0.0 0.0 1.0 1.0

View file

@ -1,7 +1,7 @@
#version 450
/*
Pixel AA v1.4 by fishku
Pixel AA v1.5 by fishku
Copyright (C) 2023
Public domain license (CC0)
@ -24,6 +24,7 @@
subpixel anti-aliasing, results are identical to the "pixellate" shader.
Changelog:
v1.5: Upstream optimizations from GLSL port. Add free transform preset.
v1.4: Enable subpixel sampling for all four pixel layout orientations,
including rotated screens.
v1.3: Account for screen rotation in subpixel sampling.
@ -59,9 +60,8 @@ layout(location = 2) out vec2 tx_to_uv;
void main() {
gl_Position = global.MVP * Position;
tx_coord = TexCoord * param.SourceSize.xy;
tx_per_px = param.SourceSize.xy * param.OutputSize.zw;
tx_to_uv = param.SourceSize.zw;
pixel_aa_vert_shader(TexCoord, param.SourceSize.xy, param.OutputSize.xy,
tx_coord, tx_per_px, tx_to_uv);
}
#pragma stage fragment

View file

@ -1,5 +1,13 @@
// See pixel_aa.slang for copyright and other information.
void pixel_aa_vert_shader(vec2 TexCoord, vec2 source_size, vec2 output_size,
inout vec2 tx_coord, inout vec2 tx_per_px,
inout vec2 tx_to_uv) {
tx_coord = TexCoord * source_size;
tx_per_px = source_size / output_size;
tx_to_uv = 1.0 / source_size;
}
// Similar to smoothstep, but has a configurable slope at x = 0.5.
// Original smoothstep has a slope of 1.5 at x = 0.5
#define INSTANTIATE_SLOPESTEP(T) \
@ -9,77 +17,135 @@
const T o = (1.0 + s) * 0.5; \
return o - 0.5 * s * pow(2.0 * (o - s * x), T(slope)); \
}
INSTANTIATE_SLOPESTEP(float)
INSTANTIATE_SLOPESTEP(vec2)
float to_lin(float x) { return pow(x, 2.2); }
vec3 to_lin(vec3 x) { return pow(x, vec3(2.2)); }
float to_srgb(float x) { return pow(x, 1.0 / 2.2); }
vec3 to_srgb(vec3 x) { return pow(x, vec3(1.0 / 2.2)); }
// Function to get a single sample using the "pixel AA" method.
// Params:
// tx_coord: Coordinate in source pixel (texel) coordinates
vec3 sample_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord,
float sharpness, bool gamma_correct) {
// The offset for interpolation is a periodic function with
// a period length of 1 texel.
// The input coordinate is shifted so that the center of the texel
// aligns with the start of the period.
// First, get the period and phase.
vec2 period;
const vec2 phase = modf(tx_coord - 0.5, period);
// The function starts at 0, then starts transitioning at
// 0.5 - 0.5 / pixels_per_texel, then reaches 0.5 at 0.5,
// Then reaches 1 at 0.5 + 0.5 / pixels_per_texel.
// For sharpness values < 1.0, blend to bilinear filtering.
const vec2 offset =
slopestep(min(1.0, sharpness) * (0.5 - 0.5 * tx_per_px),
1.0 - min(1.0, sharpness) * (1.0 - (0.5 + 0.5 * tx_per_px)),
phase, max(1.0, sharpness));
// With gamma correct blending, we have to do 4 taps and interpolate
// manually. Without it, we can make use of a single tap using bilinear
// interpolation. The offsets are shifted back to the texel center before
// sampling.
if (gamma_correct) {
const vec3 samples[] = {
to_lin(texture(tex, (period + 0.5) * tx_to_uv).rgb),
to_lin(texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).rgb),
to_lin(texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).rgb),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).rgb)};
return to_srgb(mix(mix(samples[0], samples[1], offset.x),
mix(samples[2], samples[3], offset.x), offset.y));
} else {
return texture(tex, (period + 0.5 + offset) * tx_to_uv).rgb;
}
}
// Function to get a pixel value, taking into consideration possible subpixel
// interpolation.
vec4 pixel_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord,
float sharpness, bool gamma_correct, bool sample_subpx,
uint subpx_orientation, uint screen_rotation) {
const float sharpness_upper = min(1.0, sharpness);
const vec2 sharp_lb = sharpness_upper * (0.5 - 0.5 * tx_per_px);
const vec2 sharp_ub =
1.0 - sharpness_upper * (1.0 - (0.5 + 0.5 * tx_per_px));
const float sharpness_lower = max(1.0, sharpness);
if (sample_subpx) {
// Subpixel sampling: Shift the sampling by 1/3rd of an output pixel for
// each subpixel, assuming that the output size is at monitor
// resolution.
// Account for different subpixel orientations and also for a possible
// rotation of the screen in certain cores.
const vec2 rotation_correction[] = {vec2(1.0, 0.0), vec2(0.0, 1.0),
vec2(-1.0, 0.0), vec2(0.0, -1.0)};
// Compensate for possible rotation of the screen in certain cores.
const vec4 rot_corr = vec4(1.0, 0.0, -1.0, 0.0);
const vec2 sub_tx_offset =
tx_per_px / 3.0 *
rotation_correction[(screen_rotation + subpx_orientation) % 4];
vec2(rot_corr[(screen_rotation + subpx_orientation) % 4],
rot_corr[(screen_rotation + subpx_orientation + 3) % 4]);
vec3 res;
for (int i = -1; i < 2; ++i) {
res[i + 1] = sample_aa(tex, tx_per_px, tx_to_uv,
tx_coord + sub_tx_offset * float(i),
sharpness, gamma_correct)[i + 1];
vec2 period, phase, offset;
if (gamma_correct) {
// Red
period = floor(tx_coord - sub_tx_offset - 0.5);
phase = tx_coord - sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.r = to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).r),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).r),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).r),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).r),
offset.x),
offset.y));
// Green
period = floor(tx_coord - 0.5);
phase = tx_coord - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.g = to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).g),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).g),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).g),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).g),
offset.x),
offset.y));
// Blue
period = floor(tx_coord + sub_tx_offset - 0.5);
phase = tx_coord + sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.b = to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).b),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).b),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).b),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).b),
offset.x),
offset.y));
} else {
// Red
period = floor(tx_coord - sub_tx_offset - 0.5);
phase = tx_coord - sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.r = texture(tex, (period + 0.5 + offset) * tx_to_uv).r;
// Green
period = floor(tx_coord - 0.5);
phase = tx_coord - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.g = texture(tex, (period + 0.5 + offset) * tx_to_uv).g;
// Blue
period = floor(tx_coord + sub_tx_offset - 0.5);
phase = tx_coord + sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.b = texture(tex, (period + 0.5 + offset) * tx_to_uv).b;
}
return vec4(res, 1.0);
} else {
return vec4(sample_aa(tex, tx_per_px, tx_to_uv, tx_coord, sharpness,
gamma_correct),
1.0);
// The offset for interpolation is a periodic function with
// a period length of 1 texel.
// The input coordinate is shifted so that the center of the texel
// aligns with the start of the period.
// First, get the period and phase.
vec2 period = floor(tx_coord - 0.5);
vec2 phase = tx_coord - 0.5 - period;
// The function starts at 0, then starts transitioning at
// 0.5 - 0.5 / pixels_per_texel, then reaches 0.5 at 0.5,
// Then reaches 1 at 0.5 + 0.5 / pixels_per_texel.
// For sharpness values < 1.0, blend to bilinear filtering.
vec2 offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
// With gamma correct blending, we have to do 4 taps and interpolate
// manually. Without it, we can make use of a single tap using bilinear
// interpolation. The offsets are shifted back to the texel center
// before sampling.
if (gamma_correct) {
return vec4(
to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).rgb),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv)
.rgb),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv)
.rgb),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).rgb),
offset.x),
offset.y)),
1.0);
} else {
return texture(tex, (period + 0.5 + offset) * tx_to_uv);
}
}
}