Add 'Picoscale_256x-320x240' video filter (#12997)

This commit is contained in:
jdgleaver 2021-09-15 19:20:56 +01:00 committed by GitHub
parent 78858a9474
commit 5b13047623
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 546 additions and 2 deletions

View file

@ -2363,7 +2363,8 @@ ifeq ($(HAVE_STATIC_VIDEO_FILTERS), 1)
gfx/video_filters/dot_matrix_3x.o \
gfx/video_filters/dot_matrix_4x.o \
gfx/video_filters/upscale_1_5x.o \
gfx/video_filters/upscale_256x_320x240.o
gfx/video_filters/upscale_256x_320x240.o \
gfx/video_filters/picoscale_256x_320x240.o
endif
ifeq ($(WANT_IOSUHAX), 1)

View file

@ -292,6 +292,7 @@ extern const struct softfilter_implementation *dot_matrix_3x_get_implementation(
extern const struct softfilter_implementation *dot_matrix_4x_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *upscale_1_5x_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *upscale_256x_320x240_get_implementation(softfilter_simd_mask_t simd);
extern const struct softfilter_implementation *picoscale_256x_320x240_get_implementation(softfilter_simd_mask_t simd);
static const softfilter_get_implementation_t soft_plugs_builtin[] = {
blargg_ntsc_snes_get_implementation,
@ -317,6 +318,7 @@ static const softfilter_get_implementation_t soft_plugs_builtin[] = {
dot_matrix_4x_get_implementation,
upscale_1_5x_get_implementation,
upscale_256x_320x240_get_implementation,
picoscale_256x_320x240_get_implementation,
};
static bool append_softfilter_plugs(rarch_softfilter_t *filt,

View file

@ -126,7 +126,8 @@ objects += blargg_ntsc_snes.$(DYLIB) \
dot_matrix_3x.$(DYLIB) \
dot_matrix_4x.$(DYLIB) \
upscale_1_5x.$(DYLIB) \
upscale_256x_320x240.$(DYLIB)
upscale_256x_320x240.$(DYLIB) \
picoscale_256x_320x240.$(DYLIB)
all: build;

View file

@ -0,0 +1,7 @@
filter = picoscale_256x_320x240
# Filter Type:
# - snn: "smoothed" nearest neighbour
# - bl2: 2-level-bilinear with 2 quantized weights
# - bl4: 4-level-bilinear with 4 quantized weights
picoscale_256x_320x240_filter_type = "bl2"

View file

@ -0,0 +1,7 @@
filter = picoscale_256x_320x240
# Filter Type:
# - snn: "smoothed" nearest neighbour
# - bl2: 2-level-bilinear with 2 quantized weights
# - bl4: 4-level-bilinear with 4 quantized weights
picoscale_256x_320x240_filter_type = "bl4"

View file

@ -0,0 +1,7 @@
filter = picoscale_256x_320x240
# Filter Type:
# - snn: "smoothed" nearest neighbour
# - bl2: 2-level-bilinear with 2 quantized weights
# - bl4: 4-level-bilinear with 4 quantized weights
picoscale_256x_320x240_filter_type = "snn"

View file

@ -0,0 +1,518 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
* Copyright (C) 2011-2018 - Daniel De Matteis
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
/* Compile: gcc -o picoscale_256x_320x240.so -shared picoscale_256x_320x240.c -std=c99 -O3 -Wall -pedantic -fPIC */
#include "softfilter.h"
#include <stdlib.h>
#include <string.h>
#ifdef RARCH_INTERNAL
#define softfilter_get_implementation picoscale_256x_320x240_get_implementation
#define softfilter_thread_data picoscale_256x_320x240_softfilter_thread_data
#define filter_data picoscale_256x_320x240_filter_data
#endif
#if defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
#define PICOSCALE_restrict __restrict
#elif defined(_MSC_VER) && _MSC_VER >= 1400
#define PICOSCALE_restrict __restrict
#else
#define PICOSCALE_restrict
#endif
typedef struct
{
void (*upscale_256_320x224_240)(
uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss);
void (*upscale_256_320x___)(
uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss,
uint16_t height);
} picoscale_functions_t;
struct softfilter_thread_data
{
void *out_data;
const void *in_data;
size_t out_pitch;
size_t in_pitch;
unsigned colfmt;
unsigned width;
unsigned height;
int first;
int last;
};
struct filter_data
{
unsigned threads;
struct softfilter_thread_data *workers;
unsigned in_fmt;
picoscale_functions_t functions;
};
/*******************************************************************
* Image scaling algorithms from picodrive standalone
*
* Scaler types:
* - snn: "smoothed" nearest neighbour (see below)
* - bln: n-level-bilinear with n quantized weights
* quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc
* currently n=2, n=4 are implemented
*
* "smoothed" nearest neighbour: uses the average of the source pixels if no
* source pixel covers more than 65% of the result pixel. It definitely
* looks better than nearest neighbour and is still quite fast. It creates
* a sharper look than a bilinear filter, at the price of some visible jags
* on diagonal edges.
*
* Copyright (C) 2021 kub <derkub@gmail.com>
*******************************************************************/
/* RGB565 pixel mixing, see https://www.compuphase.com/graphic/scale3.htm and
http://blargg.8bitalley.com/info/rgb_mixing.html */
/* 2-level mixing */
#define PICOSCALE_P_05(d,p1,p2) d=(((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1)) /* round up */
/* 4-level mixing, 2 times slower
* > 1/4*p1 + 3/4*p2 = 1/2*(1/2*(p1+p2) + p2) */
#define PICOSCALE_P_025(d,p1,p2) PICOSCALE_P_05(t, p1, p2); PICOSCALE_P_05( d, t, p2)
#define PICOSCALE_P_075(d,p1,p2) PICOSCALE_P_025(d,p2,p1)
/* pixel transforms */
#define PICOSCALE_F_NOP(v) (v) /* source already in dest format (CLUT/RGB) */
/*
scalers h:
*/
/* scale 4:5 */
#define PICOSCALE_H_UPSCALE_SNN_4_5(di,ds,si,ss,w,f) do { \
uint16_t i; \
for (i = w/4; i > 0; i--, si += 4, di += 5) { \
di[0] = f(si[0]); \
di[1] = f(si[1]); \
PICOSCALE_P_05(di[2], f(si[1]),f(si[2])); \
di[3] = f(si[2]); \
di[4] = f(si[3]); \
} \
di += ds - w/4*5; \
si += ss - w; \
} while (0)
#define PICOSCALE_H_UPSCALE_BL2_4_5(di,ds,si,ss,w,f) do { \
uint16_t i; \
for (i = w/4; i > 0; i--, si += 4, di += 5) { \
di[0] = f(si[0]); \
PICOSCALE_P_05(di[1], f(si[0]),f(si[1])); \
PICOSCALE_P_05(di[2], f(si[1]),f(si[2])); \
di[3] = f(si[2]); \
di[4] = f(si[3]); \
} \
di += ds - w/4*5; \
si += ss - w; \
} while (0)
#define PICOSCALE_H_UPSCALE_BL4_4_5(di,ds,si,ss,w,f) do { \
uint16_t i, t; uint16_t p = f(si[0]); \
for (i = w/4; i > 0; i--, si += 4, di += 5) { \
PICOSCALE_P_025(di[0], p, f(si[0])); \
PICOSCALE_P_05 (di[1], f(si[0]),f(si[1])); \
PICOSCALE_P_05 (di[2], f(si[1]),f(si[2])); \
PICOSCALE_P_075(di[3], f(si[2]),f(si[3])); \
di[4] = p = f(si[3]); \
} \
di += ds - w/4*5; \
si += ss - w; \
} while (0)
/*
scalers v:
*/
#define PICOSCALE_V_MIX(di,li,ri,w,p_mix,f) do { \
uint16_t i, t, u; (void)t, (void)u; \
for (i = 0; i < w; i += 4) { \
p_mix((di)[i ], f((li)[i ]),f((ri)[i ])); \
p_mix((di)[i+1], f((li)[i+1]),f((ri)[i+1])); \
p_mix((di)[i+2], f((li)[i+2]),f((ri)[i+2])); \
p_mix((di)[i+3], f((li)[i+3]),f((ri)[i+3])); \
} \
} while (0)
/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */
void picoscale_upscale_rgb_snn_256_320x___(uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss, uint16_t height)
{
uint16_t y;
for (y = 0; y < height; y++)
{
PICOSCALE_H_UPSCALE_SNN_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
}
void picoscale_upscale_rgb_bl2_256_320x___(uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss, uint16_t height)
{
uint16_t y;
for (y = 0; y < height; y++)
{
PICOSCALE_H_UPSCALE_BL2_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
}
void picoscale_upscale_rgb_bl4_256_320x___(uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss, uint16_t height)
{
uint16_t y;
for (y = 0; y < height; y++)
{
PICOSCALE_H_UPSCALE_BL4_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
}
/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */
void picoscale_upscale_rgb_snn_256_320x224_240(uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss)
{
uint16_t y, j;
uint16_t *di_start = di;
for (y = 0; y < 224; y += 16)
{
for (j = 0; j < 8; j++)
{
PICOSCALE_H_UPSCALE_SNN_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
di += ds;
for (j = 0; j < 8; j++)
{
PICOSCALE_H_UPSCALE_SNN_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
/* mix lines 6-8 */
di -= 9*ds;
PICOSCALE_V_MIX(&di[0], &di[-ds], &di[ds], 320, PICOSCALE_P_05, PICOSCALE_F_NOP);
PICOSCALE_V_MIX(&di[-ds], &di[-2*ds], &di[-ds], 320, PICOSCALE_P_05, PICOSCALE_F_NOP);
PICOSCALE_V_MIX(&di[ ds], &di[ ds], &di[ 2*ds], 320, PICOSCALE_P_05, PICOSCALE_F_NOP);
di += 9*ds;
}
/* The above scaling produces an output image 238 pixels high
* > Last two rows must be zeroed out */
memset(di, 0, sizeof(uint16_t) * ds);
memset(di + ds, 0, sizeof(uint16_t) * ds);
}
void picoscale_upscale_rgb_bl2_256_320x224_240(uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss)
{
uint16_t y, j;
for (y = 0; y < 224; y += 16)
{
for (j = 0; j < 4; j++)
{
PICOSCALE_H_UPSCALE_BL2_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
di += ds;
for (j = 0; j < 12; j++)
{
PICOSCALE_H_UPSCALE_BL2_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
/* mix lines 3-10 */
di -= 13*ds;
PICOSCALE_V_MIX(&di[0], &di[-ds], &di[ds], 320, PICOSCALE_P_05, PICOSCALE_F_NOP);
for (j = 0; j < 7; j++)
{
di += ds;
PICOSCALE_V_MIX(&di[0], &di[0], &di[ds], 320, PICOSCALE_P_05, PICOSCALE_F_NOP);
}
di += 6*ds;
}
/* The above scaling produces an output image 238 pixels high
* > Last two rows must be zeroed out */
memset(di, 0, sizeof(uint16_t) * ds);
memset(di + ds, 0, sizeof(uint16_t) * ds);
}
void picoscale_upscale_rgb_bl4_256_320x224_240(uint16_t *PICOSCALE_restrict di, uint16_t ds,
const uint16_t *PICOSCALE_restrict si, uint16_t ss)
{
uint16_t y, j;
for (y = 0; y < 224; y += 16)
{
for (j = 0; j < 2; j++)
{
PICOSCALE_H_UPSCALE_BL4_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
di += ds;
for (j = 0; j < 14; j++)
{
PICOSCALE_H_UPSCALE_BL4_4_5(di, ds, si, ss, 256, PICOSCALE_F_NOP);
}
di -= 15*ds;
/* mixing line 2: line 1 = -ds, line 2 = +ds */
PICOSCALE_V_MIX(&di[0], &di[-ds], &di[ds], 320, PICOSCALE_P_025, PICOSCALE_F_NOP);
di += ds;
/* mixing lines 3-5: line n-1 = 0, line n = +ds */
for (j = 0; j < 3; j++)
{
PICOSCALE_V_MIX(&di[0], &di[0], &di[ds], 320, PICOSCALE_P_025, PICOSCALE_F_NOP);
di += ds;
}
/* mixing lines 6-9 */
for (j = 0; j < 4; j++)
{
PICOSCALE_V_MIX(&di[0], &di[0], &di[ds], 320, PICOSCALE_P_05, PICOSCALE_F_NOP);
di += ds;
}
/* mixing lines 10-13 */
for (j = 0; j < 4; j++)
{
PICOSCALE_V_MIX(&di[0], &di[0], &di[ds], 320, PICOSCALE_P_075, PICOSCALE_F_NOP);
di += ds;
}
/* lines 14-16, already in place */
di += 3*ds;
}
/* The above scaling produces an output image 238 pixels high
* > Last two rows must be zeroed out */
memset(di, 0, sizeof(uint16_t) * ds);
memset(di + ds, 0, sizeof(uint16_t) * ds);
}
/*******************************************************************
*******************************************************************/
static unsigned picoscale_256x_320x240_generic_input_fmts(void)
{
return SOFTFILTER_FMT_RGB565;
}
static unsigned picoscale_256x_320x240_generic_output_fmts(unsigned input_fmts)
{
return input_fmts;
}
static unsigned picoscale_256x_320x240_generic_threads(void *data)
{
struct filter_data *filt = (struct filter_data*)data;
return filt->threads;
}
static void picoscale_256x_320x240_initialize(struct filter_data *filt,
const struct softfilter_config *config,
void *userdata)
{
char *filter_type = NULL;
/* Assign default scaling functions */
filt->functions.upscale_256_320x224_240 = picoscale_upscale_rgb_snn_256_320x224_240;
filt->functions.upscale_256_320x___ = picoscale_upscale_rgb_snn_256_320x___;
/* Read set filter type */
if (config->get_string(userdata, "filter_type", &filter_type, "snn"))
{
if (!strcmp(filter_type, "bl2"))
{
filt->functions.upscale_256_320x224_240 = picoscale_upscale_rgb_bl2_256_320x224_240;
filt->functions.upscale_256_320x___ = picoscale_upscale_rgb_bl2_256_320x___;
}
else if (!strcmp(filter_type, "bl4"))
{
filt->functions.upscale_256_320x224_240 = picoscale_upscale_rgb_bl4_256_320x224_240;
filt->functions.upscale_256_320x___ = picoscale_upscale_rgb_bl4_256_320x___;
}
}
if (filter_type)
free(filter_type);
}
static void *picoscale_256x_320x240_generic_create(const struct softfilter_config *config,
unsigned in_fmt, unsigned out_fmt,
unsigned max_width, unsigned max_height,
unsigned threads, softfilter_simd_mask_t simd, void *userdata)
{
struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
(void)simd;
(void)config;
(void)userdata;
if (!filt) {
return NULL;
}
/* Apparently the code is not thread-safe,
* so force single threaded operation... */
filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data));
filt->threads = 1;
filt->in_fmt = in_fmt;
if (!filt->workers) {
free(filt);
return NULL;
}
/* Assign scaling functions */
picoscale_256x_320x240_initialize(filt, config, userdata);
return filt;
}
static void picoscale_256x_320x240_generic_output(void *data,
unsigned *out_width, unsigned *out_height,
unsigned width, unsigned height)
{
if ((width == 256) &&
((height == 224) || (height == 240) || (height == 239)))
{
*out_width = 320;
*out_height = 240;
}
else
{
*out_width = width;
*out_height = height;
}
}
static void picoscale_256x_320x240_generic_destroy(void *data)
{
struct filter_data *filt = (struct filter_data*)data;
if (!filt) {
return;
}
free(filt->workers);
free(filt);
}
static void picoscale_256x_320x240_work_cb_rgb565(void *data, void *thread_data)
{
struct filter_data *filt = (struct filter_data*)data;
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
const uint16_t *input = (const uint16_t*)thr->in_data;
uint16_t *output = (uint16_t*)thr->out_data;
uint16_t in_stride = (uint16_t)(thr->in_pitch >> 1);
uint16_t out_stride = (uint16_t)(thr->out_pitch >> 1);
unsigned width = thr->width;
unsigned height = thr->height;
if (width == 256)
{
if (height == 224)
{
filt->functions.upscale_256_320x224_240(output, out_stride, input, in_stride);
return;
}
else if (height == 240)
{
filt->functions.upscale_256_320x___(output, out_stride, input, in_stride, 240);
return;
}
else if (height == 239)
{
filt->functions.upscale_256_320x___(output, out_stride, input, in_stride, 239);
/* The above scaling function produces an output
* image 239 pixels high
* > Last row must be zeroed out */
memset(output + (239 * out_stride), 0, sizeof(uint16_t) * out_stride);
return;
}
}
/* Input buffer is of dimensions that cannot be upscaled
* > Simply copy input to output */
/* If source and destination buffers have the
* same pitch, perform fast copy of raw pixel data */
if (in_stride == out_stride)
memcpy(output, input, thr->out_pitch * height);
else
{
/* Otherwise copy pixel data line-by-line */
unsigned y;
for (y = 0; y < height; y++)
{
memcpy(output, input, width * sizeof(uint16_t));
input += in_stride;
output += out_stride;
}
}
}
static void picoscale_256x_320x240_generic_packets(void *data,
struct softfilter_work_packet *packets,
void *output, size_t output_stride,
const void *input, unsigned width, unsigned height, size_t input_stride)
{
/* We are guaranteed single threaded operation
* (filt->threads = 1) so we don't need to loop
* over threads and can cull some code. This only
* makes the tiniest performance difference, but
* every little helps when running on an o3DS... */
struct filter_data *filt = (struct filter_data*)data;
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
thr->out_data = (uint8_t*)output;
thr->in_data = (const uint8_t*)input;
thr->out_pitch = output_stride;
thr->in_pitch = input_stride;
thr->width = width;
thr->height = height;
if (filt->in_fmt == SOFTFILTER_FMT_RGB565) {
packets[0].work = picoscale_256x_320x240_work_cb_rgb565;
}
packets[0].thread_data = thr;
}
static const struct softfilter_implementation picoscale_256x_320x240_generic = {
picoscale_256x_320x240_generic_input_fmts,
picoscale_256x_320x240_generic_output_fmts,
picoscale_256x_320x240_generic_create,
picoscale_256x_320x240_generic_destroy,
picoscale_256x_320x240_generic_threads,
picoscale_256x_320x240_generic_output,
picoscale_256x_320x240_generic_packets,
SOFTFILTER_API_VERSION,
"Picoscale_256x-320x240",
"picoscale_256x_320x240",
};
const struct softfilter_implementation *softfilter_get_implementation(
softfilter_simd_mask_t simd)
{
(void)simd;
return &picoscale_256x_320x240_generic;
}
#ifdef RARCH_INTERNAL
#undef softfilter_get_implementation
#undef softfilter_thread_data
#undef filter_data
#endif

View file

@ -1036,6 +1036,7 @@ FILTERS
#include "../gfx/video_filters/dot_matrix_4x.c"
#include "../gfx/video_filters/upscale_1_5x.c"
#include "../gfx/video_filters/upscale_256x_320x240.c"
#include "../gfx/video_filters/picoscale_256x_320x240.c"
#endif
#ifdef HAVE_DSP_FILTER