mirror of
https://github.com/libretro/RetroArch.git
synced 2024-05-20 13:27:22 -04:00
476 lines
16 KiB
C
476 lines
16 KiB
C
/* RetroArch - A frontend for libretro.
|
|
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
|
|
* Copyright (C) 2011-2018 - Daniel De Matteis
|
|
*
|
|
* RetroArch is free software: you can redistribute it and/or modify it under the terms
|
|
* of the GNU General Public License as published by the Free Software Found-
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
|
*
|
|
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with RetroArch.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/* Compile: gcc -o upscale_mix_240x160_320x240.so -shared upscale_mix_240x160_320x240.c -std=c99 -O3 -Wall -pedantic -fPIC */
|
|
|
|
#include "softfilter.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#ifdef RARCH_INTERNAL
|
|
#define softfilter_get_implementation upscale_mix_240x160_320x240_get_implementation
|
|
#define softfilter_thread_data upscale_mix_240x160_320x240_softfilter_thread_data
|
|
#define filter_data upscale_mix_240x160_320x240_filter_data
|
|
#endif
|
|
|
|
typedef struct
|
|
{
|
|
void (*upscale_mix_240x160_320x240)(
|
|
uint16_t *dst, const uint16_t *src,
|
|
uint16_t dst_stride, uint16_t src_stride);
|
|
} upscale_mix_function_t;
|
|
|
|
struct softfilter_thread_data
|
|
{
|
|
void *out_data;
|
|
const void *in_data;
|
|
size_t out_pitch;
|
|
size_t in_pitch;
|
|
unsigned colfmt;
|
|
unsigned width;
|
|
unsigned height;
|
|
int first;
|
|
int last;
|
|
};
|
|
|
|
struct filter_data
|
|
{
|
|
unsigned threads;
|
|
struct softfilter_thread_data *workers;
|
|
unsigned in_fmt;
|
|
upscale_mix_function_t function;
|
|
};
|
|
|
|
/*******************************************************************
|
|
* Approximately bilinear scaler, 240x160 to 320x240
|
|
* Copyright (C) 2014 hi-ban, Nebuleon <nebuleon.fumika@gmail.com>
|
|
* (Optimisations by jdgleaver)
|
|
*******************************************************************/
|
|
|
|
#define UPSCALE_240__WEIGHT_1_1(A, B, out, tmp) \
|
|
*(out) = ((A + B + ((A ^ B) & 0x821)) >> 1)
|
|
|
|
#define UPSCALE_240__WEIGHT_1_3(A, B, out, tmp) \
|
|
tmp = ((A + B + ((A ^ B) & 0x821)) >> 1); \
|
|
*(out) = ((tmp + B - ((tmp ^ B) & 0x821)) >> 1)
|
|
|
|
#define UPSCALE_240__WEIGHT_3_1(A, B, out, tmp) \
|
|
tmp = ((A + B + ((A ^ B) & 0x821)) >> 1); \
|
|
*(out) = ((A + tmp - ((A ^ tmp) & 0x821)) >> 1)
|
|
|
|
/* Upscales a 240x160 image to 320x240 using an approximate bilinear
|
|
* resampling algorithm that only uses integer math */
|
|
void upscale_mix_240x160_to_320x240(uint16_t *dst, const uint16_t *src,
|
|
uint16_t dst_stride, uint16_t src_stride)
|
|
{
|
|
/* There are 80 blocks of 3 pixels horizontally,
|
|
* and 80 blocks of 2 pixels vertically
|
|
* Each block of 3x2 becomes 4x3 */
|
|
uint32_t block_x;
|
|
uint32_t block_y;
|
|
|
|
for (block_y = 0; block_y < 80; block_y++)
|
|
{
|
|
const uint16_t *block_src = src + block_y * src_stride * 2;
|
|
uint16_t *block_dst = dst + block_y * dst_stride * 3;
|
|
|
|
for (block_x = 0; block_x < 80; block_x++)
|
|
{
|
|
uint16_t _4, _5, _6;
|
|
uint16_t _1_2_weight_1_3;
|
|
uint16_t _2_3_weight_1_1;
|
|
uint16_t _4_5_weight_1_3;
|
|
uint16_t _5_6_weight_1_1;
|
|
uint16_t tmp;
|
|
const uint16_t *block_src_ptr = block_src;
|
|
uint16_t *block_dst_ptr = block_dst;
|
|
|
|
/* Horizontally:
|
|
* Before(3):
|
|
* (a)(b)(c)
|
|
* After(4):
|
|
* (a)(ab)(bc)(c)
|
|
*
|
|
* Vertically:
|
|
* Before(2): After(3):
|
|
* (a) (a)
|
|
* (b) (ab)
|
|
* (b)
|
|
*/
|
|
|
|
/* -- Row 1 -- */
|
|
uint16_t _1 = *(block_src_ptr );
|
|
uint16_t _2 = *(block_src_ptr + 1);
|
|
uint16_t _3 = *(block_src_ptr + 2);
|
|
|
|
*(block_dst_ptr ) = _1;
|
|
UPSCALE_240__WEIGHT_1_3(_1, _2, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_2, _3, block_dst_ptr + 2, tmp);
|
|
*(block_dst_ptr + 3) = _3;
|
|
|
|
block_src_ptr += src_stride;
|
|
block_dst_ptr += dst_stride;
|
|
|
|
/* -- Row 2 -- */
|
|
_4 = *(block_src_ptr );
|
|
_5 = *(block_src_ptr + 1);
|
|
_6 = *(block_src_ptr + 2);
|
|
|
|
UPSCALE_240__WEIGHT_1_3(_1, _4, block_dst_ptr, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_1, _2, &_1_2_weight_1_3, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_4, _5, &_4_5_weight_1_3, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_1_2_weight_1_3, _4_5_weight_1_3, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_2, _3, &_2_3_weight_1_1, tmp);
|
|
UPSCALE_240__WEIGHT_3_1(_5, _6, &_5_6_weight_1_1, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_2_3_weight_1_1, _5_6_weight_1_1, block_dst_ptr + 2, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_3, _6, block_dst_ptr + 3, tmp);
|
|
|
|
block_src_ptr += src_stride;
|
|
block_dst_ptr += dst_stride;
|
|
|
|
/* -- Row 3 -- */
|
|
*(block_dst_ptr ) = _4;
|
|
UPSCALE_240__WEIGHT_1_3(_4, _5, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_5, _6, block_dst_ptr + 2, tmp);
|
|
*(block_dst_ptr + 3) = _6;
|
|
|
|
block_src += 3;
|
|
block_dst += 4;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Upscales a 240x160 image to 320x213 (padding the result
|
|
* to 320x240 via letterboxing) using an approximate bilinear
|
|
* resampling algorithm that only uses integer math */
|
|
void upscale_mix_240x160_to_320x240_aspect(uint16_t *dst, const uint16_t *src,
|
|
uint16_t dst_stride, uint16_t src_stride)
|
|
{
|
|
/* There are 80 blocks of 3 pixels horizontally,
|
|
* and 53 blocks of 3 pixels vertically
|
|
* Each block of 3x3 becomes 4x4 */
|
|
uint32_t block_x;
|
|
uint32_t block_y;
|
|
const uint16_t *block_src = NULL;
|
|
uint16_t *block_dst = NULL;
|
|
|
|
/* Letterboxing - zero out first 13 rows */
|
|
memset(dst, 0, sizeof(uint16_t) * dst_stride * 13);
|
|
|
|
/* Scale blocks from 3x3 to 4x4 */
|
|
for (block_y = 0; block_y < 53; block_y++)
|
|
{
|
|
block_src = src + block_y * src_stride * 3;
|
|
block_dst = (dst + (13 * dst_stride)) + block_y * dst_stride * 4;
|
|
|
|
for (block_x = 0; block_x < 80; block_x++)
|
|
{
|
|
const uint16_t *block_src_ptr = block_src;
|
|
uint16_t *block_dst_ptr = block_dst;
|
|
|
|
uint16_t _1, _2, _3,
|
|
_4, _5, _6,
|
|
_7, _8, _9;
|
|
|
|
uint16_t _1_2_weight_1_3;
|
|
uint16_t _2_3_weight_1_1;
|
|
uint16_t _4_5_weight_1_3;
|
|
uint16_t _5_6_weight_1_1;
|
|
uint16_t _7_8_weight_1_3;
|
|
uint16_t _8_9_weight_1_1;
|
|
|
|
uint16_t tmp;
|
|
|
|
/* Horizontally:
|
|
* Before(3):
|
|
* (a)(b)(c)
|
|
* After(4):
|
|
* (a)(ab)(bc)(c)
|
|
*
|
|
* Vertically:
|
|
* Before(2): After(3):
|
|
* (a) (a)
|
|
* (b) (ab)
|
|
* (c) (bc)
|
|
* (c)
|
|
*/
|
|
|
|
/* -- Row 1 -- */
|
|
_1 = *(block_src_ptr );
|
|
_2 = *(block_src_ptr + 1);
|
|
_3 = *(block_src_ptr + 2);
|
|
|
|
*(block_dst_ptr ) = _1;
|
|
UPSCALE_240__WEIGHT_1_3(_1, _2, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_2, _3, block_dst_ptr + 2, tmp);
|
|
*(block_dst_ptr + 3) = _3;
|
|
|
|
block_src_ptr += src_stride;
|
|
block_dst_ptr += dst_stride;
|
|
|
|
/* -- Row 2 -- */
|
|
_4 = *(block_src_ptr );
|
|
_5 = *(block_src_ptr + 1);
|
|
_6 = *(block_src_ptr + 2);
|
|
|
|
UPSCALE_240__WEIGHT_1_3(_1, _4, block_dst_ptr, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_1, _2, &_1_2_weight_1_3, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_4, _5, &_4_5_weight_1_3, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_1_2_weight_1_3, _4_5_weight_1_3, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_2, _3, &_2_3_weight_1_1, tmp);
|
|
UPSCALE_240__WEIGHT_3_1(_5, _6, &_5_6_weight_1_1, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_2_3_weight_1_1, _5_6_weight_1_1, block_dst_ptr + 2, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_3, _6, block_dst_ptr + 3, tmp);
|
|
|
|
block_src_ptr += src_stride;
|
|
block_dst_ptr += dst_stride;
|
|
|
|
/* -- Row 3 -- */
|
|
_7 = *(block_src_ptr );
|
|
_8 = *(block_src_ptr + 1);
|
|
_9 = *(block_src_ptr + 2);
|
|
|
|
UPSCALE_240__WEIGHT_1_3(_4, _7, block_dst_ptr, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_4, _5, &_4_5_weight_1_3, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_7, _8, &_7_8_weight_1_3, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_4_5_weight_1_3, _7_8_weight_1_3, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_5, _6, &_5_6_weight_1_1, tmp);
|
|
UPSCALE_240__WEIGHT_3_1(_8, _9, &_8_9_weight_1_1, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_5_6_weight_1_1, _8_9_weight_1_1, block_dst_ptr + 2, tmp);
|
|
UPSCALE_240__WEIGHT_1_3(_6, _9, block_dst_ptr + 3, tmp);
|
|
|
|
block_src_ptr += src_stride;
|
|
block_dst_ptr += dst_stride;
|
|
|
|
/* -- Row 4 -- */
|
|
*(block_dst_ptr ) = _7;
|
|
UPSCALE_240__WEIGHT_1_3(_7, _8, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_8, _9, block_dst_ptr + 2, tmp);
|
|
*(block_dst_ptr + 3) = _9;
|
|
|
|
block_src += 3;
|
|
block_dst += 4;
|
|
}
|
|
}
|
|
|
|
/* Above scaling excludes the last row of the
|
|
* source image. Handle this separately. */
|
|
block_src = src + (src_stride * 159);
|
|
block_dst = dst + (225 * dst_stride);
|
|
|
|
for (block_x = 0; block_x < 80; block_x++)
|
|
{
|
|
uint16_t tmp;
|
|
const uint16_t *block_src_ptr = block_src;
|
|
uint16_t *block_dst_ptr = block_dst;
|
|
|
|
/* Horizontally:
|
|
* Before(3):
|
|
* (a)(b)(c)
|
|
* After(4):
|
|
* (a)(ab)(bc)(c)
|
|
*/
|
|
|
|
/* -- Row 1 -- */
|
|
uint16_t _1 = *(block_src_ptr );
|
|
uint16_t _2 = *(block_src_ptr + 1);
|
|
uint16_t _3 = *(block_src_ptr + 2);
|
|
|
|
*(block_dst_ptr ) = _1;
|
|
UPSCALE_240__WEIGHT_1_3(_1, _2, block_dst_ptr + 1, tmp);
|
|
UPSCALE_240__WEIGHT_1_1(_2, _3, block_dst_ptr + 2, tmp);
|
|
*(block_dst_ptr + 3) = _3;
|
|
|
|
block_src += 3;
|
|
block_dst += 4;
|
|
}
|
|
|
|
/* Letterboxing - zero out last 14 rows */
|
|
memset(dst + (226 * dst_stride), 0, sizeof(uint16_t) * dst_stride * 14);
|
|
}
|
|
|
|
/*******************************************************************
|
|
*******************************************************************/
|
|
|
|
static unsigned upscale_mix_240x160_320x240_generic_input_fmts(void)
|
|
{
|
|
return SOFTFILTER_FMT_RGB565;
|
|
}
|
|
|
|
static unsigned upscale_mix_240x160_320x240_generic_output_fmts(unsigned input_fmts)
|
|
{
|
|
return input_fmts;
|
|
}
|
|
|
|
static unsigned upscale_mix_240x160_320x240_generic_threads(void *data)
|
|
{
|
|
struct filter_data *filt = (struct filter_data*)data;
|
|
return filt->threads;
|
|
}
|
|
|
|
static void upscale_mix_240x160_320x240_initialize(struct filter_data *filt,
|
|
const struct softfilter_config *config,
|
|
void *userdata)
|
|
{
|
|
int keep_aspect = 1;
|
|
|
|
/* Assign default scaling functions */
|
|
filt->function.upscale_mix_240x160_320x240 = upscale_mix_240x160_to_320x240_aspect;
|
|
|
|
/* Read aspect ratio correction setting */
|
|
if (config->get_int(userdata, "keep_aspect", &keep_aspect, 1) && !keep_aspect)
|
|
filt->function.upscale_mix_240x160_320x240 = upscale_mix_240x160_to_320x240;
|
|
}
|
|
|
|
static void *upscale_mix_240x160_320x240_generic_create(const struct softfilter_config *config,
|
|
unsigned in_fmt, unsigned out_fmt,
|
|
unsigned max_width, unsigned max_height,
|
|
unsigned threads, softfilter_simd_mask_t simd, void *userdata)
|
|
{
|
|
struct filter_data *filt = (struct filter_data*)calloc(1, sizeof(*filt));
|
|
if (!filt)
|
|
return NULL;
|
|
if (!(filt->workers = (struct softfilter_thread_data*)calloc(1, sizeof(struct softfilter_thread_data))))
|
|
{
|
|
free(filt);
|
|
return NULL;
|
|
}
|
|
/* Apparently the code is not thread-safe,
|
|
* so force single threaded operation... */
|
|
filt->threads = 1;
|
|
filt->in_fmt = in_fmt;
|
|
/* Assign scaling functions */
|
|
upscale_mix_240x160_320x240_initialize(filt, config, userdata);
|
|
|
|
return filt;
|
|
}
|
|
|
|
static void upscale_mix_240x160_320x240_generic_output(void *data,
|
|
unsigned *out_width, unsigned *out_height,
|
|
unsigned width, unsigned height)
|
|
{
|
|
if ((width == 240) && (height == 160))
|
|
{
|
|
*out_width = 320;
|
|
*out_height = 240;
|
|
}
|
|
else
|
|
{
|
|
*out_width = width;
|
|
*out_height = height;
|
|
}
|
|
}
|
|
|
|
static void upscale_mix_240x160_320x240_generic_destroy(void *data)
|
|
{
|
|
struct filter_data *filt = (struct filter_data*)data;
|
|
if (!filt)
|
|
return;
|
|
free(filt->workers);
|
|
free(filt);
|
|
}
|
|
|
|
static void upscale_mix_240x160_320x240_work_cb_rgb565(void *data, void *thread_data)
|
|
{
|
|
struct filter_data *filt = (struct filter_data*)data;
|
|
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)thread_data;
|
|
const uint16_t *input = (const uint16_t*)thr->in_data;
|
|
uint16_t *output = (uint16_t*)thr->out_data;
|
|
uint16_t in_stride = (uint16_t)(thr->in_pitch >> 1);
|
|
uint16_t out_stride = (uint16_t)(thr->out_pitch >> 1);
|
|
unsigned width = thr->width;
|
|
unsigned height = thr->height;
|
|
|
|
if ((width == 240) && (height == 160))
|
|
{
|
|
filt->function.upscale_mix_240x160_320x240(output, input, out_stride, in_stride);
|
|
return;
|
|
}
|
|
|
|
/* Input buffer is of dimensions that cannot be upscaled
|
|
* > Simply copy input to output */
|
|
|
|
/* If source and destination buffers have the
|
|
* same pitch, perform fast copy of raw pixel data */
|
|
if (in_stride == out_stride)
|
|
memcpy(output, input, thr->out_pitch * height);
|
|
else
|
|
{
|
|
/* Otherwise copy pixel data line-by-line */
|
|
unsigned y;
|
|
for (y = 0; y < height; y++)
|
|
{
|
|
memcpy(output, input, width * sizeof(uint16_t));
|
|
input += in_stride;
|
|
output += out_stride;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void upscale_mix_240x160_320x240_generic_packets(void *data,
|
|
struct softfilter_work_packet *packets,
|
|
void *output, size_t output_stride,
|
|
const void *input, unsigned width, unsigned height, size_t input_stride)
|
|
{
|
|
/* We are guaranteed single threaded operation
|
|
* (filt->threads = 1) so we don't need to loop
|
|
* over threads and can cull some code. This only
|
|
* makes the tiniest performance difference, but
|
|
* every little helps when running on an o3DS... */
|
|
struct filter_data *filt = (struct filter_data*)data;
|
|
struct softfilter_thread_data *thr = (struct softfilter_thread_data*)&filt->workers[0];
|
|
|
|
thr->out_data = (uint8_t*)output;
|
|
thr->in_data = (const uint8_t*)input;
|
|
thr->out_pitch = output_stride;
|
|
thr->in_pitch = input_stride;
|
|
thr->width = width;
|
|
thr->height = height;
|
|
|
|
/* TODO/FIXME - no XRGB8888 codepath? */
|
|
if (filt->in_fmt == SOFTFILTER_FMT_RGB565)
|
|
packets[0].work = upscale_mix_240x160_320x240_work_cb_rgb565;
|
|
packets[0].thread_data = thr;
|
|
}
|
|
|
|
static const struct softfilter_implementation upscale_mix_240x160_320x240_generic = {
|
|
upscale_mix_240x160_320x240_generic_input_fmts,
|
|
upscale_mix_240x160_320x240_generic_output_fmts,
|
|
|
|
upscale_mix_240x160_320x240_generic_create,
|
|
upscale_mix_240x160_320x240_generic_destroy,
|
|
|
|
upscale_mix_240x160_320x240_generic_threads,
|
|
upscale_mix_240x160_320x240_generic_output,
|
|
upscale_mix_240x160_320x240_generic_packets,
|
|
|
|
SOFTFILTER_API_VERSION,
|
|
"upscale_mix_240x160-320x240",
|
|
"upscale_mix_240x160_320x240",
|
|
};
|
|
|
|
const struct softfilter_implementation *softfilter_get_implementation(
|
|
softfilter_simd_mask_t simd)
|
|
{
|
|
return &upscale_mix_240x160_320x240_generic;
|
|
}
|
|
|
|
#ifdef RARCH_INTERNAL
|
|
#undef softfilter_get_implementation
|
|
#undef softfilter_thread_data
|
|
#undef filter_data
|
|
#endif
|