mirror of
https://github.com/SourMesen/Mesen.git
synced 2024-06-01 19:08:17 -04:00
294 lines
10 KiB
C++
294 lines
10 KiB
C++
//NTSC filter based on Bisqwit's code/algorithm
|
|
//As described here:
|
|
//http://forums.nesdev.com/viewtopic.php?p=172329
|
|
#include "stdafx.h"
|
|
#include <cmath>
|
|
#include "BisqwitNtscFilter.h"
|
|
#include "PPU.h"
|
|
#include "EmulationSettings.h"
|
|
#include "Console.h"
|
|
|
|
BisqwitNtscFilter::BisqwitNtscFilter(shared_ptr<Console> console, int resDivider) : BaseVideoFilter(console)
|
|
{
|
|
_resDivider = resDivider;
|
|
_stopThread = false;
|
|
_workDone = false;
|
|
|
|
const int8_t signalLumaLow[4] = { -29, -15, 22, 71 };
|
|
const int8_t signalLumaHigh[4] = { 32, 66, 105, 105 };
|
|
|
|
//Precalculate the low and high signal chosen for each 64 base colors
|
|
for(int i = 0; i <= 0x3F; i++) {
|
|
int r = (i & 0x0F) >= 0x0E ? 0x1D : i;
|
|
|
|
int m = signalLumaLow[r / 0x10];
|
|
int q = signalLumaHigh[r / 0x10];
|
|
if((r & 0x0F) == 13) {
|
|
q = m;
|
|
} else if((r & 0x0F) == 0) {
|
|
m = q;
|
|
}
|
|
_signalLow[i] = m;
|
|
_signalHigh[i] = q;
|
|
}
|
|
|
|
_extraThread = std::thread([=]() {
|
|
//Worker thread to improve decode speed
|
|
while(!_stopThread) {
|
|
_waitWork.Wait();
|
|
if(_stopThread) {
|
|
break;
|
|
}
|
|
|
|
uint32_t* outputBuffer = GetOutputBuffer();
|
|
|
|
//Adjust outputbuffer to start at the middle of the picture
|
|
if(_keepVerticalRes) {
|
|
outputBuffer += GetOverscan().GetScreenWidth() * 8 / _resDivider * (120 - GetOverscan().Top);
|
|
} else {
|
|
outputBuffer += GetOverscan().GetScreenWidth() * 64 / _resDivider / _resDivider * (120 - GetOverscan().Top);
|
|
}
|
|
|
|
DecodeFrame(120, 239 - GetOverscan().Bottom, _ppuOutputBuffer, outputBuffer, (IsOddFrame() ? 8 : 0) + 327360);
|
|
|
|
_workDone = true;
|
|
}
|
|
});
|
|
}
|
|
|
|
BisqwitNtscFilter::~BisqwitNtscFilter()
|
|
{
|
|
_stopThread = true;
|
|
_waitWork.Signal();
|
|
_extraThread.join();
|
|
}
|
|
|
|
void BisqwitNtscFilter::ApplyFilter(uint16_t *ppuOutputBuffer)
|
|
{
|
|
_ppuOutputBuffer = ppuOutputBuffer;
|
|
|
|
_workDone = false;
|
|
_waitWork.Signal();
|
|
DecodeFrame(GetOverscan().Top, 120, ppuOutputBuffer, GetOutputBuffer(), (IsOddFrame() ? 8 : 0) + GetOverscan().Top*341*8);
|
|
while(!_workDone) {}
|
|
}
|
|
|
|
FrameInfo BisqwitNtscFilter::GetFrameInfo()
|
|
{
|
|
OverscanDimensions overscan = GetOverscan();
|
|
if(_keepVerticalRes) {
|
|
return { overscan.GetScreenWidth() * 8 / _resDivider, overscan.GetScreenHeight(), PPU::ScreenWidth, PPU::ScreenHeight, 4 };
|
|
} else {
|
|
return { overscan.GetScreenWidth() * 8 / _resDivider, overscan.GetScreenHeight() * 8 / _resDivider, PPU::ScreenWidth, PPU::ScreenHeight, 4 };
|
|
}
|
|
}
|
|
|
|
void BisqwitNtscFilter::OnBeforeApplyFilter()
|
|
{
|
|
PictureSettings pictureSettings = _console->GetSettings()->GetPictureSettings();
|
|
NtscFilterSettings ntscSettings = _console->GetSettings()->GetNtscFilterSettings();
|
|
|
|
_keepVerticalRes = ntscSettings.KeepVerticalResolution;
|
|
|
|
const double pi = std::atan(1.0) * 4;
|
|
int contrast = (int)((pictureSettings.Contrast + 1.0) * (pictureSettings.Contrast + 1.0) * 167941);
|
|
int saturation = (int)((pictureSettings.Saturation + 1.0) * (pictureSettings.Saturation + 1.0) * 144044);
|
|
for(int i = 0; i < 27; i++) {
|
|
_sinetable[i] = (int8_t)(8 * std::sin(i * 2 * pi / 12 + pictureSettings.Hue * pi));
|
|
}
|
|
|
|
_yWidth = (int)(12 + ntscSettings.YFilterLength * 22);
|
|
_iWidth = (int)(12 + ntscSettings.IFilterLength * 22);
|
|
_qWidth = (int)(12 + ntscSettings.QFilterLength * 22);
|
|
|
|
_y = contrast / _yWidth;
|
|
|
|
_ir = (int)(contrast * 1.994681e-6 * saturation / _iWidth);
|
|
_qr = (int)(contrast * 9.915742e-7 * saturation / _qWidth);
|
|
|
|
_ig = (int)(contrast * 9.151351e-8 * saturation / _iWidth);
|
|
_qg = (int)(contrast * -6.334805e-7 * saturation / _qWidth);
|
|
|
|
_ib = (int)(contrast * -1.012984e-6 * saturation / _iWidth);
|
|
_qb = (int)(contrast * 1.667217e-6 * saturation / _qWidth);
|
|
}
|
|
|
|
void BisqwitNtscFilter::RecursiveBlend(int iterationCount, uint64_t *output, uint64_t *currentLine, uint64_t *nextLine, int pixelsPerCycle, bool verticalBlend)
|
|
{
|
|
//Blend 2 pixels at once
|
|
uint32_t width = GetOverscan().GetScreenWidth() * pixelsPerCycle / 2;
|
|
|
|
double scanlineIntensity = 1.0 - _console->GetSettings()->GetPictureSettings().ScanlineIntensity;
|
|
if(scanlineIntensity < 1.0 && (iterationCount == 2 || _resDivider == 4)) {
|
|
//Most likely extremely inefficient scanlines, but works
|
|
for(uint32_t x = 0; x < width; x++) {
|
|
uint64_t mixed;
|
|
if(verticalBlend) {
|
|
mixed = ((((currentLine[x] ^ nextLine[x]) & 0xfefefefefefefefeL) >> 1) + (currentLine[x] & nextLine[x]));
|
|
} else {
|
|
mixed = currentLine[x];
|
|
}
|
|
|
|
uint8_t r = (mixed >> 16) & 0xFF, g = (mixed >> 8) & 0xFF, b = mixed & 0xFF;
|
|
uint8_t r2 = (mixed >> 48) & 0xFF, g2 = (mixed >> 40) & 0xFF, b2 = (mixed >> 32) & 0xFF;
|
|
r = (uint8_t)(r * scanlineIntensity);
|
|
g = (uint8_t)(g * scanlineIntensity);
|
|
b = (uint8_t)(b * scanlineIntensity);
|
|
r2 = (uint8_t)(r2 * scanlineIntensity);
|
|
g2 = (uint8_t)(g2 * scanlineIntensity);
|
|
b2 = (uint8_t)(b2 * scanlineIntensity);
|
|
|
|
output[x] = ((uint64_t)r2 << 48) | ((uint64_t)g2 << 40) | ((uint64_t)b2 << 32) | (r << 16) | (g << 8) | b;
|
|
}
|
|
} else {
|
|
if(verticalBlend) {
|
|
for(uint32_t x = 0; x < width; x++) {
|
|
output[x] = ((((currentLine[x] ^ nextLine[x]) & 0xfefefefefefefefeL) >> 1) + (currentLine[x] & nextLine[x]));
|
|
}
|
|
} else {
|
|
memcpy(output, currentLine, width * sizeof(uint64_t));
|
|
}
|
|
}
|
|
|
|
iterationCount /= 2;
|
|
if(iterationCount > 0) {
|
|
RecursiveBlend(iterationCount, output - width * iterationCount, currentLine, output, pixelsPerCycle, verticalBlend);
|
|
RecursiveBlend(iterationCount, output + width * iterationCount, output, nextLine, pixelsPerCycle, verticalBlend);
|
|
}
|
|
}
|
|
|
|
void BisqwitNtscFilter::GenerateNtscSignal(int8_t *ntscSignal, int &phase, int rowNumber)
|
|
{
|
|
for(int x = -_paddingSize; x < 256 + _paddingSize; x++) {
|
|
uint16_t color = _ppuOutputBuffer[(rowNumber << 8) | (x < 0 ? 0 : (x >= 256 ? 255 : x))];
|
|
|
|
int8_t low = _signalLow[color & 0x3F];
|
|
int8_t high = _signalHigh[color & 0x3F];
|
|
int8_t emphasis = color >> 6;
|
|
|
|
uint16_t phaseBitmask = _bitmaskLut[std::abs(phase - (color & 0x0F)) % 12];
|
|
|
|
uint8_t voltage;
|
|
for(int j = 0; j < 8; j++) {
|
|
phaseBitmask <<= 1;
|
|
voltage = high;
|
|
if(phaseBitmask >= 0x40) {
|
|
if(phaseBitmask == 0x1000) {
|
|
phaseBitmask = 1;
|
|
} else {
|
|
voltage = low;
|
|
}
|
|
}
|
|
|
|
if(phaseBitmask & emphasis) {
|
|
voltage -= voltage / 4;
|
|
}
|
|
|
|
ntscSignal[((x + _paddingSize) << 3) | j] = voltage;
|
|
}
|
|
|
|
phase += _signalsPerPixel;
|
|
}
|
|
phase += (341 - 256 - _paddingSize * 2) * _signalsPerPixel;
|
|
}
|
|
|
|
void BisqwitNtscFilter::DecodeFrame(int startRow, int endRow, uint16_t *ppuOutputBuffer, uint32_t* outputBuffer, int startPhase)
|
|
{
|
|
int pixelsPerCycle = 8 / _resDivider;
|
|
int phase = startPhase;
|
|
constexpr int lineWidth = 256 + _paddingSize * 2;
|
|
int8_t rowSignal[lineWidth * _signalsPerPixel];
|
|
uint32_t rowPixelGap = GetOverscan().GetScreenWidth() * pixelsPerCycle;
|
|
if(!_keepVerticalRes) {
|
|
rowPixelGap *= pixelsPerCycle;
|
|
}
|
|
|
|
uint32_t* orgBuffer = outputBuffer;
|
|
|
|
for(int y = startRow; y <= endRow; y++) {
|
|
int startCycle = phase % 12;
|
|
|
|
//Convert the PPU's output to an NTSC signal
|
|
GenerateNtscSignal(rowSignal, phase, y);
|
|
|
|
//Convert the NTSC signal to RGB
|
|
NtscDecodeLine(lineWidth * _signalsPerPixel, rowSignal, outputBuffer, (startCycle + 7) % 12);
|
|
|
|
outputBuffer += rowPixelGap;
|
|
}
|
|
|
|
if(!_keepVerticalRes) {
|
|
//Generate the missing vertical lines
|
|
outputBuffer = orgBuffer;
|
|
int lastRow = 239 - GetOverscan().Bottom;
|
|
bool verticalBlend = _console->GetSettings()->GetNtscFilterSettings().VerticalBlend;
|
|
for(int y = startRow; y <= endRow; y++) {
|
|
uint64_t* currentLine = (uint64_t*)outputBuffer;
|
|
uint64_t* nextLine = y == lastRow ? currentLine : (uint64_t*)(outputBuffer + rowPixelGap);
|
|
uint64_t* buffer = (uint64_t*)(outputBuffer + rowPixelGap / 2);
|
|
|
|
RecursiveBlend(4 / _resDivider, buffer, currentLine, nextLine, pixelsPerCycle, verticalBlend);
|
|
|
|
outputBuffer += rowPixelGap;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* NTSC_DecodeLine(Width, Signal, Target, Phase0)
|
|
*
|
|
* Convert NES NTSC graphics signal into RGB using integer arithmetics only.
|
|
*
|
|
* Width: Number of NTSC signal samples.
|
|
* For a 256 pixels wide screen, this would be 256*8. 283*8 if you include borders.
|
|
*
|
|
* Signal: An array of Width samples.
|
|
* The following sample values are recognized:
|
|
* -29 = Luma 0 low 32 = Luma 0 high (-38 and 6 when attenuated)
|
|
* -15 = Luma 1 low 66 = Luma 1 high (-28 and 31 when attenuated)
|
|
* 22 = Luma 2 low 105 = Luma 2 high ( -1 and 58 when attenuated)
|
|
* 71 = Luma 3 low 105 = Luma 3 high ( 34 and 58 when attenuated)
|
|
* In this scale, sync signal would be -59 and colorburst would be -40 and 19,
|
|
* but these are not interpreted specially in this function.
|
|
* The value is calculated from the relative voltage with:
|
|
* floor((voltage-0.518)*1000/12)-15
|
|
*
|
|
* Target: Pointer to a storage for Width RGB32 samples (00rrggbb).
|
|
* Note that the function will produce a RGB32 value for _every_ half-clock-cycle.
|
|
* This means 2264 RGB samples if you render 283 pixels per scanline (incl. borders).
|
|
* The caller can pick and choose those columns they want from the signal
|
|
* to render the picture at their desired resolution.
|
|
*
|
|
* Phase0: An integer in range 0-11 that describes the phase offset into colors on this scanline.
|
|
* Would be generated from the PPU clock cycle counter at the start of the scanline.
|
|
* In essence it conveys in one integer the same information that real NTSC signal
|
|
* would convey in the colorburst period in the beginning of each scanline.
|
|
*/
|
|
void BisqwitNtscFilter::NtscDecodeLine(int width, const int8_t* signal, uint32_t* target, int phase0)
|
|
{
|
|
auto Read = [=](int pos) -> char { return pos >= 0 ? signal[pos] : 0; };
|
|
auto Cos = [=](int pos) -> char { return _sinetable[(pos + 36) % 12 + phase0]; };
|
|
auto Sin = [=](int pos) -> char { return _sinetable[(pos + 36) % 12 + 3 + phase0]; };
|
|
|
|
int brightness = (int)(_console->GetSettings()->GetPictureSettings().Brightness * 750);
|
|
int ysum = brightness, isum = 0, qsum = 0;
|
|
int offset = _resDivider + 4;
|
|
int leftOverscan = (GetOverscan().Left + _paddingSize) * 8 + offset;
|
|
int rightOverscan = width - (GetOverscan().Right + _paddingSize) * 8 + offset;
|
|
|
|
for(int s = 0; s < rightOverscan; s++) {
|
|
ysum += Read(s) - Read(s - _yWidth);
|
|
isum += Read(s) * Cos(s) - Read(s - _iWidth) * Cos(s - _iWidth);
|
|
qsum += Read(s) * Sin(s) - Read(s - _qWidth) * Sin(s - _qWidth);
|
|
|
|
if(!(s % _resDivider) && s >= leftOverscan) {
|
|
int r = std::min(255, std::max(0, (ysum*_y + isum*_ir + qsum*_qr) / 65536));
|
|
int g = std::min(255, std::max(0, (ysum*_y + isum*_ig + qsum*_qg) / 65536));
|
|
int b = std::min(255, std::max(0, (ysum*_y + isum*_ib + qsum*_qb) / 65536));
|
|
|
|
*target = 0xFF000000 | (r << 16) | (g << 8) | b;
|
|
target++;
|
|
}
|
|
}
|
|
}
|