Linux: use futex_waitv syscall for atomic waiting

In order to make this possible, some unnecessary features were removed.
This commit is contained in:
Ivan Chikish 2023-07-31 23:57:26 +03:00 committed by Ivan
parent 831a9fe012
commit d34287b2cc
51 changed files with 441 additions and 574 deletions

View file

@ -2172,14 +2172,14 @@ u64 thread_base::finalize(thread_state result_state) noexcept
const u64 _self = m_thread;
// Set result state (errored or finalized)
m_sync.fetch_op([&](u64& v)
m_sync.fetch_op([&](u32& v)
{
v &= -4;
v |= static_cast<u32>(result_state);
});
// Signal waiting threads
m_sync.notify_all(2);
m_sync.notify_all();
return _self;
}
@ -2266,8 +2266,18 @@ thread_state thread_ctrl::state()
void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
{
if (!usec)
{
return;
}
auto _this = g_tls_this_thread;
if (!alert && usec > 50000)
{
usec = 50000;
}
#ifdef __linux__
static thread_local struct linux_timer_handle_t
{
@ -2296,13 +2306,13 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
}
} fd_timer;
if (!alert && usec > 0 && usec <= 1000 && fd_timer != -1)
if (!alert && fd_timer != -1)
{
struct itimerspec timeout;
u64 missed;
timeout.it_value.tv_nsec = usec * 1'000ull;
timeout.it_value.tv_sec = 0;
timeout.it_value.tv_nsec = usec % 1'000'000 * 1'000ull;
timeout.it_value.tv_sec = usec / 1'000'000;
timeout.it_interval.tv_sec = 0;
timeout.it_interval.tv_nsec = 0;
timerfd_settime(fd_timer, 0, &timeout, NULL);
@ -2312,15 +2322,27 @@ void thread_ctrl::wait_for(u64 usec, [[maybe_unused]] bool alert /* true */)
}
#endif
if (_this->m_sync.bit_test_reset(2) || _this->m_taskq)
if (alert)
{
return;
if (_this->m_sync.bit_test_reset(2) || _this->m_taskq)
{
return;
}
}
// Wait for signal and thread state abort
atomic_wait::list<2> list{};
list.set<0>(_this->m_sync, 0, 4 + 1);
list.set<1>(_this->m_taskq, nullptr);
if (alert)
{
list.set<0>(_this->m_sync, 0);
list.set<1>(utils::bless<atomic_t<u32>>(&_this->m_taskq)[1], 0);
}
else
{
list.set<0>(_this->m_dummy, 0);
}
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
}
@ -2331,29 +2353,27 @@ void thread_ctrl::wait_for_accurate(u64 usec)
return;
}
if (usec > 50000)
{
fmt::throw_exception("thread_ctrl::wait_for_accurate: unsupported amount");
}
#ifdef __linux__
return wait_for(usec, false);
#else
using namespace std::chrono_literals;
const auto until = std::chrono::steady_clock::now() + 1us * usec;
while (true)
{
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = usec <= 1000 ? 10 : 50;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u64 host_min_quantum = 500;
#endif
if (usec >= host_min_quantum)
{
#ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
wait_for(usec - ((usec % host_min_quantum) + host_min_quantum), false);
#else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
wait_for(usec - (usec % host_min_quantum), false);
#endif
}
// TODO: Determine best value for yield delay
else if (usec >= host_min_quantum / 2)
@ -2374,6 +2394,7 @@ void thread_ctrl::wait_for_accurate(u64 usec)
usec = (until - current).count();
}
#endif
}
std::string thread_ctrl::get_name_cached()
@ -2440,7 +2461,7 @@ bool thread_base::join(bool dtor) const
for (u64 i = 0; (m_sync & 3) <= 1; i++)
{
m_sync.wait(0, 2, timeout);
m_sync.wait(m_sync & ~2, timeout);
if (m_sync & 2)
{
@ -2460,7 +2481,7 @@ void thread_base::notify()
{
// Set notification
m_sync |= 4;
m_sync.notify_one(4);
m_sync.notify_all();
}
u64 thread_base::get_native_id() const
@ -2497,7 +2518,7 @@ u64 thread_base::get_cycles()
{
cycles = static_cast<u64>(thread_time.tv_sec) * 1'000'000'000 + thread_time.tv_nsec;
#endif
if (const u64 old_cycles = m_sync.fetch_op([&](u64& v){ v &= 7; v |= (cycles << 3); }) >> 3)
if (const u64 old_cycles = m_cycles.exchange(cycles))
{
return cycles - old_cycles;
}
@ -2507,7 +2528,7 @@ u64 thread_base::get_cycles()
}
else
{
return m_sync >> 3;
return m_cycles;
}
}
@ -2560,8 +2581,8 @@ void thread_base::exec()
}
// Notify waiters
ptr->exec.release(nullptr);
ptr->exec.notify_all();
ptr->done.release(1);
ptr->done.notify_all();
}
if (ptr->next)

View file

@ -100,17 +100,19 @@ class thread_future
protected:
atomic_t<void(*)(thread_base*, thread_future*)> exec{};
atomic_t<u32> done{0};
public:
// Get reference to the atomic variable for inspection and waiting for
const auto& get_wait() const
{
return exec;
return done;
}
// Wait (preset)
void wait() const
{
exec.wait<atomic_wait::op_ne>(nullptr);
done.wait(0);
}
};
@ -131,8 +133,13 @@ private:
// Thread handle (platform-specific)
atomic_t<u64> m_thread{0};
// Thread state and cycles
atomic_t<u64> m_sync{0};
// Thread cycles
atomic_t<u64> m_cycles{0};
atomic_t<u32> m_dummy{0};
// Thread state
atomic_t<u32> m_sync{0};
// Thread name
atomic_ptr<std::string> m_tname;
@ -284,16 +291,22 @@ public:
}
atomic_wait::list<Max + 2> list{};
list.template set<Max>(_this->m_sync, 0, 4 + 1);
list.template set<Max + 1>(_this->m_taskq, nullptr);
list.template set<Max>(_this->m_sync, 0);
list.template set<Max + 1>(_this->m_taskq);
setter(list);
list.wait(atomic_wait_timeout{usec <= 0xffff'ffff'ffff'ffff / 1000 ? usec * 1000 : 0xffff'ffff'ffff'ffff});
}
template <atomic_wait::op Op = atomic_wait::op::eq, typename T, typename U>
template <typename T, typename U>
static inline void wait_on(T& wait, U old, u64 usec = -1)
{
wait_on_custom<1>([&](atomic_wait::list<3>& list){ list.set<0, Op>(wait, old); }, usec);
wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait, old); }, usec);
}
template <typename T>
static inline void wait_on(T& wait)
{
wait_on_custom<1>([&](atomic_wait::list<3>& list) { list.template set<0>(wait); });
}
// Exit.
@ -637,7 +650,7 @@ public:
{
bool notify_sync = false;
if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u64& v){ return !(v & 3) && (v |= 1); }).second)
if (s >= thread_state::aborting && thread::m_sync.fetch_op([](u32& v) { return !(v & 3) && (v |= 1); }).second)
{
notify_sync = true;
}
@ -650,7 +663,7 @@ public:
if (notify_sync)
{
// Notify after context abortion has been made so all conditions for wake-up be satisfied by the time of notification
thread::m_sync.notify_one(1);
thread::m_sync.notify_all();
}
if (s == thread_state::finished)

View file

@ -9,7 +9,7 @@ void cond_variable::imp_wait(u32 _old, u64 _timeout) noexcept
ensure(_old);
// Wait with timeout
m_value.wait(_old, c_signal_mask, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000});
m_value.wait(_old, atomic_wait_timeout{_timeout > max_timeout ? umax : _timeout * 1000});
// Cleanup
m_value.atomic_op([](u32& value)
@ -47,10 +47,10 @@ void cond_variable::imp_wake(u32 _count) noexcept
if (_count > 1 || ((_old + (c_signal_mask & (0 - c_signal_mask))) & c_signal_mask) == c_signal_mask)
{
// Resort to notify_all if signal count reached max
m_value.notify_all(c_signal_mask);
m_value.notify_all();
}
else
{
m_value.notify_one(c_signal_mask);
m_value.notify_one();
}
}

View file

@ -2,6 +2,7 @@
#include "util/types.hpp"
#include "util/atomic.hpp"
#include "util/asm.hpp"
//! Simple unshrinkable array base for concurrent access. Only growths automatically.
//! There is no way to know the current size. The smaller index is, the faster it's accessed.
@ -280,12 +281,17 @@ public:
template <typename T>
class lf_queue final
{
atomic_t<lf_queue_item<T>*> m_head{nullptr};
atomic_t<u64> m_head{0};
lf_queue_item<T>* load(u64 value) const noexcept
{
return reinterpret_cast<lf_queue_item<T>*>(value >> 16);
}
// Extract all elements and reverse element order (FILO to FIFO)
lf_queue_item<T>* reverse() noexcept
{
if (auto* head = m_head.load() ? m_head.exchange(nullptr) : nullptr)
if (auto* head = load(m_head) ? load(m_head.exchange(0)) : nullptr)
{
if (auto* prev = head->m_link)
{
@ -311,35 +317,35 @@ public:
~lf_queue()
{
delete m_head.load();
delete load(m_head);
}
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(std::nullptr_t /*null*/ = nullptr) noexcept
{
if (m_head == nullptr)
if (m_head == 0)
{
m_head.template wait<Flags>(nullptr);
utils::bless<atomic_t<u32>>(&m_head)[1].wait(0);
}
}
const volatile void* observe() const noexcept
{
return m_head.load();
return load(m_head);
}
explicit operator bool() const noexcept
{
return m_head != nullptr;
return m_head != 0;
}
template <typename... Args>
void push(Args&&... args)
{
auto _old = m_head.load();
auto oldv = m_head.load();
auto _old = load(oldv);
auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...);
while (!m_head.compare_exchange(_old, item))
while (!m_head.compare_exchange(oldv, reinterpret_cast<u64>(item) << 16))
{
item->m_link = _old;
}
@ -347,7 +353,7 @@ public:
if (!_old)
{
// Notify only if queue was empty
m_head.notify_one();
utils::bless<atomic_t<u32>>(&m_head)[1].notify_one();
}
}
@ -363,7 +369,7 @@ public:
lf_queue_slice<T> pop_all_reversed()
{
lf_queue_slice<T> result;
result.m_head = m_head.exchange(nullptr);
result.m_head = load(m_head.exchange(0));
return result;
}

View file

@ -74,14 +74,14 @@ void shared_mutex::imp_wait()
break;
}
m_value.wait(old, c_sig);
m_value.wait(old);
}
}
void shared_mutex::imp_signal()
{
m_value += c_sig;
m_value.notify_one(c_sig);
m_value.notify_one();
}
void shared_mutex::imp_lock(u32 val)

View file

@ -38,7 +38,29 @@ constexpr NTSTATUS NTSTATUS_ALERTED = 0x101;
constexpr NTSTATUS NTSTATUS_TIMEOUT = 0x102;
#endif
#ifndef __linux__
#ifdef __linux__
#ifndef SYS_futex_waitv
#if defined(ARCH_X64) || defined(ARCH_ARM64)
#define SYS_futex_waitv 449
#endif
#endif
#ifndef FUTEX_32
#define FUTEX_32 2
#endif
#ifndef FUTEX_WAITV_MAX
#define FUTEX_WAITV_MAX 128
#endif
struct futex_waitv
{
__u64 val;
__u64 uaddr;
__u32 flags;
__u32 __reserved;
};
#else
enum
{
FUTEX_PRIVATE_FLAG = 0,
@ -113,7 +135,7 @@ inline int futex(volatile void* uaddr, int futex_op, uint val, const timespec* t
}
else
{
// TODO
// TODO: absolute timeout
}
map.erase(std::find(map.find(uaddr), map.end(), ref));

View file

@ -261,7 +261,7 @@ struct cpu_prof
if (threads.empty())
{
// Wait for messages if no work (don't waste CPU)
thread_ctrl::wait_on(registered, nullptr);
thread_ctrl::wait_on(registered);
continue;
}
@ -939,7 +939,7 @@ bool cpu_thread::check_state() noexcept
else
{
// TODO: fix the workaround
g_suspend_counter.wait(ctr, -4, atomic_wait_timeout{100});
g_suspend_counter.wait(ctr, atomic_wait_timeout{10'000});
}
}
else
@ -972,8 +972,7 @@ bool cpu_thread::check_state() noexcept
}
// Short sleep when yield flag is present alone (makes no sense when other methods which can stop thread execution have been done)
// Pass a mask of a single bit which is often unused to avoid notifications
s_dummy_atomic.wait(0, 1u << 30, atomic_wait_timeout{80'000});
s_dummy_atomic.wait(0, atomic_wait_timeout{80'000});
}
}
}
@ -1010,13 +1009,13 @@ cpu_thread& cpu_thread::operator=(thread_state)
if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit))
{
state.notify_one(cpu_flag::exit);
state.notify_one();
if (auto thread = try_get<spu_thread>())
{
if (u32 resv = atomic_storage<u32>::load(thread->raddr))
{
vm::reservation_notifier(resv).notify_all(-128);
vm::reservation_notifier(resv).notify_all();
}
}
}

View file

@ -73,7 +73,7 @@ void mic_context::operator()()
// Timestep in microseconds
constexpr u64 TIMESTEP = 256ull * 1'000'000ull / 48000ull;
u64 timeout = 0;
u64 oldvalue = 0;
u32 oldvalue = 0;
while (thread_ctrl::state() != thread_state::aborting)
{

View file

@ -374,7 +374,7 @@ public:
static constexpr auto thread_name = "Microphone Thread"sv;
protected:
atomic_t<u64> wakey = 0;
atomic_t<u32> wakey = 0;
// u32 signalStateLocalTalk = 9; // value is in range 0-10. 10 indicates talking, 0 indicating none.
// u32 signalStateFarTalk = 0; // value is in range 0-10. 10 indicates talking from far away, 0 indicating none.

View file

@ -164,7 +164,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr<char> msgString,
return CellSysutilError{ret + 0u};
}
const auto notify = std::make_shared<atomic_t<bool>>(false);
const auto notify = std::make_shared<atomic_t<u32>>(0);
const auto res = manager->create<rsx::overlays::message_dialog>()->show(is_blocking, msgString.get_ptr(), _type, [callback, userData, &return_code, is_blocking, notify](s32 status)
{
@ -186,7 +186,7 @@ error_code open_msg_dialog(bool is_blocking, u32 type, vm::cptr<char> msgString,
if (is_blocking && notify)
{
*notify = true;
*notify = 1;
notify->notify_one();
}
});

View file

@ -256,7 +256,7 @@ error_code cell_music_decode_read(vm::ptr<void> buf, vm::ptr<u32> startTime, u64
{
dec.read_pos = 0;
dec.decoder.clear();
dec.decoder.track_fully_consumed = true;
dec.decoder.track_fully_consumed = 1;
dec.decoder.track_fully_consumed.notify_one();
break;
}

View file

@ -2477,8 +2477,8 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr<CellSpurs> spurs, vm::ptr<u32>
spurs_res += 127;
spurs_res2 += 127;
spurs_res.notify_all(-128);
spurs_res2.notify_all(-128);
spurs_res.notify_all();
spurs_res2.notify_all();
u32 res_wkl;
const auto wkl = &spurs->wklInfo(wnum);

View file

@ -303,7 +303,7 @@ struct vdec_context final
return;
}
thread_ctrl::wait_on(in_cmd, nullptr);
thread_ctrl::wait_on(in_cmd);
slice = in_cmd.pop_all(); // Pop new command list
}())
{
@ -921,7 +921,7 @@ static error_code vdecOpen(ppu_thread& ppu, T type, U res, vm::cptr<CellVdecCb>
});
thrd->state -= cpu_flag::stop;
thrd->state.notify_one(cpu_flag::stop);
thrd->state.notify_one();
return CELL_OK;
}

View file

@ -1821,7 +1821,11 @@ void ppu_thread::cpu_task()
// Wait until the progress dialog is closed.
// We don't want to open a cell dialog while a native progress dialog is still open.
thread_ctrl::wait_on<atomic_wait::op_ne>(g_progr_ptotal, 0);
while (u32 v = g_progr_ptotal)
{
g_progr_ptotal.wait(v);
}
g_fxo->get<progress_dialog_workaround>().show_overlay_message_only = true;
// Sadly we can't postpone initializing guest time because we need to run PPU threads
@ -1839,7 +1843,7 @@ void ppu_thread::cpu_task()
}
ensure(spu.state.test_and_reset(cpu_flag::stop));
spu.state.notify_one(cpu_flag::stop);
spu.state.notify_one();
}
});
@ -2051,7 +2055,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
struct init_pushed
{
bool pushed = false;
atomic_t<bool> inited = false;
atomic_t<u32> inited = false;
};
call_history.data.resize(g_cfg.core.ppu_call_history ? call_history_max_size : 1);
@ -2100,7 +2104,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
{
while (!Emu.IsStopped() && !g_fxo->get<init_pushed>().inited)
{
thread_ctrl::wait_on(g_fxo->get<init_pushed>().inited, false);
thread_ctrl::wait_on(g_fxo->get<init_pushed>().inited, 0);
}
return false;
}
@ -2117,7 +2121,7 @@ ppu_thread::ppu_thread(utils::serial& ar)
{ppu_cmd::ptr_call, 0}, +[](ppu_thread&) -> bool
{
auto& inited = g_fxo->get<init_pushed>().inited;
inited = true;
inited = 1;
inited.notify_all();
return true;
}
@ -3046,7 +3050,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value)
if (ppu.cia < liblv2_begin || ppu.cia >= liblv2_end)
{
res.notify_all(-128);
res.notify_all();
}
if (addr == ppu.last_faddr)

View file

@ -21,7 +21,7 @@ inline void try_start(spu_thread& spu)
}).second)
{
spu.state -= cpu_flag::stop;
spu.state.notify_one(cpu_flag::stop);
spu.state.notify_one();
}
};
@ -273,7 +273,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value)
for (status_npc_sync_var old; (old = status_npc).status & SPU_STATUS_RUNNING;)
{
status_npc.wait(old);
utils::bless<atomic_t<u32>>(&status_npc)[0].wait(old.status);
}
}
}

View file

@ -647,7 +647,10 @@ void spu_cache::initialize()
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
{
// Initialize progress dialog (wait for previous progress done)
thread_ctrl::wait_on<atomic_wait::op_ne>(g_progr_ptotal, 0);
while (u32 v = g_progr_ptotal)
{
g_progr_ptotal.wait(v);
}
g_progr_ptotal += ::size32(func_list);
progr.emplace("Building SPU cache...");
@ -7795,7 +7798,7 @@ public:
{
minusb = eval(x);
}
const auto minusbx = bitcast<u8[16]>(minusb);
// Data with swapped endian from a load instruction
@ -11011,7 +11014,7 @@ struct spu_llvm_worker
return;
}
thread_ctrl::wait_on(registered, nullptr);
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
slice = registered.pop_all();
}())
{
@ -11178,7 +11181,7 @@ struct spu_llvm
{
// Interrupt profiler thread and put it to sleep
static_cast<void>(prof_mutex.reset());
thread_ctrl::wait_on(registered, nullptr);
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&registered)[1], 0);
continue;
}

View file

@ -2418,7 +2418,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
}
}
if (++i < 10)
if (true || ++i < 10)
{
busy_wait(500);
}
@ -2426,7 +2426,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
{
// Wait
_cpu->state += cpu_flag::wait + cpu_flag::temp;
bits->wait(old, wmask);
// bits->wait(old, wmask);
_cpu->check_state();
}
}())
@ -2542,7 +2542,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
v &= ~wmask;
});
bits->notify_all(wmask);
// bits->notify_all(wmask);
if (size == size0)
{
@ -3588,7 +3588,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args)
{
if (raddr)
{
vm::reservation_notifier(addr).notify_all(-128);
vm::reservation_notifier(addr).notify_all();
raddr = 0;
}
@ -3775,7 +3775,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args)
}
do_cell_atomic_128_store(addr, _ptr<spu_rdata_t>(args.lsa & 0x3ff80));
vm::reservation_notifier(addr).notify_all(-128);
vm::reservation_notifier(addr).notify_all();
}
bool spu_thread::do_mfc(bool can_escape, bool must_finish)
@ -4908,7 +4908,11 @@ s64 spu_thread::get_ch_value(u32 ch)
}
}
#ifdef __linux__
const bool reservation_busy_waiting = false;
#else
const bool reservation_busy_waiting = ((utils::get_tsc() >> 8) % 100 + ((raddr == spurs_addr) ? 50 : 0)) < g_cfg.core.spu_reservation_busy_waiting_percentage;
#endif
for (; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true))
{
@ -4930,8 +4934,11 @@ s64 spu_thread::get_ch_value(u32 ch)
if (raddr && (mask1 & ~SPU_EVENT_TM) == SPU_EVENT_LR)
{
// Don't busy-wait with TSX - memory is sensitive
if (!reservation_busy_waiting)
if (g_use_rtm || !reservation_busy_waiting)
{
#ifdef __linux__
vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{50'000});
#else
if (raddr - spurs_addr <= 0x80 && !g_cfg.core.spu_accurate_reservations && mask1 == SPU_EVENT_LR)
{
atomic_wait_engine::set_one_time_use_wait_callback(+[](u64) -> bool
@ -4944,7 +4951,7 @@ s64 spu_thread::get_ch_value(u32 ch)
// Wait without timeout, in this situation we have notifications for all writes making it possible
// Abort notifications are handled specially for performance reasons
vm::reservation_notifier(raddr).wait(rtime, -128);
vm::reservation_notifier(raddr).wait(rtime);
continue;
}
@ -4976,7 +4983,8 @@ s64 spu_thread::get_ch_value(u32 ch)
return true;
});
vm::reservation_notifier(raddr).wait(rtime, -128, atomic_wait_timeout{80'000});
vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{80'000});
#endif
}
else
{
@ -5464,7 +5472,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu)
{
group->run_state = SPU_THREAD_GROUP_STATUS_SUSPENDED;
spu.state += cpu_flag::signal;
spu.state.notify_one(cpu_flag::signal);
spu.state.notify_one();
return;
}
@ -5482,7 +5490,7 @@ extern void resume_spu_thread_group_from_waiting(spu_thread& spu)
thread->state -= cpu_flag::suspend;
}
thread->state.notify_one(cpu_flag::suspend + cpu_flag::signal);
thread->state.notify_one();
}
}
}
@ -6244,7 +6252,7 @@ s64 spu_channel::pop_wait(cpu_thread& spu, bool pop)
while (true)
{
thread_ctrl::wait_on(data, bit_wait);
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32{bit_wait >> 32});
old = data;
if (!(old & bit_wait))
@ -6325,7 +6333,7 @@ bool spu_channel::push_wait(cpu_thread& spu, u32 value, bool push)
return false;
}
thread_ctrl::wait_on(data, state);
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&data)[1], u32(state >> 32));
state = data;
}
}
@ -6369,7 +6377,7 @@ std::pair<u32, u32> spu_channel_4_t::pop_wait(cpu_thread& spu)
while (true)
{
thread_ctrl::wait_on(values, old);
thread_ctrl::wait_on(utils::bless<atomic_t<u32>>(&values)[0], u32(u64(std::bit_cast<u128>(old))));
old = values;
if (!old.waiting)

View file

@ -235,7 +235,7 @@ public:
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushed to jostling_value)
ensure(this->data.bit_test_reset(off_wait));
data.notify_one();
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
}
// Return true if count has changed from 0 to 1, this condition is considered satisfied even if we pushed a value directly to the special storage for waiting SPUs
@ -294,7 +294,7 @@ public:
if ((old & mask) == mask)
{
data.notify_one();
utils::bless<atomic_t<u32>>(&data)[1].notify_one();
}
return static_cast<u32>(old);
@ -386,7 +386,7 @@ struct spu_channel_4_t
// Turn off waiting bit manually (must succeed because waiting bit can only be resetted by the thread pushing to jostling_value)
ensure(atomic_storage<u8>::exchange(values.raw().waiting, 0));
values.notify_one();
utils::bless<atomic_t<u32>>(&values)[0].notify_one();
}
return;

View file

@ -1631,7 +1631,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
if (is_paused(target->state - cpu_flag::suspend))
{
target->state.notify_one(cpu_flag::suspend);
target->state.notify_one();
}
}
}
@ -1684,7 +1684,7 @@ void lv2_obj::schedule_all(u64 current_time)
if (notify_later_idx == std::size(g_to_notify))
{
// Out of notification slots, notify locally (resizable container is not worth it)
target->state.notify_one(cpu_flag::signal + cpu_flag::suspend);
target->state.notify_one();
}
else
{
@ -1718,7 +1718,7 @@ void lv2_obj::schedule_all(u64 current_time)
if (notify_later_idx == std::size(g_to_notify))
{
// Out of notification slots, notify locally (resizable container is not worth it)
target->state.notify_one(cpu_flag::notify);
target->state.notify_one();
}
else
{
@ -1948,7 +1948,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
u64 remaining = usec - passed;
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50;
constexpr u64 host_min_quantum = 10;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
@ -1965,8 +1965,7 @@ bool lv2_obj::wait_timeout(u64 usec, ppu_thread* cpu, bool scale, bool is_usleep
if (remaining > host_min_quantum)
{
#ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum));
wait_for(remaining);
#else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
wait_for(remaining - (remaining % host_min_quantum));

View file

@ -183,7 +183,7 @@ error_code _sys_interrupt_thread_establish(ppu_thread& ppu, vm::ptr<u32> ih, u32
});
it->state -= cpu_flag::stop;
it->state.notify_one(cpu_flag::stop);
it->state.notify_one();
return result;
});

View file

@ -862,7 +862,7 @@ error_code mmapper_thread_recover_page_fault(cpu_thread* cpu)
if (cpu->state & cpu_flag::signal)
{
cpu->state.notify_one(cpu_flag::signal);
cpu->state.notify_one();
}
return CELL_OK;

View file

@ -1042,7 +1042,7 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id)
{
for (; index != umax; index--)
{
threads[index]->state.notify_one(cpu_flag::stop);
threads[index]->state.notify_one();
}
}
} notify_threads;
@ -1216,7 +1216,7 @@ error_code sys_spu_thread_group_resume(ppu_thread& ppu, u32 id)
{
for (; index != umax; index--)
{
threads[index]->state.notify_one(cpu_flag::suspend);
threads[index]->state.notify_one();
}
}
} notify_threads;
@ -1397,7 +1397,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value)
if (prev_resv && prev_resv != resv)
{
// Batch reservation notifications if possible
vm::reservation_notifier(prev_resv).notify_all(-128);
vm::reservation_notifier(prev_resv).notify_all();
}
prev_resv = resv;
@ -1407,7 +1407,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value)
if (prev_resv)
{
vm::reservation_notifier(prev_resv).notify_all(-128);
vm::reservation_notifier(prev_resv).notify_all();
}
group->exit_status = value;

View file

@ -434,7 +434,7 @@ public:
{
// Note: by the time of notification the thread could have been deallocated which is why the direct function is used
// TODO: Pass a narrower mask
atomic_wait_engine::notify_one(cpu, 4, atomic_wait::default_mask<atomic_bs_t<cpu_flag>>);
atomic_wait_engine::notify_one(cpu);
}
}

View file

@ -135,7 +135,7 @@ namespace vm
_xend();
#endif
if constexpr (Ack)
res.notify_all(-128);
res.notify_all();
return;
}
else
@ -149,7 +149,7 @@ namespace vm
_xend();
#endif
if constexpr (Ack)
res.notify_all(-128);
res.notify_all();
return result;
}
else
@ -204,7 +204,7 @@ namespace vm
#endif
res += 127;
if (Ack)
res.notify_all(-128);
res.notify_all();
return;
}
else
@ -218,7 +218,7 @@ namespace vm
#endif
res += 127;
if (Ack)
res.notify_all(-128);
res.notify_all();
return result;
}
else
@ -253,7 +253,7 @@ namespace vm
});
if constexpr (Ack)
res.notify_all(-128);
res.notify_all();
return;
}
else
@ -273,7 +273,7 @@ namespace vm
});
if (Ack && result)
res.notify_all(-128);
res.notify_all();
return result;
}
}
@ -293,7 +293,7 @@ namespace vm
}
if constexpr (Ack)
res.notify_all(-128);
res.notify_all();
return;
}
else
@ -313,7 +313,7 @@ namespace vm
}
if (Ack && result)
res.notify_all(-128);
res.notify_all();
return result;
}
}
@ -405,7 +405,7 @@ namespace vm
if constexpr (Ack)
{
res.notify_all(-128);
res.notify_all();
}
}
else
@ -415,7 +415,7 @@ namespace vm
if constexpr (Ack)
{
res.notify_all(-128);
res.notify_all();
}
return result;

View file

@ -55,7 +55,7 @@ namespace gl
job.completion_callback(result);
}
thread_ctrl::wait_on(m_work_queue, nullptr);
thread_ctrl::wait_on(m_work_queue);
}
}

View file

@ -161,7 +161,7 @@ namespace rsx
this->on_close = std::move(on_close);
visible = true;
const auto notify = std::make_shared<atomic_t<bool>>(false);
const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>();
overlayman.attach_thread_input(

View file

@ -295,7 +295,7 @@ namespace rsx
}
else if (!m_input_thread_abort)
{
thread_ctrl::wait_on(m_input_token_stack, nullptr);
thread_ctrl::wait_on(m_input_token_stack);
}
}
}

View file

@ -295,7 +295,7 @@ namespace rsx
{
if (!m_stop_input_loop)
{
const auto notify = std::make_shared<atomic_t<bool>>(false);
const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>();
if (interactive)

View file

@ -1621,7 +1621,7 @@ namespace rsx
update_panel();
const auto notify = std::make_shared<atomic_t<bool>>(false);
const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>();
overlayman.attach_thread_input(
@ -1631,7 +1631,7 @@ namespace rsx
while (!Emu.IsStopped() && !*notify)
{
notify->wait(false, atomic_wait_timeout{1'000'000});
notify->wait(0, atomic_wait_timeout{1'000'000});
}
}
}

View file

@ -240,7 +240,7 @@ namespace rsx
this->on_close = std::move(on_close);
visible = true;
const auto notify = std::make_shared<atomic_t<bool>>(false);
const auto notify = std::make_shared<atomic_t<u32>>(0);
auto& overlayman = g_fxo->get<display_manager>();
overlayman.attach_thread_input(
@ -250,7 +250,7 @@ namespace rsx
while (!Emu.IsStopped() && !*notify)
{
notify->wait(false, atomic_wait_timeout{1'000'000});
notify->wait(0, atomic_wait_timeout{1'000'000});
}
return CELL_OK;

View file

@ -20,7 +20,7 @@ namespace rsx
u64 user_interface::alloc_thread_bit()
{
auto [_old, ok] = this->thread_bits.fetch_op([](u64& bits)
auto [_old, ok] = this->thread_bits.fetch_op([](u32& bits)
{
if (~bits)
{
@ -385,7 +385,7 @@ namespace rsx
m_stop_pad_interception.release(stop_pad_interception);
m_stop_input_loop.release(true);
while (u64 b = thread_bits)
while (u32 b = thread_bits)
{
if (b == g_thread_bit)
{

View file

@ -85,7 +85,7 @@ namespace rsx
bool m_start_pad_interception = true;
atomic_t<bool> m_stop_pad_interception = false;
atomic_t<bool> m_input_thread_detached = false;
atomic_t<u64> thread_bits = 0;
atomic_t<u32> thread_bits = 0;
bool m_keyboard_input_enabled = false; // Allow keyboard input
bool m_keyboard_pad_handler_active = true; // Initialized as true to prevent keyboard input until proven otherwise.
bool m_allow_input_on_pause = false;

View file

@ -853,9 +853,8 @@ namespace rsx
g_fxo->get<vblank_thread>().set_thread(std::shared_ptr<named_thread<std::function<void()>>>(new named_thread<std::function<void()>>("VBlank Thread"sv, [this]() -> void
{
// See sys_timer_usleep for details
#ifdef __linux__
constexpr u32 host_min_quantum = 50;
constexpr u32 host_min_quantum = 10;
#else
constexpr u32 host_min_quantum = 500;
#endif
@ -878,8 +877,12 @@ namespace rsx
// Calculate time remaining to that time (0 if we passed it)
const u64 wait_for = current >= post_event_time ? 0 : post_event_time - current;
#ifdef __linux__
const u64 wait_sleep = wait_for;
#else
// Substract host operating system min sleep quantom to get sleep time
const u64 wait_sleep = wait_for - u64{wait_for >= host_min_quantum} * host_min_quantum;
#endif
if (!wait_for)
{
@ -3116,7 +3119,7 @@ namespace rsx
{
#ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = remaining <= 1000 ? 10 : 50;
constexpr u64 host_min_quantum = 10;
#else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
@ -3125,8 +3128,7 @@ namespace rsx
if (remaining >= host_min_quantum)
{
#ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), false);
thread_ctrl::wait_for(remaining, false);
#else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), false);

View file

@ -213,7 +213,7 @@ namespace rsx
u32 last_known_code_start = 0;
atomic_t<u32> external_interrupt_lock{ 0 };
atomic_t<bool> external_interrupt_ack{ false };
atomic_t<bool> is_initialized{ false };
atomic_t<u32> is_initialized{0};
bool is_fifo_idle() const;
void flush_fifo();

View file

@ -48,7 +48,7 @@ namespace vk
}
}
thread_ctrl::wait_on(m_work_queue, nullptr);
thread_ctrl::wait_on(m_work_queue);
}
}

View file

@ -1699,7 +1699,7 @@ struct registers_decoder<NV4097_SET_SHADER_CONTROL>
static void dump(std::string& out, const decoded_type& decoded)
{
fmt::append(out, "Shader control: raw_value: 0x%x reg_count: %u%s%s",
decoded.shader_ctrl(), ((decoded.shader_ctrl() >> 24) & 0xFF), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) ? " depth_replace" : ""),
decoded.shader_ctrl(), ((decoded.shader_ctrl() >> 24) & 0xFF), ((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) ? " depth_replace" : ""),
((decoded.shader_ctrl() & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) ? " 32b_exports" : ""));
}
};

View file

@ -153,7 +153,7 @@ void fmt_class_string<cfg_mode>::format(std::string& out, u64 arg)
});
}
void Emulator::CallFromMainThread(std::function<void()>&& func, atomic_t<bool>* wake_up, bool track_emu_state, u64 stop_ctr) const
void Emulator::CallFromMainThread(std::function<void()>&& func, atomic_t<u32>* wake_up, bool track_emu_state, u64 stop_ctr) const
{
if (!track_emu_state)
{
@ -174,14 +174,14 @@ void Emulator::CallFromMainThread(std::function<void()>&& func, atomic_t<bool>*
void Emulator::BlockingCallFromMainThread(std::function<void()>&& func) const
{
atomic_t<bool> wake_up = false;
atomic_t<u32> wake_up = 0;
CallFromMainThread(std::move(func), &wake_up);
while (!wake_up)
{
ensure(thread_ctrl::get_current());
wake_up.wait(false);
wake_up.wait(0);
}
}
@ -424,7 +424,7 @@ void Emulator::Init()
make_path_verbose(dev_flash, true);
make_path_verbose(dev_flash2, true);
make_path_verbose(dev_flash3, true);
if (make_path_verbose(dev_usb, true))
{
make_path_verbose(dev_usb + "MUSIC/", false);
@ -2152,7 +2152,7 @@ void Emulator::RunPPU()
}
ensure(cpu.state.test_and_reset(cpu_flag::stop));
cpu.state.notify_one(cpu_flag::stop);
cpu.state.notify_one();
signalled_thread = true;
});
@ -2165,7 +2165,7 @@ void Emulator::RunPPU()
if (auto thr = g_fxo->try_get<named_thread<rsx::rsx_replay_thread>>())
{
thr->state -= cpu_flag::stop;
thr->state.notify_one(cpu_flag::stop);
thr->state.notify_one();
}
}
@ -2234,7 +2234,7 @@ void Emulator::FinalizeRunRequest()
}
ensure(spu.state.test_and_reset(cpu_flag::stop));
spu.state.notify_one(cpu_flag::stop);
spu.state.notify_one();
};
if (m_savestate_extension_flags1 & SaveStateExtentionFlags1::ShouldCloseMenu)
@ -2437,7 +2437,7 @@ void Emulator::Resume()
auto on_select = [](u32, cpu_thread& cpu)
{
cpu.state -= cpu_flag::dbg_global_pause;
cpu.state.notify_one(cpu_flag::dbg_global_pause);
cpu.state.notify_one();
};
idm::select<named_thread<ppu_thread>>(on_select);

View file

@ -54,7 +54,7 @@ constexpr bool is_error(game_boot_result res)
struct EmuCallbacks
{
std::function<void(std::function<void()>, atomic_t<bool>*)> call_from_main_thread;
std::function<void(std::function<void()>, atomic_t<u32>*)> call_from_main_thread;
std::function<void(bool)> on_run; // (start_playtime) continuing or going ingame, so start the clock
std::function<void()> on_pause;
std::function<void()> on_resume;
@ -180,7 +180,7 @@ public:
}
// Call from the GUI thread
void CallFromMainThread(std::function<void()>&& func, atomic_t<bool>* wake_up = nullptr, bool track_emu_state = true, u64 stop_ctr = umax) const;
void CallFromMainThread(std::function<void()>&& func, atomic_t<u32>* wake_up = nullptr, bool track_emu_state = true, u64 stop_ctr = umax) const;
// Blocking call from the GUI thread
void BlockingCallFromMainThread(std::function<void()>&& func) const;

View file

@ -68,7 +68,7 @@ void progress_dialog_server::operator()()
{
// Some backends like OpenGL actually initialize a lot of driver objects in the "on_init" method.
// Wait for init to complete within reasonable time. Abort just in case we have hardware/driver issues.
renderer->is_initialized.wait(false, atomic_wait_timeout(5 * 1000000000ull));
renderer->is_initialized.wait(0, atomic_wait_timeout(5 * 1000000000ull));
auto manager = g_fxo->try_get<rsx::overlays::display_manager>();
show_overlay_message = g_fxo->get<progress_dialog_workaround>().show_overlay_message_only;

View file

@ -60,7 +60,7 @@ void headless_application::InitializeCallbacks()
return false;
};
callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<bool>* wake_up)
callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<u32>* wake_up)
{
RequestCallFromMainThread(std::move(func), wake_up);
};
@ -166,7 +166,7 @@ void headless_application::InitializeCallbacks()
/**
* Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct.
*/
void headless_application::CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up)
void headless_application::CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up)
{
func();

View file

@ -30,8 +30,8 @@ private:
}
Q_SIGNALS:
void RequestCallFromMainThread(std::function<void()> func, atomic_t<bool>* wake_up);
void RequestCallFromMainThread(std::function<void()> func, atomic_t<u32>* wake_up);
private Q_SLOTS:
static void CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up);
static void CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up);
};

View file

@ -55,6 +55,7 @@ DYNAMIC_IMPORT("ntdll.dll", NtSetTimerResolution, NTSTATUS(ULONG DesiredResoluti
#ifdef __linux__
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/prctl.h>
#endif
#if defined(__APPLE__)
@ -443,6 +444,11 @@ int main(int argc, char** argv)
const u64 intro_time = (intro_stats.ru_utime.tv_sec + intro_stats.ru_stime.tv_sec) * 1000000000ull + (intro_stats.ru_utime.tv_usec + intro_stats.ru_stime.tv_usec) * 1000ull;
#endif
#ifdef __linux__
// Set timerslack value for Linux. The default value is 50,000ns. Change this to just 1 since we value precise timers.
prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
#endif
s_argv0 = argv[0]; // Save for report_fatal_error
// Only run RPCS3 to display an error

View file

@ -1366,7 +1366,7 @@ void debugger_frame::DoStep(bool step_over)
}
});
cpu->state.notify_one(s_pause_flags);
cpu->state.notify_one();
}
}
@ -1412,7 +1412,7 @@ void debugger_frame::RunBtnPress()
Emu.Resume();
}
cpu->state.notify_one(s_pause_flags);
cpu->state.notify_one();
m_debugger_list->EnableThreadFollowing();
}
}

View file

@ -421,7 +421,7 @@ void gui_application::InitializeCallbacks()
return false;
};
callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<bool>* wake_up)
callbacks.call_from_main_thread = [this](std::function<void()> func, atomic_t<u32>* wake_up)
{
RequestCallFromMainThread(std::move(func), wake_up);
};
@ -792,7 +792,7 @@ void gui_application::OnChangeStyleSheetRequest()
/**
* Using connects avoids timers being unable to be used in a non-qt thread. So, even if this looks stupid to just call func, it's succinct.
*/
void gui_application::CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up)
void gui_application::CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up)
{
func();

View file

@ -118,8 +118,8 @@ Q_SIGNALS:
void OnEnableDiscEject(bool enabled);
void OnEnableDiscInsert(bool enabled);
void RequestCallFromMainThread(std::function<void()> func, atomic_t<bool>* wake_up);
void RequestCallFromMainThread(std::function<void()> func, atomic_t<u32>* wake_up);
private Q_SLOTS:
static void CallFromMainThread(const std::function<void()>& func, atomic_t<bool>* wake_up);
static void CallFromMainThread(const std::function<void()>& func, atomic_t<u32>* wake_up);
};

View file

@ -1,6 +1,7 @@
#include "atomic.hpp"
#if defined(__linux__)
// This definition is unused on Linux
#define USE_FUTEX
#elif !defined(_WIN32)
#define USE_STD
@ -40,8 +41,8 @@ namespace utils
// Total number of entries.
static constexpr usz s_hashtable_size = 1u << 17;
// Reference counter combined with shifted pointer (which is assumed to be 47 bit)
static constexpr uptr s_ref_mask = (1u << 17) - 1;
// Reference counter combined with shifted pointer (which is assumed to be 48 bit)
static constexpr uptr s_ref_mask = 0xffff;
// Fix for silly on-first-use initializer
static bool s_null_wait_cb(const void*, u64, u64){ return true; };
@ -55,163 +56,17 @@ static thread_local bool(*s_tls_one_time_wait_cb)(u64 attempts) = nullptr;
// Callback for notification functions for optimizations
static thread_local void(*s_tls_notify_cb)(const void* data, u64 progress) = nullptr;
static inline bool operator &(atomic_wait::op lhs, atomic_wait::op_flag rhs)
{
return !!(static_cast<u8>(lhs) & static_cast<u8>(rhs));
}
// Compare data in memory with old value, and return true if they are equal
static NEVER_INLINE bool ptr_cmp(const void* data, u32 _size, u128 old128, u128 mask128, atomic_wait::info* ext = nullptr)
static NEVER_INLINE bool ptr_cmp(const void* data, u32 old, atomic_wait::info* ext = nullptr)
{
using atomic_wait::op;
using atomic_wait::op_flag;
const u8 size = static_cast<u8>(_size);
const op flag{static_cast<u8>(_size >> 8)};
bool result = false;
if (size <= 8)
{
u64 new_value = 0;
u64 old_value = static_cast<u64>(old128);
u64 mask = static_cast<u64>(mask128) & (u64{umax} >> ((64 - size * 8) & 63));
// Don't load memory on empty mask
switch (mask ? size : 0)
{
case 0: break;
case 1: new_value = reinterpret_cast<const atomic_t<u8>*>(data)->load(); break;
case 2: new_value = reinterpret_cast<const atomic_t<u16>*>(data)->load(); break;
case 4: new_value = reinterpret_cast<const atomic_t<u32>*>(data)->load(); break;
case 8: new_value = reinterpret_cast<const atomic_t<u64>*>(data)->load(); break;
default:
{
fmt::throw_exception("Bad size (arg=0x%x)", _size);
}
}
if (flag & op_flag::bit_not)
{
new_value = ~new_value;
}
if (!mask) [[unlikely]]
{
new_value = 0;
old_value = 0;
}
else
{
if (flag & op_flag::byteswap)
{
switch (size)
{
case 2:
{
new_value = stx::se_storage<u16>::swap(static_cast<u16>(new_value));
old_value = stx::se_storage<u16>::swap(static_cast<u16>(old_value));
mask = stx::se_storage<u16>::swap(static_cast<u16>(mask));
break;
}
case 4:
{
new_value = stx::se_storage<u32>::swap(static_cast<u32>(new_value));
old_value = stx::se_storage<u32>::swap(static_cast<u32>(old_value));
mask = stx::se_storage<u32>::swap(static_cast<u32>(mask));
break;
}
case 8:
{
new_value = stx::se_storage<u64>::swap(new_value);
old_value = stx::se_storage<u64>::swap(old_value);
mask = stx::se_storage<u64>::swap(mask);
break;
}
default:
{
break;
}
}
}
// Make most significant bit sign bit
const auto shv = std::countl_zero(mask);
new_value &= mask;
old_value &= mask;
new_value <<= shv;
old_value <<= shv;
}
s64 news = new_value;
s64 olds = old_value;
u64 newa = news < 0 ? (0ull - new_value) : new_value;
u64 olda = olds < 0 ? (0ull - old_value) : old_value;
switch (op{static_cast<u8>(static_cast<u8>(flag) & 0xf)})
{
case op::eq: result = old_value == new_value; break;
case op::slt: result = olds < news; break;
case op::sgt: result = olds > news; break;
case op::ult: result = old_value < new_value; break;
case op::ugt: result = old_value > new_value; break;
case op::alt: result = olda < newa; break;
case op::agt: result = olda > newa; break;
case op::pop:
{
// Count is taken from least significant byte and ignores some flags
const u64 count = static_cast<u64>(old128) & 0xff;
result = count < utils::popcnt64(new_value);
break;
}
default:
{
fmt::throw_exception("ptr_cmp(): unrecognized atomic wait operation.");
}
}
}
else if (size == 16 && (flag == op::eq || flag == (op::eq | op_flag::inverse)))
{
u128 new_value = 0;
u128 old_value = old128;
u128 mask = mask128;
// Don't load memory on empty mask
if (mask) [[likely]]
{
new_value = atomic_storage<u128>::load(*reinterpret_cast<const u128*>(data));
}
// TODO
result = !((old_value ^ new_value) & mask);
}
else if (size > 16 && !~mask128 && (flag == op::eq || flag == (op::eq | op_flag::inverse)))
{
// Interpret old128 as a pointer to the old value
ensure(!(old128 >> (64 + 17)));
result = std::memcmp(data, reinterpret_cast<const void*>(static_cast<uptr>(old128)), size) == 0;
}
else
{
fmt::throw_exception("ptr_cmp(): no alternative operations are supported for non-standard atomic wait yet.");
}
if (flag & op_flag::inverse)
{
result = !result;
}
// Check other wait variables if provided
if (result)
if (reinterpret_cast<const atomic_t<u32>*>(data)->load() == old)
{
if (ext) [[unlikely]]
{
for (auto e = ext; e->data; e++)
{
if (!ptr_cmp(e->data, e->size, e->old, e->mask))
if (!ptr_cmp(e->data, e->old))
{
return false;
}
@ -283,18 +138,15 @@ namespace
#endif
// Essentially a fat semaphore
struct cond_handle
struct alignas(64) cond_handle
{
// Combined pointer (most significant 47 bits) and ref counter (17 least significant bits)
// Combined pointer (most significant 48 bits) and ref counter (16 least significant bits)
atomic_t<u64> ptr_ref;
u64 tid;
u128 mask;
u128 oldv;
u32 oldv;
u64 tsc0;
u16 link;
u8 size;
u8 flag;
atomic_t<u32> sync;
#ifdef USE_STD
@ -316,7 +168,7 @@ namespace
mtx.init(mtx);
#endif
ensure(!ptr_ref.exchange((iptr << 17) | 1));
ensure(!ptr_ref.exchange((iptr << 16) | 1));
}
void destroy()
@ -324,10 +176,7 @@ namespace
tid = 0;
tsc0 = 0;
link = 0;
size = 0;
flag = 0;
sync.release(0);
mask = 0;
oldv = 0;
#ifdef USE_STD
@ -517,7 +366,7 @@ namespace
// TLS storage for few allocaded "semaphores" to allow skipping initialization
static thread_local tls_cond_handler s_tls_conds{};
static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1)
static u32 cond_alloc(uptr iptr, u32 tls_slot = -1)
{
// Try to get cond from tls slot instead
u16* ptls = tls_slot >= std::size(s_tls_conds.cond) ? nullptr : s_tls_conds.cond + tls_slot;
@ -526,8 +375,7 @@ static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1)
{
// Fast reinitialize
const u32 id = std::exchange(*ptls, 0);
s_cond_list[id].mask = mask;
s_cond_list[id].ptr_ref.release((iptr << 17) | 1);
s_cond_list[id].ptr_ref.release((iptr << 16) | 1);
return id;
}
@ -581,7 +429,6 @@ static u32 cond_alloc(uptr iptr, u128 mask, u32 tls_slot = -1)
const u32 id = level3 * 64 + std::countr_one(bits);
// Initialize new "semaphore"
s_cond_list[id].mask = mask;
s_cond_list[id].init(iptr);
return id;
}
@ -625,8 +472,6 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
{
// Fast finalization
cond->sync.release(0);
cond->size = 0;
cond->mask = 0;
*ptls = static_cast<u16>(cond_id);
return;
}
@ -652,7 +497,7 @@ static void cond_free(u32 cond_id, u32 tls_slot = -1)
s_cond_sem1.atomic_op(FN(x -= u128{1} << (level1 * 14)));
}
static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
static cond_handle* cond_id_lock(u32 cond_id, uptr iptr = 0)
{
bool did_ref = false;
@ -673,7 +518,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
return false;
}
if (iptr && (val >> 17) != iptr)
if (iptr && (val >> 16) != iptr)
{
// Pointer mismatch
return false;
@ -686,11 +531,6 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
return false;
}
if (!(mask & cond->mask) && cond->size)
{
return false;
}
if (!did_ref)
{
val++;
@ -702,7 +542,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
if (ok)
{
// Check other fields again
if (const u32 sync_val = cond->sync; sync_val == 0 || sync_val == 3 || (cond->size && !(mask & cond->mask)))
if (const u32 sync_val = cond->sync; sync_val == 0 || sync_val == 3)
{
did_ref = true;
continue;
@ -713,7 +553,7 @@ static cond_handle* cond_id_lock(u32 cond_id, u128 mask, uptr iptr = 0)
if ((old & s_ref_mask) == s_ref_mask)
{
fmt::throw_exception("Reference count limit (131071) reached in an atomic notifier.");
fmt::throw_exception("Reference count limit (%u) reached in an atomic notifier.", s_ref_mask);
}
break;
@ -736,8 +576,8 @@ namespace
u64 bits: 24; // Allocated bits
u64 prio: 24; // Reserved
u64 ref : 17; // Ref counter
u64 iptr: 47; // First pointer to use slot (to count used slots)
u64 ref : 16; // Ref counter
u64 iptr: 48; // First pointer to use slot (to count used slots)
};
// Need to spare 16 bits for ref counter
@ -760,7 +600,7 @@ namespace
static void slot_free(uptr ptr, atomic_t<u16>* slot, u32 tls_slot) noexcept;
template <typename F>
static auto slot_search(uptr iptr, u128 mask, F func) noexcept;
static auto slot_search(uptr iptr, F func) noexcept;
};
static_assert(sizeof(root_info) == 64);
@ -944,7 +784,7 @@ void root_info::slot_free(uptr iptr, atomic_t<u16>* slot, u32 tls_slot) noexcept
}
template <typename F>
FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept
FORCE_INLINE auto root_info::slot_search(uptr iptr, F func) noexcept
{
u32 index = 0;
[[maybe_unused]] u32 total = 0;
@ -974,7 +814,7 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept
for (u32 i = 0; i < cond_count; i++)
{
if (cond_id_lock(cond_ids[i], mask, iptr))
if (cond_id_lock(cond_ids[i], iptr))
{
if (func(cond_ids[i]))
{
@ -994,18 +834,82 @@ FORCE_INLINE auto root_info::slot_search(uptr iptr, u128 mask, F func) noexcept
}
}
SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext)
SAFE_BUFFERS(void)
atomic_wait_engine::wait(const void* data, u32 old_value, u64 timeout, atomic_wait::info* ext)
{
const auto stamp0 = utils::get_unique_tsc();
uint ext_size = 0;
if (!s_tls_wait_cb(data, 0, stamp0))
#ifdef __linux__
::timespec ts{};
if (timeout + 1)
{
if (ext) [[unlikely]]
{
// futex_waitv uses absolute timeout
::clock_gettime(CLOCK_MONOTONIC, &ts);
}
ts.tv_sec += timeout / 1'000'000'000;
ts.tv_nsec += timeout % 1'000'000'000;
if (ts.tv_nsec > 1'000'000'000)
{
ts.tv_sec++;
ts.tv_nsec -= 1'000'000'000;
}
}
futex_waitv vec[atomic_wait::max_list]{};
vec[0].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
vec[0].uaddr = reinterpret_cast<__u64>(data);
vec[0].val = old_value;
if (ext) [[unlikely]]
{
for (auto e = ext; e->data; e++)
{
ext_size++;
vec[ext_size].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
vec[ext_size].uaddr = reinterpret_cast<__u64>(e->data);
vec[ext_size].val = e->old;
}
}
if (ext_size) [[unlikely]]
{
if (syscall(SYS_futex_waitv, +vec, ext_size + 1, 0, timeout + 1 ? &ts : nullptr, CLOCK_MONOTONIC) == -1)
{
if (errno == ENOSYS)
{
fmt::throw_exception("futex_waitv is not supported (Linux kernel is too old)");
}
if (errno == EINVAL)
{
fmt::throw_exception("futex_waitv: bad param");
}
}
}
else
{
if (futex(const_cast<void*>(data), FUTEX_WAIT_PRIVATE, old_value, timeout + 1 ? &ts : nullptr) == -1)
{
if (errno == EINVAL)
{
fmt::throw_exception("futex: bad param");
}
}
}
return;
#endif
if (!s_tls_wait_cb(data, 0, 0))
{
return;
}
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
const auto stamp0 = utils::get_unique_tsc();
uint ext_size = 0;
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
uptr iptr_ext[atomic_wait::max_list - 1]{};
@ -1026,18 +930,18 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
}
}
iptr_ext[ext_size] = reinterpret_cast<uptr>(e->data) & (~s_ref_mask >> 17);
iptr_ext[ext_size] = reinterpret_cast<uptr>(e->data) & (~s_ref_mask >> 16);
ext_size++;
}
}
const u32 cond_id = cond_alloc(iptr, mask, 0);
const u32 cond_id = cond_alloc(iptr, 0);
u32 cond_id_ext[atomic_wait::max_list - 1]{};
for (u32 i = 0; i < ext_size; i++)
{
cond_id_ext[i] = cond_alloc(iptr_ext[i], ext[i].mask, i + 1);
cond_id_ext[i] = cond_alloc(iptr_ext[i], i + 1);
}
const auto slot = root_info::slot_alloc(iptr);
@ -1060,8 +964,6 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
// Store some info for notifiers (some may be unused)
cond->link = 0;
cond->size = static_cast<u8>(size);
cond->flag = static_cast<u8>(size >> 8);
cond->oldv = old_value;
cond->tsc0 = stamp0;
@ -1071,8 +973,6 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
{
// Extensions point to original cond_id, copy remaining info
cond_ext[i]->link = cond_id;
cond_ext[i]->size = static_cast<u8>(ext[i].size);
cond_ext[i]->flag = static_cast<u8>(ext[i].size >> 8);
cond_ext[i]->oldv = ext[i].old;
cond_ext[i]->tsc0 = stamp0;
@ -1105,7 +1005,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
u64 attempts = 0;
while (ptr_cmp(data, size, old_value, mask, ext))
while (ptr_cmp(data, old_value, ext))
{
if (s_tls_one_time_wait_cb)
{
@ -1263,7 +1163,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::wait(const void* data, u32 size, u128 old
}
template <bool NoAlert = false>
static u32 alert_sema(u32 cond_id, u32 size, u128 mask)
static u32 alert_sema(u32 cond_id, u32 size)
{
ensure(cond_id);
@ -1271,11 +1171,11 @@ static u32 alert_sema(u32 cond_id, u32 size, u128 mask)
u32 ok = 0;
if (!cond->size || mask & cond->mask)
if (true)
{
// Redirect if necessary
const auto _old = cond;
const auto _new = _old->link ? cond_id_lock(_old->link, u128(-1)) : _old;
const auto _new = _old->link ? cond_id_lock(_old->link) : _old;
if (_new && _new->tsc0 == _old->tsc0)
{
@ -1336,50 +1236,58 @@ void atomic_wait_engine::set_notify_callback(void(*cb)(const void*, u64))
s_tls_notify_cb = cb;
}
void atomic_wait_engine::notify_one(const void* data, u32 size, u128 mask)
void atomic_wait_engine::notify_one(const void* data)
{
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
if (s_tls_notify_cb)
s_tls_notify_cb(data, 0);
root_info::slot_search(iptr, mask, [&](u32 cond_id)
#ifdef __linux__
futex(const_cast<void*>(data), FUTEX_WAKE_PRIVATE, 1);
#else
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
root_info::slot_search(iptr, [&](u32 cond_id)
{
if (alert_sema(cond_id, size, mask))
if (alert_sema(cond_id, 4))
{
return true;
}
return false;
});
#endif
if (s_tls_notify_cb)
s_tls_notify_cb(data, -1);
}
SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u128 mask)
SAFE_BUFFERS(void)
atomic_wait_engine::notify_all(const void* data)
{
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 17);
if (s_tls_notify_cb)
s_tls_notify_cb(data, 0);
#ifdef __linux__
futex(const_cast<void*>(data), FUTEX_WAKE_PRIVATE, 1);
#else
const uptr iptr = reinterpret_cast<uptr>(data) & (~s_ref_mask >> 16);
// Array count for batch notification
u32 count = 0;
// Array itself.
u32 cond_ids[128];
root_info::slot_search(iptr, mask, [&](u32 cond_id)
root_info::slot_search(iptr, [&](u32 cond_id)
{
if (count >= 128)
{
// Unusual big amount of sema: fallback to notify_one alg
alert_sema(cond_id, size, mask);
alert_sema(cond_id, 4);
return false;
}
u32 res = alert_sema<true>(cond_id, size, mask);
u32 res = alert_sema<true>(cond_id, 4);
if (~res <= u16{umax})
{
@ -1395,7 +1303,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u1
{
const u32 cond_id = *(std::end(cond_ids) - i - 1);
if (!s_cond_list[cond_id].wakeup(size ? 1 : 2))
if (!s_cond_list[cond_id].wakeup(1))
{
*(std::end(cond_ids) - i - 1) = ~cond_id;
}
@ -1434,6 +1342,7 @@ SAFE_BUFFERS(void) atomic_wait_engine::notify_all(const void* data, u32 size, u1
{
cond_free(~*(std::end(cond_ids) - i - 1));
}
#endif
if (s_tls_notify_cb)
s_tls_notify_cb(data, -1);

View file

@ -129,54 +129,21 @@ enum class atomic_wait_timeout : u64
inf = 0xffffffffffffffff,
};
template <typename T>
class lf_queue;
namespace stx
{
template <typename T>
class atomic_ptr;
}
// Various extensions for atomic_t::wait
namespace atomic_wait
{
// Max number of simultaneous atomic variables to wait on (can be extended if really necessary)
constexpr uint max_list = 8;
enum class op : u8
{
eq, // Wait while value is bitwise equal to
slt, // Wait while signed value is less than
sgt, // Wait while signed value is greater than
ult, // Wait while unsigned value is less than
ugt, // Wait while unsigned value is greater than
alt, // Wait while absolute value is less than
agt, // Wait while absolute value is greater than
pop, // Wait while set bit count of the value is less than
__max
};
static_assert(static_cast<u8>(op::__max) == 8);
enum class op_flag : u8
{
inverse = 1 << 4, // Perform inverse operation (negate the result)
bit_not = 1 << 5, // Perform bitwise NOT on loaded value before operation
byteswap = 1 << 6, // Perform byteswap on both arguments and masks when applicable
};
constexpr op_flag op_be = std::endian::native == std::endian::little ? op_flag::byteswap : op_flag{0};
constexpr op_flag op_le = std::endian::native == std::endian::little ? op_flag{0} : op_flag::byteswap;
constexpr op operator |(op_flag lhs, op_flag rhs)
{
return op{static_cast<u8>(static_cast<u8>(lhs) | static_cast<u8>(rhs))};
}
constexpr op operator |(op_flag lhs, op rhs)
{
return op{static_cast<u8>(static_cast<u8>(lhs) | static_cast<u8>(rhs))};
}
constexpr op operator |(op lhs, op_flag rhs)
{
return op{static_cast<u8>(static_cast<u8>(lhs) | static_cast<u8>(rhs))};
}
constexpr op op_ne = op::eq | op_flag::inverse;
constexpr struct any_value_t
{
template <typename T>
@ -186,46 +153,10 @@ namespace atomic_wait
}
} any_value;
template <typename X>
using payload_type = decltype(std::declval<X>().observe());
template <typename X, typename T = payload_type<X>>
constexpr u128 default_mask = sizeof(T) <= 8 ? u128{u64{umax} >> ((64 - sizeof(T) * 8) & 63)} : u128(-1);
template <typename X, typename T = payload_type<X>>
constexpr u128 get_value(X&, T value = T{}, ...)
{
static_assert((sizeof(T) & (sizeof(T) - 1)) == 0);
static_assert(sizeof(T) <= 16);
return std::bit_cast<get_uint_t<sizeof(T)>, T>(value);
}
struct info
{
const void* data;
u32 size;
u128 old;
u128 mask;
template <typename X, typename T = payload_type<X>>
constexpr void set_value(X& a, T value = T{})
{
old = get_value(a, value);
}
template <typename X, typename T = payload_type<X>>
constexpr void set_mask(T value)
{
static_assert((sizeof(T) & (sizeof(T) - 1)) == 0);
static_assert(sizeof(T) <= 16);
mask = std::bit_cast<get_uint_t<sizeof(T)>, T>(value);
}
template <typename X, typename T = payload_type<X>>
constexpr void set_mask()
{
mask = default_mask<X>;
}
u32 old;
};
template <uint Max, typename... T>
@ -243,9 +174,9 @@ namespace atomic_wait
constexpr list& operator=(const list&) noexcept = default;
template <typename... U, typename = std::void_t<decltype(std::declval<U>().template wait<op::eq>(any_value))...>>
template <typename... U, typename = std::void_t<decltype(std::declval<U>().wait(any_value))...>>
constexpr list(U&... vars)
: m_info{{&vars, sizeof(vars.observe()), get_value(vars), default_mask<U>}...}
: m_info{{&vars, 0}...}
{
static_assert(sizeof...(U) == Max, "Inconsistent amount of atomics.");
}
@ -256,40 +187,37 @@ namespace atomic_wait
static_assert(sizeof...(U) == Max, "Inconsistent amount of values.");
auto* ptr = m_info;
((ptr->template set_value<T>(*static_cast<T*>(ptr->data), values), ptr++), ...);
(((ptr->old = std::bit_cast<u32>(values)), ptr++), ...);
return *this;
}
template <typename... U>
constexpr list& masks(U... masks)
{
static_assert(sizeof...(U) <= Max, "Too many masks.");
auto* ptr = m_info;
((ptr++)->template set_mask<T>(masks), ...);
return *this;
}
template <uint Index, op Flags = op::eq, typename T2, typename U, typename = std::void_t<decltype(std::declval<T2>().template wait<op::eq>(any_value))>>
template <uint Index, typename T2, typename U, typename = std::void_t<decltype(std::declval<T2>().wait(any_value))>>
constexpr void set(T2& var, U value)
{
static_assert(Index < Max);
m_info[Index].data = &var;
m_info[Index].size = sizeof(var.observe()) | (static_cast<u8>(Flags) << 8);
m_info[Index].template set_value<T2>(var, value);
m_info[Index].template set_mask<T2>();
m_info[Index].old = std::bit_cast<u32>(value);
}
template <uint Index, op Flags = op::eq, typename T2, typename U, typename V, typename = std::void_t<decltype(std::declval<T2>().template wait<op::eq>(any_value))>>
constexpr void set(T2& var, U value, V mask)
template <uint Index, typename T2>
constexpr void set(lf_queue<T2>& var, std::nullptr_t = nullptr)
{
static_assert(Index < Max);
static_assert(sizeof(var) == sizeof(uptr));
m_info[Index].data = &var;
m_info[Index].size = sizeof(var.observe()) | (static_cast<u8>(Flags) << 8);
m_info[Index].template set_value<T2>(var, value);
m_info[Index].template set_mask<T2>(mask);
m_info[Index].data = reinterpret_cast<char*>(&var) + sizeof(u32);
m_info[Index].old = 0;
}
template <uint Index, typename T2>
constexpr void set(stx::atomic_ptr<T2>& var, std::nullptr_t = nullptr)
{
static_assert(Index < Max);
static_assert(sizeof(var) == sizeof(uptr));
m_info[Index].data = reinterpret_cast<char*>(&var) + sizeof(u32);
m_info[Index].old = 0;
}
// Timeout is discouraged
@ -302,7 +230,7 @@ namespace atomic_wait
}
};
template <typename... T, typename = std::void_t<decltype(std::declval<T>().template wait<op::eq>(any_value))...>>
template <typename... T, typename = std::void_t<decltype(std::declval<T>().wait(any_value))...>>
list(T&... vars) -> list<sizeof...(T), T...>;
}
@ -322,20 +250,15 @@ private:
template <uint Max, typename... T>
friend class atomic_wait::list;
static void wait(const void* data, u32 size, u128 old_value, u64 timeout, u128 mask, atomic_wait::info* ext = nullptr);
static void wait(const void* data, u32 old_value, u64 timeout, atomic_wait::info* ext = nullptr);
public:
static void notify_one(const void* data, u32 size, u128 mask128);
static void notify_all(const void* data, u32 size, u128 mask128);
static void notify_one(const void* data);
static void notify_all(const void* data);
static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0));
static void set_notify_callback(void(*cb)(const void* data, u64 progress));
static void set_one_time_use_wait_callback(bool(*cb)(u64 progress));
static void notify_all(const void* data)
{
notify_all(data, 0, u128(-1));
}
static void set_one_time_use_wait_callback(bool (*cb)(u64 progress));
};
template <uint Max, typename... T>
@ -343,7 +266,7 @@ void atomic_wait::list<Max, T...>::wait(atomic_wait_timeout timeout)
{
static_assert(!!Max, "Cannot initiate atomic wait with empty list.");
atomic_wait_engine::wait(m_info[0].data, m_info[0].size, m_info[0].old, static_cast<u64>(timeout), m_info[0].mask, m_info + 1);
atomic_wait_engine::wait(m_info[0].data, m_info[0].old, static_cast<u64>(timeout), m_info + 1);
}
// Helper class, provides access to compiler-specific atomic intrinsics
@ -1759,46 +1682,31 @@ public:
});
}
// Timeout is discouraged
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const
requires(sizeof(type) == 4)
{
const u128 old = std::bit_cast<get_uint_t<sizeof(T)>>(old_value);
const u128 mask = atomic_wait::default_mask<atomic_t>;
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask);
atomic_wait_engine::wait(&m_data, std::bit_cast<u32>(old_value), static_cast<u64>(timeout));
}
// Overload with mask (only selected bits are checked), timeout is discouraged
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(type old_value, type mask_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
[[deprecated]] void wait(type old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const
requires(sizeof(type) == 8)
{
const u128 old = std::bit_cast<get_uint_t<sizeof(T)>>(old_value);
const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
atomic_wait_engine::wait(&m_data, sizeof(T) | (static_cast<u8>(Flags) << 8), old, static_cast<u64>(timeout), mask);
atomic_wait::info ext[2]{};
ext[0].data = reinterpret_cast<const char*>(&m_data) + 4;
ext[0].old = std::bit_cast<u64>(old_value) >> 32;
atomic_wait_engine::wait(&m_data, std::bit_cast<u64>(old_value), static_cast<u64>(timeout), ext);
}
void notify_one() noexcept
void notify_one()
requires(sizeof(type) == 4 || sizeof(type) == 8)
{
atomic_wait_engine::notify_one(&m_data, sizeof(T), atomic_wait::default_mask<atomic_t>);
atomic_wait_engine::notify_one(&m_data);
}
// Notify with mask, allowing to not wake up thread which doesn't wait on this mask
void notify_one(type mask_value) noexcept
void notify_all()
requires(sizeof(type) == 4 || sizeof(type) == 8)
{
const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
atomic_wait_engine::notify_one(&m_data, sizeof(T), mask);
}
void notify_all() noexcept
{
atomic_wait_engine::notify_all(&m_data, sizeof(T), atomic_wait::default_mask<atomic_t>);
}
// Notify all threads with mask, allowing to not wake up threads which don't wait on them
void notify_all(type mask_value) noexcept
{
const u128 mask = std::bit_cast<get_uint_t<sizeof(T)>>(mask_value);
atomic_wait_engine::notify_all(&m_data, sizeof(T), mask);
atomic_wait_engine::notify_all(&m_data);
}
};
@ -1874,23 +1782,6 @@ public:
{
return base::fetch_xor(1) != 0;
}
// Timeout is discouraged
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(bool old_value, atomic_wait_timeout timeout = atomic_wait_timeout::inf) const noexcept
{
base::template wait<Flags>(old_value, 1, timeout);
}
void notify_one() noexcept
{
base::notify_one(1);
}
void notify_all() noexcept
{
base::notify_all(1);
}
};
// Specializations
@ -1904,12 +1795,6 @@ struct std::common_type<atomic_t<T, Align>, T2> : std::common_type<T, std::commo
template <typename T, typename T2, usz Align2>
struct std::common_type<T, atomic_t<T2, Align2>> : std::common_type<std::common_type_t<T>, T2> {};
namespace atomic_wait
{
template <usz Align>
constexpr u128 default_mask<atomic_t<bool, Align>> = 1;
}
#ifndef _MSC_VER
#pragma GCC diagnostic pop
#pragma GCC diagnostic pop

View file

@ -6,33 +6,35 @@
// Mutex that tries to maintain the order of acquisition
class fifo_mutex
{
// Low 8 bits are incremented on acquisition, high 8 bits are incremented on release
atomic_t<u16> m_value{0};
// Low 16 bits are incremented on acquisition, high 16 bits are incremented on release
atomic_t<u32> m_value{0};
public:
constexpr fifo_mutex() noexcept = default;
void lock() noexcept
{
const u16 val = m_value.fetch_op([](u16& val)
// clang-format off
const u32 val = m_value.fetch_op([](u32& val)
{
val = (val & 0xff00) | ((val + 1) & 0xff);
val = (val & 0xffff0000) | ((val + 1) & 0xffff);
});
// clang-format on
if (val >> 8 != (val & 0xff)) [[unlikely]]
if (val >> 16 != (val & 0xffff)) [[unlikely]]
{
// TODO: implement busy waiting along with moving to cpp file
m_value.wait<atomic_wait::op_ne>(((val + 1) & 0xff) << 8, 0xff00);
m_value.wait((val & 0xffff0000) | ((val + 1) & 0xffff));
}
}
bool try_lock() noexcept
{
const u16 val = m_value.load();
const u32 val = m_value.load();
if (val >> 8 == (val & 0xff))
if (val >> 16 == (val & 0xffff))
{
if (m_value.compare_and_swap(val, ((val + 1) & 0xff) | (val & 0xff00)))
if (m_value.compare_and_swap(val, ((val + 1) & 0xffff) | (val & 0xffff0000)))
{
return true;
}
@ -43,9 +45,9 @@ public:
void unlock() noexcept
{
const u16 val = m_value.add_fetch(0x100);
const u32 val = m_value.add_fetch(0x10000);
if (val >> 8 != (val & 0xff))
if (val >> 16 != (val & 0xffff))
{
m_value.notify_one();
}
@ -53,9 +55,9 @@ public:
bool is_free() const noexcept
{
const u16 val = m_value.load();
const u32 val = m_value.load();
return (val >> 8) == (val & 0xff);
return (val >> 16) == (val & 0xffff);
}
void lock_unlock() noexcept

View file

@ -259,8 +259,8 @@ namespace utils
void audio_decoder::clear()
{
track_fully_decoded = false;
track_fully_consumed = false;
track_fully_decoded = 0;
track_fully_consumed = 0;
has_error = false;
m_size = 0;
duration_ms = 0;
@ -274,7 +274,7 @@ namespace utils
{
auto& thread = *m_thread;
thread = thread_state::aborting;
track_fully_consumed = true;
track_fully_consumed = 1;
track_fully_consumed.notify_one();
thread();
m_thread.reset();
@ -511,7 +511,7 @@ namespace utils
media_log.notice("audio_decoder: about to decode: %s (index=%d)", ::at32(m_context.playlist, m_context.current_track), m_context.current_track);
decode_track(::at32(m_context.playlist, m_context.current_track));
track_fully_decoded = true;
track_fully_decoded = 1;
if (has_error)
{
@ -521,7 +521,7 @@ namespace utils
// Let's only decode one track at a time. Wait for the consumer to finish reading the track.
media_log.notice("audio_decoder: waiting until track is consumed...");
thread_ctrl::wait_on(track_fully_consumed, false);
thread_ctrl::wait_on(track_fully_consumed, 0);
track_fully_consumed = false;
}

View file

@ -77,8 +77,8 @@ namespace utils
std::vector<u8> data;
atomic_t<u64> m_size = 0;
atomic_t<u64> duration_ms = 0;
atomic_t<bool> track_fully_decoded{false};
atomic_t<bool> track_fully_consumed{false};
atomic_t<u32> track_fully_decoded{0};
atomic_t<u32> track_fully_consumed{0};
atomic_t<bool> has_error{false};
std::deque<std::pair<u64, u64>> timestamps_ms;

View file

@ -3,6 +3,7 @@
#include <cstdint>
#include <memory>
#include "atomic.hpp"
#include "asm.hpp"
namespace stx
{
@ -21,7 +22,7 @@ namespace stx
// Basic assumption of userspace pointer size
constexpr uint c_ptr_size = 48;
// Use lower 17 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted)
// Use lower 16 bits as atomic_ptr internal counter of borrowed refs (pointer itself is shifted)
constexpr uint c_ref_mask = 0xffff, c_ref_size = 16;
// Remaining pointer bits
@ -1054,20 +1055,19 @@ namespace stx
return observe() == r.get();
}
template <atomic_wait::op Flags = atomic_wait::op::eq>
void wait(const volatile void* value, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
void wait(std::nullptr_t, atomic_wait_timeout timeout = atomic_wait_timeout::inf)
{
m_val.wait<Flags>(reinterpret_cast<uptr>(value) << c_ref_size, c_ptr_mask, timeout);
utils::bless<atomic_t<u32>>(&m_val)[1].wait(0, timeout);
}
void notify_one()
{
m_val.notify_one(c_ptr_mask);
utils::bless<atomic_t<u32>>(&m_val)[1].notify_one();
}
void notify_all()
{
m_val.notify_all(c_ptr_mask);
utils::bless<atomic_t<u32>>(&m_val)[1].notify_all();
}
};
@ -1110,18 +1110,6 @@ namespace stx
} null_ptr;
}
namespace atomic_wait
{
template <typename T>
constexpr u128 default_mask<stx::atomic_ptr<T>> = stx::c_ptr_mask;
template <typename T>
constexpr u128 get_value(stx::atomic_ptr<T>&, const volatile void* value = nullptr)
{
return reinterpret_cast<uptr>(value) << stx::c_ref_size;
}
}
using stx::null_ptr;
using stx::single_ptr;
using stx::shared_ptr;