Merge pull request #2260 from Subv/scheduling

Threading: Reworked the way our scheduler works.
2016-12-16 00:41:22 -05:00 · 2016-12-16 00:41:22 -05:00 · cda7210fad
commit cda7210fad
parent a89471621b 5b1edc6ae7
8 changed files with 221 additions and 206 deletions
--- a/src/citra_qt/debugger/wait_tree.cpp
+++ b/src/citra_qt/debugger/wait_tree.cpp
@ -229,7 +229,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
        list.push_back(std::make_unique<WaitTreeMutexList>(thread.held_mutexes));
    }
    if (thread.status == THREADSTATUS_WAIT_SYNCH) {
-        list.push_back(std::make_unique<WaitTreeObjectList>(thread.wait_objects, thread.wait_all));
+        list.push_back(std::make_unique<WaitTreeObjectList>(thread.wait_objects,
+                                                            thread.IsSleepingOnWaitAll()));
    }

    return list;
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@ -79,8 +79,6 @@ ResultCode AddressArbiter::ArbitrateAddress(ArbitrationType type, VAddr address,
                          ErrorSummary::WrongArgument, ErrorLevel::Usage);
    }

-    HLE::Reschedule(__func__);
-
    // The calls that use a timeout seem to always return a Timeout error even if they did not put
    // the thread to sleep
    if (type == ArbitrationType::WaitIfLessThanWithTimeout ||
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <boost/range/algorithm_ext/erase.hpp>
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/config_mem.h"
@ -31,13 +32,60 @@ void WaitObject::RemoveWaitingThread(Thread* thread) {
        waiting_threads.erase(itr);
 }

+SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() {
+    // Remove the threads that are ready or already running from our waitlist
+    boost::range::remove_erase_if(waiting_threads, [](const SharedPtr<Thread>& thread) {
+        return thread->status == THREADSTATUS_RUNNING || thread->status == THREADSTATUS_READY;
+    });
+
+    // TODO(Subv): This call should be performed inside the loop below to check if an object can be
+    // acquired by a particular thread. This is useful for things like recursive locking of Mutexes.
+    if (ShouldWait())
+        return nullptr;
+
+    Thread* candidate = nullptr;
+    s32 candidate_priority = THREADPRIO_LOWEST + 1;
+
+    for (const auto& thread : waiting_threads) {
+        if (thread->current_priority >= candidate_priority)
+            continue;
+
+        bool ready_to_run =
+            std::none_of(thread->wait_objects.begin(), thread->wait_objects.end(),
+                         [](const SharedPtr<WaitObject>& object) { return object->ShouldWait(); });
+        if (ready_to_run) {
+            candidate = thread.get();
+            candidate_priority = thread->current_priority;
+        }
+    }
+
+    return candidate;
+}
+
 void WaitObject::WakeupAllWaitingThreads() {
-    for (auto thread : waiting_threads)
+    while (auto thread = GetHighestPriorityReadyThread()) {
+        if (!thread->IsSleepingOnWaitAll()) {
+            Acquire();
+            // Set the output index of the WaitSynchronizationN call to the index of this object.
+            if (thread->wait_set_output) {
+                thread->SetWaitSynchronizationOutput(thread->GetWaitObjectIndex(this));
+                thread->wait_set_output = false;
+            }
+        } else {
+            for (auto& object : thread->wait_objects) {
+                object->Acquire();
+                object->RemoveWaitingThread(thread.get());
+            }
+            // Note: This case doesn't update the output index of WaitSynchronizationN.
+            // Clear the thread's waitlist
+            thread->wait_objects.clear();
+        }
+
+        thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
        thread->ResumeFromWait();
-
-    waiting_threads.clear();
-
-    HLE::Reschedule(__func__);
+        // Note: Removing the thread from the object's waitlist will be
+        // done by GetHighestPriorityReadyThread.
+    }
 }

 const std::vector<SharedPtr<Thread>>& WaitObject::GetWaitingThreads() const {
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@ -151,9 +151,15 @@ public:
     */
    void RemoveWaitingThread(Thread* thread);

-    /// Wake up all threads waiting on this object
+    /**
+     * Wake up all threads waiting on this object that can be awoken, in priority order,
+     * and set the synchronization result and output of the thread.
+     */
    void WakeupAllWaitingThreads();

+    /// Obtains the highest priority thread that is ready to run from this object's waiting list.
+    SharedPtr<Thread> GetHighestPriorityReadyThread();
+
    /// Get a const reference to the waiting threads list for debug use
    const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const;

--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@ -120,8 +120,6 @@ void Thread::Stop() {
    u32 tls_slot =
        ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
    Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
-
-    HLE::Reschedule(__func__);
 }

 Thread* ArbitrateHighestPriorityThread(u32 address) {
@ -180,50 +178,6 @@ static void PriorityBoostStarvedThreads() {
    }
 }

-/**
- * Gets the registers for timeout parameter of the next WaitSynchronization call.
- * @param thread a pointer to the thread that is ready to call WaitSynchronization
- * @returns a tuple of two register pointers to low and high part of the timeout parameter
- */
-static std::tuple<u32*, u32*> GetWaitSynchTimeoutParameterRegister(Thread* thread) {
-    bool thumb_mode = (thread->context.cpsr & TBIT) != 0;
-    u16 thumb_inst = Memory::Read16(thread->context.pc & 0xFFFFFFFE);
-    u32 inst = Memory::Read32(thread->context.pc & 0xFFFFFFFC) & 0x0FFFFFFF;
-
-    if ((thumb_mode && thumb_inst == 0xDF24) || (!thumb_mode && inst == 0x0F000024)) {
-        // svc #0x24 (WaitSynchronization1)
-        return std::make_tuple(&thread->context.cpu_registers[2],
-                               &thread->context.cpu_registers[3]);
-    } else if ((thumb_mode && thumb_inst == 0xDF25) || (!thumb_mode && inst == 0x0F000025)) {
-        // svc #0x25 (WaitSynchronizationN)
-        return std::make_tuple(&thread->context.cpu_registers[0],
-                               &thread->context.cpu_registers[4]);
-    }
-
-    UNREACHABLE();
-}
-
-/**
- * Updates the WaitSynchronization timeout parameter according to the difference
- * between ticks of the last WaitSynchronization call and the incoming one.
- * @param timeout_low a pointer to the register for the low part of the timeout parameter
- * @param timeout_high a pointer to the register for the high part of the timeout parameter
- * @param last_tick tick of the last WaitSynchronization call
- */
-static void UpdateTimeoutParameter(u32* timeout_low, u32* timeout_high, u64 last_tick) {
-    s64 timeout = ((s64)*timeout_high << 32) | *timeout_low;
-
-    if (timeout != -1) {
-        timeout -= cyclesToUs(CoreTiming::GetTicks() - last_tick) * 1000; // in nanoseconds
-
-        if (timeout < 0)
-            timeout = 0;
-
-        *timeout_low = timeout & 0xFFFFFFFF;
-        *timeout_high = timeout >> 32;
-    }
-}
-
 /**
 * Switches the CPU's active thread context to that of the specified thread
 * @param new_thread The thread to switch to
@ -254,32 +208,6 @@ static void SwitchContext(Thread* new_thread) {

        current_thread = new_thread;

-        // If the thread was waited by a svcWaitSynch call, step back PC by one instruction to rerun
-        // the SVC when the thread wakes up. This is necessary to ensure that the thread can acquire
-        // the requested wait object(s) before continuing.
-        if (new_thread->waitsynch_waited) {
-            // CPSR flag indicates CPU mode
-            bool thumb_mode = (new_thread->context.cpsr & TBIT) != 0;
-
-            // SVC instruction is 2 bytes for THUMB, 4 bytes for ARM
-            new_thread->context.pc -= thumb_mode ? 2 : 4;
-
-            // Get the register for timeout parameter
-            u32 *timeout_low, *timeout_high;
-            std::tie(timeout_low, timeout_high) = GetWaitSynchTimeoutParameterRegister(new_thread);
-
-            // Update the timeout parameter
-            UpdateTimeoutParameter(timeout_low, timeout_high, new_thread->last_running_ticks);
-        }
-
-        // Clean up the thread's wait_objects, they'll be restored if needed during
-        // the svcWaitSynchronization call
-        for (size_t i = 0; i < new_thread->wait_objects.size(); ++i) {
-            SharedPtr<WaitObject> object = new_thread->wait_objects[i];
-            object->RemoveWaitingThread(new_thread);
-        }
-        new_thread->wait_objects.clear();
-
        ready_queue.remove(new_thread->current_priority, new_thread);
        new_thread->status = THREADSTATUS_RUNNING;

@ -319,17 +247,13 @@ static Thread* PopNextReadyThread() {
 void WaitCurrentThread_Sleep() {
    Thread* thread = GetCurrentThread();
    thread->status = THREADSTATUS_WAIT_SLEEP;
-
-    HLE::Reschedule(__func__);
 }

 void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects,
-                                           bool wait_set_output, bool wait_all) {
+                                           bool wait_set_output) {
    Thread* thread = GetCurrentThread();
    thread->wait_set_output = wait_set_output;
-    thread->wait_all = wait_all;
    thread->wait_objects = std::move(wait_objects);
-    thread->waitsynch_waited = true;
    thread->status = THREADSTATUS_WAIT_SYNCH;
 }

@ -351,15 +275,15 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
        return;
    }

-    thread->waitsynch_waited = false;
-
    if (thread->status == THREADSTATUS_WAIT_SYNCH || thread->status == THREADSTATUS_WAIT_ARB) {
+        thread->wait_set_output = false;
+        // Remove the thread from each of its waiting objects' waitlists
+        for (auto& object : thread->wait_objects)
+            object->RemoveWaitingThread(thread.get());
+        thread->wait_objects.clear();
        thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS,
                                                        ErrorSummary::StatusChanged,
                                                        ErrorLevel::Info));
-
-        if (thread->wait_set_output)
-            thread->SetWaitSynchronizationOutput(-1);
    }

    thread->ResumeFromWait();
@ -399,6 +323,7 @@ void Thread::ResumeFromWait() {

    ready_queue.push_back(current_priority, this);
    status = THREADSTATUS_READY;
+    HLE::Reschedule(__func__);
 }

 /**
@ -494,13 +419,11 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->last_running_ticks = CoreTiming::GetTicks();
    thread->processor_id = processor_id;
    thread->wait_set_output = false;
-    thread->wait_all = false;
    thread->wait_objects.clear();
    thread->wait_address = 0;
    thread->name = std::move(name);
    thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
    thread->owner_process = g_current_process;
-    thread->waitsynch_waited = false;

    // Find the next available TLS index, and mark it as used
    auto& tls_slots = Kernel::g_current_process->tls_slots;
@ -555,8 +478,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    ready_queue.push_back(thread->current_priority, thread.get());
    thread->status = THREADSTATUS_READY;

-    HLE::Reschedule(__func__);
-
    return MakeResult<SharedPtr<Thread>>(std::move(thread));
 }

@ -619,14 +540,6 @@ void Reschedule() {

    HLE::DoneRescheduling();

-    // Don't bother switching to the same thread.
-    // But if the thread was waiting on objects, we still need to switch it
-    // to perform PC modification, change state to RUNNING, etc.
-    // This occurs in the case when an object the thread is waiting on immediately wakes up
-    // the current thread before Reschedule() is called.
-    if (next == cur && (next == nullptr || next->waitsynch_waited == false))
-        return;
-
    if (cur && next) {
        LOG_TRACE(Kernel, "context switch %u -> %u", cur->GetObjectId(), next->GetObjectId());
    } else if (cur) {
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@ -5,7 +5,9 @@
 #pragma once

 #include <string>
+#include <unordered_map>
 #include <vector>
+#include <boost/container/flat_map.hpp>
 #include <boost/container/flat_set.hpp>
 #include "common/common_types.h"
 #include "core/core.h"
@ -124,6 +126,16 @@ public:
     */
    void SetWaitSynchronizationOutput(s32 output);

+    /**
+     * Retrieves the index that this particular object occupies in the list of objects
+     * that the thread passed to WaitSynchronizationN.
+     * It is used to set the output value of WaitSynchronizationN when the thread is awakened.
+     * @param object Object to query the index of.
+     */
+    s32 GetWaitObjectIndex(const WaitObject* object) const {
+        return wait_objects_index.at(object->GetObjectId());
+    }
+
    /**
     * Stops a thread, invalidating it from further use
     */
@ -137,6 +149,15 @@ public:
        return tls_address;
    }

+    /**
+     * Returns whether this thread is waiting for all the objects in
+     * its wait list to become ready, as a result of a WaitSynchronizationN call
+     * with wait_all = true, or a ReplyAndReceive call.
+     */
+    bool IsSleepingOnWaitAll() const {
+        return !wait_objects.empty();
+    }
+
    Core::ThreadContext context;

    u32 thread_id;
@ -154,16 +175,22 @@ public:

    VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread

-    bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
-
    /// Mutexes currently held by this thread, which will be released when it exits.
    boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;

    SharedPtr<Process> owner_process; ///< Process that owns this thread
-    std::vector<SharedPtr<WaitObject>> wait_objects; ///< Objects that the thread is waiting on
+
+    /// Objects that the thread is waiting on.
+    /// This is only populated when the thread should wait for all the objects to become ready.
+    std::vector<SharedPtr<WaitObject>> wait_objects;
+
+    /// Mapping of Object ids to their position in the last waitlist that this object waited on.
+    boost::container::flat_map<int, s32> wait_objects_index;
+
    VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address
-    bool wait_all;        ///< True if the thread is waiting on all objects before resuming
-    bool wait_set_output; ///< True if the output parameter should be set on thread wakeup
+
+    /// True if the WaitSynchronizationN output parameter should be set on thread wakeup.
+    bool wait_set_output;

    std::string name;

@ -215,10 +242,9 @@ void WaitCurrentThread_Sleep();
 * @param wait_objects Kernel objects that we are waiting on
 * @param wait_set_output If true, set the output parameter on thread wakeup (for
 * WaitSynchronizationN only)
- * @param wait_all If true, wait on all objects before resuming (for WaitSynchronizationN only)
 */
 void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects,
-                                           bool wait_set_output, bool wait_all);
+                                           bool wait_set_output);

 /**
 * Waits the current thread from an ArbitrateAddress call
--- a/src/core/hle/kernel/timer.cpp
+++ b/src/core/hle/kernel/timer.cpp
@ -60,14 +60,10 @@ void Timer::Set(s64 initial, s64 interval) {
    u64 initial_microseconds = initial / 1000;
    CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type,
                              callback_handle);
-
-    HLE::Reschedule(__func__);
 }

 void Timer::Cancel() {
    CoreTiming::UnscheduleEvent(timer_callback_event_type, callback_handle);
-
-    HLE::Reschedule(__func__);
 }

 void Timer::Clear() {
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@ -43,6 +43,9 @@ const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS,
                                        ErrorSummary::InvalidArgument,
                                        ErrorLevel::Usage); // 0xE0E0181E

+const ResultCode ERR_SYNC_TIMEOUT(ErrorDescription::Timeout, ErrorModule::OS,
+                                  ErrorSummary::StatusChanged, ErrorLevel::Info);
+
 const ResultCode ERR_MISALIGNED_ADDRESS{// 0xE0E01BF1
                                        ErrorDescription::MisalignedAddress, ErrorModule::OS,
                                        ErrorSummary::InvalidArgument, ErrorLevel::Usage};
@ -260,27 +263,30 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) {
    auto object = Kernel::g_handle_table.GetWaitObject(handle);
    Kernel::Thread* thread = Kernel::GetCurrentThread();

-    thread->waitsynch_waited = false;
-
    if (object == nullptr)
        return ERR_INVALID_HANDLE;

    LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle,
              object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds);

-    HLE::Reschedule(__func__);
-
-    // Check for next thread to schedule
    if (object->ShouldWait()) {

+        if (nano_seconds == 0)
+            return ERR_SYNC_TIMEOUT;
+
        object->AddWaitingThread(thread);
-        Kernel::WaitCurrentThread_WaitSynchronization({object}, false, false);
+        // TODO(Subv): Perform things like update the mutex lock owner's priority to
+        // prevent priority inversion. Currently this is done in Mutex::ShouldWait,
+        // but it should be moved to a function that is called from here.
+        thread->status = THREADSTATUS_WAIT_SYNCH;

        // Create an event to wake the thread up after the specified nanosecond delay has passed
        thread->WakeAfterDelay(nano_seconds);

-        // NOTE: output of this SVC will be set later depending on how the thread resumes
-        return HLE::RESULT_INVALID;
+        // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread
+        // resumes due to a signal in its wait objects.
+        // Otherwise we retain the default value of timeout.
+        return ERR_SYNC_TIMEOUT;
    }

    object->Acquire();
@ -291,11 +297,7 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) {
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
 static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_count, bool wait_all,
                                       s64 nano_seconds) {
-    bool wait_thread = !wait_all;
-    int handle_index = 0;
    Kernel::Thread* thread = Kernel::GetCurrentThread();
-    bool was_waiting = thread->waitsynch_waited;
-    thread->waitsynch_waited = false;

    // Check if 'handles' is invalid
    if (handles == nullptr)
@ -311,90 +313,113 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou
        return ResultCode(ErrorDescription::OutOfRange, ErrorModule::OS,
                          ErrorSummary::InvalidArgument, ErrorLevel::Usage);

-    // If 'handle_count' is non-zero, iterate through each handle and wait the current thread if
-    // necessary
-    if (handle_count != 0) {
-        bool selected = false; // True once an object has been selected
-
-        Kernel::SharedPtr<Kernel::WaitObject> wait_object;
+    using ObjectPtr = Kernel::SharedPtr<Kernel::WaitObject>;
+    std::vector<ObjectPtr> objects(handle_count);

    for (int i = 0; i < handle_count; ++i) {
        auto object = Kernel::g_handle_table.GetWaitObject(handles[i]);
        if (object == nullptr)
            return ERR_INVALID_HANDLE;
-
-            // Check if the current thread should wait on this object...
-            if (object->ShouldWait()) {
-
-                // Check we are waiting on all objects...
-                if (wait_all)
-                    // Wait the thread
-                    wait_thread = true;
-            } else {
-                // Do not wait on this object, check if this object should be selected...
-                if (!wait_all && (!selected || (wait_object == object && was_waiting))) {
-                    // Do not wait the thread
-                    wait_thread = false;
-                    handle_index = i;
-                    wait_object = object;
-                    selected = true;
-                }
-            }
-        }
-    } else {
-        // If no handles were passed in, put the thread to sleep only when 'wait_all' is false
-        // NOTE: This should deadlock the current thread if no timeout was specified
-        if (!wait_all) {
-            wait_thread = true;
-        }
+        objects[i] = object;
    }

-    SCOPE_EXIT({
-        HLE::Reschedule("WaitSynchronizationN");
-    }); // Reschedule after putting the threads to sleep.
+    // Clear the mapping of wait object indices.
+    // We don't want any lingering state in this map.
+    // It will be repopulated later in the wait_all = false case.
+    thread->wait_objects_index.clear();

-    // If thread should wait, then set its state to waiting
-    if (wait_thread) {
-
-        // Actually wait the current thread on each object if we decided to wait...
-        std::vector<SharedPtr<Kernel::WaitObject>> wait_objects;
-        wait_objects.reserve(handle_count);
-
-        for (int i = 0; i < handle_count; ++i) {
-            auto object = Kernel::g_handle_table.GetWaitObject(handles[i]);
-            object->AddWaitingThread(Kernel::GetCurrentThread());
-            wait_objects.push_back(object);
+    if (wait_all) {
+        bool all_available =
+            std::all_of(objects.begin(), objects.end(),
+                        [](const ObjectPtr& object) { return !object->ShouldWait(); });
+        if (all_available) {
+            // We can acquire all objects right now, do so.
+            for (auto& object : objects)
+                object->Acquire();
+            // Note: In this case, the `out` parameter is not set,
+            // and retains whatever value it had before.
+            return RESULT_SUCCESS;
        }

-        Kernel::WaitCurrentThread_WaitSynchronization(std::move(wait_objects), true, wait_all);
+        // Not all objects were available right now, prepare to suspend the thread.
+
+        // If a timeout value of 0 was provided, just return the Timeout error code instead of
+        // suspending the thread.
+        if (nano_seconds == 0)
+            return ERR_SYNC_TIMEOUT;
+
+        // Put the thread to sleep
+        thread->status = THREADSTATUS_WAIT_SYNCH;
+
+        // Add the thread to each of the objects' waiting threads.
+        for (auto& object : objects) {
+            object->AddWaitingThread(thread);
+            // TODO(Subv): Perform things like update the mutex lock owner's priority to
+            // prevent priority inversion. Currently this is done in Mutex::ShouldWait,
+            // but it should be moved to a function that is called from here.
+        }
+
+        // Set the thread's waitlist to the list of objects passed to WaitSynchronizationN
+        thread->wait_objects = std::move(objects);

        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds);
+        thread->WakeAfterDelay(nano_seconds);

-        // NOTE: output of this SVC will be set later depending on how the thread resumes
-        return HLE::RESULT_INVALID;
-    }
+        // This value gets set to -1 by default in this case, it is not modified after this.
+        *out = -1;
+        // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to
+        // a signal in one of its wait objects.
+        return ERR_SYNC_TIMEOUT;
+    } else {
+        // Find the first object that is acquirable in the provided list of objects
+        auto itr = std::find_if(objects.begin(), objects.end(),
+                                [](const ObjectPtr& object) { return !object->ShouldWait(); });

-    // Acquire objects if we did not wait...
-    for (int i = 0; i < handle_count; ++i) {
-        auto object = Kernel::g_handle_table.GetWaitObject(handles[i]);
-
-        // Acquire the object if it is not waiting...
-        if (!object->ShouldWait()) {
+        if (itr != objects.end()) {
+            // We found a ready object, acquire it and set the result value
+            Kernel::WaitObject* object = itr->get();
            object->Acquire();
-
-            // If this was the first non-waiting object and 'wait_all' is false, don't acquire
-            // any other objects
-            if (!wait_all)
-                break;
-        }
-    }
-
-    // TODO(bunnei): If 'wait_all' is true, this is probably wrong. However, real hardware does
-    // not seem to set it to any meaningful value.
-    *out = handle_count != 0 ? (wait_all ? -1 : handle_index) : 0;
-
+            *out = std::distance(objects.begin(), itr);
            return RESULT_SUCCESS;
+        }
+
+        // No objects were ready to be acquired, prepare to suspend the thread.
+
+        // If a timeout value of 0 was provided, just return the Timeout error code instead of
+        // suspending the thread.
+        if (nano_seconds == 0)
+            return ERR_SYNC_TIMEOUT;
+
+        // Put the thread to sleep
+        thread->status = THREADSTATUS_WAIT_SYNCH;
+
+        // Clear the thread's waitlist, we won't use it for wait_all = false
+        thread->wait_objects.clear();
+
+        // Add the thread to each of the objects' waiting threads.
+        for (size_t i = 0; i < objects.size(); ++i) {
+            Kernel::WaitObject* object = objects[i].get();
+            // Set the index of this object in the mapping of Objects -> index for this thread.
+            thread->wait_objects_index[object->GetObjectId()] = static_cast<int>(i);
+            object->AddWaitingThread(thread);
+            // TODO(Subv): Perform things like update the mutex lock owner's priority to
+            // prevent priority inversion. Currently this is done in Mutex::ShouldWait,
+            // but it should be moved to a function that is called from here.
+        }
+
+        // Note: If no handles and no timeout were given, then the thread will deadlock, this is
+        // consistent with hardware behavior.
+
+        // Create an event to wake the thread up after the specified nanosecond delay has passed
+        thread->WakeAfterDelay(nano_seconds);
+
+        // Note: The output of this SVC will be set to RESULT_SUCCESS if the thread resumes due to a
+        // signal in one of its wait objects.
+        // Otherwise we retain the default value of timeout, and -1 in the out parameter
+        thread->wait_set_output = true;
+        *out = -1;
+        return ERR_SYNC_TIMEOUT;
+    }
 }

 /// Create an address arbiter (to allocate access to shared resources)
@ -1159,6 +1184,8 @@ void CallSVC(u32 immediate) {
    if (info) {
        if (info->func) {
            info->func();
+            //  TODO(Subv): Not all service functions should cause a reschedule in all cases.
+            HLE::Reschedule(__func__);
        } else {
            LOG_ERROR(Kernel_SVC, "unimplemented SVC function %s(..)", info->name);
        }