summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt5
-rw-r--r--src/audio_core/audio_core.cpp9
-rw-r--r--src/audio_core/audio_core.h2
-rw-r--r--src/audio_core/hle/common.h9
-rw-r--r--src/audio_core/hle/dsp.h12
-rw-r--r--src/audio_core/hle/pipe.cpp32
-rw-r--r--src/audio_core/hle/pipe.h4
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/debugger/profiler.cpp39
-rw-r--r--src/citra_qt/debugger/profiler.h3
-rw-r--r--src/citra_qt/main.cpp9
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/microprofile.h4
-rw-r--r--src/common/microprofileui.h3
-rw-r--r--src/common/profiler.cpp82
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h27
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp7
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/dsp_dsp.cpp191
-rw-r--r--src/core/hle/service/dsp_dsp.h19
-rw-r--r--src/core/hle/service/gsp_gpu.cpp1
-rw-r--r--src/core/hle/service/y2r_u.cpp480
-rw-r--r--src/core/hle/service/y2r_u.h20
-rw-r--r--src/core/hle/svc.cpp4
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp133
-rw-r--r--src/video_core/debug_utils/debug_utils.h30
-rw-r--r--src/video_core/rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/shader/shader.cpp3
-rw-r--r--src/video_core/shader/shader.h2
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h28
35 files changed, 875 insertions, 587 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index b1c86462f..a965af291 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -18,6 +18,9 @@ set(HEADERS
sink.h
)
+include_directories(../../externals/soundtouch/include)
+
create_directory_groups(${SRCS} ${HEADERS})
-add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file
+add_library(audio_core STATIC ${SRCS} ${HEADERS})
+target_link_libraries(audio_core SoundTouch)
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 894f46990..b512b0f9b 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -4,6 +4,7 @@
#include "audio_core/audio_core.h"
#include "audio_core/hle/dsp.h"
+#include "audio_core/hle/pipe.h"
#include "core/core_timing.h"
#include "core/hle/kernel/vm_manager.h"
@@ -17,10 +18,10 @@ static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
if (DSP::HLE::Tick()) {
- // HACK: We're not signaling the interrups when they should be, but just firing them all off together.
- // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here.
- // TODO(merry): Understand when the other interrupts are fired.
- DSP_DSP::SignalAllInterrupts();
+ // TODO(merry): Signal all the other interrupts as appropriate.
+ DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Audio);
+ // HACK(merry): Added to prevent regressions. Will remove soon.
+ DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Binary);
}
// Reschedule recurrent event
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 64c330914..b349895ea 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -10,8 +10,6 @@ class VMManager;
namespace AudioCore {
-constexpr int num_sources = 24;
-constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
constexpr int native_sample_rate = 32728; ///< 32kHz
/// Initialise Audio Core
diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h
index 37d441eb2..7910f42ae 100644
--- a/src/audio_core/hle/common.h
+++ b/src/audio_core/hle/common.h
@@ -7,18 +7,19 @@
#include <algorithm>
#include <array>
-#include "audio_core/audio_core.h"
-
#include "common/common_types.h"
namespace DSP {
namespace HLE {
+constexpr int num_sources = 24;
+constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
+
/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo.
-using StereoFrame16 = std::array<std::array<s16, 2>, AudioCore::samples_per_frame>;
+using StereoFrame16 = std::array<std::array<s16, 2>, samples_per_frame>;
/// The DSP is quadraphonic internally.
-using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_frame>;
+using QuadFrame32 = std::array<std::array<s32, 4>, samples_per_frame>;
/**
* This performs the filter operation defined by FilterT::ProcessSample on the frame in-place.
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index c15ef0b7a..c76350bdd 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -7,7 +7,7 @@
#include <cstddef>
#include <type_traits>
-#include "audio_core/audio_core.h"
+#include "audio_core/hle/common.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
@@ -305,7 +305,7 @@ struct SourceConfiguration {
u16_le buffer_id;
};
- Configuration config[AudioCore::num_sources];
+ Configuration config[num_sources];
};
ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
@@ -320,7 +320,7 @@ struct SourceStatus {
INSERT_PADDING_DSPWORDS(1);
};
- Status status[AudioCore::num_sources];
+ Status status[num_sources];
};
ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
@@ -413,7 +413,7 @@ ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
struct AdpcmCoefficients {
/// Coefficients are signed fixed point with 11 fractional bits.
/// Each source has 16 coefficients associated with it.
- s16_le coeff[AudioCore::num_sources][16];
+ s16_le coeff[num_sources][16];
};
ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
@@ -427,7 +427,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
/// When the application writes to this region it has no effect.
struct FinalMixSamples {
- s16_le pcm16[2 * AudioCore::samples_per_frame];
+ s16_le pcm16[2 * samples_per_frame];
};
ASSERT_DSP_STRUCT(FinalMixSamples, 640);
@@ -437,7 +437,7 @@ ASSERT_DSP_STRUCT(FinalMixSamples, 640);
/// Values that exceed s16 range will be clipped by the DSP after further processing.
struct IntermediateMixSamples {
struct Samples {
- s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
+ s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
};
Samples mix1;
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
index 9381883b4..03280780f 100644
--- a/src/audio_core/hle/pipe.cpp
+++ b/src/audio_core/hle/pipe.cpp
@@ -12,12 +12,14 @@
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "core/hle/service/dsp_dsp.h"
+
namespace DSP {
namespace HLE {
static DspState dsp_state = DspState::Off;
-static std::array<std::vector<u8>, static_cast<size_t>(DspPipe::DspPipe_MAX)> pipe_data;
+static std::array<std::vector<u8>, NUM_DSP_PIPE> pipe_data;
void ResetPipes() {
for (auto& data : pipe_data) {
@@ -27,16 +29,18 @@ void ResetPipes() {
}
std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
- if (pipe_number >= DspPipe::DspPipe_MAX) {
- LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number);
+ const size_t pipe_index = static_cast<size_t>(pipe_number);
+
+ if (pipe_index >= NUM_DSP_PIPE) {
+ LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
return {};
}
- std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)];
+ std::vector<u8>& data = pipe_data[pipe_index];
if (length > data.size()) {
- LOG_WARNING(Audio_DSP, "pipe_number = %u is out of data, application requested read of %u but %zu remain",
- pipe_number, length, data.size());
+ LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain",
+ pipe_index, length, data.size());
length = data.size();
}
@@ -49,16 +53,20 @@ std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
}
size_t GetPipeReadableSize(DspPipe pipe_number) {
- if (pipe_number >= DspPipe::DspPipe_MAX) {
- LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number);
+ const size_t pipe_index = static_cast<size_t>(pipe_number);
+
+ if (pipe_index >= NUM_DSP_PIPE) {
+ LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
return 0;
}
- return pipe_data[static_cast<size_t>(pipe_number)].size();
+ return pipe_data[pipe_index].size();
}
static void WriteU16(DspPipe pipe_number, u16 value) {
- std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)];
+ const size_t pipe_index = static_cast<size_t>(pipe_number);
+
+ std::vector<u8>& data = pipe_data.at(pipe_index);
// Little endian
data.emplace_back(value & 0xFF);
data.emplace_back(value >> 8);
@@ -91,6 +99,8 @@ static void AudioPipeWriteStructAddresses() {
for (u16 addr : struct_addresses) {
WriteU16(DspPipe::Audio, addr);
}
+ // Signal that we have data on this pipe.
+ DSP_DSP::SignalPipeInterrupt(DspPipe::Audio);
}
void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
@@ -145,7 +155,7 @@ void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
return;
}
default:
- LOG_CRITICAL(Audio_DSP, "pipe_number = %u unimplemented", pipe_number);
+ LOG_CRITICAL(Audio_DSP, "pipe_number = %zu unimplemented", static_cast<size_t>(pipe_number));
UNIMPLEMENTED();
return;
}
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
index 382d35e87..64d97f8ba 100644
--- a/src/audio_core/hle/pipe.h
+++ b/src/audio_core/hle/pipe.h
@@ -19,9 +19,9 @@ enum class DspPipe {
Debug = 0,
Dma = 1,
Audio = 2,
- Binary = 3,
- DspPipe_MAX
+ Binary = 3
};
+constexpr size_t NUM_DSP_PIPE = 8;
/**
* Read a DSP pipe.
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 6660d9879..cc9e0c624 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -92,7 +92,7 @@ else()
endif()
target_link_libraries(citra-qt core video_core audio_core common qhexedit)
target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
-target_link_libraries(citra-qt ${PLATFORM_LIBRARIES})
+target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 8e60b9cad..01b81c11c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -71,7 +71,9 @@ void EmuThread::run() {
// Shutdown the core emulation
System::Shutdown();
+#if MICROPROFILE_ENABLED
MicroProfileOnThreadExit();
+#endif
render_window->moveContext();
}
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 4f6ba0e1f..7bb010f77 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -9,13 +9,16 @@
#include "citra_qt/debugger/profiler.h"
#include "citra_qt/util/util.h"
+#include "common/common_types.h"
#include "common/microprofile.h"
#include "common/profiler_reporting.h"
// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
+#if MICROPROFILE_ENABLED
#define MICROPROFILEUI_IMPL 1
#include "common/microprofileui.h"
+#endif
using namespace Common::Profiling;
@@ -34,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
}
}
-static const TimingCategoryInfo* GetCategoryInfo(int id)
-{
- const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
- if ((size_t)id >= categories.size()) {
- return nullptr;
- } else {
- return &categories[id];
- }
-}
-
ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
{
updateProfilingInfo();
- const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
- results.time_per_category.resize(categories.size());
}
QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@@ -85,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
if (parent.isValid()) {
return 0;
} else {
- return static_cast<int>(results.time_per_category.size() + 2);
+ return 2;
}
}
@@ -104,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
} else {
return GetDataForColumn(index.column(), results.interframe_time);
}
- } else {
- if (index.column() == 0) {
- const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
- return info != nullptr ? QString(info->name) : QVariant();
- } else {
- if (index.row() - 2 < (int)results.time_per_category.size()) {
- return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
- } else {
- return QVariant();
- }
- }
}
}
@@ -148,6 +128,8 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
}
}
+#if MICROPROFILE_ENABLED
+
class MicroProfileWidget : public QWidget {
public:
MicroProfileWidget(QWidget* parent = nullptr);
@@ -171,6 +153,8 @@ private:
QTimer update_timer;
};
+#endif
+
MicroProfileDialog::MicroProfileDialog(QWidget* parent)
: QWidget(parent, Qt::Dialog)
{
@@ -180,6 +164,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
// Remove the "?" button from the titlebar and enable the maximize button
setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
+#if MICROPROFILE_ENABLED
+
MicroProfileWidget* widget = new MicroProfileWidget(this);
QLayout* layout = new QVBoxLayout(this);
@@ -191,6 +177,7 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
setFocusProxy(widget);
widget->setFocusPolicy(Qt::StrongFocus);
widget->setFocus();
+#endif
}
QAction* MicroProfileDialog::toggleViewAction() {
@@ -218,6 +205,9 @@ void MicroProfileDialog::hideEvent(QHideEvent* ev) {
QWidget::hideEvent(ev);
}
+
+#if MICROPROFILE_ENABLED
+
/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
/// QPainter available inside the drawing callbacks.
static QPainter* mp_painter = nullptr;
@@ -337,3 +327,4 @@ void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color)
mp_painter->drawPolyline(point_buf.data(), vertices_length);
point_buf.clear();
}
+#endif
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index 036054740..3b38ed8ec 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -7,8 +7,10 @@
#include <QAbstractItemModel>
#include <QDockWidget>
#include <QTimer>
+
#include "ui_profiler.h"
+#include "common/microprofile.h"
#include "common/profiler_reporting.h"
class ProfilerModel : public QAbstractItemModel
@@ -49,6 +51,7 @@ private:
QTimer update_timer;
};
+
class MicroProfileDialog : public QWidget {
Q_OBJECT
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 2ca1e51f6..f1ab29755 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -69,8 +69,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
profilerWidget->hide();
+#if MICROPROFILE_ENABLED
microProfileDialog = new MicroProfileDialog(this);
microProfileDialog->hide();
+#endif
disasmWidget = new DisassemblerWidget(this, emu_thread.get());
addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
@@ -110,7 +112,9 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
debug_menu->addAction(profilerWidget->toggleViewAction());
+#if MICROPROFILE_ENABLED
debug_menu->addAction(microProfileDialog->toggleViewAction());
+#endif
debug_menu->addAction(disasmWidget->toggleViewAction());
debug_menu->addAction(registersWidget->toggleViewAction());
debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -136,8 +140,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
restoreGeometry(UISettings::values.geometry);
restoreState(UISettings::values.state);
render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
+#if MICROPROFILE_ENABLED
microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
microProfileDialog->setVisible(UISettings::values.microprofile_visible);
+#endif
game_list->LoadInterfaceLayout();
@@ -511,9 +517,10 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
UISettings::values.geometry = saveGeometry();
UISettings::values.state = saveState();
UISettings::values.renderwindow_geometry = render_window->saveGeometry();
+#if MICROPROFILE_ENABLED
UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
UISettings::values.microprofile_visible = microProfileDialog->isVisible();
-
+#endif
UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked();
UISettings::values.first_start = false;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c839ce173..aa6eee2a3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ set(HEADERS
microprofile.h
microprofileui.h
platform.h
- profiler.h
profiler_reporting.h
scm_rev.h
scope_exit.h
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
index d3b6cb97c..ef312c6e1 100644
--- a/src/common/microprofile.h
+++ b/src/common/microprofile.h
@@ -4,6 +4,10 @@
#pragma once
+// Uncomment this to disable microprofile. This will get you cleaner profiles when using
+// external sampling profilers like "Very Sleepy", and will improve performance somewhat.
+// #define MICROPROFILE_ENABLED 0
+
// Customized Citra settings.
// This file wraps the MicroProfile header so that these are consistent everywhere.
#define MICROPROFILE_WEBSERVER 0
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
index 97c369bd9..41abe6b75 100644
--- a/src/common/microprofileui.h
+++ b/src/common/microprofileui.h
@@ -13,4 +13,7 @@
#define MICROPROFILE_HELP_ALT "Right-Click"
#define MICROPROFILE_HELP_MOD "Ctrl"
+// This isn't included by microprofileui.h :(
+#include <cstdlib> // For std::abs
+
#include <microprofileui.h>
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
index 7792edd2f..49eb3f40c 100644
--- a/src/common/profiler.cpp
+++ b/src/common/profiler.cpp
@@ -7,71 +7,16 @@
#include <vector>
#include "common/assert.h"
-#include "common/profiler.h"
#include "common/profiler_reporting.h"
#include "common/synchronized_wrapper.h"
-#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
- #define WIN32_LEAN_AND_MEAN
- #include <Windows.h> // For QueryPerformanceCounter/Frequency
-#endif
-
namespace Common {
namespace Profiling {
-#if ENABLE_PROFILING
-thread_local Timer* Timer::current_timer = nullptr;
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
-QPCClock::time_point QPCClock::now() {
- static LARGE_INTEGER freq;
- // Use this dummy local static to ensure this gets initialized once.
- static BOOL dummy = QueryPerformanceFrequency(&freq);
-
- LARGE_INTEGER ticks;
- QueryPerformanceCounter(&ticks);
-
- // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
- // correct way to approach this would be to just return ticks as a time_point and then subtract
- // and do this conversion when creating a duration from two time_points, however, as far as I
- // could tell the C++ requirements for these types are incompatible with this approach.
- return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
-}
-#endif
-
-TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
- : accumulated_duration(0) {
-
- ProfilingManager& manager = GetProfilingManager();
- category_id = manager.RegisterTimingCategory(this, name);
- if (parent != nullptr)
- manager.SetTimingCategoryParent(category_id, parent->category_id);
-}
-
ProfilingManager::ProfilingManager()
: last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
}
-unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
- TimingCategoryInfo info;
- info.category = category;
- info.name = name;
- info.parent = TimingCategoryInfo::NO_PARENT;
-
- unsigned int id = (unsigned int)timing_categories.size();
- timing_categories.push_back(std::move(info));
-
- return id;
-}
-
-void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
- ASSERT(category < timing_categories.size());
- ASSERT(parent < timing_categories.size());
-
- timing_categories[category].parent = parent;
-}
-
void ProfilingManager::BeginFrame() {
this_frame_start = Clock::now();
}
@@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
results.interframe_time = now - last_frame_end;
results.frame_time = now - this_frame_start;
- results.time_per_category.resize(timing_categories.size());
- for (size_t i = 0; i < timing_categories.size(); ++i) {
- results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
- }
-
last_frame_end = now;
}
@@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
window_size = cursor = 0;
}
-void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
- size_t old_size = times_per_category.size();
- if (n == old_size)
- return;
-
- times_per_category.resize(n);
-
- for (size_t i = old_size; i < n; ++i) {
- times_per_category[i].resize(max_window_size, Duration::zero());
- }
-}
-
void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
- SetNumberOfCategories(frame_result.time_per_category.size());
-
interframe_times[cursor] = frame_result.interframe_time;
frame_times[cursor] = frame_result.frame_time;
- for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
- times_per_category[i][cursor] = frame_result.time_per_category[i];
- }
++cursor;
if (cursor == max_window_size)
@@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
result.fps = 0.0f;
}
- result.time_per_category.resize(times_per_category.size());
- for (size_t i = 0; i < times_per_category.size(); ++i) {
- result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
- }
-
return result;
}
diff --git a/src/common/profiler.h b/src/common/profiler.h
deleted file mode 100644
index 3e967b4bc..000000000
--- a/src/common/profiler.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <chrono>
-
-#include "common/assert.h"
-#include "common/thread.h"
-
-namespace Common {
-namespace Profiling {
-
-// If this is defined to 0, it turns all Timers into no-ops.
-#ifndef ENABLE_PROFILING
-#define ENABLE_PROFILING 1
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
-// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
-// precision. We manually implement a clock based on QPC to get good results.
-
-struct QPCClock {
- using duration = std::chrono::microseconds;
- using time_point = std::chrono::time_point<QPCClock>;
- using rep = duration::rep;
- using period = duration::period;
- static const bool is_steady = false;
-
- static time_point now();
-};
-
-using Clock = QPCClock;
-#else
-using Clock = std::chrono::high_resolution_clock;
-#endif
-
-using Duration = Clock::duration;
-
-/**
- * Represents a timing category that measured time can be accounted towards. Should be declared as a
- * global variable and passed to Timers.
- */
-class TimingCategory final {
-public:
- TimingCategory(const char* name, TimingCategory* parent = nullptr);
-
- unsigned int GetCategoryId() const {
- return category_id;
- }
-
- /// Adds some time to this category. Can safely be called from multiple threads at the same time.
- void AddTime(Duration amount) {
- std::atomic_fetch_add_explicit(
- &accumulated_duration, amount.count(),
- std::memory_order_relaxed);
- }
-
- /**
- * Atomically retrieves the accumulated measured time for this category and resets the counter
- * to zero. Can be safely called concurrently with AddTime.
- */
- Duration GetAccumulatedTime() {
- return Duration(std::atomic_exchange_explicit(
- &accumulated_duration, (Duration::rep)0,
- std::memory_order_relaxed));
- }
-
-private:
- unsigned int category_id;
- std::atomic<Duration::rep> accumulated_duration;
-};
-
-/**
- * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
- * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
- * appropriately paired.
- *
- * When a Timer is started, it automatically pauses a previously running timer on the same thread,
- * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
- * double-accounting of time on two categories.
- */
-class Timer {
-public:
- Timer(TimingCategory& category) : category(category) {
- }
-
- void Start() {
-#if ENABLE_PROFILING
- ASSERT(!running);
- previous_timer = current_timer;
- current_timer = this;
- if (previous_timer != nullptr)
- previous_timer->StopTiming();
-
- StartTiming();
-#endif
- }
-
- void Stop() {
-#if ENABLE_PROFILING
- ASSERT(running);
- StopTiming();
-
- if (previous_timer != nullptr)
- previous_timer->StartTiming();
- current_timer = previous_timer;
-#endif
- }
-
-private:
-#if ENABLE_PROFILING
- void StartTiming() {
- start = Clock::now();
- running = true;
- }
-
- void StopTiming() {
- auto duration = Clock::now() - start;
- running = false;
- category.AddTime(std::chrono::duration_cast<Duration>(duration));
- }
-
- Clock::time_point start;
- bool running = false;
-
- Timer* previous_timer;
- static thread_local Timer* current_timer;
-#endif
-
- TimingCategory& category;
-};
-
-/**
- * A Timer that automatically starts timing when created and stops at the end of the scope. Should
- * be used in the majority of cases.
- */
-class ScopeTimer : public Timer {
-public:
- ScopeTimer(TimingCategory& category) : Timer(category) {
- Start();
- }
-
- ~ScopeTimer() {
- Stop();
- }
-};
-
-} // namespace Profiling
-} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
index df98e05b7..fa1ac883f 100644
--- a/src/common/profiler_reporting.h
+++ b/src/common/profiler_reporting.h
@@ -4,22 +4,17 @@
#pragma once
+#include <chrono>
#include <cstddef>
#include <vector>
-#include "common/profiler.h"
#include "common/synchronized_wrapper.h"
namespace Common {
namespace Profiling {
-struct TimingCategoryInfo {
- static const unsigned int NO_PARENT = -1;
-
- TimingCategory* category;
- const char* name;
- unsigned int parent;
-};
+using Clock = std::chrono::high_resolution_clock;
+using Duration = Clock::duration;
struct ProfilingFrameResult {
/// Time since the last delivered frame
@@ -27,22 +22,12 @@ struct ProfilingFrameResult {
/// Time spent processing a frame, excluding VSync
Duration frame_time;
-
- /// Total amount of time spent inside each category in this frame. Indexed by the category id
- std::vector<Duration> time_per_category;
};
class ProfilingManager final {
public:
ProfilingManager();
- unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
- void SetTimingCategoryParent(unsigned int category, unsigned int parent);
-
- const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
- return timing_categories;
- }
-
/// This should be called after swapping screen buffers.
void BeginFrame();
/// This should be called before swapping screen buffers.
@@ -54,7 +39,6 @@ public:
}
private:
- std::vector<TimingCategoryInfo> timing_categories;
Clock::time_point last_frame_end;
Clock::time_point this_frame_start;
@@ -73,9 +57,6 @@ struct AggregatedFrameResult {
AggregatedDuration frame_time;
float fps;
-
- /// Total amount of time spent inside each category in this frame. Indexed by the category id
- std::vector<AggregatedDuration> time_per_category;
};
class TimingResultsAggregator final {
@@ -83,7 +64,6 @@ public:
TimingResultsAggregator(size_t window_size);
void Clear();
- void SetNumberOfCategories(size_t n);
void AddFrame(const ProfilingFrameResult& frame_result);
@@ -95,7 +75,6 @@ public:
std::vector<Duration> interframe_times;
std::vector<Duration> frame_times;
- std::vector<std::vector<Duration>> times_per_category;
};
ProfilingManager& GetProfilingManager();
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 647784208..8d4b26815 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -10,7 +10,6 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "core/memory.h"
#include "core/hle/svc.h"
@@ -25,9 +24,6 @@
#include "core/gdbstub/gdbstub.h"
-Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
-Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
-
enum {
COND = (1 << 0),
NON_BRANCH = (1 << 1),
@@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
}
static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
- Common::Profiling::ScopeTimer timer_decode(profile_decode);
MICROPROFILE_SCOPE(DynCom_Decode);
// Decode instruction, get index
@@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
}
static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
- Common::Profiling::ScopeTimer timer_decode(profile_decode);
MICROPROFILE_SCOPE(DynCom_Decode);
ARM_INST_PTR inst_base = nullptr;
@@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
unsigned InterpreterMainLoop(ARMul_State* cpu) {
- Common::Profiling::ScopeTimer timer_execute(profile_execute);
MICROPROFILE_SCOPE(DynCom_Execute);
GDBStub::BreakpointAddress breakpoint_data;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 2d22652d9..53931a106 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -18,6 +18,7 @@
/// Detailed description of the error. This listing is likely incomplete.
enum class ErrorDescription : u32 {
Success = 0,
+ OS_InvalidBufferDescriptor = 48,
WrongAddress = 53,
FS_NotFound = 120,
FS_AlreadyExists = 190,
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 08e437125..995bee3f9 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <cinttypes>
#include "audio_core/hle/pipe.h"
@@ -12,37 +13,80 @@
#include "core/hle/kernel/event.h"
#include "core/hle/service/dsp_dsp.h"
+using DspPipe = DSP::HLE::DspPipe;
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// Namespace DSP_DSP
namespace DSP_DSP {
-static u32 read_pipe_count;
static Kernel::SharedPtr<Kernel::Event> semaphore_event;
-struct PairHash {
- template <typename T, typename U>
- std::size_t operator()(const std::pair<T, U> &x) const {
- // TODO(yuriks): Replace with better hash combining function.
- return std::hash<T>()(x.first) ^ std::hash<U>()(x.second);
+/// There are three types of interrupts
+enum class InterruptType {
+ Zero, One, Pipe
+};
+constexpr size_t NUM_INTERRUPT_TYPE = 3;
+
+class InterruptEvents final {
+public:
+ void Signal(InterruptType type, DspPipe pipe) {
+ Kernel::SharedPtr<Kernel::Event>& event = Get(type, pipe);
+ if (event) {
+ event->Signal();
+ }
}
+
+ Kernel::SharedPtr<Kernel::Event>& Get(InterruptType type, DspPipe dsp_pipe) {
+ switch (type) {
+ case InterruptType::Zero:
+ return zero;
+ case InterruptType::One:
+ return one;
+ case InterruptType::Pipe: {
+ const size_t pipe_index = static_cast<size_t>(dsp_pipe);
+ ASSERT(pipe_index < DSP::HLE::NUM_DSP_PIPE);
+ return pipe[pipe_index];
+ }
+ }
+
+ UNREACHABLE_MSG("Invalid interrupt type = %zu", static_cast<size_t>(type));
+ }
+
+ bool HasTooManyEventsRegistered() const {
+ // Actual service implementation only has 6 'slots' for interrupts.
+ constexpr size_t max_number_of_interrupt_events = 6;
+
+ size_t number = std::count_if(pipe.begin(), pipe.end(), [](const auto& evt) {
+ return evt != nullptr;
+ });
+
+ if (zero != nullptr)
+ number++;
+ if (one != nullptr)
+ number++;
+
+ return number >= max_number_of_interrupt_events;
+ }
+
+private:
+ /// Currently unknown purpose
+ Kernel::SharedPtr<Kernel::Event> zero = nullptr;
+ /// Currently unknown purpose
+ Kernel::SharedPtr<Kernel::Event> one = nullptr;
+ /// Each DSP pipe has an associated interrupt
+ std::array<Kernel::SharedPtr<Kernel::Event>, DSP::HLE::NUM_DSP_PIPE> pipe = {{}};
};
-/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents
-static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
+static InterruptEvents interrupt_events;
// DSP Interrupts:
-// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting
-// for an interrupt event. Immediately after this interrupt event, userland normally updates the
-// state in the next region and increments the relevant frame counter by two.
-void SignalAllInterrupts() {
- // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case.
- for (auto& interrupt_event : interrupt_events)
- interrupt_event.second->Signal();
-}
-
-void SignalInterrupt(u32 interrupt, u32 channel) {
- interrupt_events[std::make_pair(interrupt, channel)]->Signal();
+// The audio-pipe interrupt occurs every frame tick. Userland programs normally have a thread
+// that's waiting for an interrupt event. Immediately after this interrupt event, userland
+// normally updates the state in the next region and increments the relevant frame counter by
+// two.
+void SignalPipeInterrupt(DspPipe pipe) {
+ interrupt_events.Signal(InterruptType::Pipe, pipe);
}
/**
@@ -58,7 +102,10 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
u32 addr = cmd_buff[1];
+ cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+
+ // TODO(merry): There is a per-region offset missing in this calculation (that seems to be always zero).
cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
LOG_DEBUG(Service_DSP, "addr=0x%08X", addr);
@@ -113,7 +160,9 @@ static void LoadComponent(Service::Interface* self) {
static void GetSemaphoreEventHandle(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x16, 1, 2);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ // cmd_buff[2] not set
cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle
LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -138,8 +187,7 @@ static void FlushDataCache(Service::Interface* self) {
u32 size = cmd_buff[2];
u32 process = cmd_buff[4];
- // TODO(purpasmart96): Verify return header on HW
-
+ cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process);
@@ -148,8 +196,8 @@ static void FlushDataCache(Service::Interface* self) {
/**
* DSP_DSP::RegisterInterruptEvents service function
* Inputs:
- * 1 : Interrupt Number
- * 2 : Channel Number
+ * 1 : Interrupt Type
+ * 2 : Pipe Number
* 4 : Interrupt event handle
* Outputs:
* 1 : Result of function, 0 on success, otherwise error code
@@ -157,23 +205,40 @@ static void FlushDataCache(Service::Interface* self) {
static void RegisterInterruptEvents(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- u32 interrupt = cmd_buff[1];
- u32 channel = cmd_buff[2];
+ u32 type_index = cmd_buff[1];
+ u32 pipe_index = cmd_buff[2];
u32 event_handle = cmd_buff[4];
+ ASSERT_MSG(type_index < NUM_INTERRUPT_TYPE && pipe_index < DSP::HLE::NUM_DSP_PIPE,
+ "Invalid type or pipe: type = %u, pipe = %u", type_index, pipe_index);
+
+ InterruptType type = static_cast<InterruptType>(cmd_buff[1]);
+ DspPipe pipe = static_cast<DspPipe>(cmd_buff[2]);
+
+ cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
+
if (event_handle) {
auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
- if (evt) {
- interrupt_events[std::make_pair(interrupt, channel)] = evt;
- cmd_buff[1] = RESULT_SUCCESS.raw;
- LOG_INFO(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
- } else {
- LOG_CRITICAL(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
- ASSERT(false); // This should really be handled at a IPC translation layer.
+
+ if (!evt) {
+ LOG_INFO(Service_DSP, "Invalid event handle! type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
+ ASSERT(false); // TODO: This should really be handled at an IPC translation layer.
+ }
+
+ if (interrupt_events.HasTooManyEventsRegistered()) {
+ LOG_INFO(Service_DSP, "Ran out of space to register interrupts (Attempted to register type=%u, pipe=%u, event_handle=0x%08X)",
+ type_index, pipe_index, event_handle);
+ cmd_buff[1] = ResultCode(ErrorDescription::InvalidResultValue, ErrorModule::DSP, ErrorSummary::OutOfResource, ErrorLevel::Status).raw;
+ return;
}
+
+ interrupt_events.Get(type, pipe) = evt;
+ LOG_INFO(Service_DSP, "Registered type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
} else {
- interrupt_events.erase(std::make_pair(interrupt, channel));
- LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
+ interrupt_events.Get(type, pipe) = nullptr;
+ LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
}
}
@@ -187,6 +252,7 @@ static void RegisterInterruptEvents(Service::Interface* self) {
static void SetSemaphore(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -195,7 +261,7 @@ static void SetSemaphore(Service::Interface* self) {
/**
* DSP_DSP::WriteProcessPipe service function
* Inputs:
- * 1 : Channel
+ * 1 : Pipe Number
* 2 : Size
* 3 : (size << 14) | 0x402
* 4 : Buffer
@@ -206,24 +272,32 @@ static void SetSemaphore(Service::Interface* self) {
static void WriteProcessPipe(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]);
+ u32 pipe_index = cmd_buff[1];
u32 size = cmd_buff[2];
u32 buffer = cmd_buff[4];
- ASSERT_MSG(IPC::StaticBufferDesc(size, 1) == cmd_buff[3], "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe, size, buffer);
- ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
+ DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
- std::vector<u8> message(size);
+ if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
+ LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe_index, size, buffer);
+ cmd_buff[0] = IPC::MakeHeader(0, 1, 0);
+ cmd_buff[1] = ResultCode(ErrorDescription::OS_InvalidBufferDescriptor, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent).raw;
+ return;
+ }
+
+ ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
+ std::vector<u8> message(size);
for (size_t i = 0; i < size; i++) {
message[i] = Memory::Read8(buffer + i);
}
DSP::HLE::PipeWrite(pipe, message);
+ cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
- LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
+ LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
}
/**
@@ -243,13 +317,16 @@ static void WriteProcessPipe(Service::Interface* self) {
static void ReadPipeIfPossible(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]);
+ u32 pipe_index = cmd_buff[1];
u32 unknown = cmd_buff[2];
u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
VAddr addr = cmd_buff[0x41];
- ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr);
+ DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
+
+ ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
+ cmd_buff[0] = IPC::MakeHeader(0x10, 1, 2);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
@@ -260,8 +337,10 @@ static void ReadPipeIfPossible(Service::Interface* self) {
} else {
cmd_buff[2] = 0; // Return no data
}
+ cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
+ cmd_buff[4] = addr;
- LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]);
+ LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
}
/**
@@ -278,26 +357,31 @@ static void ReadPipeIfPossible(Service::Interface* self) {
static void ReadPipe(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]);
+ u32 pipe_index = cmd_buff[1];
u32 unknown = cmd_buff[2];
u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
VAddr addr = cmd_buff[0x41];
- ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr);
+ DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
+
+ ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
Memory::WriteBlock(addr, response.data(), response.size());
+ cmd_buff[0] = IPC::MakeHeader(0xE, 2, 2);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
cmd_buff[2] = static_cast<u32>(response.size());
+ cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
+ cmd_buff[4] = addr;
} else {
// No more data is in pipe. Hardware hangs in this case; this should never happen.
UNREACHABLE();
}
- LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]);
+ LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
}
/**
@@ -312,13 +396,16 @@ static void ReadPipe(Service::Interface* self) {
static void GetPipeReadableSize(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]);
+ u32 pipe_index = cmd_buff[1];
u32 unknown = cmd_buff[2];
+ DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
+
+ cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe);
- LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, cmd_buff[2]);
+ LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]);
}
/**
@@ -333,6 +420,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
u32 mask = cmd_buff[1];
+ cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask);
@@ -350,6 +438,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
static void GetHeadphoneStatus(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
cmd_buff[2] = 0; // Not using headphones?
@@ -376,6 +465,7 @@ static void RecvData(Service::Interface* self) {
// Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept.
+ cmd_buff[0] = IPC::MakeHeader(0x1, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
switch (DSP::HLE::GetDspState()) {
case DSP::HLE::DspState::On:
@@ -411,6 +501,7 @@ static void RecvDataIsReady(Service::Interface* self) {
ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number);
+ cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
cmd_buff[2] = 1; // Ready to read
@@ -458,14 +549,14 @@ const Interface::FunctionInfo FunctionTable[] = {
Interface::Interface() {
semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event");
- read_pipe_count = 0;
+ interrupt_events = {};
Register(FunctionTable);
}
Interface::~Interface() {
semaphore_event = nullptr;
- interrupt_events.clear();
+ interrupt_events = {};
}
} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 32b89e9bb..22f6687cc 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -8,6 +8,12 @@
#include "core/hle/service/service.h"
+namespace DSP {
+namespace HLE {
+enum class DspPipe;
+}
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// Namespace DSP_DSP
@@ -23,15 +29,10 @@ public:
}
};
-/// Signal all audio related interrupts.
-void SignalAllInterrupts();
-
/**
- * Signal a specific audio related interrupt based on interrupt id and channel id.
- * @param interrupt_id The interrupt id
- * @param channel_id The channel id
- * The significance of various values of interrupt_id and channel_id is not yet known.
+ * Signal a specific DSP related interrupt of type == InterruptType::Pipe, pipe == pipe.
+ * @param pipe The DSP pipe for which to signal an interrupt for.
*/
-void SignalInterrupt(u32 interrupt_id, u32 channel_id);
+void SignalPipeInterrupt(DSP::HLE::DspPipe pipe);
-} // namespace
+} // namespace DSP_DSP
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 211fcf599..233592d7f 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -4,7 +4,6 @@
#include "common/bit_field.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "core/memory.h"
#include "core/hle/kernel/event.h"
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 1672ad775..d16578f87 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -4,6 +4,7 @@
#include <cstring>
+#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -25,13 +26,17 @@ struct ConversionParameters {
u16 input_line_width;
u16 input_lines;
StandardCoefficient standard_coefficient;
- u8 reserved;
+ u8 padding;
u16 alpha;
};
static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
static Kernel::SharedPtr<Kernel::Event> completion_event;
static ConversionConfiguration conversion;
+static DitheringWeightParams dithering_weight_params;
+static u32 temporal_dithering_enabled = 0;
+static u32 transfer_end_interrupt_enabled = 0;
+static u32 spacial_dithering_enabled = 0;
static const CoefficientSet standard_coefficients[4] = {
{{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
@@ -70,7 +75,7 @@ ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
size_t index = static_cast<size_t>(standard_coefficient);
- if (index >= 4) {
+ if (index >= ARRAY_SIZE(standard_coefficients)) {
return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
}
@@ -83,44 +88,183 @@ static void SetInputFormat(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
+
+ cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
+}
+static void GetInputFormat(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = static_cast<u32>(conversion.input_format);
+
+ LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
}
static void SetOutputFormat(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
+
+ cmd_buff[0] = IPC::MakeHeader(0x3, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
+}
+
+static void GetOutputFormat(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x4, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = static_cast<u32>(conversion.output_format);
+
+ LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
}
static void SetRotation(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
+
+ cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
+}
+
+static void GetRotation(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x6, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = static_cast<u32>(conversion.rotation);
+
+ LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
}
static void SetBlockAlignment(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
- LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
+ cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
+}
+
+static void GetBlockAlignment(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = static_cast<u32>(conversion.block_alignment);
+
+ LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
+}
+
+/**
+ * Y2R_U::SetSpacialDithering service function
+ * Inputs:
+ * 1 : u8, 0 = Disabled, 1 = Enabled
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+static void SetSpacialDithering(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ spacial_dithering_enabled = cmd_buff[1] & 0xF;
+
+ cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R_U::GetSpacialDithering service function
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Disabled, 1 = Enabled
+ */
+static void GetSpacialDithering(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = spacial_dithering_enabled;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R_U::SetTemporalDithering service function
+ * Inputs:
+ * 1 : u8, 0 = Disabled, 1 = Enabled
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+static void SetTemporalDithering(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ temporal_dithering_enabled = cmd_buff[1] & 0xF;
+
+ cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R_U::GetTemporalDithering service function
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Disabled, 1 = Enabled
+ */
+static void GetTemporalDithering(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = temporal_dithering_enabled;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
}
+/**
+ * Y2R_U::SetTransferEndInterrupt service function
+ * Inputs:
+ * 1 : u8, 0 = Disabled, 1 = Enabled
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
static void SetTransferEndInterrupt(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ transfer_end_interrupt_enabled = cmd_buff[1] & 0xf;
cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
- LOG_DEBUG(Service_Y2R, "(STUBBED) called");
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R_U::GetTransferEndInterrupt service function
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Disabled, 1 = Enabled
+ */
+static void GetTransferEndInterrupt(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0xE, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = transfer_end_interrupt_enabled;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
}
/**
@@ -132,8 +276,10 @@ static void SetTransferEndInterrupt(Service::Interface* self) {
static void GetTransferEndEvent(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom();
+
LOG_DEBUG(Service_Y2R, "called");
}
@@ -144,12 +290,12 @@ static void SetSendingY(Service::Interface* self) {
conversion.src_Y.image_size = cmd_buff[2];
conversion.src_Y.transfer_unit = cmd_buff[3];
conversion.src_Y.gap = cmd_buff[4];
- u32 src_process_handle = cmd_buff[6];
- LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
- "src_process_handle=0x%08X", conversion.src_Y.image_size,
- conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
+ cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
+ conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, cmd_buff[6]);
}
static void SetSendingU(Service::Interface* self) {
@@ -159,12 +305,12 @@ static void SetSendingU(Service::Interface* self) {
conversion.src_U.image_size = cmd_buff[2];
conversion.src_U.transfer_unit = cmd_buff[3];
conversion.src_U.gap = cmd_buff[4];
- u32 src_process_handle = cmd_buff[6];
- LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
- "src_process_handle=0x%08X", conversion.src_U.image_size,
- conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
+ cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
+ conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, cmd_buff[6]);
}
static void SetSendingV(Service::Interface* self) {
@@ -174,12 +320,12 @@ static void SetSendingV(Service::Interface* self) {
conversion.src_V.image_size = cmd_buff[2];
conversion.src_V.transfer_unit = cmd_buff[3];
conversion.src_V.gap = cmd_buff[4];
- u32 src_process_handle = cmd_buff[6];
- LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
- "src_process_handle=0x%08X", conversion.src_V.image_size,
- conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
+ cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
+ conversion.src_V.image_size, conversion.src_V.transfer_unit, conversion.src_V.gap, cmd_buff[6]);
}
static void SetSendingYUYV(Service::Interface* self) {
@@ -189,12 +335,76 @@ static void SetSendingYUYV(Service::Interface* self) {
conversion.src_YUYV.image_size = cmd_buff[2];
conversion.src_YUYV.transfer_unit = cmd_buff[3];
conversion.src_YUYV.gap = cmd_buff[4];
- u32 src_process_handle = cmd_buff[6];
- LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
- "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
- conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
+ cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
+ conversion.src_YUYV.image_size, conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, cmd_buff[6]);
+}
+
+/**
+ * Y2R::IsFinishedSendingYuv service function
+ * Output:
+ * 1 : Result of the function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Not Finished, 1 = Finished
+ */
+static void IsFinishedSendingYuv(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x14, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = 1;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R::IsFinishedSendingY service function
+ * Output:
+ * 1 : Result of the function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Not Finished, 1 = Finished
+ */
+static void IsFinishedSendingY(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x15, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = 1;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R::IsFinishedSendingU service function
+ * Output:
+ * 1 : Result of the function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Not Finished, 1 = Finished
+ */
+static void IsFinishedSendingU(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = 1;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
+}
+
+/**
+ * Y2R::IsFinishedSendingV service function
+ * Output:
+ * 1 : Result of the function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Not Finished, 1 = Finished
+ */
+static void IsFinishedSendingV(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x17, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = 1;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
}
static void SetReceiving(Service::Interface* self) {
@@ -204,27 +414,66 @@ static void SetReceiving(Service::Interface* self) {
conversion.dst.image_size = cmd_buff[2];
conversion.dst.transfer_unit = cmd_buff[3];
conversion.dst.gap = cmd_buff[4];
- u32 dst_process_handle = cmd_buff[6];
- LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
- "dst_process_handle=0x%08X", conversion.dst.image_size,
- conversion.dst.transfer_unit, conversion.dst.gap,
- dst_process_handle);
+ cmd_buff[0] = IPC::MakeHeader(0x18, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, dst_process_handle=0x%08X",
+ conversion.dst.image_size, conversion.dst.transfer_unit, conversion.dst.gap, cmd_buff[6]);
+}
+
+/**
+ * Y2R::IsFinishedReceiving service function
+ * Output:
+ * 1 : Result of the function, 0 on success, otherwise error code
+ * 2 : u8, 0 = Not Finished, 1 = Finished
+ */
+static void IsFinishedReceiving(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x19, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = 1;
+
+ LOG_WARNING(Service_Y2R, "(STUBBED) called");
}
static void SetInputLineWidth(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
+ cmd_buff[0] = IPC::MakeHeader(0x1A, 1, 0);
cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
+
+ LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
+}
+
+static void GetInputLineWidth(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x1B, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = conversion.input_line_width;
+
+ LOG_DEBUG(Service_Y2R, "called input_line_width=%u", conversion.input_line_width);
}
static void SetInputLines(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]);
+ cmd_buff[0] = IPC::MakeHeader(0x1C, 1, 0);
cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
+
+ LOG_DEBUG(Service_Y2R, "called input_lines=%u", cmd_buff[1]);
+}
+
+static void GetInputLines(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x1D, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = static_cast<u32>(conversion.input_lines);
+
+ LOG_DEBUG(Service_Y2R, "called input_lines=%u", conversion.input_lines);
}
static void SetCoefficient(Service::Interface* self) {
@@ -232,44 +481,111 @@ static void SetCoefficient(Service::Interface* self) {
const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
+
+ cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
coefficients[0], coefficients[1], coefficients[2], coefficients[3],
coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
+}
+
+static void GetCoefficient(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x1F, 5, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+ std::memcpy(&cmd_buff[2], conversion.coefficients.data(), sizeof(CoefficientSet));
+
+ LOG_DEBUG(Service_Y2R, "called");
}
static void SetStandardCoefficient(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]);
+ u32 index = cmd_buff[1];
+
+ cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
+ cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)index).raw;
- cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw;
+ LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", index);
+}
+
+static void GetStandardCoefficient(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ u32 index = cmd_buff[1];
+
+ if (index < ARRAY_SIZE(standard_coefficients)) {
+ cmd_buff[0] = IPC::MakeHeader(0x21, 5, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ std::memcpy(&cmd_buff[2], &standard_coefficients[index], sizeof(CoefficientSet));
+
+ LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u ", index);
+ } else {
+ cmd_buff[0] = IPC::MakeHeader(0x21, 1, 0);
+ cmd_buff[1] = -1; // TODO(bunnei): Identify the correct error code for this
+
+ LOG_ERROR(Service_Y2R, "called standard_coefficient=%u The argument is invalid!", index);
+ }
}
static void SetAlpha(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
conversion.alpha = cmd_buff[1];
+
+ cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
+}
+
+static void GetAlpha(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x23, 2, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = conversion.alpha;
+
LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
+}
+
+static void SetDitheringWeightParams(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams));
+ cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called");
+}
+
+static void GetDitheringWeightParams(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x25, 9, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ std::memcpy(&cmd_buff[2], &dithering_weight_params, sizeof(DitheringWeightParams));
+
+ LOG_DEBUG(Service_Y2R, "called");
}
static void StartConversion(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
// dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
- u32 total_output_size = conversion.input_lines *
- (conversion.dst.transfer_unit + conversion.dst.gap);
+ u32 total_output_size = conversion.input_lines * (conversion.dst.transfer_unit + conversion.dst.gap);
Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
HW::Y2R::PerformConversion(conversion);
- LOG_DEBUG(Service_Y2R, "called");
completion_event->Signal();
+ cmd_buff[0] = IPC::MakeHeader(0x26, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called");
}
static void StopConversion(Service::Interface* self) {
@@ -277,6 +593,7 @@ static void StopConversion(Service::Interface* self) {
cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
LOG_DEBUG(Service_Y2R, "called");
}
@@ -289,50 +606,61 @@ static void StopConversion(Service::Interface* self) {
static void IsBusyConversion(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x28, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
cmd_buff[2] = 0; // StartConversion always finishes immediately
+
LOG_DEBUG(Service_Y2R, "called");
}
/**
- * Y2R_U::SetConversionParams service function
+ * Y2R_U::SetPackageParameter service function
*/
-static void SetConversionParams(Service::Interface* self) {
+static void SetPackageParameter(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
- LOG_DEBUG(Service_Y2R,
- "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
- "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
- "reserved=%hhu alpha=%hX",
- params->input_format, params->output_format, params->rotation, params->block_alignment,
- params->input_line_width, params->input_lines, params->standard_coefficient,
- params->reserved, params->alpha);
-
- ResultCode result = RESULT_SUCCESS;
conversion.input_format = params->input_format;
conversion.output_format = params->output_format;
conversion.rotation = params->rotation;
conversion.block_alignment = params->block_alignment;
- result = conversion.SetInputLineWidth(params->input_line_width);
- if (result.IsError()) goto cleanup;
+
+ ResultCode result = conversion.SetInputLineWidth(params->input_line_width);
+
+ if (result.IsError())
+ goto cleanup;
+
result = conversion.SetInputLines(params->input_lines);
- if (result.IsError()) goto cleanup;
+
+ if (result.IsError())
+ goto cleanup;
+
result = conversion.SetStandardCoefficient(params->standard_coefficient);
- if (result.IsError()) goto cleanup;
+
+ if (result.IsError())
+ goto cleanup;
+
+ conversion.padding = params->padding;
conversion.alpha = params->alpha;
cleanup:
cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
cmd_buff[1] = result.raw;
+
+ LOG_DEBUG(Service_Y2R, "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
+ "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
+ params->input_format, params->output_format, params->rotation, params->block_alignment,
+ params->input_line_width, params->input_lines, params->standard_coefficient, params->padding, params->alpha);
}
static void PingProcess(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0x2A, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
cmd_buff[2] = 0;
+
LOG_WARNING(Service_Y2R, "(STUBBED) called");
}
@@ -358,6 +686,7 @@ static void DriverInitialize(Service::Interface* self) {
cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
LOG_DEBUG(Service_Y2R, "called");
}
@@ -366,54 +695,67 @@ static void DriverFinalize(Service::Interface* self) {
cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0);
cmd_buff[1] = RESULT_SUCCESS.raw;
+
+ LOG_DEBUG(Service_Y2R, "called");
+}
+
+
+static void GetPackageParameter(Service::Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+
+ cmd_buff[0] = IPC::MakeHeader(0x2D, 4, 0);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ std::memcpy(&cmd_buff[2], &conversion, sizeof(ConversionParameters));
+
LOG_DEBUG(Service_Y2R, "called");
}
const Interface::FunctionInfo FunctionTable[] = {
{0x00010040, SetInputFormat, "SetInputFormat"},
- {0x00020000, nullptr, "GetInputFormat"},
+ {0x00020000, GetInputFormat, "GetInputFormat"},
{0x00030040, SetOutputFormat, "SetOutputFormat"},
- {0x00040000, nullptr, "GetOutputFormat"},
+ {0x00040000, GetOutputFormat, "GetOutputFormat"},
{0x00050040, SetRotation, "SetRotation"},
- {0x00060000, nullptr, "GetRotation"},
+ {0x00060000, GetRotation, "GetRotation"},
{0x00070040, SetBlockAlignment, "SetBlockAlignment"},
- {0x00080000, nullptr, "GetBlockAlignment"},
- {0x00090040, nullptr, "SetSpacialDithering"},
- {0x000A0000, nullptr, "GetSpacialDithering"},
- {0x000B0040, nullptr, "SetTemporalDithering"},
- {0x000C0000, nullptr, "GetTemporalDithering"},
+ {0x00080000, GetBlockAlignment, "GetBlockAlignment"},
+ {0x00090040, SetSpacialDithering, "SetSpacialDithering"},
+ {0x000A0000, GetSpacialDithering, "GetSpacialDithering"},
+ {0x000B0040, SetTemporalDithering, "SetTemporalDithering"},
+ {0x000C0000, GetTemporalDithering, "GetTemporalDithering"},
{0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
+ {0x000E0000, GetTransferEndInterrupt, "GetTransferEndInterrupt"},
{0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
{0x00100102, SetSendingY, "SetSendingY"},
{0x00110102, SetSendingU, "SetSendingU"},
{0x00120102, SetSendingV, "SetSendingV"},
{0x00130102, SetSendingYUYV, "SetSendingYUYV"},
- {0x00140000, nullptr, "IsFinishedSendingYuv"},
- {0x00150000, nullptr, "IsFinishedSendingY"},
- {0x00160000, nullptr, "IsFinishedSendingU"},
- {0x00170000, nullptr, "IsFinishedSendingV"},
+ {0x00140000, IsFinishedSendingYuv, "IsFinishedSendingYuv"},
+ {0x00150000, IsFinishedSendingY, "IsFinishedSendingY"},
+ {0x00160000, IsFinishedSendingU, "IsFinishedSendingU"},
+ {0x00170000, IsFinishedSendingV, "IsFinishedSendingV"},
{0x00180102, SetReceiving, "SetReceiving"},
- {0x00190000, nullptr, "IsFinishedReceiving"},
+ {0x00190000, IsFinishedReceiving, "IsFinishedReceiving"},
{0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
- {0x001B0000, nullptr, "GetInputLineWidth"},
+ {0x001B0000, GetInputLineWidth, "GetInputLineWidth"},
{0x001C0040, SetInputLines, "SetInputLines"},
- {0x001D0000, nullptr, "GetInputLines"},
+ {0x001D0000, GetInputLines, "GetInputLines"},
{0x001E0100, SetCoefficient, "SetCoefficient"},
- {0x001F0000, nullptr, "GetCoefficient"},
+ {0x001F0000, GetCoefficient, "GetCoefficient"},
{0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
- {0x00210040, nullptr, "GetStandardCoefficientParams"},
+ {0x00210040, GetStandardCoefficient, "GetStandardCoefficient"},
{0x00220040, SetAlpha, "SetAlpha"},
- {0x00230000, nullptr, "GetAlpha"},
- {0x00240200, nullptr, "SetDitheringWeightParams"},
- {0x00250000, nullptr, "GetDitheringWeightParams"},
+ {0x00230000, GetAlpha, "GetAlpha"},
+ {0x00240200, SetDitheringWeightParams,"SetDitheringWeightParams"},
+ {0x00250000, GetDitheringWeightParams,"GetDitheringWeightParams"},
{0x00260000, StartConversion, "StartConversion"},
{0x00270000, StopConversion, "StopConversion"},
{0x00280000, IsBusyConversion, "IsBusyConversion"},
- {0x002901C0, SetConversionParams, "SetConversionParams"},
+ {0x002901C0, SetPackageParameter, "SetPackageParameter"},
{0x002A0000, PingProcess, "PingProcess"},
{0x002B0000, DriverInitialize, "DriverInitialize"},
{0x002C0000, DriverFinalize, "DriverFinalize"},
- {0x002D0000, nullptr, "GetPackageParameter"},
+ {0x002D0000, GetPackageParameter, "GetPackageParameter"},
};
////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 3965a5545..95fa2fdb7 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -97,6 +97,7 @@ struct ConversionConfiguration {
u16 input_line_width;
u16 input_lines;
CoefficientSet coefficients;
+ u8 padding;
u16 alpha;
/// Input parameters for the Y (luma) plane
@@ -109,6 +110,25 @@ struct ConversionConfiguration {
ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
};
+struct DitheringWeightParams {
+ u16 w0_xEven_yEven;
+ u16 w0_xOdd_yEven;
+ u16 w0_xEven_yOdd;
+ u16 w0_xOdd_yOdd;
+ u16 w1_xEven_yEven;
+ u16 w1_xOdd_yEven;
+ u16 w1_xEven_yOdd;
+ u16 w1_xOdd_yOdd;
+ u16 w2_xEven_yEven;
+ u16 w2_xOdd_yEven;
+ u16 w2_xEven_yOdd;
+ u16 w2_xOdd_yOdd;
+ u16 w3_xEven_yEven;
+ u16 w3_xOdd_yEven;
+ u16 w3_xEven_yOdd;
+ u16 w3_xOdd_yOdd;
+};
+
class Interface : public Service::Interface {
public:
Interface();
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index ae54afb1c..a9a1a3244 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,7 +6,6 @@
#include "common/logging/log.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "common/string_util.h"
#include "common/symbols.h"
@@ -1031,8 +1030,6 @@ static const FunctionDef SVC_Table[] = {
{0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
};
-Common::Profiling::TimingCategory profiler_svc("SVC Calls");
-
static const FunctionDef* GetSVCInfo(u32 func_num) {
if (func_num >= ARRAY_SIZE(SVC_Table)) {
LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@@ -1044,7 +1041,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
void CallSVC(u32 immediate) {
- Common::Profiling::ScopeTimer timer_svc(profiler_svc);
MICROPROFILE_SCOPE(Kernel_SVC);
const FunctionDef* info = GetSVCInfo(immediate);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..de4082b1f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRCS
shader/shader_interpreter.cpp
swrasterizer.cpp
utils.cpp
+ vertex_loader.cpp
video_core.cpp
)
@@ -43,6 +44,7 @@ set(HEADERS
shader/shader_interpreter.h
swrasterizer.h
utils.h
+ vertex_loader.h
video_core.h
)
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3abe79c09..58883e374 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -7,7 +7,6 @@
#include "common/alignment.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "core/settings.h"
#include "core/hle/service/gsp_gpu.h"
@@ -22,6 +21,7 @@
#include "video_core/video_core.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/shader/shader_interpreter.h"
+#include "video_core/vertex_loader.h"
namespace Pica {
@@ -35,8 +35,6 @@ static int default_attr_counter = 0;
static u32 default_attr_write_buffer[3];
-Common::Profiling::TimingCategory category_drawing("Drawing");
-
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
static const u32 expand_bits_to_bytes[] = {
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@@ -186,60 +184,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw):
case PICA_REG_INDEX(trigger_draw_indexed):
{
- Common::Profiling::ScopeTimer scope_timer(category_drawing);
MICROPROFILE_SCOPE(GPU_Drawing);
#if PICA_LOG_TEV
DebugUtils::DumpTevStageConfig(regs.GetTevStages());
#endif
-
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
- const auto& attribute_config = regs.vertex_attributes;
- const u32 base_address = attribute_config.GetPhysicalBaseAddress();
-
- // Information about internal vertex attributes
- u32 vertex_attribute_sources[16];
- boost::fill(vertex_attribute_sources, 0xdeadbeef);
- u32 vertex_attribute_strides[16] = {};
- Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
-
- u32 vertex_attribute_elements[16] = {};
- u32 vertex_attribute_element_size[16] = {};
-
- // Setup attribute data from loaders
- for (int loader = 0; loader < 12; ++loader) {
- const auto& loader_config = attribute_config.attribute_loaders[loader];
-
- u32 offset = 0;
-
- // TODO: What happens if a loader overwrites a previous one's data?
- for (unsigned component = 0; component < loader_config.component_count; ++component) {
- if (component >= 12) {
- LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
- continue;
- }
-
- u32 attribute_index = loader_config.GetComponent(component);
- if (attribute_index < 12) {
- int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
- offset = Common::AlignUp(offset, element_size);
- vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
- vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
- vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
- vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
- vertex_attribute_element_size[attribute_index] = element_size;
- offset += attribute_config.GetStride(attribute_index);
- } else if (attribute_index < 16) {
- // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
- offset = Common::AlignUp(offset, 4);
- offset += (attribute_index - 11) * 4;
- } else {
- UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
- }
- }
- }
+ // Processes information about internal vertex attributes to figure out how a vertex is loaded.
+ // Later, these can be compiled and cached.
+ VertexLoader loader;
+ const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
+ loader.Setup(regs);
// Load vertices
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -263,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
}
}
- class {
- /// Combine overlapping and close ranges
- void SimplifyRanges() {
- for (auto it = ranges.begin(); it != ranges.end(); ++it) {
- // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
- auto it2 = std::next(it);
- while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
- it->second = std::max(it->second, it2->first + it2->second - it->first);
- it2 = ranges.erase(it2);
- }
- }
- }
-
- public:
- /// Record a particular memory access in the list
- void AddAccess(u32 paddr, u32 size) {
- // Create new range or extend existing one
- ranges[paddr] = std::max(ranges[paddr], size);
-
- // Simplify ranges...
- SimplifyRanges();
- }
-
- /// Map of accessed ranges (mapping start address to range size)
- std::map<u32, u32> ranges;
- } memory_accesses;
+ DebugUtils::MemoryAccessTracker memory_accesses;
// Simple circular-replacement vertex cache
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -332,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (!vertex_cache_hit) {
// Initialize data for the current vertex
Shader::InputVertex input;
-
- for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
- if (vertex_attribute_elements[i] != 0) {
- // Default attribute values set if array elements have < 4 components. This
- // is *not* carried over from the default attribute settings even if they're
- // enabled for this attribute.
- static const float24 zero = float24::FromFloat32(0.0f);
- static const float24 one = float24::FromFloat32(1.0f);
- input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
-
- // Load per-vertex data from the loader arrays
- for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
- u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
- const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
-
- if (g_debug_context && Pica::g_debug_context->recorder) {
- memory_accesses.AddAccess(source_addr,
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
- : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
- }
-
- const float srcval =
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
- *reinterpret_cast<const float*>(srcdata);
-
- input.attr[i][comp] = float24::FromFloat32(srcval);
- LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
- comp, i, vertex, index,
- attribute_config.GetPhysicalBaseAddress(),
- vertex_attribute_sources[i] - base_address,
- vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
- input.attr[i][comp].ToFloat32());
- }
- } else if (attribute_config.IsDefaultAttribute(i)) {
- // Load the default attribute if we're configured to do so
- input.attr[i] = g_state.vs.default_attributes[i];
- LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
- i, vertex, index,
- input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
- input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
- } else {
- // TODO(yuriks): In this case, no data gets loaded and the vertex
- // remains with the last value it had. This isn't currently maintained
- // as global state, however, and so won't work in Citra yet.
- }
- }
+ loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
// Send to vertex shader
- output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes());
+ output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
if (is_indexed) {
vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 56f9bd958..dd0828cee 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -216,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
+/**
+ * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
+ */
+class MemoryAccessTracker {
+ /// Combine overlapping and close ranges
+ void SimplifyRanges() {
+ for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+ // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
+ auto it2 = std::next(it);
+ while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
+ it->second = std::max(it->second, it2->first + it2->second - it->first);
+ it2 = ranges.erase(it2);
+ }
+ }
+ }
+
+public:
+ /// Record a particular memory access in the list
+ void AddAccess(u32 paddr, u32 size) {
+ // Create new range or extend existing one
+ ranges[paddr] = std::max(ranges[paddr], size);
+
+ // Simplify ranges...
+ SimplifyRanges();
+ }
+
+ /// Map of accessed ranges (mapping start address to range size)
+ std::map<u32, u32> ranges;
+};
+
} // namespace
} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 0434ad05a..9cf77b1f2 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "core/memory.h"
#include "core/hw/gpu.h"
@@ -287,7 +286,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
return Math::Cross(vec1, vec2).z;
};
-static Common::Profiling::TimingCategory rasterization_category("Rasterization");
MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
/**
@@ -300,7 +298,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
bool reversed = false)
{
const auto& regs = g_state.regs;
- Common::Profiling::ScopeTimer timer(rasterization_category);
MICROPROFILE_SCOPE(GPU_Rasterization);
// vertex positions in rasterizer coordinates
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 30187d4cf..a8c775c80 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -11,7 +11,6 @@
#include "common/file_util.h"
#include "common/math_util.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "core/memory.h"
#include "core/settings.h"
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 75301accd..043e99190 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -9,7 +9,6 @@
#include "common/hash.h"
#include "common/microprofile.h"
-#include "common/profiler.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/pica.h"
@@ -57,13 +56,11 @@ void Shutdown() {
#endif // ARCHITECTURE_x86_64
}
-static Common::Profiling::TimingCategory shader_category("Vertex Shader");
MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
- Common::Profiling::ScopeTimer timer(shader_category);
MICROPROFILE_SCOPE(GPU_VertexShader);
state.program_counter = config.main_offset;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..9ce9344d2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -25,7 +25,7 @@ namespace Pica {
namespace Shader {
struct InputVertex {
- Math::Vec4<float24> attr[16];
+ alignas(16) Math::Vec4<float24> attr[16];
};
struct OutputVertex {
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..8a3d91896
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
+#include <cmath>
+#include <string>
+
+#include "boost/range/algorithm/fill.hpp"
+
+#include "common/assert.h"
+#include "common/alignment.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "core/memory.h"
+
+#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/pica.h"
+#include "video_core/pica_state.h"
+#include "video_core/pica_types.h"
+#include "video_core/vertex_loader.h"
+
+namespace Pica {
+
+void VertexLoader::Setup(const Pica::Regs& regs) {
+ const auto& attribute_config = regs.vertex_attributes;
+ num_total_attributes = attribute_config.GetNumTotalAttributes();
+
+ boost::fill(vertex_attribute_sources, 0xdeadbeef);
+
+ for (int i = 0; i < 16; i++) {
+ vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
+ }
+
+ // Setup attribute data from loaders
+ for (int loader = 0; loader < 12; ++loader) {
+ const auto& loader_config = attribute_config.attribute_loaders[loader];
+
+ u32 offset = 0;
+
+ // TODO: What happens if a loader overwrites a previous one's data?
+ for (unsigned component = 0; component < loader_config.component_count; ++component) {
+ if (component >= 12) {
+ LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
+ continue;
+ }
+
+ u32 attribute_index = loader_config.GetComponent(component);
+ if (attribute_index < 12) {
+ offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
+ vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
+ vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
+ vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
+ vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
+ offset += attribute_config.GetStride(attribute_index);
+ } else if (attribute_index < 16) {
+ // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
+ offset = Common::AlignUp(offset, 4);
+ offset += (attribute_index - 11) * 4;
+ } else {
+ UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
+ }
+ }
+ }
+}
+
+void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
+ for (int i = 0; i < num_total_attributes; ++i) {
+ if (vertex_attribute_elements[i] != 0) {
+ // Load per-vertex data from the loader arrays
+ u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
+
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
+ : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
+ }
+
+ switch (vertex_attribute_formats[i]) {
+ case Regs::VertexAttributeFormat::BYTE:
+ {
+ const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ case Regs::VertexAttributeFormat::UBYTE:
+ {
+ const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ case Regs::VertexAttributeFormat::SHORT:
+ {
+ const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ case Regs::VertexAttributeFormat::FLOAT:
+ {
+ const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ }
+
+ // Default attribute values set if array elements have < 4 components. This
+ // is *not* carried over from the default attribute settings even if they're
+ // enabled for this attribute.
+ for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
+ input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ }
+
+ LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
+ vertex_attribute_elements[i], i, vertex, index,
+ base_address,
+ vertex_attribute_sources[i],
+ vertex_attribute_strides[i] * vertex,
+ input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
+ } else if (vertex_attribute_is_default[i]) {
+ // Load the default attribute if we're configured to do so
+ input.attr[i] = g_state.vs.default_attributes[i];
+ LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
+ i, vertex, index,
+ input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
+ input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
+ } else {
+ // TODO(yuriks): In this case, no data gets loaded and the vertex
+ // remains with the last value it had. This isn't currently maintained
+ // as global state, however, and so won't work in Citra yet.
+ }
+ }
+}
+
+} // namespace Pica \ No newline at end of file
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..ff42d1596
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <iterator>
+#include <algorithm>
+
+#include "video_core/pica.h"
+#include "video_core/shader/shader.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+namespace Pica {
+
+class VertexLoader {
+public:
+ void Setup(const Pica::Regs& regs);
+ void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
+
+ int GetNumTotalAttributes() const { return num_total_attributes; }
+
+private:
+ u32 vertex_attribute_sources[16];
+ u32 vertex_attribute_strides[16] = {};
+ Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
+ u32 vertex_attribute_elements[16] = {};
+ bool vertex_attribute_is_default[16];
+ int num_total_attributes;
+};
+
+} // namespace Pica