Ryujinx’s audio subsystem emulates the Nintendo Switch audio hardware and provides multi-backend audio output. The architecture separates audio rendering (processing) from output (playback):
Dual-layer design: Renderer system processes audio commands, output system handles device playback
public class VoiceContext{ private Memory<VoiceState> _voices; public struct VoiceState { public bool IsActive; public int Priority; public VoiceFormat Format; // PCM16, ADPCM, Opus, etc. public float Volume; public float[] ChannelVolumes; public int SampleRate; public int MixId; // Target mix public WaveBuffer[] WaveBuffers; // Audio data buffers public BiquadFilterParameter[] Filters; }}
Operations:
Decode audio samples (PCM, ADPCM, Opus)
Apply per-voice effects (filters, pitch shift)
Volume control and panning
2
Effect Processing
Effect Context applies audio effects:
public enum EffectType{ Invalid, BufferMix, // Mix audio between buffers Aux, // Auxiliary send/return Delay, // Delay effect Reverb, // Reverb effect Reverb3d, // 3D reverb BiquadFilter, // 2nd order IIR filter Limiter, // Dynamic range limiter CaptureBuffer, // Capture to buffer Compressor, // Dynamic range compressor}
Example - Reverb:
public class ReverbEffect{ private float _roomSize; private float _damping; private float _earlyReflection; private float _lateReverbGain; private float[] _delayLines; private float[] _combFilters; public void Process(Span<float> output, ReadOnlySpan<float> input) { // Apply early reflections // Process comb filters for late reverb // Mix with dry signal }}
3
Mixing
Mix Context combines audio sources:
public class MixContext{ public struct MixState { public int MixId; public int DestinationMixId; // Output mix (-1 = final) public float Volume; public bool IsUsed; // Channel mapping (up to 24 channels) public float[,] MixVolumes; // [src_channel, dst_channel] } public void Mix(Span<float> output, ReadOnlySpan<float> input, int inputChannels, int outputChannels, float[,] volumes) { // Mix with per-channel volumes for (int frame = 0; frame < sampleCount; frame++) { for (int outCh = 0; outCh < outputChannels; outCh++) { float sample = 0; for (int inCh = 0; inCh < inputChannels; inCh++) { sample += input[frame * inputChannels + inCh] * volumes[inCh, outCh]; } output[frame * outputChannels + outCh] += sample; } } }}
Mixing stages:
Voice → Mix (individual sources to submixes)
Mix → Mix (submix hierarchy)
Mix → Final (master output)
4
Sink Output
Sink Context routes to output devices:
public enum SinkType{ Invalid, Device, // Physical output device CircularBuffer, // Ring buffer for game reading}public class DeviceSink{ private readonly AudioOutputSystem _outputSystem; private readonly string _deviceName; private readonly uint _sampleRate; private readonly uint _channelCount; public void AppendBuffer(ReadOnlySpan<float> buffer, uint bufferTag) { // Send audio to output system _outputSystem.AppendBuffer(buffer, bufferTag); }}
public struct AudioBuffer{ public ulong Tag; // User tag for tracking public float[] Data; // Sample data public uint SampleCount; // Samples per channel public ulong PlayedSampleCount; // Progress tracker}
// Raw uncompressed audiopublic class PcmDecoder{ public void Decode(Span<float> output, ReadOnlySpan<short> input, int channelCount) { for (int i = 0; i < input.Length; i++) { output[i] = input[i] / 32768.0f; } }}
Formats: 8-bit, 16-bit, 24-bit, 32-bit, float
// Adaptive Differential PCMpublic class AdpcmDecoder{ private readonly short[] _coefficients; private AdpcmState _state; public void Decode(Span<short> output, ReadOnlySpan<byte> input) { // Decode 4-bit samples with prediction foreach (byte nibble in input) { short predicted = Predict(_state); short decoded = predicted + DecodeDifference(nibble); _state.Update(decoded); // ... } }}
Compression: ~4:1 ratio
// Opus codec via libopuspublic class OpusDecoder{ private IntPtr _decoder; public void Decode(Span<float> output, ReadOnlySpan<byte> input, int frameSize) { int samples = opus_decode_float( _decoder, input, input.Length, output, frameSize, 0); }}
Compression: 6:1 to 40:1 ratio Quality: Very high at low bitrates