|
using System; |
|
using System.Buffers.Binary; |
|
using System.Globalization; |
|
using System.IO; |
|
using System.Net.WebSockets; |
|
using System.Text; |
|
using System.Text.Json; |
|
using System.Threading; |
|
using System.Threading.Tasks; |
|
|
|
internal static class Program |
|
{ |
|
private const string DefaultModel = "voxtral-mini-transcribe-realtime-2602"; |
|
private const string DefaultBaseUrl = "wss://api.mistral.ai"; |
|
private const string ApiKeyEnvVar = "MISTRAL_API_KEY"; |
|
private const string EncodingPcmS16Le = "pcm_s16le"; |
|
|
|
private static async Task<int> Main(string[] args) |
|
{ |
|
try |
|
{ |
|
CliOptions options = CliOptions.Parse(args); |
|
string? apiKey = Environment.GetEnvironmentVariable(ApiKeyEnvVar); |
|
|
|
if (string.IsNullOrWhiteSpace(apiKey)) |
|
{ |
|
Console.Error.WriteLine("Missing MISTRAL_API_KEY in shell environment."); |
|
return 2; |
|
} |
|
|
|
WavPcm16 wav = WavReader.ReadMonoPcm16(options.WavPath); |
|
Uri wsUri = BuildRealtimeUri(options.BaseUrl, options.Model); |
|
|
|
using ClientWebSocket socket = new ClientWebSocket(); |
|
socket.Options.SetRequestHeader("Authorization", "Bearer " + apiKey.Trim()); |
|
socket.Options.SetRequestHeader("User-Agent", "voxtral-realtime-wav-test/1.0"); |
|
|
|
Console.Error.WriteLine("Connecting: " + wsUri.AbsoluteUri); |
|
await socket.ConnectAsync(wsUri, CancellationToken.None).ConfigureAwait(false); |
|
Console.Error.WriteLine("Connected."); |
|
|
|
await WaitForSessionCreatedAsync(socket, options.HandshakeTimeoutMs).ConfigureAwait(false); |
|
await SendSessionUpdateAsync(socket, wav.SampleRate, options.TargetStreamingDelayMs).ConfigureAwait(false); |
|
|
|
using CancellationTokenSource sessionCancellation = new CancellationTokenSource(); |
|
Task<TranscriptionResult> receiveTask = ReceiveEventsAsync( |
|
socket, |
|
options.PrintPartials, |
|
sessionCancellation.Token); |
|
|
|
await SendAudioAsync( |
|
socket, |
|
wav.PcmData, |
|
wav.SampleRate, |
|
options.ChunkMs, |
|
options.RealtimePacing, |
|
sessionCancellation.Token).ConfigureAwait(false); |
|
|
|
Task timeoutTask = Task.Delay(options.CompletionTimeoutMs, sessionCancellation.Token); |
|
Task completedTask = await Task.WhenAny(receiveTask, timeoutTask).ConfigureAwait(false); |
|
if (completedTask != receiveTask) |
|
{ |
|
throw new TimeoutException("Timed out waiting for transcription.done."); |
|
} |
|
|
|
TranscriptionResult result = await receiveTask.ConfigureAwait(false); |
|
sessionCancellation.Cancel(); |
|
|
|
if (options.PrintPartials) |
|
{ |
|
Console.Out.WriteLine(); |
|
} |
|
|
|
Console.Out.WriteLine("=== FINAL TRANSCRIPTION ==="); |
|
Console.Out.WriteLine(result.Text); |
|
await Console.Out.FlushAsync().ConfigureAwait(false); |
|
|
|
await TryCloseSocketAsync(socket).ConfigureAwait(false); |
|
return result.IsError ? 1 : 0; |
|
} |
|
catch (UsageException usageException) |
|
{ |
|
if (!string.IsNullOrEmpty(usageException.Message)) |
|
{ |
|
Console.Error.WriteLine(usageException.Message); |
|
Console.Error.WriteLine(); |
|
} |
|
|
|
PrintUsage(); |
|
return usageException.ExitCode; |
|
} |
|
catch (Exception exception) |
|
{ |
|
Console.Error.WriteLine("Transcription test failed: " + exception.Message); |
|
return 1; |
|
} |
|
} |
|
|
|
private static async Task WaitForSessionCreatedAsync(ClientWebSocket socket, int timeoutMs) |
|
{ |
|
using CancellationTokenSource timeoutCancellation = new CancellationTokenSource(Math.Max(1, timeoutMs)); |
|
|
|
try |
|
{ |
|
while (true) |
|
{ |
|
string payload = await ReceiveTextAsync(socket, timeoutCancellation.Token).ConfigureAwait(false); |
|
if (string.IsNullOrEmpty(payload)) |
|
{ |
|
throw new InvalidOperationException("Websocket closed before session.created."); |
|
} |
|
|
|
using JsonDocument message = JsonDocument.Parse(payload); |
|
string messageType = GetMessageType(message.RootElement); |
|
|
|
if (messageType == "session.created") |
|
{ |
|
return; |
|
} |
|
|
|
if (messageType == "error") |
|
{ |
|
throw new InvalidOperationException("Handshake error: " + ExtractErrorMessage(message.RootElement)); |
|
} |
|
} |
|
} |
|
catch (OperationCanceledException) when (timeoutCancellation.IsCancellationRequested) |
|
{ |
|
throw new TimeoutException("Timed out waiting for session.created."); |
|
} |
|
} |
|
|
|
private static async Task SendSessionUpdateAsync( |
|
ClientWebSocket socket, |
|
int sampleRate, |
|
int? targetStreamingDelayMs) |
|
{ |
|
StringBuilder payload = new StringBuilder(192); |
|
payload.Append("{\"type\":\"session.update\",\"session\":{\"audio_format\":{\"encoding\":\""); |
|
payload.Append(EncodingPcmS16Le); |
|
payload.Append("\",\"sample_rate\":"); |
|
payload.Append(sampleRate.ToString(CultureInfo.InvariantCulture)); |
|
payload.Append('}'); |
|
|
|
if (targetStreamingDelayMs.HasValue) |
|
{ |
|
payload.Append(",\"target_streaming_delay_ms\":"); |
|
payload.Append(targetStreamingDelayMs.Value.ToString(CultureInfo.InvariantCulture)); |
|
} |
|
|
|
payload.Append("}}"); |
|
await SendJsonAsync(socket, payload.ToString(), CancellationToken.None).ConfigureAwait(false); |
|
} |
|
|
|
private static async Task SendAudioAsync( |
|
ClientWebSocket socket, |
|
byte[] pcmData, |
|
int sampleRate, |
|
int chunkMs, |
|
bool realtimePacing, |
|
CancellationToken cancellationToken) |
|
{ |
|
int framesPerChunk = Math.Max(1, sampleRate * chunkMs / 1000); |
|
int bytesPerChunk = framesPerChunk * 2; |
|
|
|
for (int offset = 0; offset < pcmData.Length; offset += bytesPerChunk) |
|
{ |
|
cancellationToken.ThrowIfCancellationRequested(); |
|
|
|
int count = Math.Min(bytesPerChunk, pcmData.Length - offset); |
|
string base64 = Convert.ToBase64String(pcmData, offset, count); |
|
string appendPayload = "{\"type\":\"input_audio.append\",\"audio\":\"" + base64 + "\"}"; |
|
|
|
await SendJsonAsync(socket, appendPayload, cancellationToken).ConfigureAwait(false); |
|
|
|
if (realtimePacing && offset + count < pcmData.Length) |
|
{ |
|
await Task.Delay(chunkMs, cancellationToken).ConfigureAwait(false); |
|
} |
|
} |
|
|
|
await SendJsonAsync(socket, "{\"type\":\"input_audio.flush\"}", cancellationToken).ConfigureAwait(false); |
|
await SendJsonAsync(socket, "{\"type\":\"input_audio.end\"}", cancellationToken).ConfigureAwait(false); |
|
} |
|
|
|
private static async Task<TranscriptionResult> ReceiveEventsAsync( |
|
ClientWebSocket socket, |
|
bool printPartials, |
|
CancellationToken cancellationToken) |
|
{ |
|
while (!cancellationToken.IsCancellationRequested) |
|
{ |
|
string payload = await ReceiveTextAsync(socket, cancellationToken).ConfigureAwait(false); |
|
if (string.IsNullOrEmpty(payload)) |
|
{ |
|
return new TranscriptionResult("Connection closed before transcription.done.", true); |
|
} |
|
|
|
using JsonDocument message = JsonDocument.Parse(payload); |
|
JsonElement root = message.RootElement; |
|
string messageType = GetMessageType(root); |
|
|
|
if (messageType == "transcription.text.delta") |
|
{ |
|
if (printPartials && TryGetStringProperty(root, "text", out string deltaText)) |
|
{ |
|
Console.Out.Write(deltaText); |
|
await Console.Out.FlushAsync().ConfigureAwait(false); |
|
} |
|
|
|
continue; |
|
} |
|
|
|
if (messageType == "transcription.done") |
|
{ |
|
if (TryGetStringProperty(root, "text", out string finalText)) |
|
{ |
|
return new TranscriptionResult(finalText, false); |
|
} |
|
|
|
return new TranscriptionResult(string.Empty, false); |
|
} |
|
|
|
if (messageType == "error") |
|
{ |
|
string errorMessage = ExtractErrorMessage(root); |
|
Console.Error.WriteLine("Realtime API error: " + errorMessage); |
|
return new TranscriptionResult(errorMessage, true); |
|
} |
|
} |
|
|
|
throw new OperationCanceledException(cancellationToken); |
|
} |
|
|
|
private static async Task SendJsonAsync( |
|
ClientWebSocket socket, |
|
string payload, |
|
CancellationToken cancellationToken) |
|
{ |
|
byte[] utf8 = Encoding.UTF8.GetBytes(payload); |
|
ArraySegment<byte> segment = new ArraySegment<byte>(utf8); |
|
await socket.SendAsync(segment, WebSocketMessageType.Text, true, cancellationToken).ConfigureAwait(false); |
|
} |
|
|
|
private static async Task<string> ReceiveTextAsync( |
|
ClientWebSocket socket, |
|
CancellationToken cancellationToken) |
|
{ |
|
byte[] buffer = new byte[4096]; |
|
ArraySegment<byte> receiveSegment = new ArraySegment<byte>(buffer); |
|
StringBuilder payload = new StringBuilder(4096); |
|
|
|
while (true) |
|
{ |
|
WebSocketReceiveResult receiveResult = await socket.ReceiveAsync(receiveSegment, cancellationToken) |
|
.ConfigureAwait(false); |
|
|
|
if (receiveResult.MessageType == WebSocketMessageType.Close) |
|
{ |
|
return string.Empty; |
|
} |
|
|
|
if (receiveResult.Count > 0) |
|
{ |
|
payload.Append(Encoding.UTF8.GetString(buffer, 0, receiveResult.Count)); |
|
} |
|
|
|
if (receiveResult.EndOfMessage) |
|
{ |
|
return payload.ToString(); |
|
} |
|
} |
|
} |
|
|
|
private static async Task TryCloseSocketAsync(ClientWebSocket socket) |
|
{ |
|
if (socket.State != WebSocketState.Open && socket.State != WebSocketState.CloseReceived) |
|
{ |
|
return; |
|
} |
|
|
|
try |
|
{ |
|
await socket.CloseAsync( |
|
WebSocketCloseStatus.NormalClosure, |
|
string.Empty, |
|
CancellationToken.None).ConfigureAwait(false); |
|
} |
|
catch |
|
{ |
|
} |
|
} |
|
|
|
private static Uri BuildRealtimeUri(string baseUrl, string model) |
|
{ |
|
string resolvedBaseUrl = string.IsNullOrWhiteSpace(baseUrl) ? DefaultBaseUrl : baseUrl.Trim(); |
|
if (!resolvedBaseUrl.EndsWith("/", StringComparison.Ordinal)) |
|
{ |
|
resolvedBaseUrl += "/"; |
|
} |
|
|
|
UriBuilder builder = new UriBuilder(new Uri(resolvedBaseUrl, UriKind.Absolute)); |
|
if (builder.Scheme == Uri.UriSchemeHttps) |
|
{ |
|
builder.Scheme = "wss"; |
|
} |
|
else if (builder.Scheme == Uri.UriSchemeHttp) |
|
{ |
|
builder.Scheme = "ws"; |
|
} |
|
|
|
string basePath = builder.Path.TrimEnd('/'); |
|
builder.Path = basePath + "/v1/audio/transcriptions/realtime"; |
|
builder.Query = "model=" + Uri.EscapeDataString(string.IsNullOrWhiteSpace(model) ? DefaultModel : model.Trim()); |
|
return builder.Uri; |
|
} |
|
|
|
private static string GetMessageType(JsonElement json) |
|
{ |
|
if (json.ValueKind == JsonValueKind.Object |
|
&& json.TryGetProperty("type", out JsonElement typeElement) |
|
&& typeElement.ValueKind == JsonValueKind.String) |
|
{ |
|
return typeElement.GetString() ?? string.Empty; |
|
} |
|
|
|
return string.Empty; |
|
} |
|
|
|
private static bool TryGetStringProperty(JsonElement json, string propertyName, out string value) |
|
{ |
|
value = string.Empty; |
|
if (json.ValueKind != JsonValueKind.Object) |
|
{ |
|
return false; |
|
} |
|
|
|
if (!json.TryGetProperty(propertyName, out JsonElement property)) |
|
{ |
|
return false; |
|
} |
|
|
|
if (property.ValueKind != JsonValueKind.String) |
|
{ |
|
return false; |
|
} |
|
|
|
value = property.GetString() ?? string.Empty; |
|
return true; |
|
} |
|
|
|
private static string ExtractErrorMessage(JsonElement json) |
|
{ |
|
if (json.ValueKind != JsonValueKind.Object) |
|
{ |
|
return "Unknown error payload"; |
|
} |
|
|
|
if (!json.TryGetProperty("error", out JsonElement errorElement)) |
|
{ |
|
return "Unknown error payload"; |
|
} |
|
|
|
if (errorElement.ValueKind == JsonValueKind.Object |
|
&& errorElement.TryGetProperty("message", out JsonElement messageElement)) |
|
{ |
|
if (messageElement.ValueKind == JsonValueKind.String) |
|
{ |
|
return messageElement.GetString() ?? "Unknown error"; |
|
} |
|
|
|
if (messageElement.ValueKind == JsonValueKind.Object |
|
&& messageElement.TryGetProperty("detail", out JsonElement detailElement) |
|
&& detailElement.ValueKind == JsonValueKind.String) |
|
{ |
|
return detailElement.GetString() ?? "Unknown error"; |
|
} |
|
|
|
return messageElement.GetRawText(); |
|
} |
|
|
|
return errorElement.GetRawText(); |
|
} |
|
|
|
private static void PrintUsage() |
|
{ |
|
Console.Error.WriteLine("Usage:"); |
|
Console.Error.WriteLine(" dotnet run --project client-cs-cli/VoxtralRealtimeWavTest.csproj -- <audio.wav> [options]"); |
|
Console.Error.WriteLine(); |
|
Console.Error.WriteLine("Options:"); |
|
Console.Error.WriteLine(" --model <id> Voxtral model (default: voxtral-mini-transcribe-realtime-2602)"); |
|
Console.Error.WriteLine(" --base-url <url> Base API URL (default: wss://api.mistral.ai)"); |
|
Console.Error.WriteLine(" --chunk-ms <int> Audio chunk size in ms (default: 20)"); |
|
Console.Error.WriteLine(" --target-delay-ms <int> Optional target_streaming_delay_ms"); |
|
Console.Error.WriteLine(" --handshake-timeout-ms <int> Handshake timeout (default: 10000)"); |
|
Console.Error.WriteLine(" --completion-timeout-ms <int> Completion wait timeout (default: 30000)"); |
|
Console.Error.WriteLine(" --no-realtime-pacing Send audio as fast as possible"); |
|
Console.Error.WriteLine(" --no-partials Do not print text deltas while streaming"); |
|
Console.Error.WriteLine(" --help Show this help text"); |
|
Console.Error.WriteLine(); |
|
Console.Error.WriteLine("Environment:"); |
|
Console.Error.WriteLine(" MISTRAL_API_KEY must be set in your shell environment."); |
|
} |
|
} |
|
|
|
internal sealed class CliOptions |
|
{ |
|
public string WavPath { get; private set; } = string.Empty; |
|
public string Model { get; private set; } = "voxtral-mini-transcribe-realtime-2602"; |
|
public string BaseUrl { get; private set; } = "wss://api.mistral.ai"; |
|
public int ChunkMs { get; private set; } = 20; |
|
public int? TargetStreamingDelayMs { get; private set; } |
|
public int HandshakeTimeoutMs { get; private set; } = 10000; |
|
public int CompletionTimeoutMs { get; private set; } = 30000; |
|
public bool RealtimePacing { get; private set; } = true; |
|
public bool PrintPartials { get; private set; } = true; |
|
|
|
public static CliOptions Parse(string[] args) |
|
{ |
|
if (args == null || args.Length == 0) |
|
{ |
|
throw new UsageException("Missing WAV file path.", 1); |
|
} |
|
|
|
CliOptions options = new CliOptions(); |
|
int index = 0; |
|
|
|
while (index < args.Length) |
|
{ |
|
string argument = args[index]; |
|
if (argument == "--help" || argument == "-h") |
|
{ |
|
throw new UsageException(string.Empty, 0); |
|
} |
|
|
|
if (argument == "--model") |
|
{ |
|
options.Model = RequireValue(args, ref index, argument); |
|
} |
|
else if (argument == "--base-url") |
|
{ |
|
options.BaseUrl = RequireValue(args, ref index, argument); |
|
} |
|
else if (argument == "--chunk-ms") |
|
{ |
|
options.ChunkMs = ParsePositiveInt(RequireValue(args, ref index, argument), argument, min: 5); |
|
} |
|
else if (argument == "--target-delay-ms") |
|
{ |
|
options.TargetStreamingDelayMs = ParseNonNegativeInt( |
|
RequireValue(args, ref index, argument), |
|
argument); |
|
} |
|
else if (argument == "--handshake-timeout-ms") |
|
{ |
|
options.HandshakeTimeoutMs = ParsePositiveInt( |
|
RequireValue(args, ref index, argument), |
|
argument, |
|
min: 1000); |
|
} |
|
else if (argument == "--completion-timeout-ms") |
|
{ |
|
options.CompletionTimeoutMs = ParsePositiveInt( |
|
RequireValue(args, ref index, argument), |
|
argument, |
|
min: 1000); |
|
} |
|
else if (argument == "--no-realtime-pacing") |
|
{ |
|
options.RealtimePacing = false; |
|
} |
|
else if (argument == "--no-partials") |
|
{ |
|
options.PrintPartials = false; |
|
} |
|
else if (argument.StartsWith("--", StringComparison.Ordinal)) |
|
{ |
|
throw new UsageException("Unknown option: " + argument, 1); |
|
} |
|
else |
|
{ |
|
if (!string.IsNullOrEmpty(options.WavPath)) |
|
{ |
|
throw new UsageException("Only one WAV path can be provided.", 1); |
|
} |
|
|
|
options.WavPath = argument; |
|
} |
|
|
|
index++; |
|
} |
|
|
|
if (string.IsNullOrWhiteSpace(options.WavPath)) |
|
{ |
|
throw new UsageException("Missing WAV file path.", 1); |
|
} |
|
|
|
if (!File.Exists(options.WavPath)) |
|
{ |
|
throw new UsageException("WAV file not found: " + options.WavPath, 1); |
|
} |
|
|
|
return options; |
|
} |
|
|
|
private static string RequireValue(string[] args, ref int index, string optionName) |
|
{ |
|
if (index + 1 >= args.Length) |
|
{ |
|
throw new UsageException("Missing value for " + optionName, 1); |
|
} |
|
|
|
index++; |
|
return args[index]; |
|
} |
|
|
|
private static int ParsePositiveInt(string value, string optionName, int min) |
|
{ |
|
if (!int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out int parsed) || parsed < min) |
|
{ |
|
throw new UsageException(optionName + " must be an integer >= " + min.ToString(CultureInfo.InvariantCulture), 1); |
|
} |
|
|
|
return parsed; |
|
} |
|
|
|
private static int ParseNonNegativeInt(string value, string optionName) |
|
{ |
|
if (!int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out int parsed) || parsed < 0) |
|
{ |
|
throw new UsageException(optionName + " must be an integer >= 0", 1); |
|
} |
|
|
|
return parsed; |
|
} |
|
} |
|
|
|
internal sealed class UsageException : Exception |
|
{ |
|
public UsageException(string message, int exitCode) |
|
: base(message) |
|
{ |
|
ExitCode = exitCode; |
|
} |
|
|
|
public int ExitCode { get; } |
|
} |
|
|
|
internal sealed class TranscriptionResult |
|
{ |
|
public TranscriptionResult(string text, bool isError) |
|
{ |
|
Text = text; |
|
IsError = isError; |
|
} |
|
|
|
public string Text { get; } |
|
public bool IsError { get; } |
|
} |
|
|
|
internal sealed class WavPcm16 |
|
{ |
|
public WavPcm16(byte[] pcmData, int sampleRate) |
|
{ |
|
PcmData = pcmData; |
|
SampleRate = sampleRate; |
|
} |
|
|
|
public byte[] PcmData { get; } |
|
public int SampleRate { get; } |
|
} |
|
|
|
internal static class WavReader |
|
{ |
|
private const ushort AudioFormatPcm = 1; |
|
private const ushort AudioFormatIeeeFloat = 3; |
|
|
|
public static WavPcm16 ReadMonoPcm16(string path) |
|
{ |
|
using FileStream stream = File.OpenRead(path); |
|
using BinaryReader reader = new BinaryReader(stream, Encoding.ASCII, leaveOpen: false); |
|
|
|
string riff = ReadFourCc(reader); |
|
if (!string.Equals(riff, "RIFF", StringComparison.Ordinal)) |
|
{ |
|
throw new InvalidDataException("Unsupported WAV container (expected RIFF)."); |
|
} |
|
|
|
_ = reader.ReadUInt32(); |
|
|
|
string wave = ReadFourCc(reader); |
|
if (!string.Equals(wave, "WAVE", StringComparison.Ordinal)) |
|
{ |
|
throw new InvalidDataException("Unsupported WAV format (expected WAVE)."); |
|
} |
|
|
|
WaveFormat? waveFormat = null; |
|
byte[]? audioData = null; |
|
|
|
while (stream.Position + 8 <= stream.Length) |
|
{ |
|
string chunkId = ReadFourCc(reader); |
|
uint chunkSize = reader.ReadUInt32(); |
|
|
|
if (chunkSize > int.MaxValue) |
|
{ |
|
throw new InvalidDataException("WAV chunk is too large."); |
|
} |
|
|
|
int chunkSizeInt = (int)chunkSize; |
|
if (chunkId == "fmt ") |
|
{ |
|
waveFormat = ReadWaveFormat(reader, chunkSizeInt); |
|
} |
|
else if (chunkId == "data") |
|
{ |
|
audioData = reader.ReadBytes(chunkSizeInt); |
|
if (audioData.Length != chunkSizeInt) |
|
{ |
|
throw new EndOfStreamException("Unexpected end of WAV data chunk."); |
|
} |
|
} |
|
else |
|
{ |
|
stream.Seek(chunkSizeInt, SeekOrigin.Current); |
|
} |
|
|
|
if ((chunkSizeInt & 1) == 1 && stream.Position < stream.Length) |
|
{ |
|
stream.Seek(1, SeekOrigin.Current); |
|
} |
|
} |
|
|
|
if (waveFormat == null) |
|
{ |
|
throw new InvalidDataException("WAV file missing fmt chunk."); |
|
} |
|
|
|
if (audioData == null || audioData.Length == 0) |
|
{ |
|
throw new InvalidDataException("WAV file missing data chunk."); |
|
} |
|
|
|
byte[] monoPcm = ConvertToMonoPcm16(waveFormat, audioData); |
|
return new WavPcm16(monoPcm, waveFormat.SampleRate); |
|
} |
|
|
|
private static WaveFormat ReadWaveFormat(BinaryReader reader, int chunkSize) |
|
{ |
|
if (chunkSize < 16) |
|
{ |
|
throw new InvalidDataException("Invalid fmt chunk size."); |
|
} |
|
|
|
ushort audioFormat = reader.ReadUInt16(); |
|
ushort channels = reader.ReadUInt16(); |
|
uint sampleRate = reader.ReadUInt32(); |
|
_ = reader.ReadUInt32(); |
|
_ = reader.ReadUInt16(); |
|
ushort bitsPerSample = reader.ReadUInt16(); |
|
|
|
int remainingBytes = chunkSize - 16; |
|
if (remainingBytes > 0) |
|
{ |
|
reader.BaseStream.Seek(remainingBytes, SeekOrigin.Current); |
|
} |
|
|
|
if (channels == 0) |
|
{ |
|
throw new InvalidDataException("WAV channels must be >= 1."); |
|
} |
|
|
|
if (sampleRate == 0 || sampleRate > int.MaxValue) |
|
{ |
|
throw new InvalidDataException("Invalid WAV sample rate."); |
|
} |
|
|
|
return new WaveFormat(audioFormat, channels, (int)sampleRate, bitsPerSample); |
|
} |
|
|
|
private static byte[] ConvertToMonoPcm16(WaveFormat format, byte[] data) |
|
{ |
|
int bytesPerSample = format.BitsPerSample / 8; |
|
if (bytesPerSample <= 0) |
|
{ |
|
throw new InvalidDataException("Unsupported WAV bits per sample: " + format.BitsPerSample); |
|
} |
|
|
|
int frameSize = bytesPerSample * format.Channels; |
|
if (frameSize <= 0) |
|
{ |
|
throw new InvalidDataException("Invalid WAV frame size."); |
|
} |
|
|
|
int frameCount = data.Length / frameSize; |
|
if (frameCount == 0) |
|
{ |
|
throw new InvalidDataException("WAV has no decodable frames."); |
|
} |
|
|
|
byte[] mono = new byte[frameCount * 2]; |
|
|
|
for (int frame = 0; frame < frameCount; frame++) |
|
{ |
|
int frameOffset = frame * frameSize; |
|
float mixed = 0f; |
|
|
|
for (int channel = 0; channel < format.Channels; channel++) |
|
{ |
|
int sampleOffset = frameOffset + (channel * bytesPerSample); |
|
mixed += ReadNormalizedSample(data, sampleOffset, format.AudioFormat, format.BitsPerSample); |
|
} |
|
|
|
mixed /= format.Channels; |
|
mixed = Math.Clamp(mixed, -1f, 1f); |
|
|
|
short pcmValue = (short)Math.Clamp( |
|
(int)Math.Round(mixed * short.MaxValue), |
|
short.MinValue, |
|
short.MaxValue); |
|
|
|
int monoOffset = frame * 2; |
|
BinaryPrimitives.WriteInt16LittleEndian(new Span<byte>(mono, monoOffset, 2), pcmValue); |
|
} |
|
|
|
return mono; |
|
} |
|
|
|
private static float ReadNormalizedSample(byte[] data, int offset, ushort audioFormat, ushort bitsPerSample) |
|
{ |
|
if (audioFormat == AudioFormatPcm) |
|
{ |
|
if (bitsPerSample == 8) |
|
{ |
|
int sample = data[offset] - 128; |
|
return sample / 128f; |
|
} |
|
|
|
if (bitsPerSample == 16) |
|
{ |
|
short sample = BinaryPrimitives.ReadInt16LittleEndian(new ReadOnlySpan<byte>(data, offset, 2)); |
|
return sample / 32768f; |
|
} |
|
|
|
if (bitsPerSample == 24) |
|
{ |
|
int b0 = data[offset]; |
|
int b1 = data[offset + 1] << 8; |
|
int b2 = data[offset + 2] << 16; |
|
int sample = b0 | b1 | b2; |
|
if ((sample & 0x800000) != 0) |
|
{ |
|
sample |= unchecked((int)0xFF000000); |
|
} |
|
|
|
return sample / 8388608f; |
|
} |
|
|
|
if (bitsPerSample == 32) |
|
{ |
|
int sample = BinaryPrimitives.ReadInt32LittleEndian(new ReadOnlySpan<byte>(data, offset, 4)); |
|
return sample / 2147483648f; |
|
} |
|
} |
|
|
|
if (audioFormat == AudioFormatIeeeFloat) |
|
{ |
|
if (bitsPerSample == 32) |
|
{ |
|
int intBits = BinaryPrimitives.ReadInt32LittleEndian(new ReadOnlySpan<byte>(data, offset, 4)); |
|
float sample = BitConverter.Int32BitsToSingle(intBits); |
|
return Math.Clamp(sample, -1f, 1f); |
|
} |
|
|
|
if (bitsPerSample == 64) |
|
{ |
|
long longBits = BinaryPrimitives.ReadInt64LittleEndian(new ReadOnlySpan<byte>(data, offset, 8)); |
|
double sample = BitConverter.Int64BitsToDouble(longBits); |
|
return (float)Math.Clamp(sample, -1d, 1d); |
|
} |
|
} |
|
|
|
throw new InvalidDataException( |
|
"Unsupported WAV encoding. Supported: PCM 8/16/24/32-bit, IEEE float 32/64-bit."); |
|
} |
|
|
|
private static string ReadFourCc(BinaryReader reader) |
|
{ |
|
byte[] bytes = reader.ReadBytes(4); |
|
if (bytes.Length != 4) |
|
{ |
|
throw new EndOfStreamException("Unexpected end of WAV file."); |
|
} |
|
|
|
return Encoding.ASCII.GetString(bytes, 0, 4); |
|
} |
|
|
|
private sealed class WaveFormat |
|
{ |
|
public WaveFormat(ushort audioFormat, ushort channels, int sampleRate, ushort bitsPerSample) |
|
{ |
|
AudioFormat = audioFormat; |
|
Channels = channels; |
|
SampleRate = sampleRate; |
|
BitsPerSample = bitsPerSample; |
|
} |
|
|
|
public ushort AudioFormat { get; } |
|
public ushort Channels { get; } |
|
public int SampleRate { get; } |
|
public ushort BitsPerSample { get; } |
|
} |
|
} |