|
| 1 | +// Copyright (c) Microsoft. All rights reserved. |
| 2 | + |
| 3 | +using System.Reflection; |
| 4 | +using Microsoft.SemanticKernel; |
| 5 | +using Microsoft.SemanticKernel.ChatCompletion; |
| 6 | +using Microsoft.SemanticKernel.Connectors.OpenAI; |
| 7 | +using OpenAI.Chat; |
| 8 | +using Resources; |
| 9 | + |
| 10 | +namespace ChatCompletion; |
| 11 | + |
| 12 | +/// <summary> |
| 13 | +/// These examples demonstrate how to use audio input and output with OpenAI Chat Completion |
| 14 | +/// </summary> |
| 15 | +/// <remarks> |
| 16 | +/// Currently, audio input and output is only supported with the following models: |
| 17 | +/// <list type="bullet"> |
| 18 | +/// <item>gpt-4o-audio-preview</item> |
| 19 | +/// </list> |
| 20 | +/// The sample demonstrates: |
| 21 | +/// <list type="bullet"> |
| 22 | +/// <item>How to send audio input to the model</item> |
| 23 | +/// <item>How to receive both text and audio output from the model</item> |
| 24 | +/// <item>How to save and process the audio response</item> |
| 25 | +/// </list> |
| 26 | +/// </remarks> |
| 27 | +public class OpenAI_ChatCompletionWithAudio(ITestOutputHelper output) : BaseTest(output) |
| 28 | +{ |
| 29 | + /// <summary> |
| 30 | + /// This example demonstrates how to use audio input and receive both text and audio output from the model. |
| 31 | + /// </summary> |
| 32 | + /// <remarks> |
| 33 | + /// This sample shows: |
| 34 | + /// <list type="bullet"> |
| 35 | + /// <item>Loading audio data from a resource file</item> |
| 36 | + /// <item>Configuring the chat completion service with audio options</item> |
| 37 | + /// <item>Enabling both text and audio response modalities</item> |
| 38 | + /// <item>Extracting and saving the audio response to a file</item> |
| 39 | + /// <item>Accessing the transcript metadata from the audio response</item> |
| 40 | + /// </list> |
| 41 | + /// </remarks> |
| 42 | + [Fact] |
| 43 | + public async Task UsingChatCompletionWithLocalInputAudioAndOutputAudio() |
| 44 | + { |
| 45 | + Console.WriteLine($"======== Open AI - {nameof(UsingChatCompletionWithLocalInputAudioAndOutputAudio)} ========\n"); |
| 46 | + |
| 47 | + var audioBytes = await EmbeddedResource.ReadAllAsync("test_audio.wav"); |
| 48 | + |
| 49 | + var kernel = Kernel.CreateBuilder() |
| 50 | + .AddOpenAIChatCompletion("gpt-4o-audio-preview", TestConfiguration.OpenAI.ApiKey) |
| 51 | + .Build(); |
| 52 | + |
| 53 | + var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>(); |
| 54 | + var settings = new OpenAIPromptExecutionSettings |
| 55 | + { |
| 56 | + Audio = new ChatAudioOptions(ChatOutputAudioVoice.Shimmer, ChatOutputAudioFormat.Mp3), |
| 57 | + Modalities = ChatResponseModalities.Text | ChatResponseModalities.Audio |
| 58 | + }; |
| 59 | + |
| 60 | + var chatHistory = new ChatHistory("You are a friendly assistant."); |
| 61 | + |
| 62 | + chatHistory.AddUserMessage([new AudioContent(audioBytes, "audio/wav")]); |
| 63 | + |
| 64 | + var result = await chatCompletionService.GetChatMessageContentAsync(chatHistory, settings); |
| 65 | + |
| 66 | + // Now we need to get the audio content from the result |
| 67 | + var audioReply = result.Items.First(i => i is AudioContent) as AudioContent; |
| 68 | + |
| 69 | + var currentDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location)!; |
| 70 | + var audioFile = Path.Combine(currentDirectory, "audio_output.mp3"); |
| 71 | + if (File.Exists(audioFile)) |
| 72 | + { |
| 73 | + File.Delete(audioFile); |
| 74 | + } |
| 75 | + File.WriteAllBytes(audioFile, audioReply!.Data!.Value.ToArray()); |
| 76 | + |
| 77 | + Console.WriteLine($"Generated audio: {new Uri(audioFile).AbsoluteUri}"); |
| 78 | + Console.WriteLine($"Transcript: {audioReply.Metadata!["Transcript"]}"); |
| 79 | + } |
| 80 | + |
| 81 | + /// <summary> |
| 82 | + /// This example demonstrates how to use audio input and receive only text output from the model. |
| 83 | + /// </summary> |
| 84 | + /// <remarks> |
| 85 | + /// This sample shows: |
| 86 | + /// <list type="bullet"> |
| 87 | + /// <item>Loading audio data from a resource file</item> |
| 88 | + /// <item>Configuring the chat completion service with audio options</item> |
| 89 | + /// <item>Setting response modalities to Text only</item> |
| 90 | + /// <item>Processing the text response from the model</item> |
| 91 | + /// </list> |
| 92 | + /// </remarks> |
| 93 | + [Fact] |
| 94 | + public async Task UsingChatCompletionWithLocalInputAudioAndTextOutput() |
| 95 | + { |
| 96 | + Console.WriteLine($"======== Open AI - {nameof(UsingChatCompletionWithLocalInputAudioAndTextOutput)} ========\n"); |
| 97 | + |
| 98 | + var audioBytes = await EmbeddedResource.ReadAllAsync("test_audio.wav"); |
| 99 | + |
| 100 | + var kernel = Kernel.CreateBuilder() |
| 101 | + .AddOpenAIChatCompletion("gpt-4o-audio-preview", TestConfiguration.OpenAI.ApiKey) |
| 102 | + .Build(); |
| 103 | + |
| 104 | + var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>(); |
| 105 | + var settings = new OpenAIPromptExecutionSettings |
| 106 | + { |
| 107 | + Audio = new ChatAudioOptions(ChatOutputAudioVoice.Shimmer, ChatOutputAudioFormat.Mp3), |
| 108 | + Modalities = ChatResponseModalities.Text |
| 109 | + }; |
| 110 | + |
| 111 | + var chatHistory = new ChatHistory("You are a friendly assistant."); |
| 112 | + |
| 113 | + chatHistory.AddUserMessage([new AudioContent(audioBytes, "audio/wav")]); |
| 114 | + |
| 115 | + var result = await chatCompletionService.GetChatMessageContentAsync(chatHistory, settings); |
| 116 | + |
| 117 | + // Now we need to get the audio content from the result |
| 118 | + Console.WriteLine($"Assistant > {result}"); |
| 119 | + } |
| 120 | +} |
0 commit comments