diff --git a/dotnet/samples/Concepts/PromptTemplates/HandlebarsVisionPrompts.cs b/dotnet/samples/Concepts/PromptTemplates/HandlebarsVisionPrompts.cs new file mode 100644 index 000000000000..195d281da570 --- /dev/null +++ b/dotnet/samples/Concepts/PromptTemplates/HandlebarsVisionPrompts.cs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.PromptTemplates.Handlebars; + +namespace PromptTemplates; + +// This example shows how to use chat completion handlebars template prompts with base64 encoded images as a parameter. +public class HandlebarsVisionPrompts(ITestOutputHelper output) : BaseTest(output) +{ + [Fact] + public async Task RunAsync() + { + const string HandlebarsTemplate = """ + You are an AI assistant designed to help with image recognition tasks. + + {{request}} + {{imageData}} + + """; + + var kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: TestConfiguration.OpenAI.ChatModelId, + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + + var templateFactory = new HandlebarsPromptTemplateFactory(); + var promptTemplateConfig = new PromptTemplateConfig() + { + Template = HandlebarsTemplate, + TemplateFormat = "handlebars", + Name = "Vision_Chat_Prompt", + }; + var function = kernel.CreateFunctionFromPrompt(promptTemplateConfig, templateFactory); + + var arguments = new KernelArguments(new Dictionary + { + {"request","Describe this image:"}, + {"imageData", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAAXNSR0IArs4c6QAAACVJREFUKFNj/KTO/J+BCMA4iBUyQX1A0I10VAizCj1oMdyISyEAFoQbHwTcuS8AAAAASUVORK5CYII="} + }); + + var response = await kernel.InvokeAsync(function, arguments); + Console.WriteLine(response); + + /* + Output: + The image is a solid block of bright red color. There are no additional features, shapes, or textures present. + */ + } +} diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md index 26eef28982a7..937d832dfcba 100644 --- a/dotnet/samples/Concepts/README.md +++ b/dotnet/samples/Concepts/README.md @@ -142,7 +142,8 @@ Down below you can find the code snippets that demonstrate the usage of many Sem - [MultiplePromptTemplates](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/PromptTemplates/MultiplePromptTemplates.cs) - [PromptFunctionsWithChatGPT](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/PromptTemplates/PromptFunctionsWithChatGPT.cs) - [TemplateLanguage](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/PromptTemplates/TemplateLanguage.cs) -- [PromptyFunction](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/PromptYemplates/PromptyFunction.cs) +- [PromptyFunction](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/PromptTemplates/PromptyFunction.cs) +- [HandlebarsVisionPrompts](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/PromptTemplates/HandlebarsVisionPrompts.cs) ## RAG - Retrieval-Augmented Generation diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatPromptParser.cs b/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatPromptParser.cs index c9cae7acb070..12d63de28d3c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatPromptParser.cs +++ b/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatPromptParser.cs @@ -75,7 +75,14 @@ private static ChatMessageContent ParseChatNode(PromptNode node) { if (childNode.TagName.Equals(ImageTagName, StringComparison.OrdinalIgnoreCase)) { - items.Add(new ImageContent(new Uri(childNode.Content!))); + if (childNode.Content!.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) + { + items.Add(new ImageContent(childNode.Content)); + } + else + { + items.Add(new ImageContent(new Uri(childNode.Content!))); + } } else if (childNode.TagName.Equals(TextTagName, StringComparison.OrdinalIgnoreCase)) { diff --git a/dotnet/src/SemanticKernel.UnitTests/Prompt/ChatPromptParserTests.cs b/dotnet/src/SemanticKernel.UnitTests/Prompt/ChatPromptParserTests.cs index ecb051b7d7b1..e3ad0cd53a5c 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Prompt/ChatPromptParserTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Prompt/ChatPromptParserTests.cs @@ -114,6 +114,40 @@ public void ItReturnsChatHistoryWithValidContentItemsIncludeCData() """, c.Content)); } + [Fact] + public void ItReturnsChatHistoryWithValidDataImageContent() + { + // Arrange + string prompt = GetValidPromptWithDataUriImageContent(); + + // Act + bool result = ChatPromptParser.TryParse(prompt, out var chatHistory); + + // Assert + Assert.True(result); + Assert.NotNull(chatHistory); + + Assert.Collection(chatHistory, + c => Assert.Equal("What can I help with?", c.Content), + c => + { + Assert.Equal("Explain this image", c.Content); + Assert.Collection(c.Items, + o => + { + Assert.IsType(o); + Assert.Equal("Explain this image", ((TextContent)o).Text); + }, + o => + { + Assert.IsType(o); + Assert.Equal("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAAXNSR0IArs4c6QAAACVJREFUKFNj/KTO/J+BCMA4iBUyQX1A0I10VAizCj1oMdyISyEAFoQbHwTcuS8AAAAASUVORK5CYII=", ((ImageContent)o).DataUri); + Assert.Equal("image/png", ((ImageContent)o).MimeType); + Assert.NotNull(((ImageContent)o).Data); + }); + }); + } + [Fact] public void ItReturnsChatHistoryWithValidContentItemsIncludeCode() { @@ -210,6 +244,21 @@ Second line. """; } + private static string GetValidPromptWithDataUriImageContent() + { + return + """ + + What can I help with? + + + Explain this image + data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAAXNSR0IArs4c6QAAACVJREFUKFNj/KTO/J+BCMA4iBUyQX1A0I10VAizCj1oMdyISyEAFoQbHwTcuS8AAAAASUVORK5CYII= + + + """; + } + private static string GetValidPromptWithCDataSection() { return