From b66c61e5c316b5dae2ae7e98fef2b1b8729512f7 Mon Sep 17 00:00:00 2001 From: Jacob Alber Date: Mon, 16 Mar 2026 09:56:12 -0400 Subject: [PATCH] fix: FS Checkpoint storage special character support The `sessionId`, an optional parameter when starting a new session when running a workflow is an arbitrary string. This allows consumers to support whatever ids are needed by other systems, but can result in errors when an OS special or forbidden character is included. The fix is to escape the paths, in a 1:1 manner. We rely on EncodeDataString to do this. * Also modifies the index file to make it easier to determine what the name of the file on disk is for a given `sessionId`. --- .../FileSystemJsonCheckpointStore.cs | 33 ++- .../WorkflowsJsonUtilities.cs | 1 + .../FileSystemJsonCheckpointStoreTests.cs | 194 ++++++++++++++---- 3 files changed, 185 insertions(+), 43 deletions(-) diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs index 543fdeb530..c47298f112 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Checkpointing/FileSystemJsonCheckpointStore.cs @@ -5,11 +5,14 @@ using System.IO; using System.Text; using System.Text.Json; +using System.Text.Json.Serialization.Metadata; using System.Threading; using System.Threading.Tasks; namespace Microsoft.Agents.AI.Workflows.Checkpointing; +internal record CheckpointFileIndexEntry(CheckpointInfo CheckpointInfo, string FileName); + /// /// Provides a file system-based implementation of a JSON checkpoint store that persists checkpoint data and index /// information to disk using JSON files. @@ -28,6 +31,8 @@ public sealed class FileSystemJsonCheckpointStore : JsonCheckpointStore, IDispos internal DirectoryInfo Directory { get; } internal HashSet CheckpointIndex { get; } + private static JsonTypeInfo EntryTypeInfo => WorkflowsJsonUtilities.JsonContext.Default.CheckpointFileIndexEntry; + /// /// Initializes a new instance of the class that uses the specified directory /// @@ -64,9 +69,11 @@ public FileSystemJsonCheckpointStore(DirectoryInfo directory) using StreamReader reader = new(this._indexFile, encoding: Encoding.UTF8, detectEncodingFromByteOrderMarks: false, BufferSize, leaveOpen: true); while (reader.ReadLine() is string line) { - if (JsonSerializer.Deserialize(line, KeyTypeInfo) is { } info) + if (JsonSerializer.Deserialize(line, EntryTypeInfo) is { } entry) { - this.CheckpointIndex.Add(info); + // We never actually use the file names from the index entries since they can be derived from the CheckpointInfo, but it is useful to + // have the UrlEncoded file names in the index file for human readability + this.CheckpointIndex.Add(entry.CheckpointInfo); } } } @@ -93,8 +100,14 @@ private void CheckDisposed() } } - private string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key) - => Path.Combine(this.Directory.FullName, $"{sessionId}_{key.CheckpointId}.json"); + internal string GetFileNameForCheckpoint(string sessionId, CheckpointInfo key) + { + string protoPath = $"{sessionId}_{key.CheckpointId}.json"; + + // Escape the protoPath to ensure it is a valid file name, especially if sessionId or CheckpointId contain path separators, etc. + return Uri.EscapeDataString(protoPath) // This takes care of most of the invalid path characters + .Replace(".", "%2E"); // This takes care of escaping the root folder, since EscapeDataString does not escape dots + } private CheckpointInfo GetUnusedCheckpointInfo(string sessionId) { @@ -116,13 +129,16 @@ public override async ValueTask CreateCheckpointAsync(string ses CheckpointInfo key = this.GetUnusedCheckpointInfo(sessionId); string fileName = this.GetFileNameForCheckpoint(sessionId, key); + string filePath = Path.Combine(this.Directory.FullName, fileName); + try { - using Stream checkpointStream = File.Open(fileName, FileMode.Create, FileAccess.Write, FileShare.None); + using Stream checkpointStream = File.Open(filePath, FileMode.Create, FileAccess.Write, FileShare.None); using Utf8JsonWriter jsonWriter = new(checkpointStream, new JsonWriterOptions() { Indented = false }); value.WriteTo(jsonWriter); - JsonSerializer.Serialize(this._indexFile!, key, KeyTypeInfo); + CheckpointFileIndexEntry entry = new(key, fileName); + JsonSerializer.Serialize(this._indexFile!, entry, EntryTypeInfo); byte[] bytes = Encoding.UTF8.GetBytes(Environment.NewLine); await this._indexFile!.WriteAsync(bytes, 0, bytes.Length, CancellationToken.None).ConfigureAwait(false); await this._indexFile!.FlushAsync(CancellationToken.None).ConfigureAwait(false); @@ -136,7 +152,7 @@ public override async ValueTask CreateCheckpointAsync(string ses try { // try to clean up after ourselves - File.Delete(fileName); + File.Delete(filePath); } catch { } @@ -149,6 +165,7 @@ public override async ValueTask RetrieveCheckpointAsync(string sess { this.CheckDisposed(); string fileName = this.GetFileNameForCheckpoint(sessionId, key); + string filePath = Path.Combine(this.Directory.FullName, fileName); if (!this.CheckpointIndex.Contains(key) || !File.Exists(fileName)) @@ -156,7 +173,7 @@ public override async ValueTask RetrieveCheckpointAsync(string sess throw new KeyNotFoundException($"Checkpoint '{key.CheckpointId}' not found in store at '{this.Directory.FullName}'."); } - using FileStream checkpointFileStream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); + using FileStream checkpointFileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.Read); using JsonDocument document = await JsonDocument.ParseAsync(checkpointFileStream).ConfigureAwait(false); return document.RootElement.Clone(); diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs index 08d1dcbcbb..4a94961522 100644 --- a/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs +++ b/dotnet/src/Microsoft.Agents.AI.Workflows/WorkflowsJsonUtilities.cs @@ -71,6 +71,7 @@ private static JsonSerializerOptions CreateDefaultOptions() [JsonSerializable(typeof(PortableValue))] [JsonSerializable(typeof(PortableMessageEnvelope))] [JsonSerializable(typeof(InMemoryCheckpointManager))] + [JsonSerializable(typeof(CheckpointFileIndexEntry))] // Runtime State Types [JsonSerializable(typeof(ScopeKey))] diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs index d21f9af035..10fbfddd64 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/FileSystemJsonCheckpointStoreTests.cs @@ -9,49 +9,173 @@ namespace Microsoft.Agents.AI.Workflows.UnitTests; -public sealed class FileSystemJsonCheckpointStoreTests +internal sealed class TempDirectory : IDisposable { - [Fact] - public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync() + public DirectoryInfo DirectoryInfo { get; } + + public TempDirectory() { - // Arrange - DirectoryInfo tempDir = new(Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString())); - FileSystemJsonCheckpointStore? store = null; + string tempDirPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + this.DirectoryInfo = Directory.CreateDirectory(tempDirPath); + } + + public void Dispose() + { + this.DisposeInternal(); + GC.SuppressFinalize(this); + } + + private void DisposeInternal() + { + if (this.DirectoryInfo.Exists) + { + try + { + // Best efforts + this.DirectoryInfo.Delete(recursive: true); + } + catch { } + } + } + + ~TempDirectory() + { + // Best efforts + this.DisposeInternal(); + } + + public static implicit operator DirectoryInfo(TempDirectory tempDirectory) => tempDirectory.DirectoryInfo; + + public string FullName => this.DirectoryInfo.FullName; - try + public bool IsParentOf(FileInfo candidate) + { + if (candidate.Directory is null) { - store = new(tempDir); - string runId = Guid.NewGuid().ToString("N"); - JsonElement testData = JsonSerializer.SerializeToElement(new { test = "data" }); - - // Act - CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, testData); - - // Assert - Check the file size before disposing to verify data was flushed to disk - // The index.jsonl file is held exclusively by the store, so we check via FileInfo - string indexPath = Path.Combine(tempDir.FullName, "index.jsonl"); - FileInfo indexFile = new(indexPath); - indexFile.Refresh(); - long fileSizeBeforeDispose = indexFile.Length; - - // Data should already be on disk (file size > 0) before we dispose - fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync"); - - // Dispose to release file lock before final verification - store.Dispose(); - store = null; - - string[] lines = File.ReadAllLines(indexPath); - lines.Should().HaveCount(1); - lines[0].Should().Contain(checkpoint.CheckpointId); + return false; } - finally + + if (candidate.Directory.FullName == this.DirectoryInfo.FullName) + { + return true; + } + + return this.IsParentOf(candidate.Directory); + } + + public bool IsParentOf(DirectoryInfo candidate) + { + while (candidate.Parent is not null) { - store?.Dispose(); - if (tempDir.Exists) + if (candidate.Parent.FullName == this.DirectoryInfo.FullName) { - tempDir.Delete(recursive: true); + return true; } + + candidate = candidate.Parent; } + + return false; + } +} +public sealed class FileSystemJsonCheckpointStoreTests +{ + public static JsonElement TestData => JsonSerializer.SerializeToElement(new { test = "data" }); + + [Fact] + public async Task CreateCheckpointAsync_ShouldPersistIndexToDiskBeforeDisposeAsync() + { + // Arrange + using TempDirectory tempDirectory = new(); + using FileSystemJsonCheckpointStore? store = new(tempDirectory); + + string runId = Guid.NewGuid().ToString("N"); + + // Act + CheckpointInfo checkpoint = await store.CreateCheckpointAsync(runId, TestData); + + // Assert - Check the file size before disposing to verify data was flushed to disk + // The index.jsonl file is held exclusively by the store, so we check via FileInfo + string indexPath = Path.Combine(tempDirectory.FullName, "index.jsonl"); + FileInfo indexFile = new(indexPath); + indexFile.Refresh(); + long fileSizeBeforeDispose = indexFile.Length; + + // Data should already be on disk (file size > 0) before we dispose + fileSizeBeforeDispose.Should().BeGreaterThan(0, "index.jsonl should be flushed to disk after CreateCheckpointAsync"); + + // Dispose to release file lock before final verification + store.Dispose(); + + string[] lines = File.ReadAllLines(indexPath); + lines.Should().HaveCount(1); + lines[0].Should().Contain(checkpoint.CheckpointId); + } + + private async ValueTask Run_EscapeRootFolderTestAsync(string escapingPath) + { + // Arrange + using TempDirectory tempDirectory = new(); + using FileSystemJsonCheckpointStore store = new(tempDirectory); + + string naivePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, escapingPath); + + // Check that the naive path is actually outside the temp directory to validate the test is meaningful + FileInfo naiveCheckpointFile = new(naivePath); + tempDirectory.IsParentOf(naiveCheckpointFile).Should().BeFalse("The naive path should be outside the root folder to validate that escaping is necessary."); + + // Act + CheckpointInfo checkpointInfo = await store.CreateCheckpointAsync(escapingPath, TestData); + + // Assert + string naivePathWithCheckpointId = Path.Combine(tempDirectory.DirectoryInfo.FullName, $"{escapingPath}_{checkpointInfo.CheckpointId}.json"); + new FileInfo(naivePathWithCheckpointId).Exists.Should().BeFalse("The naive path should not be used to save a checkpoint file."); + + string actualFileName = store.GetFileNameForCheckpoint(escapingPath, checkpointInfo); + string actualFilePath = Path.Combine(tempDirectory.DirectoryInfo.FullName, actualFileName); + FileInfo actualFile = new(actualFilePath); + + tempDirectory.IsParentOf(actualFile).Should().BeTrue("The actual checkpoint should be saved inside the root folder."); + actualFile.Exists.Should().BeTrue("The actual path should be used to save a checkpoint file."); + } + + [Fact] + public async Task CreateCheckpointAsync_ShouldNotEscapeRootFolderAsync() + { + // The SessionId is used as part of the file name, but if it contains path characters such as /.. it can escape the root folder. + // Testing that such characters are escaped properly to prevent directory traversal attacks, etc. + + await this.Run_EscapeRootFolderTestAsync("../valid_suffix"); + +#if !NETFRAMEWORK + if (OperatingSystem.IsWindows()) + { + // Windows allows both \ and / as path separators, so we test both + await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix"); + } +#else + // .NET Framework is always on Windows + await this.Run_EscapeRootFolderTestAsync("..\\valid_suffix"); +#endif + } + + private const string InvalidPathCharsWin32 = "\\/:*?\"<>|"; + private const string InvalidPathCharsUnix = "/"; + private const string InvalidPathCharsMacOS = "/:"; + + [Theory] + [InlineData(InvalidPathCharsWin32)] + [InlineData(InvalidPathCharsUnix)] + [InlineData(InvalidPathCharsMacOS)] + public async Task CreateCheckpointAsync_EscapesInvalidCharsAsync(string invalidChars) + { + // Arrange + using TempDirectory tempDirectory = new(); + using FileSystemJsonCheckpointStore store = new(tempDirectory); + + string runId = $"prefix_{invalidChars}_suffix"; + + Func createCheckpointAction = async () => await store.CreateCheckpointAsync(runId, TestData); + await createCheckpointAction.Should().NotThrowAsync(); } }