From a1be1ceebff78475c90bb69e9dee3c44603151d8 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 4 Feb 2026 08:38:38 +0000 Subject: [PATCH 01/11] Add progress handler for rootfs unpacking --- Sources/Containerization/ContainerManager.swift | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Sources/Containerization/ContainerManager.swift b/Sources/Containerization/ContainerManager.swift index bbdbd8c0..7ff386eb 100644 --- a/Sources/Containerization/ContainerManager.swift +++ b/Sources/Containerization/ContainerManager.swift @@ -376,6 +376,7 @@ public struct ContainerManager: Sendable { /// - readOnly: Whether to mount the root filesystem as read-only. /// - networking: Whether to create a network interface for this container. Defaults to `true`. /// When `false`, no network resources are allocated and `releaseNetwork`/`delete` remain safe to call. + /// - progress: Optional handler for tracking rootfs unpacking progress. public mutating func create( _ id: String, reference: String, @@ -383,6 +384,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: UInt64? = nil, readOnly: Bool = false, networking: Bool = true, + progress: ProgressHandler? = nil, configuration: (inout LinuxContainer.Configuration) throws -> Void ) async throws -> LinuxContainer { let image = try await imageStore.get(reference: reference, pull: true) @@ -393,6 +395,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: writableLayerSizeInBytes, readOnly: readOnly, networking: networking, + progress: progress, configuration: configuration ) } @@ -407,6 +410,7 @@ public struct ContainerManager: Sendable { /// - readOnly: Whether to mount the root filesystem as read-only. /// - networking: Whether to create a network interface for this container. Defaults to `true`. /// When `false`, no network resources are allocated and `releaseNetwork`/`delete` remain safe to call. + /// - progress: Optional handler for tracking rootfs unpacking progress. public mutating func create( _ id: String, image: Image, @@ -414,6 +418,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: UInt64? = nil, readOnly: Bool = false, networking: Bool = true, + progress: ProgressHandler? = nil, configuration: (inout LinuxContainer.Configuration) throws -> Void ) async throws -> LinuxContainer { let path = try createContainerRoot(id) @@ -421,7 +426,8 @@ public struct ContainerManager: Sendable { var rootfs = try await unpack( image: image, destination: path.appendingPathComponent("rootfs.ext4"), - size: rootfsSizeInBytes + size: rootfsSizeInBytes, + progress: progress ) if readOnly { rootfs.options.append("ro") @@ -511,10 +517,10 @@ public struct ContainerManager: Sendable { return path } - private func unpack(image: Image, destination: URL, size: UInt64) async throws -> Mount { + private func unpack(image: Image, destination: URL, size: UInt64, progress: ProgressHandler? = nil) async throws -> Mount { do { let unpacker = EXT4Unpacker(blockSizeInBytes: size) - return try await unpacker.unpack(image, for: .current, at: destination) + return try await unpacker.unpack(image, for: .current, at: destination, progress: progress) } catch let err as ContainerizationError { if err.code == .exists { return .block( From 60d26a0399b765a1c05c0bde8c968af7fb441739 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 4 Feb 2026 09:06:48 +0000 Subject: [PATCH 02/11] Add accurate progress calculation --- .../Formatter+Unpack.swift | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index fee04cc3..c793a275 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -122,6 +122,28 @@ extension EXT4.Formatter { compression: ContainerizationArchive.Filter = .gzip, progress: ProgressHandler? = nil ) throws { + // Optional first pass: scan headers to get total size (fast, metadata only) + if let progress { + let sizeReader = try ArchiveReader( + format: format, + filter: compression, + file: source + ) + var totalSize: Int64 = 0 + for (entry, _) in sizeReader.makeStreamingIterator() { + try Task.checkCancellation() + if entry.fileType == .regular, let size = entry.size { + totalSize += Int64(size) + } + } + if totalSize > 0 { + Task { + await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) + } + } + } + + // Second pass: unpack let reader = try ArchiveReader( format: format, filter: compression, From 359437ab3363e567ef456c108e1deef8929b634f Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 4 Feb 2026 19:29:25 +0000 Subject: [PATCH 03/11] Add tests --- .../TestFormatterUnpack.swift | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 0407950a..0a1776c2 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -18,6 +18,7 @@ #if os(macOS) import ContainerizationArchive +import ContainerizationExtras import Foundation import Testing import SystemPackage @@ -130,6 +131,159 @@ struct Tar2EXT4Test: ~Copyable { } } +/// Collects progress events in a thread-safe manner. +private actor ProgressCollector { + var events: [ProgressEvent] = [] + + func append(_ newEvents: [ProgressEvent]) { + events.append(contentsOf: newEvents) + } + + func allEvents() -> [ProgressEvent] { + events + } +} + +struct UnpackProgressTest { + @Test func progressReportsAccurateSizes() async throws { + // Create an archive with files of known sizes + let tempDir = FileManager.default.uniqueTemporaryDirectory() + let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) + let fsPath = FilePath(tempDir.appendingPathComponent("test.ext4.img", isDirectory: false)) + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create test data with specific sizes + let file1Data = Data(repeating: 0xAA, count: 1024) // 1 KiB + let file2Data = Data(repeating: 0xBB, count: 4096) // 4 KiB + let file3Data = Data(repeating: 0xCC, count: 512) // 512 bytes + let expectedTotalSize: Int64 = 1024 + 4096 + 512 // 5632 bytes + + // Build the archive + let archiver = try ArchiveWriter( + configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) + try archiver.open(file: archivePath) + + try archiver.writeEntry(entry: WriteEntry.dir(path: "/data", permissions: 0o755), data: nil) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file1.bin", permissions: 0o644, size: Int64(file1Data.count)), + data: file1Data) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file2.bin", permissions: 0o644, size: Int64(file2Data.count)), + data: file2Data) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file3.bin", permissions: 0o644, size: Int64(file3Data.count)), + data: file3Data) + // Include an empty file to verify it doesn't break size calculations + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/empty.bin", permissions: 0o644, size: 0), + data: Data()) + try archiver.finishEncoding() + + // Set up progress collection + let collector = ProgressCollector() + let progressHandler: ProgressHandler = { events in + await collector.append(events) + } + + // Unpack with progress tracking + let formatter = try EXT4.Formatter(fsPath) + try formatter.unpack(source: archivePath, progress: progressHandler) + try formatter.close() + + // Allow async progress tasks to complete + try await Task.sleep(for: .milliseconds(100)) + + // Analyze collected events + let allEvents = await collector.allEvents() + + var reportedTotalSize: Int64 = 0 + var cumulativeSize: Int64 = 0 + var itemCount: Int64 = 0 + + for event in allEvents { + switch event.event { + case "add-total-size": + let value = try #require(event.value as? Int64, "add-total-size value should be Int64") + reportedTotalSize += value + case "add-size": + let value = try #require(event.value as? Int64, "add-size value should be Int64") + cumulativeSize += value + case "add-items": + let value = try #require(event.value as? Int, "add-items value should be Int") + itemCount += Int64(value) + default: + break + } + } + + // Verify the progress contract + #expect( + reportedTotalSize == expectedTotalSize, + "Total size should be \(expectedTotalSize) bytes, got \(reportedTotalSize)") + #expect( + cumulativeSize == expectedTotalSize, + "Cumulative size should equal total size (\(expectedTotalSize)), got \(cumulativeSize)") + #expect( + itemCount == 5, + "Should have processed 5 entries (1 dir + 4 files), got \(itemCount)") + + // Verify incremental progress: we should get separate add-size events for each file + let addSizeEvents = allEvents.filter { $0.event == "add-size" } + #expect( + addSizeEvents.count == 4, + "Should have 4 add-size events (one per file, including empty), got \(addSizeEvents.count)") + + // Verify individual file sizes were reported correctly + let reportedSizes = addSizeEvents.compactMap { $0.value as? Int64 }.sorted() + #expect( + reportedSizes == [0, 512, 1024, 4096], + "Individual file sizes should be [0, 512, 1024, 4096], got \(reportedSizes)") + + // Verify add-total-size comes before add-size events (first pass before second pass) + if let totalSizeIndex = allEvents.firstIndex(where: { $0.event == "add-total-size" }), + let firstAddSizeIndex = allEvents.firstIndex(where: { $0.event == "add-size" }) { + #expect( + totalSizeIndex < firstAddSizeIndex, + "add-total-size should be reported before add-size events") + } + } + + @Test func progressHandlerIsOptional() throws { + // Verify that unpacking works without a progress handler (existing behavior) + let tempDir = FileManager.default.uniqueTemporaryDirectory() + let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) + let fsPath = FilePath(tempDir.appendingPathComponent("test.ext4.img", isDirectory: false)) + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + let archiver = try ArchiveWriter( + configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) + try archiver.open(file: archivePath) + try archiver.writeEntry(entry: WriteEntry.dir(path: "/test", permissions: 0o755), data: nil) + let data = Data(repeating: 0x42, count: 100) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/test/file.bin", permissions: 0o644, size: Int64(data.count)), + data: data) + try archiver.finishEncoding() + + // Unpack without progress handler - should not throw + let formatter = try EXT4.Formatter(fsPath) + try formatter.unpack(source: archivePath) + try formatter.close() + + // Verify the file was unpacked correctly + let reader = try EXT4.EXT4Reader(blockDevice: fsPath) + let children = try reader.children(of: EXT4.RootInode) + let childNames = Set(children.map { $0.0 }) + #expect(childNames.contains("test"), "Directory 'test' should exist in unpacked filesystem") + } +} + extension ContainerizationArchive.WriteEntry { static func dir(path: String, permissions: UInt16) -> WriteEntry { let entry = WriteEntry() From fbdc0355b9812de51fe52c3fc202ea5899d2664c Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Fri, 27 Feb 2026 23:18:02 +0100 Subject: [PATCH 04/11] Fix unpack progress reporting --- .../Image/Unpacker/EXT4Unpacker.swift | 73 +++++++++++++++---- .../TestFormatterUnpack.swift | 41 +++++++++++ 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index 4e588e8d..89313540 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -84,27 +84,24 @@ public struct EXT4Unpacker: Unpacker { ) defer { try? filesystem.close() } + if let progress { + let totalSize = try await totalRegularFileBytes(in: manifest.layers, image: image) + if totalSize > 0 { + await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) + } + } + for layer in manifest.layers { try Task.checkCancellation() let content = try await image.getContent(digest: layer.digest) - let compression: ContainerizationArchive.Filter - switch layer.mediaType { - case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: - compression = .none - case MediaTypes.imageLayerGzip, MediaTypes.dockerImageLayerGzip: - compression = .gzip - case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: - compression = .zstd - default: - throw ContainerizationError(.unsupported, message: "media type \(layer.mediaType) not supported.") - } - try filesystem.unpack( - source: content.path, + let compression = try compressionFilter(for: layer.mediaType) + let reader = try ArchiveReader( format: .paxRestricted, - compression: compression, - progress: progress + filter: compression, + file: content.path ) + try filesystem.unpack(reader: reader, progress: progress) } return .block( @@ -123,4 +120,50 @@ public struct EXT4Unpacker: Unpacker { } return blockPath } + + #if os(macOS) + private func compressionFilter(for mediaType: String) throws -> ContainerizationArchive.Filter { + switch mediaType { + case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: + return .none + case MediaTypes.imageLayerGzip, MediaTypes.dockerImageLayerGzip: + return .gzip + case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: + return .zstd + default: + throw ContainerizationError(.unsupported, message: "media type \(mediaType) not supported.") + } + } + + private func totalRegularFileBytes(in layers: [Descriptor], image: Image) async throws -> Int64 { + var totalSize: Int64 = 0 + + for layer in layers { + try Task.checkCancellation() + + let compression = try compressionFilter(for: layer.mediaType) + let content = try await image.getContent(digest: layer.digest) + let reader = try ArchiveReader( + format: .paxRestricted, + filter: compression, + file: content.path + ) + + for (entry, _) in reader.makeStreamingIterator() { + try Task.checkCancellation() + guard entry.fileType == .regular, let size = entry.size else { + continue + } + + let fileSize = Int64(clamping: size) + if totalSize > Int64.max - fileSize { + return Int64.max + } + totalSize += fileSize + } + } + + return totalSize + } + #endif } diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 0a1776c2..0a152b2c 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -184,7 +184,13 @@ struct UnpackProgressTest { // Set up progress collection let collector = ProgressCollector() + let shouldPrintProgress = ProcessInfo.processInfo.environment["PRINT_UNPACK_PROGRESS"] == "1" let progressHandler: ProgressHandler = { events in + if shouldPrintProgress { + for event in events { + print("unpack-progress \(event.event): \(event.value)") + } + } await collector.append(events) } @@ -242,6 +248,41 @@ struct UnpackProgressTest { reportedSizes == [0, 512, 1024, 4096], "Individual file sizes should be [0, 512, 1024, 4096], got \(reportedSizes)") + // Verify event-by-event behavior expected by clients: + // total remains stable and written bytes are monotonic as progress updates arrive. + var runningTotal: Int64? + var runningWritten: Int64 = 0 + var previousSnapshot: (written: Int64, total: Int64?)? + var progressSnapshotCount = 0 + + for event in allEvents { + switch event.event { + case "add-total-size": + let value = try #require(event.value as? Int64, "add-total-size value should be Int64") + runningTotal = (runningTotal ?? 0) + value + case "add-size": + let value = try #require(event.value as? Int64, "add-size value should be Int64") + runningWritten += value + let currentSnapshot = (written: runningWritten, total: runningTotal) + if let previousSnapshot { + #expect( + currentSnapshot.written >= previousSnapshot.written, + "Written bytes should be monotonic: \(currentSnapshot.written) < \(previousSnapshot.written)") + #expect( + currentSnapshot.total == previousSnapshot.total, + "Total bytes should remain stable across progress updates") + } + previousSnapshot = currentSnapshot + progressSnapshotCount += 1 + default: + break + } + } + + #expect( + progressSnapshotCount == addSizeEvents.count, + "Should produce one monotonic snapshot per add-size update") + // Verify add-total-size comes before add-size events (first pass before second pass) if let totalSizeIndex = allEvents.firstIndex(where: { $0.event == "add-total-size" }), let firstAddSizeIndex = allEvents.firstIndex(where: { $0.event == "add-size" }) { From b4165335d074063a5bd390b33c524ce19317a679 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 11 Mar 2026 11:26:15 +0100 Subject: [PATCH 05/11] Avoid double zstd decompression when progress is enabled --- .../Image/Unpacker/EXT4Unpacker.swift | 43 +++++++++++++------ .../ArchiveReader.swift | 12 ++++-- .../Formatter+Unpack.swift | 27 ++++++++++-- 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index 89313540..4c9ea881 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -84,22 +84,41 @@ public struct EXT4Unpacker: Unpacker { ) defer { try? filesystem.close() } + // Resolve layer paths upfront. When progress reporting is enabled and a layer + // uses zstd, decompress once so both the size-scanning pass and the unpack + // pass share the same decompressed file. + var resolvedLayers: [(file: URL, filter: ContainerizationArchive.Filter)] = [] + var decompressedFiles: [URL] = [] + for layer in manifest.layers { + let content = try await image.getContent(digest: layer.digest) + let compression = try compressionFilter(for: layer.mediaType) + if progress != nil && compression == .zstd { + let decompressed = try ArchiveReader.decompressZstd(content.path) + decompressedFiles.append(decompressed) + resolvedLayers.append((file: decompressed, filter: .none)) + } else { + resolvedLayers.append((file: content.path, filter: compression)) + } + } + defer { + for file in decompressedFiles { + ArchiveReader.cleanUpDecompressedZstd(file) + } + } + if let progress { - let totalSize = try await totalRegularFileBytes(in: manifest.layers, image: image) + let totalSize = try totalRegularFileBytes(in: resolvedLayers) if totalSize > 0 { await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) } } - for layer in manifest.layers { + for resolved in resolvedLayers { try Task.checkCancellation() - let content = try await image.getContent(digest: layer.digest) - - let compression = try compressionFilter(for: layer.mediaType) let reader = try ArchiveReader( format: .paxRestricted, - filter: compression, - file: content.path + filter: resolved.filter, + file: resolved.file ) try filesystem.unpack(reader: reader, progress: progress) } @@ -135,18 +154,18 @@ public struct EXT4Unpacker: Unpacker { } } - private func totalRegularFileBytes(in layers: [Descriptor], image: Image) async throws -> Int64 { + private func totalRegularFileBytes( + in layers: [(file: URL, filter: ContainerizationArchive.Filter)] + ) throws -> Int64 { var totalSize: Int64 = 0 for layer in layers { try Task.checkCancellation() - let compression = try compressionFilter(for: layer.mediaType) - let content = try await image.getContent(digest: layer.digest) let reader = try ArchiveReader( format: .paxRestricted, - filter: compression, - file: content.path + filter: layer.filter, + file: layer.file ) for (entry, _) in reader.makeStreamingIterator() { diff --git a/Sources/ContainerizationArchive/ArchiveReader.swift b/Sources/ContainerizationArchive/ArchiveReader.swift index b353b209..6c2bfe54 100644 --- a/Sources/ContainerizationArchive/ArchiveReader.swift +++ b/Sources/ContainerizationArchive/ArchiveReader.swift @@ -125,7 +125,7 @@ public final class ArchiveReader { } /// Decompress a zstd file to a temporary location - private static func decompressZstd(_ source: URL) throws -> URL { + public static func decompressZstd(_ source: URL) throws -> URL { guard let tempDir = createTemporaryDirectory(baseName: "zstd-decompress") else { throw ArchiveError.failedToDetectFormat } @@ -148,13 +148,19 @@ public final class ArchiveReader { return tempFile } + /// Clean up the temporary directory created by `decompressZstd`. + /// The decompressed file is placed inside a unique temporary directory, + /// so removing that directory cleans up everything. + public static func cleanUpDecompressedZstd(_ file: URL) { + try? FileManager.default.removeItem(at: file.deletingLastPathComponent()) + } + deinit { archive_read_free(underlying) try? fileHandle?.close() - // Clean up temp decompressed file if let tempFile = tempDecompressedFile { - try? FileManager.default.removeItem(at: tempFile.deletingLastPathComponent()) + Self.cleanUpDecompressedZstd(tempFile) } } } diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index c793a275..7ec46b8e 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -122,12 +122,31 @@ extension EXT4.Formatter { compression: ContainerizationArchive.Filter = .gzip, progress: ProgressHandler? = nil ) throws { + // For zstd, decompress once and reuse for both passes to avoid double decompression. + let fileToRead: URL + let readerFilter: ContainerizationArchive.Filter + var decompressedFile: URL? + if compression == .zstd { + let decompressed = try ArchiveReader.decompressZstd(source) + fileToRead = decompressed + readerFilter = .none + decompressedFile = decompressed + } else { + fileToRead = source + readerFilter = compression + } + defer { + if let decompressedFile { + ArchiveReader.cleanUpDecompressedZstd(decompressedFile) + } + } + // Optional first pass: scan headers to get total size (fast, metadata only) if let progress { let sizeReader = try ArchiveReader( format: format, - filter: compression, - file: source + filter: readerFilter, + file: fileToRead ) var totalSize: Int64 = 0 for (entry, _) in sizeReader.makeStreamingIterator() { @@ -146,8 +165,8 @@ extension EXT4.Formatter { // Second pass: unpack let reader = try ArchiveReader( format: format, - filter: compression, - file: source + filter: readerFilter, + file: fileToRead ) try self.unpack(reader: reader, progress: progress) } From b1cfc8635f39659e941f2bcad09c863f1cf5ef10 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 11 Mar 2026 11:26:27 +0100 Subject: [PATCH 06/11] Simplify overflow handling in totalRegularFileBytes --- Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index 4c9ea881..97bc9328 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -174,11 +174,7 @@ public struct EXT4Unpacker: Unpacker { continue } - let fileSize = Int64(clamping: size) - if totalSize > Int64.max - fileSize { - return Int64.max - } - totalSize += fileSize + totalSize += Int64(size) } } From c8f68758d66c539afd6ffab583d65c972dde6d5b Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 11 Mar 2026 11:26:34 +0100 Subject: [PATCH 07/11] Add timing benchmark for header scan overhead --- .../ImageTests/HeaderScanTimingTests.swift | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift diff --git a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift new file mode 100644 index 00000000..98b25659 --- /dev/null +++ b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift @@ -0,0 +1,180 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025-2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +#if os(macOS) +import ContainerizationArchive +import ContainerizationEXT4 +import ContainerizationExtras +import ContainerizationOCI +import Foundation +import SystemPackage +import Testing + +@testable import Containerization + +/// Measures header scan overhead vs. full unpack time using real container images +/// pulled from a registry. +/// +/// Run with: +/// ENABLE_TIMING_TESTS=1 swift test --filter ImageHeaderScanTimingTest +@Suite +struct ImageHeaderScanTimingTest { + private static let isEnabled = ProcessInfo.processInfo.environment["ENABLE_TIMING_TESTS"] != nil + + let store: ImageStore + let dir: URL + let contentStore: ContentStore + + init() throws { + let dir = FileManager.default.uniqueTemporaryDirectory(create: true) + let cs = try LocalContentStore(path: dir) + let store = try ImageStore(path: dir, contentStore: cs) + self.dir = dir + self.store = store + self.contentStore = cs + } + + @Test(.enabled(if: ImageHeaderScanTimingTest.isEnabled)) + func measureAlpineAndUbuntu() async throws { + defer { try? FileManager.default.removeItem(at: dir) } + + let images: [(reference: String, label: String)] = [ + ("ghcr.io/linuxcontainers/alpine:3.20", "Alpine 3.20"), + ("docker.io/library/ubuntu:24.04", "Ubuntu 24.04"), + ] + + for image in images { + print("\n==============================") + print("Image: \(image.label)") + print("==============================") + + let img = try await store.pull(reference: image.reference, platform: .current) + let manifest = try await img.manifest(for: .current) + + for (i, layer) in manifest.layers.enumerated() { + let content = try await img.getContent(digest: layer.digest) + let compression = compressionFilter(for: layer.mediaType) + let compressedSize = try FileManager.default.attributesOfItem(atPath: content.path.path)[.size] as? Int64 ?? 0 + let label = "\(image.label) layer \(i + 1)/\(manifest.layers.count) (\(layer.mediaType), \(formatBytes(compressedSize)) compressed)" + try measureOverhead(url: content.path, compression: compression, label: label) + } + } + } + + // MARK: - Helpers + + private func compressionFilter(for mediaType: String) -> ContainerizationArchive.Filter { + switch mediaType { + case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: + return .zstd + case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: + return .none + default: + return .gzip + } + } + + private func measureOverhead(url: URL, compression: ContainerizationArchive.Filter, label: String) throws { + let clock = ContinuousClock() + + print("\n--- \(label) ---\n") + + // For zstd, pre-decompress once (matching the production code path in EXT4Unpacker). + let scanFile: URL + let scanFilter: ContainerizationArchive.Filter + var decompressedFile: URL? + if compression == .zstd { + var decompressed: URL! + let decompressDuration = try clock.measure { + decompressed = try ArchiveReader.decompressZstd(url) + } + scanFile = decompressed + scanFilter = .none + decompressedFile = decompressed + print(" Zstd decompress: \(decompressDuration)") + } else { + scanFile = url + scanFilter = compression + } + defer { + if let decompressedFile { + ArchiveReader.cleanUpDecompressedZstd(decompressedFile) + } + } + + // 1. Header scan only + let scanDuration = try clock.measure { + let reader = try ArchiveReader(format: .paxRestricted, filter: scanFilter, file: scanFile) + var totalSize: Int64 = 0 + for (entry, _) in reader.makeStreamingIterator() { + if entry.fileType == .regular, let size = entry.size { + totalSize += Int64(size) + } + } + print(" Scanned total size: \(formatBytes(totalSize))") + } + print(" Header scan: \(scanDuration)") + + // 2. Full unpack without progress + let tempDir1 = FileManager.default.uniqueTemporaryDirectory() + let fsPath1 = FilePath(tempDir1.appendingPathComponent("no-progress.ext4.img", isDirectory: false)) + defer { try? FileManager.default.removeItem(at: tempDir1) } + + let unpackOnlyDuration = try clock.measure { + let formatter = try EXT4.Formatter(fsPath1) + try formatter.unpack(source: url, compression: compression) + try formatter.close() + } + print(" Unpack (no progress): \(unpackOnlyDuration)") + + // 3. Full unpack with progress (includes header scan pass) + let tempDir2 = FileManager.default.uniqueTemporaryDirectory() + let fsPath2 = FilePath(tempDir2.appendingPathComponent("with-progress.ext4.img", isDirectory: false)) + defer { try? FileManager.default.removeItem(at: tempDir2) } + + let noopProgress: ProgressHandler = { _ in } + let withProgressDuration = try clock.measure { + let formatter = try EXT4.Formatter(fsPath2) + try formatter.unpack(source: url, compression: compression, progress: noopProgress) + try formatter.close() + } + print(" Unpack (w/ progress): \(withProgressDuration)") + + // Summary + let scanMs = toMs(scanDuration) + let unpackMs = toMs(unpackOnlyDuration) + let withProgressMs = toMs(withProgressDuration) + let overheadMs = withProgressMs - unpackMs + let overheadPct = unpackMs > 0 ? (overheadMs / unpackMs) * 100 : 0 + + print("\n Summary:") + print(" Header scan alone: \(String(format: "%.1f", scanMs)) ms") + print(" Unpack only: \(String(format: "%.1f", unpackMs)) ms") + print(" Unpack + progress: \(String(format: "%.1f", withProgressMs)) ms") + print(" Overhead: \(String(format: "%.1f", overheadMs)) ms (\(String(format: "%.1f", overheadPct))%)") + } + + private func toMs(_ d: Duration) -> Double { + let c = d.components + return Double(c.seconds) * 1000.0 + Double(c.attoseconds) / 1e15 + } + + private func formatBytes(_ bytes: Int64) -> String { + let mb = Double(bytes) / 1_048_576.0 + return "\(String(format: "%.1f", mb)) MB" + } +} +#endif From 4b1140b00a1e463bfecb74864ff09415e5eee42d Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 11 Mar 2026 11:58:23 +0100 Subject: [PATCH 08/11] Make unpack methods async for deterministic progress delivery --- .../Image/Unpacker/EXT4Unpacker.swift | 9 +- .../Formatter+Unpack.swift | 92 +++++++++++-------- Sources/cctl/RootfsCommand.swift | 2 +- .../TestEXT4Unpacker.swift | 8 +- .../TestFormatterUnpack.swift | 37 +++++--- .../ImageTests/HeaderScanTimingTests.swift | 12 +-- 6 files changed, 90 insertions(+), 70 deletions(-) diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index 97bc9328..a7e11922 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -42,7 +42,7 @@ public struct EXT4Unpacker: Unpacker { archive: URL, compression: ContainerizationArchive.Filter, at path: URL - ) throws { + ) async throws { let cleanedPath = try prepareUnpackPath(path: path) let filesystem = try EXT4.Formatter( FilePath(cleanedPath), @@ -50,11 +50,10 @@ public struct EXT4Unpacker: Unpacker { ) defer { try? filesystem.close() } - try filesystem.unpack( + try await filesystem.unpack( source: archive, format: .paxRestricted, - compression: compression, - progress: nil + compression: compression ) } #endif @@ -120,7 +119,7 @@ public struct EXT4Unpacker: Unpacker { filter: resolved.filter, file: resolved.file ) - try filesystem.unpack(reader: reader, progress: progress) + try await filesystem.unpack(reader: reader, progress: progress) } return .block( diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index 7ec46b8e..2b62095b 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -25,7 +25,26 @@ private typealias Hardlinks = [FilePath: FilePath] extension EXT4.Formatter { /// Unpack the provided archive on to the ext4 filesystem. - public func unpack(reader: ArchiveReader, progress: ProgressHandler? = nil) throws { + public func unpack(reader: ArchiveReader, progress: ProgressHandler? = nil) async throws { + try await self.unpackEntries(reader: reader, progress: progress) + } + + /// Unpack an archive at the source URL on to the ext4 filesystem. + public func unpack( + source: URL, + format: ContainerizationArchive.Format = .paxRestricted, + compression: ContainerizationArchive.Filter = .gzip + ) async throws { + let reader = try ArchiveReader( + format: format, + filter: compression, + file: source + ) + try await self.unpack(reader: reader) + } + + /// Core unpack logic. When `progress` is nil the handler calls are skipped. + private func unpackEntries(reader: ArchiveReader, progress: ProgressHandler?) async throws { var hardlinks: Hardlinks = [:] // Allocate a single 128KiB reusable buffer for all files to minimize allocations // and reduce the number of read calls to libarchive. @@ -39,35 +58,33 @@ extension EXT4.Formatter { continue } - defer { - // Count the number of entries - if let progress { - Task { - await progress([ - ProgressEvent(event: "add-items", value: 1) - ]) - } - } - } - pathEntry = preProcessPath(s: pathEntry) let path = FilePath(pathEntry) if path.base.hasPrefix(".wh.") { if path.base == ".wh..wh..opq" { // whiteout directory try self.unlink(path: path.dir, directoryWhiteout: true) + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } let startIndex = path.base.index(path.base.startIndex, offsetBy: ".wh.".count) let filePath = String(path.base[startIndex...]) let dir: FilePath = path.dir try self.unlink(path: dir.join(filePath)) + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } if let hardlink = entry.hardlink { let hl = preProcessPath(s: hardlink) hardlinks[path] = FilePath(hl) + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } let ts = FileTimestamps( @@ -84,13 +101,8 @@ extension EXT4.Formatter { uid: entry.owner, gid: entry.group, xattrs: entry.xattrs, fileBuffer: reusableBuffer) - // Count the size of files if let progress, let size = entry.size { - Task { - await progress([ - ProgressEvent(event: "add-size", value: Int64(size)) - ]) - } + await progress([ProgressEvent(event: "add-size", value: Int64(size))]) } case .symbolicLink: var symlinkTarget: FilePath? @@ -104,6 +116,10 @@ extension EXT4.Formatter { default: continue } + + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } } guard hardlinks.acyclic else { throw UnpackError.circularLinks @@ -115,13 +131,13 @@ extension EXT4.Formatter { } } - /// Unpack an archive at the source URL on to the ext4 filesystem. + /// Unpack an archive at the source URL on to the ext4 filesystem with progress reporting. public func unpack( source: URL, format: ContainerizationArchive.Format = .paxRestricted, compression: ContainerizationArchive.Filter = .gzip, - progress: ProgressHandler? = nil - ) throws { + progress: @escaping ProgressHandler + ) async throws { // For zstd, decompress once and reuse for both passes to avoid double decompression. let fileToRead: URL let readerFilter: ContainerizationArchive.Filter @@ -141,26 +157,22 @@ extension EXT4.Formatter { } } - // Optional first pass: scan headers to get total size (fast, metadata only) - if let progress { - let sizeReader = try ArchiveReader( - format: format, - filter: readerFilter, - file: fileToRead - ) - var totalSize: Int64 = 0 - for (entry, _) in sizeReader.makeStreamingIterator() { - try Task.checkCancellation() - if entry.fileType == .regular, let size = entry.size { - totalSize += Int64(size) - } - } - if totalSize > 0 { - Task { - await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) - } + // First pass: scan headers to get total size (fast, metadata only) + let sizeReader = try ArchiveReader( + format: format, + filter: readerFilter, + file: fileToRead + ) + var totalSize: Int64 = 0 + for (entry, _) in sizeReader.makeStreamingIterator() { + try Task.checkCancellation() + if entry.fileType == .regular, let size = entry.size { + totalSize += Int64(size) } } + if totalSize > 0 { + await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) + } // Second pass: unpack let reader = try ArchiveReader( @@ -168,7 +180,7 @@ extension EXT4.Formatter { filter: readerFilter, file: fileToRead ) - try self.unpack(reader: reader, progress: progress) + try await self.unpack(reader: reader, progress: progress) } private func preProcessPath(s: String) -> String { diff --git a/Sources/cctl/RootfsCommand.swift b/Sources/cctl/RootfsCommand.swift index 10ee29f2..45787efa 100644 --- a/Sources/cctl/RootfsCommand.swift +++ b/Sources/cctl/RootfsCommand.swift @@ -96,7 +96,7 @@ extension Application { private func outputExt4(archive: URL, to path: URL) async throws { let unpacker = EXT4Unpacker(blockSizeInBytes: 256.mib()) - try unpacker.unpack(archive: archive, compression: .gzip, at: path) + try await unpacker.unpack(archive: archive, compression: .gzip, at: path) } private func outputImage(path: URL, reference: String) async throws { diff --git a/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift b/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift index eaf8f31e..1c22f54f 100644 --- a/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift +++ b/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift @@ -28,7 +28,7 @@ struct Ext4UnpackerTests { .appendingPathComponent("ext4.unpacked.oci.img.delme", isDirectory: false)) final class MockEXT4Unpacker { - static func Unpack(index: String, fsPath: FilePath) throws { + static func Unpack(index: String, fsPath: FilePath) async throws { let fs = try EXT4.Formatter(fsPath) let bundle = Bundle.module guard let indexPath = bundle.url(forResource: index, withExtension: nil) else { @@ -67,14 +67,14 @@ struct Ext4UnpackerTests { guard let layerPath = bundle.url(forResource: layerDigest, withExtension: nil) else { throw NSError(domain: "layer \(layerDigest) not found", code: 1) } - try fs.unpack(source: layerPath) + try await fs.unpack(source: layerPath) } try fs.close() } } - @Test func eXT4Unpacker() throws { - try MockEXT4Unpacker.Unpack(index: self.indexSHA, fsPath: self.fsPath) + @Test func eXT4Unpacker() async throws { + try await MockEXT4Unpacker.Unpack(index: self.indexSHA, fsPath: self.fsPath) let ext4 = try EXT4.EXT4Reader(blockDevice: self.fsPath) let children = try ext4.children(of: EXT4.RootInode) #expect( diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 0a152b2c..65d166c4 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -42,10 +42,19 @@ struct Tar2EXT4Test: ~Copyable { "extendedattribute.test": Data([15, 26, 54, 1, 2, 4, 6, 7, 7]), ] + let layer1Path: URL + let layer2Path: URL + init() throws { - // create layer1 + // Compute paths before any throwing code to satisfy ~Copyable initialization rules. let layer1Path = FileManager.default.uniqueTemporaryDirectory() .appendingPathComponent("layer1.tar.gz", isDirectory: false) + let layer2Path = FileManager.default.uniqueTemporaryDirectory() + .appendingPathComponent("layer2.tar.gz", isDirectory: false) + self.layer1Path = layer1Path + self.layer2Path = layer2Path + + // create layer1 let layer1Archiver = try ArchiveWriter( configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) try layer1Archiver.open(file: layer1Path) @@ -57,8 +66,6 @@ struct Tar2EXT4Test: ~Copyable { try layer1Archiver.finishEncoding() // create layer2 - let layer2Path = FileManager.default.uniqueTemporaryDirectory() - .appendingPathComponent("layer2.tar.gz", isDirectory: false) let layer2Archiver = try ArchiveWriter( configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) try layer2Archiver.open(file: layer2Path) @@ -82,18 +89,23 @@ struct Tar2EXT4Test: ~Copyable { // a new layer overwriting over an existing layer try layer2Archiver.writeEntry(entry: WriteEntry.file(path: "/dir2/file1", permissions: 0o644), data: nil) try layer2Archiver.finishEncoding() + } - let unpacker = try EXT4.Formatter(fsPath) - try unpacker.unpack(source: layer1Path) - try unpacker.unpack(source: layer2Path) - try unpacker.close() + private func unpackLayers() async throws { + let formatter = try EXT4.Formatter(fsPath) + try await formatter.unpack(source: layer1Path) + try await formatter.unpack(source: layer2Path) + try formatter.close() } deinit { try? FileManager.default.removeItem(at: fsPath.url) + try? FileManager.default.removeItem(at: layer1Path.deletingLastPathComponent()) + try? FileManager.default.removeItem(at: layer2Path.deletingLastPathComponent()) } - @Test func testUnpackBasic() throws { + @Test func testUnpackBasic() async throws { + try await unpackLayers() let ext4 = try EXT4.EXT4Reader(blockDevice: fsPath) // just a directory let dir1Inode = try ext4.getInode(number: 12) @@ -196,12 +208,9 @@ struct UnpackProgressTest { // Unpack with progress tracking let formatter = try EXT4.Formatter(fsPath) - try formatter.unpack(source: archivePath, progress: progressHandler) + try await formatter.unpack(source: archivePath, progress: progressHandler) try formatter.close() - // Allow async progress tasks to complete - try await Task.sleep(for: .milliseconds(100)) - // Analyze collected events let allEvents = await collector.allEvents() @@ -292,7 +301,7 @@ struct UnpackProgressTest { } } - @Test func progressHandlerIsOptional() throws { + @Test func progressHandlerIsOptional() async throws { // Verify that unpacking works without a progress handler (existing behavior) let tempDir = FileManager.default.uniqueTemporaryDirectory() let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) @@ -314,7 +323,7 @@ struct UnpackProgressTest { // Unpack without progress handler - should not throw let formatter = try EXT4.Formatter(fsPath) - try formatter.unpack(source: archivePath) + try await formatter.unpack(source: archivePath) try formatter.close() // Verify the file was unpacked correctly diff --git a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift index 98b25659..1e4aa3dc 100644 --- a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift +++ b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift @@ -69,7 +69,7 @@ struct ImageHeaderScanTimingTest { let compression = compressionFilter(for: layer.mediaType) let compressedSize = try FileManager.default.attributesOfItem(atPath: content.path.path)[.size] as? Int64 ?? 0 let label = "\(image.label) layer \(i + 1)/\(manifest.layers.count) (\(layer.mediaType), \(formatBytes(compressedSize)) compressed)" - try measureOverhead(url: content.path, compression: compression, label: label) + try await measureOverhead(url: content.path, compression: compression, label: label) } } } @@ -87,7 +87,7 @@ struct ImageHeaderScanTimingTest { } } - private func measureOverhead(url: URL, compression: ContainerizationArchive.Filter, label: String) throws { + private func measureOverhead(url: URL, compression: ContainerizationArchive.Filter, label: String) async throws { let clock = ContinuousClock() print("\n--- \(label) ---\n") @@ -133,9 +133,9 @@ struct ImageHeaderScanTimingTest { let fsPath1 = FilePath(tempDir1.appendingPathComponent("no-progress.ext4.img", isDirectory: false)) defer { try? FileManager.default.removeItem(at: tempDir1) } - let unpackOnlyDuration = try clock.measure { + let unpackOnlyDuration = try await clock.measure { let formatter = try EXT4.Formatter(fsPath1) - try formatter.unpack(source: url, compression: compression) + try await formatter.unpack(source: url, compression: compression) try formatter.close() } print(" Unpack (no progress): \(unpackOnlyDuration)") @@ -146,9 +146,9 @@ struct ImageHeaderScanTimingTest { defer { try? FileManager.default.removeItem(at: tempDir2) } let noopProgress: ProgressHandler = { _ in } - let withProgressDuration = try clock.measure { + let withProgressDuration = try await clock.measure { let formatter = try EXT4.Formatter(fsPath2) - try formatter.unpack(source: url, compression: compression, progress: noopProgress) + try await formatter.unpack(source: url, compression: compression, progress: noopProgress) try formatter.close() } print(" Unpack (w/ progress): \(withProgressDuration)") From 1072ce0e8c69daa6847e9a693c7dd08310e19a92 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Wed, 11 Mar 2026 12:08:12 +0100 Subject: [PATCH 09/11] Use Int64 consistently for all progress event values --- Sources/ContainerizationEXT4/Formatter+Unpack.swift | 8 ++++---- Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index 2b62095b..965cf64e 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -65,7 +65,7 @@ extension EXT4.Formatter { if path.base == ".wh..wh..opq" { // whiteout directory try self.unlink(path: path.dir, directoryWhiteout: true) if let progress { - await progress([ProgressEvent(event: "add-items", value: 1)]) + await progress([ProgressEvent(event: "add-items", value: Int64(1))]) } continue } @@ -74,7 +74,7 @@ extension EXT4.Formatter { let dir: FilePath = path.dir try self.unlink(path: dir.join(filePath)) if let progress { - await progress([ProgressEvent(event: "add-items", value: 1)]) + await progress([ProgressEvent(event: "add-items", value: Int64(1))]) } continue } @@ -83,7 +83,7 @@ extension EXT4.Formatter { let hl = preProcessPath(s: hardlink) hardlinks[path] = FilePath(hl) if let progress { - await progress([ProgressEvent(event: "add-items", value: 1)]) + await progress([ProgressEvent(event: "add-items", value: Int64(1))]) } continue } @@ -118,7 +118,7 @@ extension EXT4.Formatter { } if let progress { - await progress([ProgressEvent(event: "add-items", value: 1)]) + await progress([ProgressEvent(event: "add-items", value: Int64(1))]) } } guard hardlinks.acyclic else { diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 65d166c4..e546287c 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -227,8 +227,8 @@ struct UnpackProgressTest { let value = try #require(event.value as? Int64, "add-size value should be Int64") cumulativeSize += value case "add-items": - let value = try #require(event.value as? Int, "add-items value should be Int") - itemCount += Int64(value) + let value = try #require(event.value as? Int64, "add-items value should be Int64") + itemCount += value default: break } From af5682bbf9ef5b6d3d044d51e5ba01d72c63d286 Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Thu, 12 Mar 2026 09:10:02 +0100 Subject: [PATCH 10/11] Consolidate unpack overloads and add total item count --- .../Image/Unpacker/EXT4Unpacker.swift | 46 +++---- .../Formatter+Unpack.swift | 120 ++++++++++-------- .../TestFormatterUnpack.swift | 39 ++++-- .../ImageTests/HeaderScanTimingTests.swift | 12 +- 4 files changed, 115 insertions(+), 102 deletions(-) diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index a7e11922..8a7fb910 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -106,9 +106,24 @@ public struct EXT4Unpacker: Unpacker { } if let progress { - let totalSize = try totalRegularFileBytes(in: resolvedLayers) + var totalSize: Int64 = 0 + var totalItems: Int64 = 0 + for layer in resolvedLayers { + try Task.checkCancellation() + let totals = try EXT4.Formatter.scanArchiveHeaders( + format: .paxRestricted, filter: layer.filter, file: layer.file) + totalSize += totals.size + totalItems += totals.items + } + var totalEvents: [ProgressEvent] = [] if totalSize > 0 { - await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) + totalEvents.append(ProgressEvent(event: "add-total-size", value: totalSize)) + } + if totalItems > 0 { + totalEvents.append(ProgressEvent(event: "add-total-items", value: totalItems)) + } + if !totalEvents.isEmpty { + await progress(totalEvents) } } @@ -152,32 +167,5 @@ public struct EXT4Unpacker: Unpacker { throw ContainerizationError(.unsupported, message: "media type \(mediaType) not supported.") } } - - private func totalRegularFileBytes( - in layers: [(file: URL, filter: ContainerizationArchive.Filter)] - ) throws -> Int64 { - var totalSize: Int64 = 0 - - for layer in layers { - try Task.checkCancellation() - - let reader = try ArchiveReader( - format: .paxRestricted, - filter: layer.filter, - file: layer.file - ) - - for (entry, _) in reader.makeStreamingIterator() { - try Task.checkCancellation() - guard entry.fileType == .regular, let size = entry.size else { - continue - } - - totalSize += Int64(size) - } - } - - return totalSize - } #endif } diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index 965cf64e..de6203fb 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -33,14 +33,71 @@ extension EXT4.Formatter { public func unpack( source: URL, format: ContainerizationArchive.Format = .paxRestricted, - compression: ContainerizationArchive.Filter = .gzip + compression: ContainerizationArchive.Filter = .gzip, + progress: ProgressHandler? = nil ) async throws { + // For zstd, decompress once and reuse for both passes to avoid double decompression. + let fileToRead: URL + let readerFilter: ContainerizationArchive.Filter + var decompressedFile: URL? + if progress != nil && compression == .zstd { + let decompressed = try ArchiveReader.decompressZstd(source) + fileToRead = decompressed + readerFilter = .none + decompressedFile = decompressed + } else { + fileToRead = source + readerFilter = compression + } + defer { + if let decompressedFile { + ArchiveReader.cleanUpDecompressedZstd(decompressedFile) + } + } + + if let progress { + // First pass: scan headers to get totals (fast, metadata only) + let totals = try Self.scanArchiveHeaders(format: format, filter: readerFilter, file: fileToRead) + var totalEvents: [ProgressEvent] = [] + if totals.size > 0 { + totalEvents.append(ProgressEvent(event: "add-total-size", value: totals.size)) + } + if totals.items > 0 { + totalEvents.append(ProgressEvent(event: "add-total-items", value: totals.items)) + } + if !totalEvents.isEmpty { + await progress(totalEvents) + } + } + + // Unpack pass let reader = try ArchiveReader( format: format, - filter: compression, - file: source + filter: readerFilter, + file: fileToRead ) - try await self.unpack(reader: reader) + try await self.unpackEntries(reader: reader, progress: progress) + } + + /// Scan archive headers to count the total number of bytes in regular files + /// and the total number of entries. + public static func scanArchiveHeaders( + format: ContainerizationArchive.Format, + filter: ContainerizationArchive.Filter, + file: URL + ) throws -> (size: Int64, items: Int64) { + let reader = try ArchiveReader(format: format, filter: filter, file: file) + var totalSize: Int64 = 0 + var totalItems: Int64 = 0 + for (entry, _) in reader.makeStreamingIterator() { + try Task.checkCancellation() + guard entry.path != nil else { continue } + totalItems += 1 + if entry.fileType == .regular, let size = entry.size { + totalSize += Int64(size) + } + } + return (size: totalSize, items: totalItems) } /// Core unpack logic. When `progress` is nil the handler calls are skipped. @@ -114,6 +171,9 @@ extension EXT4.Formatter { uid: entry.owner, gid: entry.group, xattrs: entry.xattrs) default: + if let progress { + await progress([ProgressEvent(event: "add-items", value: Int64(1))]) + } continue } @@ -131,58 +191,6 @@ extension EXT4.Formatter { } } - /// Unpack an archive at the source URL on to the ext4 filesystem with progress reporting. - public func unpack( - source: URL, - format: ContainerizationArchive.Format = .paxRestricted, - compression: ContainerizationArchive.Filter = .gzip, - progress: @escaping ProgressHandler - ) async throws { - // For zstd, decompress once and reuse for both passes to avoid double decompression. - let fileToRead: URL - let readerFilter: ContainerizationArchive.Filter - var decompressedFile: URL? - if compression == .zstd { - let decompressed = try ArchiveReader.decompressZstd(source) - fileToRead = decompressed - readerFilter = .none - decompressedFile = decompressed - } else { - fileToRead = source - readerFilter = compression - } - defer { - if let decompressedFile { - ArchiveReader.cleanUpDecompressedZstd(decompressedFile) - } - } - - // First pass: scan headers to get total size (fast, metadata only) - let sizeReader = try ArchiveReader( - format: format, - filter: readerFilter, - file: fileToRead - ) - var totalSize: Int64 = 0 - for (entry, _) in sizeReader.makeStreamingIterator() { - try Task.checkCancellation() - if entry.fileType == .regular, let size = entry.size { - totalSize += Int64(size) - } - } - if totalSize > 0 { - await progress([ProgressEvent(event: "add-total-size", value: totalSize)]) - } - - // Second pass: unpack - let reader = try ArchiveReader( - format: format, - filter: readerFilter, - file: fileToRead - ) - try await self.unpack(reader: reader, progress: progress) - } - private func preProcessPath(s: String) -> String { var p = s if p.hasPrefix("./") { diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index e546287c..5a4b682f 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -215,6 +215,7 @@ struct UnpackProgressTest { let allEvents = await collector.allEvents() var reportedTotalSize: Int64 = 0 + var reportedTotalItems: Int64 = 0 var cumulativeSize: Int64 = 0 var itemCount: Int64 = 0 @@ -223,6 +224,9 @@ struct UnpackProgressTest { case "add-total-size": let value = try #require(event.value as? Int64, "add-total-size value should be Int64") reportedTotalSize += value + case "add-total-items": + let value = try #require(event.value as? Int64, "add-total-items value should be Int64") + reportedTotalItems += value case "add-size": let value = try #require(event.value as? Int64, "add-size value should be Int64") cumulativeSize += value @@ -235,15 +239,19 @@ struct UnpackProgressTest { } // Verify the progress contract + let expectedTotalItems: Int64 = 5 // 1 dir + 4 files #expect( reportedTotalSize == expectedTotalSize, "Total size should be \(expectedTotalSize) bytes, got \(reportedTotalSize)") + #expect( + reportedTotalItems == expectedTotalItems, + "Total items should be \(expectedTotalItems), got \(reportedTotalItems)") #expect( cumulativeSize == expectedTotalSize, "Cumulative size should equal total size (\(expectedTotalSize)), got \(cumulativeSize)") #expect( - itemCount == 5, - "Should have processed 5 entries (1 dir + 4 files), got \(itemCount)") + itemCount == expectedTotalItems, + "Should have processed \(expectedTotalItems) entries (1 dir + 4 files), got \(itemCount)") // Verify incremental progress: we should get separate add-size events for each file let addSizeEvents = allEvents.filter { $0.event == "add-size" } @@ -292,13 +300,26 @@ struct UnpackProgressTest { progressSnapshotCount == addSizeEvents.count, "Should produce one monotonic snapshot per add-size update") - // Verify add-total-size comes before add-size events (first pass before second pass) - if let totalSizeIndex = allEvents.firstIndex(where: { $0.event == "add-total-size" }), - let firstAddSizeIndex = allEvents.firstIndex(where: { $0.event == "add-size" }) { - #expect( - totalSizeIndex < firstAddSizeIndex, - "add-total-size should be reported before add-size events") - } + // Verify totals come before incremental events (first pass before second pass) + let totalSizeIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-total-size" }), + "add-total-size event should be present") + let firstAddSizeIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-size" }), + "add-size event should be present") + #expect( + totalSizeIndex < firstAddSizeIndex, + "add-total-size should be reported before add-size events") + + let totalItemsIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-total-items" }), + "add-total-items event should be present") + let firstAddItemsIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-items" }), + "add-items event should be present") + #expect( + totalItemsIndex < firstAddItemsIndex, + "add-total-items should be reported before add-items events") } @Test func progressHandlerIsOptional() async throws { diff --git a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift index 1e4aa3dc..dcf8641b 100644 --- a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift +++ b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift @@ -116,16 +116,12 @@ struct ImageHeaderScanTimingTest { } // 1. Header scan only + var scannedTotals: (size: Int64, items: Int64) = (0, 0) let scanDuration = try clock.measure { - let reader = try ArchiveReader(format: .paxRestricted, filter: scanFilter, file: scanFile) - var totalSize: Int64 = 0 - for (entry, _) in reader.makeStreamingIterator() { - if entry.fileType == .regular, let size = entry.size { - totalSize += Int64(size) - } - } - print(" Scanned total size: \(formatBytes(totalSize))") + scannedTotals = try EXT4.Formatter.scanArchiveHeaders( + format: .paxRestricted, filter: scanFilter, file: scanFile) } + print(" Scanned total size: \(formatBytes(scannedTotals.size)) (\(scannedTotals.items) items)") print(" Header scan: \(scanDuration)") // 2. Full unpack without progress From 9729f462878a3b9ba5e37bd69e6ae2330cb1fb6e Mon Sep 17 00:00:00 2001 From: Anthony DePasquale Date: Thu, 12 Mar 2026 21:25:18 +0100 Subject: [PATCH 11/11] Format --- Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift | 8 ++++---- .../ImageTests/HeaderScanTimingTests.swift | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 5a4b682f..8f1ea01b 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -168,10 +168,10 @@ struct UnpackProgressTest { } // Create test data with specific sizes - let file1Data = Data(repeating: 0xAA, count: 1024) // 1 KiB - let file2Data = Data(repeating: 0xBB, count: 4096) // 4 KiB - let file3Data = Data(repeating: 0xCC, count: 512) // 512 bytes - let expectedTotalSize: Int64 = 1024 + 4096 + 512 // 5632 bytes + let file1Data = Data(repeating: 0xAA, count: 1024) // 1 KiB + let file2Data = Data(repeating: 0xBB, count: 4096) // 4 KiB + let file3Data = Data(repeating: 0xCC, count: 512) // 512 bytes + let expectedTotalSize: Int64 = 1024 + 4096 + 512 // 5632 bytes // Build the archive let archiver = try ArchiveWriter( diff --git a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift index dcf8641b..f536267c 100644 --- a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift +++ b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift @@ -1,5 +1,5 @@ //===----------------------------------------------------------------------===// -// Copyright © 2025-2026 Apple Inc. and the Containerization project authors. +// Copyright © 2026 Apple Inc. and the Containerization project authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.