Rework how we decide what to filter out of layer diffs

After narrowing down the list of parent directories which we might need
to exclude to those which are present in the base image, filter them out
of the layer diff as it is generated.

Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
This commit is contained in:
Nalin Dahyabhai 2025-08-07 17:28:45 -04:00 committed by openshift-cherrypick-robot
parent 870c53c695
commit 72e680a9eb
2 changed files with 122 additions and 31 deletions

124
image.go
View File

@ -107,6 +107,8 @@ type containerImageRef struct {
extraImageContent map[string]string extraImageContent map[string]string
compatSetParent types.OptionalBool compatSetParent types.OptionalBool
layerExclusions []copier.ConditionalRemovePath layerExclusions []copier.ConditionalRemovePath
layerMountTargets []copier.ConditionalRemovePath
layerPullUps []copier.EnsureParentPath
unsetAnnotations []string unsetAnnotations []string
setAnnotations []string setAnnotations []string
createdAnnotation types.OptionalBool createdAnnotation types.OptionalBool
@ -784,6 +786,55 @@ func (mb *ociManifestBuilder) manifestAndConfig() ([]byte, []byte, error) {
return omanifestbytes, oconfig, nil return omanifestbytes, oconfig, nil
} }
// filterExclusionsByImage returns a slice of the members of "exclusions" which are present in the image with the specified ID
func (i containerImageRef) filterExclusionsByImage(exclusions []copier.EnsureParentPath, imageID string) ([]copier.EnsureParentPath, error) {
if len(exclusions) == 0 || imageID == "" {
return nil, nil
}
var paths []copier.EnsureParentPath
mountPoint, err := i.store.MountImage(imageID, nil, i.mountLabel)
if err != nil {
return nil, err
}
defer func() {
if _, err := i.store.UnmountImage(imageID, false); err != nil {
logrus.Debugf("unmounting image %q: %v", imageID, err)
}
}()
globs := make([]string, 0, len(exclusions))
for _, exclusion := range exclusions {
globs = append(globs, exclusion.Path)
}
options := copier.StatOptions{}
stats, err := copier.Stat(mountPoint, mountPoint, options, globs)
if err != nil {
return nil, fmt.Errorf("checking for potential exclusion items in image %q: %w", imageID, err)
}
for _, stat := range stats {
for _, exclusion := range exclusions {
if stat.Glob != exclusion.Path {
continue
}
for result, stat := range stat.Results {
if result != exclusion.Path {
continue
}
if exclusion.ModTime != nil && !exclusion.ModTime.Equal(stat.ModTime) {
continue
}
if exclusion.Mode != nil && *exclusion.Mode != stat.Mode {
continue
}
if exclusion.Owner != nil && (int64(exclusion.Owner.UID) != stat.UID && int64(exclusion.Owner.GID) != stat.GID) {
continue
}
paths = append(paths, exclusion)
}
}
}
return paths, nil
}
func (i *containerImageRef) NewImageSource(_ context.Context, _ *types.SystemContext) (src types.ImageSource, err error) { func (i *containerImageRef) NewImageSource(_ context.Context, _ *types.SystemContext) (src types.ImageSource, err error) {
// These maps will let us check if a layer ID is part of one group or another. // These maps will let us check if a layer ID is part of one group or another.
parentLayerIDs := make(map[string]bool) parentLayerIDs := make(map[string]bool)
@ -948,6 +999,7 @@ func (i *containerImageRef) NewImageSource(_ context.Context, _ *types.SystemCon
} }
var rc io.ReadCloser var rc io.ReadCloser
var errChan chan error var errChan chan error
var layerExclusions []copier.ConditionalRemovePath
if i.confidentialWorkload.Convert { if i.confidentialWorkload.Convert {
// Convert the root filesystem into an encrypted disk image. // Convert the root filesystem into an encrypted disk image.
rc, err = i.extractConfidentialWorkloadFS(i.confidentialWorkload) rc, err = i.extractConfidentialWorkloadFS(i.confidentialWorkload)
@ -980,6 +1032,18 @@ func (i *containerImageRef) NewImageSource(_ context.Context, _ *types.SystemCon
if i.emptyLayer && layerID == i.layerID { if i.emptyLayer && layerID == i.layerID {
continue continue
} }
if layerID == i.layerID {
// We need to filter out any mount targets that we created.
layerExclusions = append(slices.Clone(i.layerExclusions), i.layerMountTargets...)
// And we _might_ need to filter out directories that modified
// by creating and removing mount targets, _if_ they were the
// same in the base image for this stage.
layerPullUps, err := i.filterExclusionsByImage(i.layerPullUps, i.fromImageID)
if err != nil {
return nil, fmt.Errorf("checking which exclusions are in base image %q: %w", i.fromImageID, err)
}
layerExclusions = append(layerExclusions, layerPullUps...)
}
// Extract this layer, one of possibly many. // Extract this layer, one of possibly many.
rc, err = i.store.Diff("", layerID, diffOptions) rc, err = i.store.Diff("", layerID, diffOptions)
if err != nil { if err != nil {
@ -1002,7 +1066,7 @@ func (i *containerImageRef) NewImageSource(_ context.Context, _ *types.SystemCon
// At this point, there are multiple ways that can happen. // At this point, there are multiple ways that can happen.
diffBeingAltered := i.compression != archive.Uncompressed diffBeingAltered := i.compression != archive.Uncompressed
diffBeingAltered = diffBeingAltered || i.layerModTime != nil || i.layerLatestModTime != nil diffBeingAltered = diffBeingAltered || i.layerModTime != nil || i.layerLatestModTime != nil
diffBeingAltered = diffBeingAltered || len(i.layerExclusions) != 0 diffBeingAltered = diffBeingAltered || len(layerExclusions) != 0
if diffBeingAltered { if diffBeingAltered {
destHasher = digest.Canonical.Digester() destHasher = digest.Canonical.Digester()
multiWriter = io.MultiWriter(counter, destHasher.Hash()) multiWriter = io.MultiWriter(counter, destHasher.Hash())
@ -1022,7 +1086,7 @@ func (i *containerImageRef) NewImageSource(_ context.Context, _ *types.SystemCon
// Use specified timestamps in the layer, if we're doing that for history // Use specified timestamps in the layer, if we're doing that for history
// entries. // entries.
nestedWriteCloser := ioutils.NewWriteCloserWrapper(writer, writeCloser.Close) nestedWriteCloser := ioutils.NewWriteCloserWrapper(writer, writeCloser.Close)
writeCloser = makeFilteredLayerWriteCloser(nestedWriteCloser, i.layerModTime, i.layerLatestModTime, i.layerExclusions) writeCloser = makeFilteredLayerWriteCloser(nestedWriteCloser, i.layerModTime, i.layerLatestModTime, layerExclusions)
writer = writeCloser writer = writeCloser
// Okay, copy from the raw diff through the filter, compressor, and counter and // Okay, copy from the raw diff through the filter, compressor, and counter and
// digesters. // digesters.
@ -1443,6 +1507,24 @@ func (b *Builder) makeContainerImageRef(options CommitOptions) (*containerImageR
return nil, fmt.Errorf("getting the per-container data directory for %q: %w", b.ContainerID, err) return nil, fmt.Errorf("getting the per-container data directory for %q: %w", b.ContainerID, err)
} }
gatherExclusions := func(excludesFiles []string) ([]copier.ConditionalRemovePath, error) {
var excludes []copier.ConditionalRemovePath
for _, excludesFile := range excludesFiles {
if strings.Contains(excludesFile, containerExcludesSubstring) {
continue
}
excludesData, err := os.ReadFile(excludesFile)
if err != nil {
return nil, fmt.Errorf("reading commit exclusions for %q: %w", b.ContainerID, err)
}
var theseExcludes []copier.ConditionalRemovePath
if err := json.Unmarshal(excludesData, &theseExcludes); err != nil {
return nil, fmt.Errorf("parsing commit exclusions for %q: %w", b.ContainerID, err)
}
excludes = append(excludes, theseExcludes...)
}
return excludes, nil
}
mountTargetFiles, err := filepath.Glob(filepath.Join(cdir, containerExcludesDir, "*")) mountTargetFiles, err := filepath.Glob(filepath.Join(cdir, containerExcludesDir, "*"))
if err != nil { if err != nil {
return nil, fmt.Errorf("checking for commit exclusions for %q: %w", b.ContainerID, err) return nil, fmt.Errorf("checking for commit exclusions for %q: %w", b.ContainerID, err)
@ -1451,29 +1533,27 @@ func (b *Builder) makeContainerImageRef(options CommitOptions) (*containerImageR
if err != nil { if err != nil {
return nil, fmt.Errorf("checking for commit pulled-up items for %q: %w", b.ContainerID, err) return nil, fmt.Errorf("checking for commit pulled-up items for %q: %w", b.ContainerID, err)
} }
excludesFiles := slices.Clone(mountTargetFiles) layerMountTargets, err := gatherExclusions(mountTargetFiles)
if !options.ConfidentialWorkloadOptions.Convert && !options.Squash { if err != nil {
excludesFiles = append(excludesFiles, pulledUpFiles...) return nil, err
}
if len(layerMountTargets) > 0 {
logrus.Debugf("these items were created for use as mount targets: %#v", layerMountTargets)
}
layerPullUps, err := gatherExclusions(pulledUpFiles)
if err != nil {
return nil, err
}
if len(layerPullUps) > 0 {
logrus.Debugf("these items appear to have been pulled up: %#v", layerPullUps)
} }
var layerExclusions []copier.ConditionalRemovePath var layerExclusions []copier.ConditionalRemovePath
for _, excludesFile := range excludesFiles {
if strings.Contains(excludesFile, containerExcludesSubstring) {
continue
}
excludesData, err := os.ReadFile(excludesFile)
if err != nil {
return nil, fmt.Errorf("reading commit exclusions for %q: %w", b.ContainerID, err)
}
var excludes []copier.ConditionalRemovePath
if err := json.Unmarshal(excludesData, &excludes); err != nil {
return nil, fmt.Errorf("parsing commit exclusions for %q: %w", b.ContainerID, err)
}
layerExclusions = append(layerExclusions, excludes...)
}
if options.CompatLayerOmissions == types.OptionalBoolTrue { if options.CompatLayerOmissions == types.OptionalBoolTrue {
layerExclusions = append(layerExclusions, compatLayerExclusions...) layerExclusions = slices.Clone(compatLayerExclusions)
}
if len(layerExclusions) > 0 {
logrus.Debugf("excluding these items from committed layer: %#v", layerExclusions)
} }
logrus.Debugf("excluding these items from committed layer: %#v", layerExclusions)
manifestType := options.PreferredManifestType manifestType := options.PreferredManifestType
if manifestType == "" { if manifestType == "" {
@ -1577,6 +1657,8 @@ func (b *Builder) makeContainerImageRef(options CommitOptions) (*containerImageR
extraImageContent: maps.Clone(options.ExtraImageContent), extraImageContent: maps.Clone(options.ExtraImageContent),
compatSetParent: options.CompatSetParent, compatSetParent: options.CompatSetParent,
layerExclusions: layerExclusions, layerExclusions: layerExclusions,
layerMountTargets: layerMountTargets,
layerPullUps: layerPullUps,
createdAnnotation: options.CreatedAnnotation, createdAnnotation: options.CreatedAnnotation,
} }
if ref.created != nil { if ref.created != nil {

View File

@ -8707,15 +8707,24 @@ RUN --mount=type=bind,ro,src=/Dockerfile,target=/etc/removed-file --mount=type=b
_EOF _EOF
local source_date_epoch=$(date +%s) local source_date_epoch=$(date +%s)
# build a copy of the image that's all squashed # build a copy of the image that's all squashed from the start
run_buildah build --no-cache --squash --source-date-epoch=$source_date_epoch --rewrite-timestamp -t dir:${TEST_SCRATCH_DIR}/squashed-image ${contextdir} run_buildah build --no-cache --squash=true --source-date-epoch=$source_date_epoch --rewrite-timestamp -t dir:${TEST_SCRATCH_DIR}/squashed-image ${contextdir}
# build a copy of the image that's "normal" # build a copy of the image where we only commit at the end
run_buildah build --no-cache --layers --source-date-epoch=$source_date_epoch --rewrite-timestamp -t not-squashed-image ${contextdir} run_buildah build --no-cache --layers=false --source-date-epoch=$source_date_epoch --rewrite-timestamp -t not-layered-image ${contextdir}
# now squash it, with no record of needing to exclude anything carrying over # build a copy of the image where we commit at every step
run_buildah from --name not-squashed-container not-squashed-image run_buildah build --no-cache --layers=true --source-date-epoch=$source_date_epoch --rewrite-timestamp -t layered-image ${contextdir}
run_buildah commit --squash not-squashed-container dir:${TEST_SCRATCH_DIR}/squashed-later-image # now squash them, with no internal record of needing to exclude anything carrying over
# find the diffs for the two versions, which should look the same run_buildah from --name not-layered-container not-layered-image
run_buildah commit --squash not-layered-container dir:${TEST_SCRATCH_DIR}/squashed-not-layered-image
run_buildah from --name layered-container layered-image
run_buildah commit --squash layered-container dir:${TEST_SCRATCH_DIR}/squashed-layered-image
# find the diffs for the three versions, which should look the same thanks to the --source-date-epoch --rewrite-timestamp combo
local squashed=${TEST_SCRATCH_DIR}/squashed-image/$(dir_image_last_diff ${TEST_SCRATCH_DIR}/squashed-image) local squashed=${TEST_SCRATCH_DIR}/squashed-image/$(dir_image_last_diff ${TEST_SCRATCH_DIR}/squashed-image)
local squashedlater=${TEST_SCRATCH_DIR}/squashed-later-image/$(dir_image_last_diff ${TEST_SCRATCH_DIR}/squashed-later-image) local notlayered=${TEST_SCRATCH_DIR}/squashed-not-layered-image/$(dir_image_last_diff ${TEST_SCRATCH_DIR}/squashed-not-layered-image)
cmp ${squashed} ${squashedlater} local layered=${TEST_SCRATCH_DIR}/squashed-layered-image/$(dir_image_last_diff ${TEST_SCRATCH_DIR}/squashed-layered-image)
tar tvf ${squashed} > ${TEST_SCRATCH_DIR}/squashed-image-rootfs.txt
tar tvf ${notlayered} > ${TEST_SCRATCH_DIR}/squashed-not-layered-image-rootfs.txt
tar tvf ${layered} > ${TEST_SCRATCH_DIR}/squashed-layered-image-rootfs.txt
diff -u ${TEST_SCRATCH_DIR}/squashed-layered-image-rootfs.txt ${TEST_SCRATCH_DIR}/squashed-image-rootfs.txt
diff -u ${TEST_SCRATCH_DIR}/squashed-layered-image-rootfs.txt ${TEST_SCRATCH_DIR}/squashed-not-layered-image-rootfs.txt
} }