diff --git a/sanitizer/BUILD.bazel b/sanitizer/BUILD.bazel index 3516f3f..c8ce9db 100644 --- a/sanitizer/BUILD.bazel +++ b/sanitizer/BUILD.bazel @@ -26,6 +26,7 @@ go_test( size = "small", srcs = [ "sanitizer_nix_test.go", + "sanitizer_test.go", "sanitizer_win_test.go", ], embed = [":sanitizer"], diff --git a/sanitizer/sanitizer.go b/sanitizer/sanitizer.go index 3d27f99..9bfedee 100644 --- a/sanitizer/sanitizer.go +++ b/sanitizer/sanitizer.go @@ -18,6 +18,8 @@ package sanitizer import ( "os" + "regexp" + "strings" ) const ( @@ -25,6 +27,10 @@ const ( nixPathSeparator = `/` ) +var ( + winShortFilenameRegex = regexp.MustCompile(`~\d+\.?`) +) + // SanitizePath sanitizes the supplied path by purely lexical processing. // The return value is safe to be joined together with a base directory (if the basedir is empty // and no symlinks are present there). @@ -43,3 +49,20 @@ func SanitizePath(in string) string { return sanitized } + +// HasWindowsShortFilenames reports if any path component look like a Windows short filename. +// Short filenames on Windows may look like this: +// 1(3)~1.PNG 1 (3) (1).png +// DOWNLO~1 Downloads +// FOOOOO~1.JPG fooooooooo.png.gif.jpg +func HasWindowsShortFilenames(in string) bool { + in = strings.ReplaceAll(in, "\\", "/") + parts := strings.Split(in, "/") + for _, part := range parts { + matched := winShortFilenameRegex.MatchString(part) + if matched { + return true + } + } + return false +} diff --git a/sanitizer/sanitizer_test.go b/sanitizer/sanitizer_test.go new file mode 100644 index 0000000..115f2e2 --- /dev/null +++ b/sanitizer/sanitizer_test.go @@ -0,0 +1,53 @@ +// Copyright 2024 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sanitizer + +import ( + "strings" + "testing" +) + +func TestHasWindowsShortFilenames(t *testing.T) { + tests := []struct { + in string + want bool + }{ + {in: "ANDROI~2", want: true}, + {in: "foo/ANDROI~2", want: true}, + {in: "ANDROI~2/bar", want: true}, + {in: "foo/ANDROI~2/bar", want: true}, + // Same with different case + {in: "Androi~2", want: true}, + {in: "foo/Androi~2", want: true}, + {in: "Androi~2/bar", want: true}, + {in: "foo/Androi~2/bar", want: true}, + // File extension + {in: "FOOOOO~1.JPG ", want: true}, + {in: "foo/FOOOOO~1.JPG", want: true}, + {in: "FOOOOO~1.JPG/bar", want: true}, + {in: "foo/FOOOOO~1.JPG/bar", want: true}, + // Not a short filename + {in: "3D Objects", want: false}, + {in: "Some~Stuff", want: false}, + } + for _, tc := range tests { + for _, a := range []string{tc.in, strings.ReplaceAll(tc.in, "\\", "/")} { + got := HasWindowsShortFilenames(a) + if got != tc.want { + t.Errorf("HasWindowsShortFilenames(%q) = %v, want %v", a, got, tc.want) + } + } + } +} diff --git a/tar/tar.go b/tar/tar.go index 7e01875..087a02c 100644 --- a/tar/tar.go +++ b/tar/tar.go @@ -186,11 +186,16 @@ const ( // By default, this is activated only on MacOS and Windows builds. If you are extracting to a // case insensitive filesystem on a Unix platform, you should activate this feature explicitly. PreventCaseInsensitiveSymlinkTraversal SecurityMode = 64 + // SkipWindowsShortFilenames drops archive entries that have a path component that look like a + // Windows short filename (e.g. GIT~1). + // By default, this is activated only on Windows builds. If you are extracting to a Windows + // filesystem on a non-Windows platform, you should activate this feature explicitly. + SkipWindowsShortFilenames SecurityMode = 128 ) // MaximumSecurityMode enables all features for maximum security. // Recommended for integrations that need file contents only (and nothing unix specific). -const MaximumSecurityMode = SkipSpecialFiles | SanitizeFileMode | SanitizeFilenames | PreventSymlinkTraversal | DropXattrs | PreventCaseInsensitiveSymlinkTraversal +const MaximumSecurityMode = SkipSpecialFiles | SanitizeFileMode | SanitizeFilenames | PreventSymlinkTraversal | DropXattrs | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames var ( // ErrHeader invalid tar header @@ -297,6 +302,10 @@ func (tr *Reader) Next() (*tar.Header, error) { h.Name = sanitizer.SanitizePath(h.Name) } + if tr.securityMode&SkipWindowsShortFilenames != 0 && sanitizer.HasWindowsShortFilenames(h.Name) { + continue + } + if tr.securityMode&PreventSymlinkTraversal != 0 { hName := sanitizer.SanitizePath(h.Name) hName = strings.TrimSuffix(hName, "/") diff --git a/tar/tar_test.go b/tar/tar_test.go index 0828ca2..459f842 100644 --- a/tar/tar_test.go +++ b/tar/tar_test.go @@ -93,6 +93,23 @@ var ( */ //go:embed case-insensitive.tar eTraverseViaCaseInsensitiveLinksTar []byte + + /* + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 3D Objects + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 Androi~2 + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 ANDROI~2 + drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/ + drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/ANDROI~2/ + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 foo/ANDROI~2/bar + drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/FOOOOO~1.JPG/ + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 foo/FOOOOO~1.JPG/bar + drwxr-x--- imrer/primarygroup 0 2024-10-11 14:27 foo/Androi~2/ + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 foo/Androi~2/bar + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 FOOOOO~1.JPG + -rw-r----- imrer/primarygroup 5 2024-10-11 14:27 Some~Stuff + */ + //go:embed winshort.tar + eWinShortTar []byte ) func isSlashRune(r rune) bool { return r == '/' || r == '\\' } @@ -493,3 +510,28 @@ func TestSafetarLinksCaseInsensitive(t *testing.T) { t.Fatal(err) } } + +func TestWindowsShortFilenames(t *testing.T) { + buf := bytes.NewBuffer(eWinShortTar[:]) + t.Logf("size of archive: %d", len(buf.Bytes())) + tr := NewReader(buf) + tr.SetSecurityMode(tr.GetSecurityMode() | SkipWindowsShortFilenames) + + for i, want := range []string{"3D Objects", "foo/", "Some~Stuff"} { + hdr, err := tr.Next() + if err != nil { + t.Errorf("No errors were expected at entry %d. Next() = %+v, want nil", i, err) + } + if hdr.Name != want { + t.Errorf("Unexpected entry %d. Next().Name = %v, want %v", i, hdr.Name, want) + } + } + + hdr, err := tr.Next() + if hdr != nil { + t.Errorf("No more tar entries were expected. Next() = %+v, want nil", hdr) + } + if err != io.EOF { + t.Fatal(err) + } +} diff --git a/tar/tar_win.go b/tar/tar_win.go index cc13573..6e77ba4 100644 --- a/tar/tar_win.go +++ b/tar/tar_win.go @@ -18,4 +18,4 @@ package tar // DefaultSecurityMode is a set of security features that are enabled by default. -const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal +const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames diff --git a/tar/winshort.tar b/tar/winshort.tar new file mode 100644 index 0000000..bc4a180 Binary files /dev/null and b/tar/winshort.tar differ diff --git a/zip/winshort.zip b/zip/winshort.zip new file mode 100644 index 0000000..05b691f Binary files /dev/null and b/zip/winshort.zip differ diff --git a/zip/zip.go b/zip/zip.go index 0ef7f6b..d5467f5 100644 --- a/zip/zip.go +++ b/zip/zip.go @@ -130,11 +130,16 @@ const ( // By default, this is activated only on MacOS and Windows builds. If you are extracting to a // case insensitive filesystem on a Unix platform, you should activate this feature explicitly. PreventCaseInsensitiveSymlinkTraversal SecurityMode = 16 + // SkipWindowsShortFilenames drops archive entries that have a path component that look like a + // Windows short filename (e.g. GIT~1). + // By default, this is activated only on Windows builds. If you are extracting to a Windows + // filesystem on a non-Windows platform, you should activate this feature explicitly. + SkipWindowsShortFilenames SecurityMode = 32 ) // MaximumSecurityMode enables all security features. Apps that care about file contents only // and nothing unix specific (e.g. file modes or special devices) should use this mode. -const MaximumSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | SanitizeFileMode | SkipSpecialFiles | PreventCaseInsensitiveSymlinkTraversal +const MaximumSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | SanitizeFileMode | SkipSpecialFiles | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames func isSpecialFile(f zip.File) bool { amode := f.Mode() @@ -163,6 +168,10 @@ func applyMagic(files []*zip.File, securityMode SecurityMode) []*zip.File { f.Name = sanitizer.SanitizePath(f.Name) } + if securityMode&SkipWindowsShortFilenames != 0 && sanitizer.HasWindowsShortFilenames(f.Name) { + continue + } + if securityMode&PreventSymlinkTraversal != 0 { fName := sanitizer.SanitizePath(f.Name) fName = strings.TrimSuffix(fName, "/") diff --git a/zip/zip_test.go b/zip/zip_test.go index 6297475..655b568 100644 --- a/zip/zip_test.go +++ b/zip/zip_test.go @@ -92,6 +92,28 @@ var ( */ //go:embed case-insensitive.zip eCaseInsensitiveSymlinksZip []byte + + /* + Archive: winshort.zip + Length Date Time Name + --------- ---------- ----- ---- + 5 2024-10-11 14:27 3D Objects + 5 2024-10-11 14:27 Androi~2 + 5 2024-10-11 14:27 ANDROI~2 + 0 2024-10-11 14:27 foo/ + 5 2024-10-11 14:27 FOOOOO~1.JPG + 5 2024-10-11 14:27 Some~Stuff + 0 2024-10-11 14:27 foo/ANDROI~2/ + 5 2024-10-11 14:27 foo/ANDROI~2/bar + 0 2024-10-11 14:27 foo/FOOOOO~1.JPG/ + 5 2024-10-11 14:27 foo/FOOOOO~1.JPG/bar + 0 2024-10-11 14:27 foo/Androi~2/ + 5 2024-10-11 14:27 foo/Androi~2/bar + --------- ------- + 40 12 files + */ + //go:embed winshort.zip + eWinShortFilenamesZip []byte ) func TestSafezip(t *testing.T) { @@ -343,3 +365,22 @@ func TestTypes(t *testing.T) { t.Errorf("type of zip.OpenReader().Reader: %v, type of zip.NewReader(): %v", openReaderType, newReaderType) } } + +func TestWindowsShortFilenames(t *testing.T) { + path := archiveToPath(t, eWinShortFilenamesZip) + r, err := OpenReader(path) + if err != nil { + t.Fatalf("Error opening zip. OpenReader(%v) = %v, want nil", path, err) + } + r.SetSecurityMode(r.GetSecurityMode() | SkipWindowsShortFilenames) + + if len(r.File) != 3 { + t.Fatalf("Unexpected number of files in the archive. len(OpenReader(%v).File) = %d, want 2.", path, len(r.File)) + } + + for i, want := range []string{"3D Objects", "foo/", "Some~Stuff"} { + if r.File[i].Name != want { + t.Errorf("Unexpected entry. OpenReader(%v).File[%d].Name = %v, want %v", path, i, r.File[i].Name, want) + } + } +} diff --git a/zip/zip_win.go b/zip/zip_win.go index d875702..2c67ea6 100644 --- a/zip/zip_win.go +++ b/zip/zip_win.go @@ -19,4 +19,4 @@ package zip // DefaultSecurityMode enables path traversal security measures. This mode should be safe for all // existing integrations. -const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal +const DefaultSecurityMode = SanitizeFilenames | PreventSymlinkTraversal | PreventCaseInsensitiveSymlinkTraversal | SkipWindowsShortFilenames