...

Source file src/cuelang.org/go/mod/modzip/zip.go

Documentation: cuelang.org/go/mod/modzip

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package modzip provides functions for creating and extracting module zip files.
     6  //
     7  // WARNING: THIS PACKAGE IS EXPERIMENTAL.
     8  // ITS API MAY CHANGE AT ANY TIME.
     9  //
    10  // Module zip files have several restrictions listed below. These are necessary
    11  // to ensure that module zip files can be extracted consistently on supported
    12  // platforms and file systems.
    13  //
    14  // • All file paths within a zip file must be valid (see cuelang.org/go/mod/module.CheckFilePath).
    15  //
    16  // • No two file paths may be equal under Unicode case-folding (see
    17  // strings.EqualFold).
    18  //
    19  // • A cue.mod/module.cue file must appear in the top-level directory. If present,
    20  // it must be named exactly that, not any other case. Directories or files named "cue.mod"
    21  // are not allowed in any other directory.
    22  //
    23  // • The total size in bytes of a module zip file may be at most MaxZipFile
    24  // bytes (500 MiB). The total uncompressed size of the files within the
    25  // zip may also be at most MaxZipFile bytes.
    26  //
    27  // • Each file's uncompressed size must match its declared 64-bit uncompressed
    28  // size in the zip file header.
    29  //
    30  // • If the zip contains files named "cue.mod/module.cue" or
    31  // "LICENSE", their sizes in bytes may be at most
    32  // MaxCUEMod or MaxLICENSE, respectively (both are 16 MiB).
    33  //
    34  // • Empty directories are ignored. File permissions and timestamps are also
    35  // ignored.
    36  //
    37  // • Symbolic links and other irregular files are not allowed.
    38  //
    39  // Note that this package does not provide hashing functionality. See
    40  // golang.org/x/mod/sumdb/dirhash.
    41  package modzip
    42  
    43  import (
    44  	"archive/zip"
    45  	"bytes"
    46  	"errors"
    47  	"fmt"
    48  	"io"
    49  	"os"
    50  	"path"
    51  	"path/filepath"
    52  	"strings"
    53  	"unicode"
    54  	"unicode/utf8"
    55  
    56  	"cuelang.org/go/mod/module"
    57  )
    58  
    59  const (
    60  	// MaxZipFile is the maximum size in bytes of a module zip file. The
    61  	// go command will report an error if either the zip file or its extracted
    62  	// content is larger than this.
    63  	MaxZipFile = 500 << 20
    64  
    65  	// MaxCUEMod is the maximum size in bytes of a cue.mod/module.cue file within a
    66  	// module zip file.
    67  	MaxCUEMod = 16 << 20
    68  
    69  	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
    70  	// module zip file.
    71  	MaxLICENSE = 16 << 20
    72  )
    73  
    74  // File provides an abstraction for a file in a directory, zip, or anything
    75  // else that looks like a file - it knows how to open files represented
    76  // as a particular type without being a file itself.
    77  //
    78  // Deprecated: this will be removed in a future API iteration that reduces
    79  // dependence on zip archives.
    80  type FileIO[F any] interface {
    81  	// Path returns a clean slash-separated relative path from the module root
    82  	// directory to the file.
    83  	Path(f F) string
    84  
    85  	// Lstat returns information about the file. If the file is a symbolic link,
    86  	// Lstat returns information about the link itself, not the file it points to.
    87  	Lstat(f F) (os.FileInfo, error)
    88  
    89  	// Open provides access to the data within a regular file. Open may return
    90  	// an error if called on a directory or symbolic link.
    91  	Open(f F) (io.ReadCloser, error)
    92  }
    93  
    94  // CheckedFiles reports whether a set of files satisfy the name and size
    95  // constraints required by module zip files. The constraints are listed in the
    96  // package documentation.
    97  //
    98  // Functions that produce this report may include slightly different sets of
    99  // files. See documentation for CheckFiles, CheckDir, and CheckZip for details.
   100  type CheckedFiles struct {
   101  	// Valid is a list of file paths that should be included in a zip file.
   102  	Valid []string
   103  
   104  	// Omitted is a list of files that are ignored when creating a module zip
   105  	// file, along with the reason each file is ignored.
   106  	Omitted []FileError
   107  
   108  	// Invalid is a list of files that should not be included in a module zip
   109  	// file, along with the reason each file is invalid.
   110  	Invalid []FileError
   111  
   112  	// SizeError is non-nil if the total uncompressed size of the valid files
   113  	// exceeds the module zip size limit or if the zip file itself exceeds the
   114  	// limit.
   115  	SizeError error
   116  
   117  	// NoModError is non-nil if there was no module.cue file present.
   118  	NoModError error
   119  }
   120  
   121  // Err returns an error if CheckedFiles does not describe a valid module zip
   122  // file. SizeError is returned if that field is set. A FileErrorList is returned
   123  // if there are one or more invalid files. Other errors may be returned in the
   124  // future.
   125  func (cf CheckedFiles) Err() error {
   126  	if cf.SizeError != nil {
   127  		return cf.SizeError
   128  	}
   129  	if len(cf.Invalid) > 0 {
   130  		return FileErrorList(cf.Invalid)
   131  	}
   132  	if cf.NoModError != nil {
   133  		return cf.NoModError
   134  	}
   135  	return nil
   136  }
   137  
   138  type FileErrorList []FileError
   139  
   140  func (el FileErrorList) Error() string {
   141  	buf := &strings.Builder{}
   142  	sep := ""
   143  	for _, e := range el {
   144  		buf.WriteString(sep)
   145  		buf.WriteString(e.Error())
   146  		sep = "\n"
   147  	}
   148  	return buf.String()
   149  }
   150  
   151  type FileError struct {
   152  	Path string
   153  	Err  error
   154  }
   155  
   156  func (e FileError) Error() string {
   157  	return fmt.Sprintf("%s: %s", e.Path, e.Err)
   158  }
   159  
   160  func (e FileError) Unwrap() error {
   161  	return e.Err
   162  }
   163  
   164  var (
   165  	// Predefined error messages for invalid files. Not exhaustive.
   166  	errPathNotClean    = errors.New("file path is not clean")
   167  	errPathNotRelative = errors.New("file path is not relative")
   168  	errCUEModCase      = errors.New("cue.mod directories must have lowercase names")
   169  	errCUEModuleCase   = errors.New("cue.mod/module.cue files must have lowercase names")
   170  	errCUEModSize      = fmt.Errorf("cue.mod/module.cue file too large (max size is %d bytes)", MaxCUEMod)
   171  	errLICENSESize     = fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
   172  
   173  	// Predefined error messages for omitted files. Not exhaustive.
   174  	errVCS           = errors.New("directory is a version control repository")
   175  	errVendored      = errors.New("file is in vendor directory")
   176  	errSubmoduleFile = errors.New("file is in another module")
   177  	errSubmoduleDir  = errors.New("directory is in another module")
   178  	errHgArchivalTxt = errors.New("file is inserted by 'hg archive' and is always omitted")
   179  	errSymlink       = errors.New("file is a symbolic link")
   180  	errNotRegular    = errors.New("not a regular file")
   181  
   182  	// Other errors
   183  	errNoMod = fmt.Errorf("no cue.mod/module.cue file found")
   184  )
   185  
   186  // CheckFiles reports whether a list of files satisfy the name and size
   187  // constraints listed in the package documentation. The returned CheckedFiles
   188  // record contains lists of valid, invalid, and omitted files. Every file in
   189  // the given list will be included in exactly one of those lists.
   190  //
   191  // CheckFiles returns an error if the returned CheckedFiles does not describe
   192  // a valid module zip file (according to CheckedFiles.Err). The returned
   193  // CheckedFiles is still populated when an error is returned.
   194  //
   195  // Note that CheckFiles will not open any files, so Create may still fail when
   196  // CheckFiles is successful due to I/O errors, reported size differences
   197  // or an invalid module.cue file.
   198  //
   199  // Deprecated: this will be removed in a future API iteration that reduces
   200  // dependence on zip archives.
   201  func CheckFiles[F any](files []F, fio FileIO[F]) (CheckedFiles, error) {
   202  	cf, _, _ := checkFiles(files, fio)
   203  	return cf, cf.Err()
   204  }
   205  
   206  // checkFiles implements CheckFiles and also returns lists of valid files and
   207  // their sizes, corresponding to cf.Valid. It omits files in submodules, files
   208  // in vendored packages, symlinked files, and various other unwanted files.
   209  //
   210  // The lists returned are used in Create to avoid repeated calls to File.Lstat.
   211  func checkFiles[F any](files []F, fio FileIO[F]) (cf CheckedFiles, validFiles []F, validSizes []int64) {
   212  	errPaths := make(map[string]struct{})
   213  	addError := func(path string, omitted bool, err error) {
   214  		if _, ok := errPaths[path]; ok {
   215  			return
   216  		}
   217  		errPaths[path] = struct{}{}
   218  		fe := FileError{Path: path, Err: err}
   219  		if omitted {
   220  			cf.Omitted = append(cf.Omitted, fe)
   221  		} else {
   222  			cf.Invalid = append(cf.Invalid, fe)
   223  		}
   224  	}
   225  
   226  	// Find directories containing cue.mod files or directories (other than the root).
   227  	// Files in these directories will be omitted.
   228  	// These directories will not be included in the output zip.
   229  	haveCUEMod := make(map[string]bool)
   230  	for _, f := range files {
   231  		if dir, rest := splitCUEMod(fio.Path(f)); rest != "" {
   232  			haveCUEMod[dir] = true
   233  		}
   234  	}
   235  
   236  	inSubmodule := func(p string) bool {
   237  		for {
   238  			dir, _ := path.Split(p)
   239  			if dir == "" {
   240  				return false
   241  			}
   242  			if haveCUEMod[dir] {
   243  				return true
   244  			}
   245  			p = dir[:len(dir)-1]
   246  		}
   247  	}
   248  
   249  	collisions := make(collisionChecker)
   250  	maxSize := int64(MaxZipFile)
   251  	foundModuleCUE := false
   252  	for _, f := range files {
   253  		p := fio.Path(f)
   254  		info, err := fio.Lstat(f)
   255  		if err != nil {
   256  			addError(p, false, err)
   257  			continue
   258  		}
   259  		if info.IsDir() {
   260  			continue
   261  		}
   262  		if p != path.Clean(p) {
   263  			addError(p, false, errPathNotClean)
   264  			continue
   265  		}
   266  		if path.IsAbs(p) {
   267  			addError(p, false, errPathNotRelative)
   268  			continue
   269  		}
   270  		if isVendoredPackage(p) {
   271  			// Skip files in vendored packages.
   272  			// Note: although CUE doesn't actually include the concept of
   273  			// vendoring yet, this check acts as future-proofing so we can
   274  			// use the vendor directory for that at some future date.
   275  			addError(p, true, errVendored)
   276  			continue
   277  		}
   278  		if inSubmodule(p) {
   279  			// Skip submodule files.
   280  			addError(p, true, errSubmoduleFile)
   281  			continue
   282  		}
   283  		if p == ".hg_archival.txt" {
   284  			// Inserted by hg archive.
   285  			// Drop this regardless of the VCS being used.
   286  			addError(p, true, errHgArchivalTxt)
   287  			continue
   288  		}
   289  		// TODO check for CUE-specific module paths.
   290  		if err := module.CheckFilePath(p); err != nil {
   291  			addError(p, false, err)
   292  			continue
   293  		}
   294  
   295  		if topDir, rest, _ := strings.Cut(p, "/"); strings.EqualFold(topDir, "cue.mod") {
   296  			if topDir != "cue.mod" {
   297  				addError(p, false, errCUEModCase)
   298  				continue
   299  			}
   300  			if strings.EqualFold(rest, "module.cue") && rest != "module.cue" {
   301  				addError(p, false, errCUEModuleCase)
   302  				continue
   303  			}
   304  			switch topDir, _, _ := strings.Cut(topDir, "/"); topDir {
   305  			case "pkg", "usr", "gen":
   306  				// TODO(rogpeppe) link to explanation.
   307  				addError(p, false, fmt.Errorf("cue.mod/pkg, cue.mod/usr, or cue.mod/gen directories are not allowed in a module because they conflict with module dependencies"))
   308  				continue
   309  			}
   310  		}
   311  		if err := collisions.check(p, info.IsDir()); err != nil {
   312  			addError(p, false, err)
   313  			continue
   314  		}
   315  		if info.Mode()&os.ModeType == os.ModeSymlink {
   316  			// Skip symbolic links (golang.org/issue/27093).
   317  			addError(p, true, errSymlink)
   318  			continue
   319  		}
   320  		if !info.Mode().IsRegular() {
   321  			addError(p, true, errNotRegular)
   322  			continue
   323  		}
   324  		size := info.Size()
   325  		if size >= 0 && size <= maxSize {
   326  			maxSize -= size
   327  		} else if cf.SizeError == nil {
   328  			cf.SizeError = fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
   329  		}
   330  		if p == "cue.mod/module.cue" {
   331  			if size > MaxCUEMod {
   332  				addError(p, false, errCUEModSize)
   333  				continue
   334  			}
   335  			foundModuleCUE = true
   336  
   337  		}
   338  		if p == "LICENSE" && size > MaxLICENSE {
   339  			addError(p, false, errLICENSESize)
   340  			continue
   341  		}
   342  
   343  		cf.Valid = append(cf.Valid, p)
   344  		validFiles = append(validFiles, f)
   345  		validSizes = append(validSizes, info.Size())
   346  	}
   347  	if !foundModuleCUE {
   348  		cf.NoModError = errNoMod
   349  	}
   350  	return cf, validFiles, validSizes
   351  }
   352  
   353  // CheckDir reports whether the files in dir satisfy the name and size
   354  // constraints listed in the package documentation. The returned CheckedFiles
   355  // record contains lists of valid, invalid, and omitted files. If a directory is
   356  // omitted (for example, a nested module or vendor directory), it will appear in
   357  // the omitted list, but its files won't be listed.
   358  //
   359  // CheckDir returns an error if it encounters an I/O error or if the returned
   360  // CheckedFiles does not describe a valid module zip file (according to
   361  // CheckedFiles.Err). The returned CheckedFiles is still populated when such
   362  // an error is returned.
   363  //
   364  // Note that CheckDir will not open any files, so CreateFromDir may still fail
   365  // when CheckDir is successful due to I/O errors.
   366  //
   367  // Deprecated: this will be removed in a future API iteration that reduces
   368  // dependence on zip archives.
   369  func CheckDir(dir string) (CheckedFiles, error) {
   370  	// List files (as CreateFromDir would) and check which ones are omitted
   371  	// or invalid.
   372  	files, omitted, err := listFilesInDir(dir)
   373  	if err != nil {
   374  		return CheckedFiles{}, err
   375  	}
   376  	cf, cfErr := CheckFiles[dirFile](files, dirFileIO{})
   377  	_ = cfErr // ignore this error; we'll generate our own after rewriting paths.
   378  
   379  	// Replace all paths with file system paths.
   380  	// Paths returned by CheckFiles will be slash-separated paths relative to dir.
   381  	// That's probably not appropriate for error messages.
   382  	for i := range cf.Valid {
   383  		cf.Valid[i] = filepath.Join(dir, cf.Valid[i])
   384  	}
   385  	cf.Omitted = append(cf.Omitted, omitted...)
   386  	for i := range cf.Omitted {
   387  		cf.Omitted[i].Path = filepath.Join(dir, cf.Omitted[i].Path)
   388  	}
   389  	for i := range cf.Invalid {
   390  		cf.Invalid[i].Path = filepath.Join(dir, cf.Invalid[i].Path)
   391  	}
   392  	return cf, cf.Err()
   393  }
   394  
   395  // CheckZipFile calls CheckZip with the given zip file.
   396  func CheckZipFile(m module.Version, zipFile string) (CheckedFiles, error) {
   397  	f, err := os.Open(zipFile)
   398  	if err != nil {
   399  		return CheckedFiles{}, err
   400  	}
   401  	defer f.Close()
   402  	info, err := f.Stat()
   403  	if err != nil {
   404  		return CheckedFiles{}, err
   405  	}
   406  	_, _, cf, err := CheckZip(m, f, info.Size())
   407  	return cf, err
   408  }
   409  
   410  // CheckZip reports whether the files contained in a zip file satisfy the name
   411  // and size constraints listed in the package documentation.
   412  //
   413  // CheckZip returns an error if the returned CheckedFiles does not describe
   414  // a valid module zip file (according to CheckedFiles.Err). The returned
   415  // CheckedFiles is still populated when an error is returned. CheckZip will
   416  // also return an error if the module path or version is malformed or if it
   417  // encounters an error reading the zip file.
   418  //
   419  // It also returns the file entry for the module.cue file.
   420  //
   421  // Note that checkZip does not read individual files, so zip.Unzip may still fail
   422  // when checkZip is successful due to I/O errors.
   423  func CheckZip(m module.Version, r io.ReaderAt, zipSize int64) (*zip.Reader, *zip.File, CheckedFiles, error) {
   424  	if zipSize > MaxZipFile {
   425  		cf := CheckedFiles{SizeError: fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)}
   426  		return nil, nil, cf, cf.Err()
   427  	}
   428  
   429  	// Check for valid file names, collisions.
   430  	var cf CheckedFiles
   431  	addError := func(zf *zip.File, err error) {
   432  		cf.Invalid = append(cf.Invalid, FileError{Path: zf.Name, Err: err})
   433  	}
   434  	z, err := zip.NewReader(r, zipSize)
   435  	if err != nil {
   436  		return nil, nil, CheckedFiles{}, err
   437  	}
   438  	collisions := make(collisionChecker)
   439  	var size int64
   440  	var modFile *zip.File
   441  	for _, zf := range z.File {
   442  		name := zf.Name
   443  		isDir := strings.HasSuffix(name, "/")
   444  		if isDir {
   445  			name = name[:len(name)-1]
   446  		}
   447  		if path.Clean(name) != name {
   448  			addError(zf, errPathNotClean)
   449  			continue
   450  		}
   451  		if err := module.CheckFilePath(name); err != nil {
   452  			addError(zf, err)
   453  			continue
   454  		}
   455  		if err := collisions.check(name, isDir); err != nil {
   456  			addError(zf, err)
   457  			continue
   458  		}
   459  		prefix, rest := splitCUEMod(name)
   460  		if rest != "" {
   461  			if prefix != "" {
   462  				// cue.mod directories or files aren't allowed to exist anywhere except in the root.
   463  				addError(zf, fmt.Errorf("cue.mod not in module root directory"))
   464  				continue
   465  			}
   466  			if !strings.Contains(rest, "/") {
   467  				addError(zf, fmt.Errorf("cue.mod is not a directory"))
   468  				continue
   469  			}
   470  			if !strings.HasPrefix(rest, "cue.mod/") {
   471  				addError(zf, errCUEModCase)
   472  				continue
   473  			}
   474  			if strings.EqualFold(rest, "cue.mod/module.cue") {
   475  				if rest != "cue.mod/module.cue" {
   476  					addError(zf, errCUEModuleCase)
   477  					continue
   478  				}
   479  				modFile = zf
   480  			}
   481  		}
   482  		if isDir {
   483  			continue
   484  		}
   485  		// TODO check for case-equivalent names too
   486  		sz := int64(zf.UncompressedSize64)
   487  		if sz >= 0 && MaxZipFile-size >= sz {
   488  			size += sz
   489  		} else if cf.SizeError == nil {
   490  			cf.SizeError = fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
   491  		}
   492  		if name == "cue.mod/module.cue" && sz > MaxCUEMod {
   493  			addError(zf, fmt.Errorf("cue.mod/module.cue file too large (max size is %d bytes)", MaxCUEMod))
   494  			continue
   495  		}
   496  		if name == "LICENSE" && sz > MaxLICENSE {
   497  			addError(zf, fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE))
   498  			continue
   499  		}
   500  		cf.Valid = append(cf.Valid, zf.Name)
   501  	}
   502  	if modFile == nil {
   503  		cf.NoModError = errNoMod
   504  	}
   505  
   506  	return z, modFile, cf, cf.Err()
   507  }
   508  
   509  // Create builds a zip archive for module m from an abstract list of files
   510  // and writes it to w.
   511  //
   512  // Note that m.Version is checked for validity but only the major version
   513  // is used for checking correctness of the cue.mod/module.cue file.
   514  //
   515  // Create verifies the restrictions described in the package documentation
   516  // and should not produce an archive that Unzip cannot extract. Create does not
   517  // include files in the output archive if they don't belong in the module zip.
   518  // In particular, Create will not include files in modules found in
   519  // subdirectories, most files in vendor directories, or irregular files (such
   520  // as symbolic links) in the output archive.
   521  //
   522  // Deprecated: this will be removed in a future API iteration that reduces
   523  // dependence on zip archives.
   524  func Create[F any](w io.Writer, m module.Version, files []F, fio FileIO[F]) (err error) {
   525  	defer func() {
   526  		if err != nil {
   527  			err = &zipError{verb: "create zip", err: err}
   528  		}
   529  	}()
   530  
   531  	// Check whether files are valid, not valid, or should be omitted.
   532  	// Also check that the valid files don't exceed the maximum size.
   533  	cf, validFiles, validSizes := checkFiles(files, fio)
   534  	if err := cf.Err(); err != nil {
   535  		return err
   536  	}
   537  
   538  	// Create the module zip file.
   539  	zw := zip.NewWriter(w)
   540  
   541  	addFile := func(f F, path string, size int64) error {
   542  		rc, err := fio.Open(f)
   543  		if err != nil {
   544  			return err
   545  		}
   546  		defer rc.Close()
   547  		w, err := zw.Create(path)
   548  		if err != nil {
   549  			return err
   550  		}
   551  		lr := &io.LimitedReader{R: rc, N: size + 1}
   552  		if _, err := io.Copy(w, lr); err != nil {
   553  			return err
   554  		}
   555  		if lr.N <= 0 {
   556  			return fmt.Errorf("file %q is larger than declared size", path)
   557  		}
   558  		return nil
   559  	}
   560  
   561  	for i, f := range validFiles {
   562  		p := fio.Path(f)
   563  		size := validSizes[i]
   564  		if err := addFile(f, p, size); err != nil {
   565  			return err
   566  		}
   567  	}
   568  
   569  	return zw.Close()
   570  }
   571  
   572  // CreateFromDir creates a module zip file for module m from the contents of
   573  // a directory, dir. The zip content is written to w.
   574  //
   575  // CreateFromDir verifies the restrictions described in the package
   576  // documentation and should not produce an archive that Unzip cannot extract.
   577  // CreateFromDir does not include files in the output archive if they don't
   578  // belong in the module zip. In particular, CreateFromDir will not include
   579  // files in modules found in subdirectories, most files in vendor directories,
   580  // or irregular files (such as symbolic links) in the output archive.
   581  // Additionally, unlike Create, CreateFromDir will not include directories
   582  // named ".bzr", ".git", ".hg", or ".svn".
   583  func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
   584  	defer func() {
   585  		if zerr, ok := err.(*zipError); ok {
   586  			zerr.path = dir
   587  		} else if err != nil {
   588  			err = &zipError{verb: "create zip from directory", path: dir, err: err}
   589  		}
   590  	}()
   591  
   592  	files, _, err := listFilesInDir(dir)
   593  	if err != nil {
   594  		return err
   595  	}
   596  
   597  	return Create[dirFile](w, m, files, dirFileIO{})
   598  }
   599  
   600  type dirFile struct {
   601  	filePath, slashPath string
   602  	info                os.FileInfo
   603  }
   604  
   605  type dirFileIO struct{}
   606  
   607  func (dirFileIO) Path(f dirFile) string                 { return f.slashPath }
   608  func (dirFileIO) Lstat(f dirFile) (os.FileInfo, error)  { return f.info, nil }
   609  func (dirFileIO) Open(f dirFile) (io.ReadCloser, error) { return os.Open(f.filePath) }
   610  
   611  // isVendoredPackage reports whether the given filename is inside
   612  // the cue.mod/vendor directory.
   613  func isVendoredPackage(name string) bool {
   614  	// TODO we have to decide what the vendor directory will actually be
   615  	// called. Maybe cue.mod/pkg is the one.
   616  	return strings.HasPrefix(name, "cue.mod/vendor/")
   617  }
   618  
   619  // Unzip extracts the contents of a module zip file to a directory.
   620  //
   621  // Unzip checks all restrictions listed in the package documentation and returns
   622  // an error if the zip archive is not valid. In some cases, files may be written
   623  // to dir before an error is returned (for example, if a file's uncompressed
   624  // size does not match its declared size).
   625  //
   626  // dir may or may not exist: Unzip will create it and any missing parent
   627  // directories if it doesn't exist. If dir exists, it must be empty.
   628  func Unzip(dir string, m module.Version, zipFile string) (err error) {
   629  	defer func() {
   630  		if err != nil {
   631  			err = &zipError{verb: "unzip", path: zipFile, err: err}
   632  		}
   633  	}()
   634  
   635  	// Check that the directory is empty. Don't create it yet in case there's
   636  	// an error reading the zip.
   637  	if files, _ := os.ReadDir(dir); len(files) > 0 {
   638  		return fmt.Errorf("target directory %v exists and is not empty (contents: %q)", dir, files)
   639  	}
   640  
   641  	// Open the zip and check that it satisfies all restrictions.
   642  	f, err := os.Open(zipFile)
   643  	if err != nil {
   644  		return err
   645  	}
   646  	defer f.Close()
   647  	info, err := f.Stat()
   648  	if err != nil {
   649  		return err
   650  	}
   651  	z, _, cf, err := CheckZip(m, f, info.Size())
   652  	if err != nil {
   653  		return err
   654  	}
   655  	if err := cf.Err(); err != nil {
   656  		return err
   657  	}
   658  
   659  	// Unzip, enforcing sizes declared in the zip file.
   660  	if err := os.MkdirAll(dir, 0777); err != nil {
   661  		return err
   662  	}
   663  	for _, zf := range z.File {
   664  		name := zf.Name
   665  		if name == "" || strings.HasSuffix(name, "/") {
   666  			continue
   667  		}
   668  		dst := filepath.Join(dir, name)
   669  		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
   670  			return err
   671  		}
   672  		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
   673  		if err != nil {
   674  			return err
   675  		}
   676  		r, err := zf.Open()
   677  		if err != nil {
   678  			w.Close()
   679  			return err
   680  		}
   681  		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
   682  		_, err = io.Copy(w, lr)
   683  		r.Close()
   684  		if err != nil {
   685  			w.Close()
   686  			return err
   687  		}
   688  		if err := w.Close(); err != nil {
   689  			return err
   690  		}
   691  		if lr.N <= 0 {
   692  			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
   693  		}
   694  	}
   695  
   696  	return nil
   697  }
   698  
   699  // collisionChecker finds case-insensitive name collisions and paths that
   700  // are listed as both files and directories.
   701  //
   702  // The keys of this map are processed with strToFold. pathInfo has the original
   703  // path for each folded path.
   704  type collisionChecker map[string]pathInfo
   705  
   706  type pathInfo struct {
   707  	path  string
   708  	isDir bool
   709  }
   710  
   711  func (cc collisionChecker) check(p string, isDir bool) error {
   712  	fold := strToFold(p)
   713  	if other, ok := cc[fold]; ok {
   714  		if p != other.path {
   715  			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
   716  		}
   717  		if isDir != other.isDir {
   718  			return fmt.Errorf("entry %q is both a file and a directory", p)
   719  		}
   720  		if !isDir {
   721  			return fmt.Errorf("multiple entries for file %q", p)
   722  		}
   723  		// It's not an error if check is called with the same directory multiple
   724  		// times. check is called recursively on parent directories, so check
   725  		// may be called on the same directory many times.
   726  	} else {
   727  		cc[fold] = pathInfo{path: p, isDir: isDir}
   728  	}
   729  
   730  	if parent := path.Dir(p); parent != "." {
   731  		return cc.check(parent, true)
   732  	}
   733  	return nil
   734  }
   735  
   736  // listFilesInDir walks the directory tree rooted at dir and returns a list of
   737  // files, as well as a list of directories and files that were skipped (for
   738  // example, nested modules and symbolic links).
   739  func listFilesInDir(dir string) (files []dirFile, omitted []FileError, err error) {
   740  	err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
   741  		if err != nil {
   742  			return err
   743  		}
   744  		relPath, err := filepath.Rel(dir, filePath)
   745  		if err != nil {
   746  			return err
   747  		}
   748  		slashPath := filepath.ToSlash(relPath)
   749  
   750  		// We would like Create and CreateFromDir to produce the same result
   751  		// for a set of files, whether expressed as a directory tree or zip.
   752  		if isVendoredPackage(slashPath) {
   753  			omitted = append(omitted, FileError{Path: slashPath, Err: errVendored})
   754  			return nil
   755  		}
   756  
   757  		if info.IsDir() {
   758  			if filePath == dir {
   759  				// Don't skip the top-level directory.
   760  				return nil
   761  			}
   762  
   763  			// Skip VCS directories.
   764  			// fossil repos are regular files with arbitrary names, so we don't try
   765  			// to exclude them.
   766  			switch filepath.Base(filePath) {
   767  			case ".bzr", ".git", ".hg", ".svn":
   768  				omitted = append(omitted, FileError{Path: slashPath, Err: errVCS})
   769  				return filepath.SkipDir
   770  			}
   771  
   772  			// Skip submodules (directories containing go.mod files).
   773  			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
   774  				omitted = append(omitted, FileError{Path: slashPath, Err: errSubmoduleDir})
   775  				return filepath.SkipDir
   776  			}
   777  			return nil
   778  		}
   779  
   780  		// Skip irregular files and files in vendor directories.
   781  		// Irregular files are ignored. They're typically symbolic links.
   782  		if !info.Mode().IsRegular() {
   783  			omitted = append(omitted, FileError{Path: slashPath, Err: errNotRegular})
   784  			return nil
   785  		}
   786  
   787  		files = append(files, dirFile{
   788  			filePath:  filePath,
   789  			slashPath: slashPath,
   790  			info:      info,
   791  		})
   792  		return nil
   793  	})
   794  	if err != nil {
   795  		return nil, nil, err
   796  	}
   797  	return files, omitted, nil
   798  }
   799  
   800  type zipError struct {
   801  	verb, path string
   802  	err        error
   803  }
   804  
   805  func (e *zipError) Error() string {
   806  	if e.path == "" {
   807  		return fmt.Sprintf("%s: %v", e.verb, e.err)
   808  	} else {
   809  		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
   810  	}
   811  }
   812  
   813  func (e *zipError) Unwrap() error {
   814  	return e.err
   815  }
   816  
   817  // strToFold returns a string with the property that
   818  //
   819  //	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
   820  //
   821  // This lets us test a large set of strings for fold-equivalent
   822  // duplicates without making a quadratic number of calls
   823  // to EqualFold. Note that strings.ToUpper and strings.ToLower
   824  // do not have the desired property in some corner cases.
   825  func strToFold(s string) string {
   826  	// Fast path: all ASCII, no upper case.
   827  	// Most paths look like this already.
   828  	for i := 0; i < len(s); i++ {
   829  		c := s[i]
   830  		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
   831  			goto Slow
   832  		}
   833  	}
   834  	return s
   835  
   836  Slow:
   837  	var buf bytes.Buffer
   838  	for _, r := range s {
   839  		// SimpleFold(x) cycles to the next equivalent rune > x
   840  		// or wraps around to smaller values. Iterate until it wraps,
   841  		// and we've found the minimum value.
   842  		for {
   843  			r0 := r
   844  			r = unicode.SimpleFold(r0)
   845  			if r <= r0 {
   846  				break
   847  			}
   848  		}
   849  		// Exception to allow fast path above: A-Z => a-z
   850  		if 'A' <= r && r <= 'Z' {
   851  			r += 'a' - 'A'
   852  		}
   853  		buf.WriteRune(r)
   854  	}
   855  	return buf.String()
   856  }
   857  
   858  // splitCUEMod splits the path p into two elements:
   859  // the first before any cue.mod directory, and the second after
   860  // including the cue.mod directory itself.
   861  //
   862  // For example splitCUEMod("foo/bar/cue.mod/baz") would
   863  // return "foo/bar/", "cue.mod/baz".
   864  func splitCUEMod(p string) (string, string) {
   865  	s := p
   866  	for {
   867  		dir, f := path.Split(s)
   868  		if strings.EqualFold(f, "cue.mod") {
   869  			return p[:len(dir)], p[len(dir):]
   870  		}
   871  		dir = strings.TrimRight(dir, "/")
   872  		if dir == "" {
   873  			return p, ""
   874  		}
   875  		s = dir
   876  	}
   877  }
   878  

View as plain text