...

Source file src/cuelang.org/go/mod/module/path.go

Documentation: cuelang.org/go/mod/module

     1  package module
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"regexp"
     7  	"strings"
     8  	"unicode"
     9  	"unicode/utf8"
    10  
    11  	"cuelang.org/go/internal/mod/semver"
    12  )
    13  
    14  // The following regular expressions come from https://github.com/opencontainers/distribution-spec/blob/main/spec.md#pulling-manifests
    15  // and ensure that we can store modules inside OCI registries.
    16  var (
    17  	basePathPat = regexp.MustCompile(`^[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*(/[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*)*$`)
    18  	tagPat      = regexp.MustCompile(`^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$`)
    19  )
    20  
    21  // Check checks that a given module path, version pair is valid.
    22  // In addition to the path being a valid module path
    23  // and the version being a valid semantic version,
    24  // the two must correspond.
    25  // For example, the path "foo.com/bar@v2" only corresponds to
    26  // semantic versions beginning with "v2.".
    27  func Check(path, version string) error {
    28  	if err := CheckPath(path); err != nil {
    29  		return err
    30  	}
    31  	if !semver.IsValid(version) {
    32  		return &ModuleError{
    33  			Path: path,
    34  			Err:  &InvalidVersionError{Version: version, Err: errors.New("not a semantic version")},
    35  		}
    36  	}
    37  	_, pathMajor, _ := SplitPathVersion(path)
    38  	if err := CheckPathMajor(version, pathMajor); err != nil {
    39  		return &ModuleError{Path: path, Err: err}
    40  	}
    41  	return nil
    42  }
    43  
    44  // firstPathOK reports whether r can appear in the first element of a module path.
    45  // The first element of the path must be an LDH domain name, at least for now.
    46  // To avoid case ambiguity, the domain name must be entirely lower case.
    47  func firstPathOK(r rune) bool {
    48  	return r == '-' || r == '.' ||
    49  		'0' <= r && r <= '9' ||
    50  		'a' <= r && r <= 'z'
    51  }
    52  
    53  // modPathOK reports whether r can appear in a module path element.
    54  // Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: - . _ and ~.
    55  //
    56  // This matches what "go get" has historically recognized in import paths,
    57  // and avoids confusing sequences like '%20' or '+' that would change meaning
    58  // if used in a URL.
    59  //
    60  // TODO(rsc): We would like to allow Unicode letters, but that requires additional
    61  // care in the safe encoding (see "escaped paths" above).
    62  func modPathOK(r rune) bool {
    63  	if r < utf8.RuneSelf {
    64  		return r == '-' || r == '.' || r == '_' || r == '~' ||
    65  			'0' <= r && r <= '9' ||
    66  			'A' <= r && r <= 'Z' ||
    67  			'a' <= r && r <= 'z'
    68  	}
    69  	return false
    70  }
    71  
    72  // importPathOK reports whether r can appear in a package import path element.
    73  //
    74  // Import paths are intermediate between module paths and file paths: we allow
    75  // disallow characters that would be confusing or ambiguous as arguments to
    76  // 'go get' (such as '@' and ' ' ), but allow certain characters that are
    77  // otherwise-unambiguous on the command line and historically used for some
    78  // binary names (such as '++' as a suffix for compiler binaries and wrappers).
    79  func importPathOK(r rune) bool {
    80  	return modPathOK(r) || r == '+'
    81  }
    82  
    83  // fileNameOK reports whether r can appear in a file name.
    84  // For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
    85  // If we expand the set of allowed characters here, we have to
    86  // work harder at detecting potential case-folding and normalization collisions.
    87  // See note about "escaped paths" above.
    88  func fileNameOK(r rune) bool {
    89  	if r < utf8.RuneSelf {
    90  		// Entire set of ASCII punctuation, from which we remove characters:
    91  		//     ! " # $ % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ ` { | } ~
    92  		// We disallow some shell special characters: " ' * < > ? ` |
    93  		// (Note that some of those are disallowed by the Windows file system as well.)
    94  		// We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
    95  		// We allow spaces (U+0020) in file names.
    96  		const allowed = "!#$%&()+,-.=@[]^_{}~ "
    97  		if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
    98  			return true
    99  		}
   100  		return strings.ContainsRune(allowed, r)
   101  	}
   102  	// It may be OK to add more ASCII punctuation here, but only carefully.
   103  	// For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
   104  	return unicode.IsLetter(r)
   105  }
   106  
   107  // CheckPathWithoutVersion is like CheckPath except that
   108  // it expects a module path without a major version.
   109  func CheckPathWithoutVersion(basePath string) (err error) {
   110  	if _, _, ok := SplitPathVersion(basePath); ok {
   111  		return fmt.Errorf("module path inappropriately contains major version")
   112  	}
   113  	if err := checkPath(basePath, modulePath); err != nil {
   114  		return err
   115  	}
   116  	i := strings.Index(basePath, "/")
   117  	if i < 0 {
   118  		i = len(basePath)
   119  	}
   120  	if i == 0 {
   121  		return fmt.Errorf("leading slash")
   122  	}
   123  	if !strings.Contains(basePath[:i], ".") {
   124  		return fmt.Errorf("missing dot in first path element")
   125  	}
   126  	if basePath[0] == '-' {
   127  		return fmt.Errorf("leading dash in first path element")
   128  	}
   129  	for _, r := range basePath[:i] {
   130  		if !firstPathOK(r) {
   131  			return fmt.Errorf("invalid char %q in first path element", r)
   132  		}
   133  	}
   134  	// Sanity check agreement with OCI specs.
   135  	if !basePathPat.MatchString(basePath) {
   136  		return fmt.Errorf("non-conforming path %q", basePath)
   137  	}
   138  	return nil
   139  }
   140  
   141  // CheckPath checks that a module path is valid.
   142  // A valid module path is a valid import path, as checked by CheckImportPath,
   143  // with three additional constraints.
   144  //
   145  // First, the leading path element (up to the first slash, if any),
   146  // by convention a domain name, must contain only lower-case ASCII letters,
   147  // ASCII digits, dots (U+002E), and dashes (U+002D);
   148  // it must contain at least one dot and cannot start with a dash.
   149  //
   150  // Second, there must be a final major version of the form
   151  // @vN where N looks numeric
   152  // (ASCII digits) and must not begin with a leading zero.
   153  //
   154  // Third, no path element may begin with a dot.
   155  func CheckPath(mpath string) (err error) {
   156  	if mpath == "local" {
   157  		return nil
   158  	}
   159  	defer func() {
   160  		if err != nil {
   161  			err = &InvalidPathError{Kind: "module", Path: mpath, Err: err}
   162  		}
   163  	}()
   164  
   165  	basePath, vers, ok := SplitPathVersion(mpath)
   166  	if !ok {
   167  		return fmt.Errorf("no major version found in module path")
   168  	}
   169  	if semver.Major(vers) != vers {
   170  		return fmt.Errorf("path can contain major version only")
   171  	}
   172  	if err := CheckPathWithoutVersion(basePath); err != nil {
   173  		return err
   174  	}
   175  	if !tagPat.MatchString(vers) {
   176  		return fmt.Errorf("non-conforming version %q", vers)
   177  	}
   178  	return nil
   179  }
   180  
   181  // CheckImportPath checks that an import path is valid.
   182  //
   183  // A valid import path consists of one or more valid path elements
   184  // separated by slashes (U+002F), optionally followed by
   185  // an @vN (major version) qualifier.
   186  // The path part must not begin with nor end in a slash.
   187  //
   188  // A valid path element is a non-empty string made up of
   189  // lower case ASCII letters, ASCII digits, and limited ASCII punctuation: - . and _
   190  // Punctuation characters may not be adjacent and must be between non-punctuation
   191  // characters.
   192  //
   193  // The element prefix up to the first dot must not be a reserved file name
   194  // on Windows, regardless of case (CON, com1, NuL, and so on).
   195  func CheckImportPath(path string) error {
   196  	parts := ParseImportPath(path)
   197  	if semver.Major(parts.Version) != parts.Version {
   198  		return &InvalidPathError{
   199  			Kind: "import",
   200  			Path: path,
   201  			Err:  fmt.Errorf("import paths can only contain a major version specifier"),
   202  		}
   203  	}
   204  	if err := checkPath(parts.Path, importPath); err != nil {
   205  		return &InvalidPathError{Kind: "import", Path: path, Err: err}
   206  	}
   207  	return nil
   208  }
   209  
   210  // pathKind indicates what kind of path we're checking. Module paths,
   211  // import paths, and file paths have different restrictions.
   212  type pathKind int
   213  
   214  const (
   215  	modulePath pathKind = iota
   216  	importPath
   217  	filePath
   218  )
   219  
   220  // checkPath checks that a general path is valid. kind indicates what
   221  // specific constraints should be applied.
   222  //
   223  // checkPath returns an error describing why the path is not valid.
   224  // Because these checks apply to module, import, and file paths,
   225  // and because other checks may be applied, the caller is expected to wrap
   226  // this error with InvalidPathError.
   227  func checkPath(path string, kind pathKind) error {
   228  	if !utf8.ValidString(path) {
   229  		return fmt.Errorf("invalid UTF-8")
   230  	}
   231  	if path == "" {
   232  		return fmt.Errorf("empty string")
   233  	}
   234  	if path[0] == '-' && kind != filePath {
   235  		return fmt.Errorf("leading dash")
   236  	}
   237  	if strings.Contains(path, "//") {
   238  		return fmt.Errorf("double slash")
   239  	}
   240  	if path[len(path)-1] == '/' {
   241  		return fmt.Errorf("trailing slash")
   242  	}
   243  	elemStart := 0
   244  	for i, r := range path {
   245  		if r == '/' {
   246  			if err := checkElem(path[elemStart:i], kind); err != nil {
   247  				return err
   248  			}
   249  			elemStart = i + 1
   250  		}
   251  	}
   252  	if err := checkElem(path[elemStart:], kind); err != nil {
   253  		return err
   254  	}
   255  	return nil
   256  }
   257  
   258  // checkElem checks whether an individual path element is valid.
   259  func checkElem(elem string, kind pathKind) error {
   260  	if elem == "" {
   261  		return fmt.Errorf("empty path element")
   262  	}
   263  	if strings.Count(elem, ".") == len(elem) {
   264  		return fmt.Errorf("invalid path element %q", elem)
   265  	}
   266  	if elem[0] == '.' && kind == modulePath {
   267  		return fmt.Errorf("leading dot in path element")
   268  	}
   269  	if elem[len(elem)-1] == '.' {
   270  		return fmt.Errorf("trailing dot in path element")
   271  	}
   272  	for _, r := range elem {
   273  		ok := false
   274  		switch kind {
   275  		case modulePath:
   276  			ok = modPathOK(r)
   277  		case importPath:
   278  			ok = importPathOK(r)
   279  		case filePath:
   280  			ok = fileNameOK(r)
   281  		default:
   282  			panic(fmt.Sprintf("internal error: invalid kind %v", kind))
   283  		}
   284  		if !ok {
   285  			return fmt.Errorf("invalid char %q", r)
   286  		}
   287  	}
   288  	// Windows disallows a bunch of path elements, sadly.
   289  	// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   290  	short := elem
   291  	if i := strings.Index(short, "."); i >= 0 {
   292  		short = short[:i]
   293  	}
   294  	for _, bad := range badWindowsNames {
   295  		if strings.EqualFold(bad, short) {
   296  			return fmt.Errorf("%q disallowed as path element component on Windows", short)
   297  		}
   298  	}
   299  
   300  	if kind == filePath {
   301  		// don't check for Windows short-names in file names. They're
   302  		// only an issue for import paths.
   303  		return nil
   304  	}
   305  
   306  	// Reject path components that look like Windows short-names.
   307  	// Those usually end in a tilde followed by one or more ASCII digits.
   308  	if tilde := strings.LastIndexByte(short, '~'); tilde >= 0 && tilde < len(short)-1 {
   309  		suffix := short[tilde+1:]
   310  		suffixIsDigits := true
   311  		for _, r := range suffix {
   312  			if r < '0' || r > '9' {
   313  				suffixIsDigits = false
   314  				break
   315  			}
   316  		}
   317  		if suffixIsDigits {
   318  			return fmt.Errorf("trailing tilde and digits in path element")
   319  		}
   320  	}
   321  
   322  	return nil
   323  }
   324  
   325  // CheckFilePath checks that a slash-separated file path is valid.
   326  // The definition of a valid file path is the same as the definition
   327  // of a valid import path except that the set of allowed characters is larger:
   328  // all Unicode letters, ASCII digits, the ASCII space character (U+0020),
   329  // and the ASCII punctuation characters
   330  // “!#$%&()+,-.=@[]^_{}~”.
   331  // (The excluded punctuation characters, " * < > ? ` ' | / \ and :,
   332  // have special meanings in certain shells or operating systems.)
   333  //
   334  // CheckFilePath may be less restrictive in the future, but see the
   335  // top-level package documentation for additional information about
   336  // subtleties of Unicode.
   337  func CheckFilePath(path string) error {
   338  	if err := checkPath(path, filePath); err != nil {
   339  		return &InvalidPathError{Kind: "file", Path: path, Err: err}
   340  	}
   341  	return nil
   342  }
   343  
   344  // badWindowsNames are the reserved file path elements on Windows.
   345  // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
   346  var badWindowsNames = []string{
   347  	"CON",
   348  	"PRN",
   349  	"AUX",
   350  	"NUL",
   351  	"COM1",
   352  	"COM2",
   353  	"COM3",
   354  	"COM4",
   355  	"COM5",
   356  	"COM6",
   357  	"COM7",
   358  	"COM8",
   359  	"COM9",
   360  	"LPT1",
   361  	"LPT2",
   362  	"LPT3",
   363  	"LPT4",
   364  	"LPT5",
   365  	"LPT6",
   366  	"LPT7",
   367  	"LPT8",
   368  	"LPT9",
   369  }
   370  
   371  // SplitPathVersion returns a prefix and version suffix such
   372  // that prefix+"@"+version == path.
   373  // SplitPathVersion returns with ok=false when presented
   374  // with a path with an invalid version suffix.
   375  //
   376  // For example, SplitPathVersion("foo.com/bar@v0.1") returns
   377  // ("foo.com/bar", "v0.1", true).
   378  func SplitPathVersion(path string) (prefix, version string, ok bool) {
   379  	i := strings.LastIndex(path, "@")
   380  	split := i
   381  	if i <= 0 || i+2 >= len(path) {
   382  		return "", "", false
   383  	}
   384  	if strings.Contains(path[:i], "@") {
   385  		return "", "", false
   386  	}
   387  	if path[i+1] != 'v' {
   388  		return "", "", false
   389  	}
   390  	if !semver.IsValid(path[i+1:]) {
   391  		return "", "", false
   392  	}
   393  	return path[:split], path[split+1:], true
   394  }
   395  
   396  // ImportPath holds the various components of an import path.
   397  type ImportPath struct {
   398  	// Path holds the base package/directory path, similar
   399  	// to that returned by [Version.BasePath].
   400  	Path string
   401  
   402  	// Version holds the version of the import
   403  	// or empty if not present. Note: in general this
   404  	// will contain a major version only, but there's no
   405  	// guarantee of that.
   406  	Version string
   407  
   408  	// Qualifier holds the package qualifier within the path.
   409  	// This will be derived from the last component of Path
   410  	// if it wasn't explicitly present in the import path.
   411  	// This is not guaranteed to be a valid CUE identifier.
   412  	Qualifier string
   413  
   414  	// ExplicitQualifier holds whether the qualifier was explicitly
   415  	// present in the import path.
   416  	ExplicitQualifier bool
   417  }
   418  
   419  // Canonical returns the canonical form of the import path.
   420  // Specifically, it will only include the package qualifier
   421  // if it's different from the last component of parts.Path.
   422  func (parts ImportPath) Canonical() ImportPath {
   423  	if i := strings.LastIndex(parts.Path, "/"); i >= 0 && parts.Path[i+1:] == parts.Qualifier {
   424  		parts.Qualifier = ""
   425  		parts.ExplicitQualifier = false
   426  	}
   427  	return parts
   428  }
   429  
   430  // Unqualified returns the import path without any package qualifier.
   431  func (parts ImportPath) Unqualified() ImportPath {
   432  	parts.Qualifier = ""
   433  	parts.ExplicitQualifier = false
   434  	return parts
   435  }
   436  
   437  func (parts ImportPath) String() string {
   438  	if parts.Version == "" && !parts.ExplicitQualifier {
   439  		// Fast path.
   440  		return parts.Path
   441  	}
   442  	var buf strings.Builder
   443  	buf.WriteString(parts.Path)
   444  	if parts.Version != "" {
   445  		buf.WriteByte('@')
   446  		buf.WriteString(parts.Version)
   447  	}
   448  	if parts.ExplicitQualifier {
   449  		buf.WriteByte(':')
   450  		buf.WriteString(parts.Qualifier)
   451  	}
   452  	return buf.String()
   453  }
   454  
   455  // ParseImportPath returns the various components of an import path.
   456  func ParseImportPath(p string) ImportPath {
   457  	var parts ImportPath
   458  	pathWithoutQualifier := p
   459  	if i := strings.LastIndexAny(p, "/:"); i >= 0 && p[i] == ':' {
   460  		pathWithoutQualifier = p[:i]
   461  		parts.Qualifier = p[i+1:]
   462  		parts.ExplicitQualifier = true
   463  	}
   464  	parts.Path = pathWithoutQualifier
   465  	if path, version, ok := SplitPathVersion(pathWithoutQualifier); ok {
   466  		parts.Version = version
   467  		parts.Path = path
   468  	}
   469  	if !parts.ExplicitQualifier {
   470  		if i := strings.LastIndex(parts.Path, "/"); i >= 0 {
   471  			parts.Qualifier = parts.Path[i+1:]
   472  		} else {
   473  			parts.Qualifier = parts.Path
   474  		}
   475  	}
   476  	return parts
   477  }
   478  
   479  // CheckPathMajor returns a non-nil error if the semantic version v
   480  // does not match the path major version pathMajor.
   481  func CheckPathMajor(v, pathMajor string) error {
   482  	if m := semver.Major(v); m != pathMajor {
   483  		return &InvalidVersionError{
   484  			Version: v,
   485  			Err:     fmt.Errorf("should be %s, not %s", pathMajor, m),
   486  		}
   487  	}
   488  	return nil
   489  }
   490  

View as plain text