...

Source file src/cuelang.org/go/internal/mod/modresolve/resolve.go

Documentation: cuelang.org/go/internal/mod/modresolve

     1  // Copyright 2024 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package modresolve
    16  
    17  import (
    18  	"crypto/sha256"
    19  	_ "embed"
    20  	"fmt"
    21  	"net"
    22  	"net/netip"
    23  	"path"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  
    28  	"cuelabs.dev/go/oci/ociregistry/ociref"
    29  
    30  	"cuelang.org/go/cue"
    31  	"cuelang.org/go/cue/cuecontext"
    32  	"cuelang.org/go/cue/errors"
    33  	"cuelang.org/go/cue/token"
    34  	"cuelang.org/go/mod/module"
    35  )
    36  
    37  // pathEncoding represents one of the possible types of
    38  // encoding for module paths within a registry.
    39  // It reflects the #registry.pathEncoding disjunction
    40  // in schema.cue.
    41  // TODO it would be nice if this could be auto-generated
    42  // from the schema.
    43  type pathEncoding string
    44  
    45  const (
    46  	encPath       pathEncoding = "path"
    47  	encHashAsRepo pathEncoding = "hashAsRepo"
    48  	encHashAsTag  pathEncoding = "hashAsTag"
    49  )
    50  
    51  // LocationResolver resolves module paths to a location
    52  // consisting of a host name of a registry and where
    53  // in that registry the module is to be found.
    54  //
    55  // Note: The implementation in this package operates entirely lexically,
    56  // which is why [Location] contains only a host name and not an actual
    57  // [ociregistry.Interface] implementation.
    58  type LocationResolver interface {
    59  	// ResolveToLocation resolves a base module path (without a version
    60  	// suffix, a.k.a. OCI repository name) and optional version to
    61  	// the location for that path. It reports whether it can find
    62  	// appropriate location for the module.
    63  	//
    64  	// If the version is empty, the Tag in the returned Location
    65  	// will hold the prefix that all versions of the module in its
    66  	// repository have. That prefix will be followed by the version
    67  	// itself.
    68  	ResolveToLocation(path string, vers string) (Location, bool)
    69  
    70  	// AllHosts returns all the registry hosts that the resolver
    71  	// might resolve to, ordered lexically by hostname.
    72  	AllHosts() []Host
    73  }
    74  
    75  // Host represents a registry host name.
    76  type Host struct {
    77  	// Name holds the IP host name of the registry.
    78  	// If it's an IP v6 address, it will be surrounded with
    79  	// square brackets ([, ]).
    80  	Name string
    81  	// Insecure holds whether this host should be connected
    82  	// to insecurely (with an HTTP rather than HTTP connection).
    83  	Insecure bool
    84  }
    85  
    86  // Location represents the location for a given module version or versions.
    87  type Location struct {
    88  	// Host holds the host or host:port of the registry.
    89  	Host string
    90  
    91  	// Insecure holds whether an insecure connection
    92  	// should be used when connecting to the registry.
    93  	Insecure bool
    94  
    95  	// Repository holds the repository to store the module in.
    96  	Repository string
    97  
    98  	// Tag holds the tag for the module version.
    99  	// If an empty version was passed to
   100  	// Resolve, it holds the prefix shared by all version
   101  	// tags for the module.
   102  	Tag string
   103  }
   104  
   105  // config mirrors the #File definition in schema.cue.
   106  // TODO it would be nice to be able to generate this
   107  // type directly from the schema.
   108  type config struct {
   109  	ModuleRegistries map[string]*registryConfig `json:"moduleRegistries,omitempty"`
   110  	DefaultRegistry  *registryConfig            `json:"defaultRegistry,omitempty"`
   111  }
   112  
   113  func (cfg *config) init() error {
   114  	for prefix, reg := range cfg.ModuleRegistries {
   115  		if err := module.CheckPathWithoutVersion(prefix); err != nil {
   116  			return fmt.Errorf("invalid module path %q: %v", prefix, err)
   117  		}
   118  		if err := reg.init(); err != nil {
   119  			return fmt.Errorf("invalid registry configuration in %q: %v", prefix, err)
   120  		}
   121  	}
   122  	if cfg.DefaultRegistry != nil {
   123  		if err := cfg.DefaultRegistry.init(); err != nil {
   124  			return fmt.Errorf("invalid default registry configuration: %v", err)
   125  		}
   126  	}
   127  	return nil
   128  }
   129  
   130  type registryConfig struct {
   131  	Registry      string       `json:"registry,omitempty"`
   132  	PathEncoding  pathEncoding `json:"pathEncoding,omitempty"`
   133  	PrefixForTags string       `json:"prefixForTags,omitempty"`
   134  	StripPrefix   bool         `json:"stripPrefix,omitempty"`
   135  
   136  	// The following fields are filled in from Registry after parsing.
   137  	host       string
   138  	repository string
   139  	insecure   bool
   140  }
   141  
   142  func (r *registryConfig) init() error {
   143  	r1, err := parseRegistry(r.Registry)
   144  	if err != nil {
   145  		return err
   146  	}
   147  	r.host, r.repository, r.insecure = r1.host, r1.repository, r1.insecure
   148  
   149  	if r.PrefixForTags != "" {
   150  		if !ociref.IsValidTag(r.PrefixForTags) {
   151  			return fmt.Errorf("invalid tag prefix %q", r.PrefixForTags)
   152  		}
   153  	}
   154  	if r.PathEncoding == "" {
   155  		// Shouldn't happen because default should apply.
   156  		return fmt.Errorf("empty pathEncoding")
   157  	}
   158  	if r.StripPrefix {
   159  		if r.PathEncoding != encPath {
   160  			// TODO we could relax this to allow storing of naked tags
   161  			// when the module path matches exactly and hash tags
   162  			// otherwise.
   163  			return fmt.Errorf("cannot strip prefix unless using path encoding")
   164  		}
   165  		if r.repository == "" {
   166  			return fmt.Errorf("use of stripPrefix requires a non-empty repository within the registry")
   167  		}
   168  	}
   169  	return nil
   170  }
   171  
   172  var (
   173  	configSchemaOnce sync.Once // guards the creation of _configSchema
   174  	// TODO remove this mutex when https://cuelang.org/issue/2733 is fixed.
   175  	configSchemaMutex sync.Mutex // guards any use of _configSchema
   176  	_configSchema     cue.Value
   177  )
   178  
   179  //go:embed schema.cue
   180  var configSchemaData []byte
   181  
   182  // RegistryConfigSchema returns the CUE schema
   183  // for the configuration parsed by [ParseConfig].
   184  func RegistryConfigSchema() string {
   185  	// Cut out the copyright header and the header that's
   186  	// not pure schema.
   187  	schema := string(configSchemaData)
   188  	i := strings.Index(schema, "\n// #file ")
   189  	if i == -1 {
   190  		panic("no file definition found in schema")
   191  	}
   192  	i++
   193  	return schema[i:]
   194  }
   195  
   196  // ParseConfig parses the registry configuration with the given contents and file name.
   197  // If there is no default registry, then the single registry specified in catchAllDefault
   198  // will be used as a default.
   199  func ParseConfig(configFile []byte, filename string, catchAllDefault string) (LocationResolver, error) {
   200  	configSchemaOnce.Do(func() {
   201  		ctx := cuecontext.New()
   202  		schemav := ctx.CompileBytes(configSchemaData, cue.Filename("cuelang.org/go/internal/mod/modresolve/schema.cue"))
   203  		schemav = schemav.LookupPath(cue.MakePath(cue.Def("#file")))
   204  		if err := schemav.Validate(); err != nil {
   205  			panic(fmt.Errorf("internal error: invalid CUE registry config schema: %v", errors.Details(err, nil)))
   206  		}
   207  		_configSchema = schemav
   208  	})
   209  	configSchemaMutex.Lock()
   210  	defer configSchemaMutex.Unlock()
   211  
   212  	v := _configSchema.Context().CompileBytes(configFile, cue.Filename(filename))
   213  	if err := v.Err(); err != nil {
   214  		return nil, errors.Wrapf(err, token.NoPos, "invalid registry configuration file")
   215  	}
   216  	v = v.Unify(_configSchema)
   217  	if err := v.Err(); err != nil {
   218  		return nil, errors.Wrapf(err, token.NoPos, "invalid configuration file")
   219  	}
   220  	var cfg config
   221  	if err := v.Decode(&cfg); err != nil {
   222  		return nil, errors.Wrapf(err, token.NoPos, "internal error: cannot decode into registry config struct")
   223  	}
   224  	if err := cfg.init(); err != nil {
   225  		return nil, err
   226  	}
   227  	if cfg.DefaultRegistry == nil {
   228  		if catchAllDefault == "" {
   229  			return nil, fmt.Errorf("no default catch-all registry provided")
   230  		}
   231  		// TODO is it too limiting to have the catch-all registry specified as a simple string?
   232  		reg, err := parseRegistry(catchAllDefault)
   233  		if err != nil {
   234  			return nil, fmt.Errorf("invalid catch-all registry %q: %v", catchAllDefault, err)
   235  		}
   236  		cfg.DefaultRegistry = reg
   237  	}
   238  	r := &resolver{
   239  		cfg: cfg,
   240  	}
   241  	if err := r.initHosts(); err != nil {
   242  		return nil, err
   243  	}
   244  	return r, nil
   245  }
   246  
   247  // ParseCUERegistry parses a registry routing specification that
   248  // maps module prefixes to the registry that should be used to
   249  // fetch that module.
   250  //
   251  // The specification consists of an order-independent, comma-separated list.
   252  //
   253  // Each element either maps a module prefix to the registry that will be used
   254  // for all modules that have that prefix (prefix=registry), or a catch-all registry to be used
   255  // for modules that do not match any prefix (registry).
   256  //
   257  // For example:
   258  //
   259  //	myorg.com=myregistry.com/m,catchallregistry.example.org
   260  //
   261  // Any module with a matching prefix will be routed to the given registry.
   262  // A prefix only matches whole path elements.
   263  // In the above example, module myorg.com/foo/bar@v0 will be looked up
   264  // in myregistry.com in the repository m/myorg.com/foo/bar,
   265  // whereas github.com/x/y will be looked up in catchallregistry.example.com.
   266  //
   267  // The registry part is syntactically similar to a [docker reference]
   268  // except that the repository is optional and no tag or digest is allowed.
   269  // Additionally, a +secure or +insecure suffix may be used to indicate
   270  // whether to use a secure or insecure connection. Without that,
   271  // localhost, 127.0.0.1 and [::1] will default to insecure, and anything
   272  // else to secure.
   273  //
   274  // If s does not declare a catch-all registry location, catchAllDefault is
   275  // used. It is an error if s fails to declares a catch-all registry location
   276  // and no catchAllDefault is provided.
   277  //
   278  // [docker reference]: https://pkg.go.dev/github.com/distribution/reference
   279  func ParseCUERegistry(s string, catchAllDefault string) (LocationResolver, error) {
   280  	if s == "" && catchAllDefault == "" {
   281  		return nil, fmt.Errorf("no catch-all registry or default")
   282  	}
   283  	if s == "" {
   284  		s = catchAllDefault
   285  	}
   286  	cfg := config{
   287  		ModuleRegistries: make(map[string]*registryConfig),
   288  	}
   289  	parts := strings.Split(s, ",")
   290  	for _, part := range parts {
   291  		key, val, ok := strings.Cut(part, "=")
   292  		if !ok {
   293  			if part == "" {
   294  				// TODO or just ignore it?
   295  				return nil, fmt.Errorf("empty registry part")
   296  			}
   297  			if _, ok := cfg.ModuleRegistries[""]; ok {
   298  				return nil, fmt.Errorf("duplicate catch-all registry")
   299  			}
   300  			key, val = "", part
   301  		} else {
   302  			if key == "" {
   303  				return nil, fmt.Errorf("empty module prefix")
   304  			}
   305  			if val == "" {
   306  				return nil, fmt.Errorf("empty registry reference")
   307  			}
   308  			if err := module.CheckPathWithoutVersion(key); err != nil {
   309  				return nil, fmt.Errorf("invalid module path %q: %v", key, err)
   310  			}
   311  			if _, ok := cfg.ModuleRegistries[key]; ok {
   312  				return nil, fmt.Errorf("duplicate module prefix %q", key)
   313  			}
   314  		}
   315  		reg, err := parseRegistry(val)
   316  		if err != nil {
   317  			return nil, fmt.Errorf("invalid registry %q: %v", val, err)
   318  		}
   319  		cfg.ModuleRegistries[key] = reg
   320  	}
   321  	if _, ok := cfg.ModuleRegistries[""]; !ok {
   322  		if catchAllDefault == "" {
   323  			return nil, fmt.Errorf("no default catch-all registry provided")
   324  		}
   325  		reg, err := parseRegistry(catchAllDefault)
   326  		if err != nil {
   327  			return nil, fmt.Errorf("invalid catch-all registry %q: %v", catchAllDefault, err)
   328  		}
   329  		cfg.ModuleRegistries[""] = reg
   330  	}
   331  	cfg.DefaultRegistry = cfg.ModuleRegistries[""]
   332  	delete(cfg.ModuleRegistries, "")
   333  
   334  	r := &resolver{
   335  		cfg: cfg,
   336  	}
   337  	if err := r.initHosts(); err != nil {
   338  		return nil, err
   339  	}
   340  	return r, nil
   341  }
   342  
   343  type resolver struct {
   344  	allHosts []Host
   345  	cfg      config
   346  }
   347  
   348  func (r *resolver) initHosts() error {
   349  	hosts := make(map[string]bool)
   350  	addHost := func(reg *registryConfig) error {
   351  		if insecure, ok := hosts[reg.host]; ok {
   352  			if insecure != reg.insecure {
   353  				return fmt.Errorf("registry host %q is specified both as secure and insecure", reg.host)
   354  			}
   355  		} else {
   356  			hosts[reg.host] = reg.insecure
   357  		}
   358  		return nil
   359  	}
   360  	for _, reg := range r.cfg.ModuleRegistries {
   361  		if err := addHost(reg); err != nil {
   362  			return err
   363  		}
   364  	}
   365  
   366  	if reg := r.cfg.DefaultRegistry; reg != nil {
   367  		if err := addHost(reg); err != nil {
   368  			return err
   369  		}
   370  	}
   371  	allHosts := make([]Host, 0, len(hosts))
   372  	for host, insecure := range hosts {
   373  		allHosts = append(allHosts, Host{
   374  			Name:     host,
   375  			Insecure: insecure,
   376  		})
   377  	}
   378  	sort.Slice(allHosts, func(i, j int) bool {
   379  		return allHosts[i].Name < allHosts[j].Name
   380  	})
   381  	r.allHosts = allHosts
   382  	return nil
   383  }
   384  
   385  // AllHosts implements Resolver.AllHosts.
   386  func (r *resolver) AllHosts() []Host {
   387  	return r.allHosts
   388  }
   389  
   390  func (r *resolver) ResolveToLocation(mpath, vers string) (Location, bool) {
   391  	if mpath == "" {
   392  		return Location{}, false
   393  	}
   394  	bestMatch := ""
   395  	// Note: there's always a wildcard match.
   396  	bestMatchReg := r.cfg.DefaultRegistry
   397  	for pat, reg := range r.cfg.ModuleRegistries {
   398  		if pat == mpath {
   399  			bestMatch = pat
   400  			bestMatchReg = reg
   401  			break
   402  		}
   403  		if !strings.HasPrefix(mpath, pat) {
   404  			continue
   405  		}
   406  		if len(bestMatch) > len(pat) {
   407  			// We've already found a more specific match.
   408  			continue
   409  		}
   410  		if mpath[len(pat)] != '/' {
   411  			// The path doesn't have a separator at the end of
   412  			// the prefix, which means that it doesn't match.
   413  			// For example, foo.com/bar does not match foo.com/ba.
   414  			continue
   415  		}
   416  		// It's a possible match but not necessarily the longest one.
   417  		bestMatch, bestMatchReg = pat, reg
   418  	}
   419  	if bestMatchReg == nil {
   420  		return Location{}, false
   421  	}
   422  	reg := bestMatchReg
   423  	loc := Location{
   424  		Host:     reg.host,
   425  		Insecure: reg.insecure,
   426  		Tag:      vers,
   427  	}
   428  	switch reg.PathEncoding {
   429  	case encPath:
   430  		if reg.StripPrefix {
   431  			mpath = strings.TrimPrefix(mpath, bestMatch)
   432  			mpath = strings.TrimPrefix(mpath, "/")
   433  		}
   434  		loc.Repository = path.Join(reg.repository, mpath)
   435  	case encHashAsRepo:
   436  		loc.Repository = fmt.Sprintf("%s/%x", reg.repository, sha256.Sum256([]byte(mpath)))
   437  	case encHashAsTag:
   438  		loc.Repository = reg.repository
   439  	default:
   440  		panic("unreachable")
   441  	}
   442  	if reg.PathEncoding == encHashAsTag {
   443  		loc.Tag = fmt.Sprintf("%s%x-%s", reg.PrefixForTags, sha256.Sum256([]byte(mpath)), vers)
   444  	} else {
   445  		loc.Tag = reg.PrefixForTags + vers
   446  	}
   447  	return loc, true
   448  }
   449  
   450  func parseRegistry(env0 string) (*registryConfig, error) {
   451  	env := env0
   452  	var suffix string
   453  	if i := strings.LastIndex(env, "+"); i > 0 {
   454  		suffix = env[i:]
   455  		env = env[:i]
   456  	}
   457  	var r ociref.Reference
   458  	if !strings.Contains(env, "/") {
   459  		// OCI references don't allow a host name on its own without a repo,
   460  		// but we do.
   461  		r.Host = env
   462  		if !ociref.IsValidHost(r.Host) {
   463  			return nil, fmt.Errorf("invalid host name %q in registry", r.Host)
   464  		}
   465  	} else {
   466  		var err error
   467  		r, err = ociref.Parse(env)
   468  		if err != nil {
   469  			return nil, err
   470  		}
   471  		if r.Tag != "" || r.Digest != "" {
   472  			return nil, fmt.Errorf("cannot have an associated tag or digest")
   473  		}
   474  	}
   475  	if suffix == "" {
   476  		if isInsecureHost(r.Host) {
   477  			suffix = "+insecure"
   478  		} else {
   479  			suffix = "+secure"
   480  		}
   481  	}
   482  	insecure := false
   483  	switch suffix {
   484  	case "+insecure":
   485  		insecure = true
   486  	case "+secure":
   487  	default:
   488  		return nil, fmt.Errorf("unknown suffix (%q), need +insecure, +secure or no suffix)", suffix)
   489  	}
   490  	return &registryConfig{
   491  		Registry:     env0,
   492  		PathEncoding: encPath,
   493  		host:         r.Host,
   494  		repository:   r.Repository,
   495  		insecure:     insecure,
   496  	}, nil
   497  }
   498  
   499  var (
   500  	ipV4Localhost = netip.MustParseAddr("127.0.0.1")
   501  	ipV6Localhost = netip.MustParseAddr("::1")
   502  )
   503  
   504  func isInsecureHost(hostPort string) bool {
   505  	host, _, err := net.SplitHostPort(hostPort)
   506  	if err != nil {
   507  		host = hostPort
   508  		if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
   509  			host = host[1 : len(host)-1]
   510  		}
   511  	}
   512  	if host == "localhost" {
   513  		return true
   514  	}
   515  	addr, err := netip.ParseAddr(host)
   516  	if err != nil {
   517  		return false
   518  	}
   519  	// TODO other clients have logic for RFC1918 too, amongst other
   520  	// things. Maybe we should do that too.
   521  	return addr == ipV4Localhost || addr == ipV6Localhost
   522  }
   523  

View as plain text