...

Source file src/cuelabs.dev/go/oci/ociregistry/ociref/reference.go

Documentation: cuelabs.dev/go/oci/ociregistry/ociref

     1  // Copyright 2023 CUE Labs AG
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ociref supports parsing cross-registry OCI registry references.
    16  package ociref
    17  
    18  import (
    19  	"fmt"
    20  	"regexp"
    21  	"strings"
    22  
    23  	"cuelabs.dev/go/oci/ociregistry"
    24  )
    25  
    26  // The following regular expressions derived from code in the
    27  // [github.com/distribution/distribution/v3/reference] package.
    28  const (
    29  	// alphanumeric defines the alphanumeric atom, typically a
    30  	// component of names. This only allows lower case characters and digits.
    31  	alphanumeric = `[a-z0-9]+`
    32  
    33  	// separator defines the separators allowed to be embedded in name
    34  	// components. This allows one period, one or two underscore and multiple
    35  	// dashes. Repeated dashes and underscores are intentionally treated
    36  	// differently. In order to support valid hostnames as name components,
    37  	// supporting repeated dash was added. Additionally double underscore is
    38  	// now allowed as a separator to loosen the restriction for previously
    39  	// supported names.
    40  	// TODO the distribution spec doesn't allow these variations.
    41  	separator = `(?:[._]|__|[-]+)`
    42  
    43  	// domainNameComponent restricts the registry domain component of a
    44  	// repository name to start with a component as defined by DomainRegexp.
    45  	domainNameComponent = `(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)`
    46  
    47  	// ipv6address are enclosed between square brackets and may be represented
    48  	// in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format
    49  	// are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as
    50  	// IPv4-Mapped are deliberately excluded.
    51  	ipv6address = `(?:\[[a-fA-F0-9:]+\])`
    52  
    53  	// optionalPort matches an optional port-number including the port separator
    54  	// (e.g. ":80").
    55  	port = `[0-9]+`
    56  
    57  	// domainName defines the structure of potential domain components
    58  	// that may be part of image names. This is purposely a subset of what is
    59  	// allowed by DNS to ensure backwards compatibility with Docker image
    60  	// names. This includes IPv4 addresses on decimal format.
    61  	//
    62  	// Note: we purposely exclude domain names without dots here,
    63  	// because otherwise we can't tell if the first component is
    64  	// a host name or not when it doesn't have a port.
    65  	// When it does have a port, the distinction is clear.
    66  	//
    67  	domainName = `(?:` + domainNameComponent + `(?:\.` + domainNameComponent + `)+` + `)`
    68  
    69  	// host defines the structure of potential domains based on the URI
    70  	// Host subcomponent on rfc3986. It may be a subset of DNS domain name,
    71  	// or an IPv4 address in decimal format, or an IPv6 address between square
    72  	// brackets (excluding zone identifiers as defined by rfc6874 or special
    73  	// addresses such as IPv4-Mapped).
    74  	host = `(?:` + domainName + `|` + ipv6address + `)`
    75  
    76  	// allowed by the URI Host subcomponent on rfc3986 to ensure backwards
    77  	// compatibility with Docker image names.
    78  	// Note: that we require the port when the host name looks like a regular
    79  	// name component.
    80  	domainAndPort = `(?:` + host + `(?:` + `:` + port + `)?` + `|` + domainNameComponent + `:` + port + `)`
    81  
    82  	// pathComponent restricts path-components to start with an alphanumeric
    83  	// character, with following parts able to be separated by a separator
    84  	// (one period, one or two underscore and multiple dashes).
    85  	pathComponent = `(?:` + alphanumeric + `(?:` + separator + alphanumeric + `)*` + `)`
    86  
    87  	// repoName matches the name of a repository. It consists of one
    88  	// or more forward slash (/) delimited path-components:
    89  	//
    90  	//	pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu"
    91  	repoName = pathComponent + `(?:` + `/` + pathComponent + `)*`
    92  )
    93  
    94  var referencePat = regexp.MustCompile(
    95  	`^(?:` +
    96  		`(?:` + `(` + domainAndPort + `)` + `/` + `)?` + // capture 1: host
    97  		`(` + repoName + `)` + // capture 2: repository name
    98  		`(?:` + `:([^@]+))?` + // capture 3: tag; rely on Go logic to test validity.
    99  		`(?:` + `@(.+))?` + // capture 4: digest; rely on go-digest to find issues
   100  		`)$`,
   101  )
   102  
   103  var hostPat = regexp.MustCompile(`^(?:` + domainAndPort + `)$`)
   104  var repoPat = regexp.MustCompile(`^(?:` + repoName + `)$`)
   105  
   106  // Reference represents an entry in an OCI repository.
   107  type Reference struct {
   108  	// Host holds the host name of the registry
   109  	// within which the repository is stored, optionally in
   110  	// the form host:port. This might be empty.
   111  	Host string
   112  
   113  	// Repository holds the repository name.
   114  	Repository string
   115  
   116  	// Tag holds the TAG part of a :TAG or :TAG@DIGEST reference.
   117  	// When Digest is set as well as Tag, the tag will be verified
   118  	// to exist and have the expected digest.
   119  	Tag string
   120  
   121  	// Digest holds the DIGEST part of an @DIGEST reference
   122  	// or of a :TAG@DIGEST reference.
   123  	Digest ociregistry.Digest
   124  }
   125  
   126  // IsValidHost reports whether s is a valid host (or host:port) part of a reference string.
   127  func IsValidHost(s string) bool {
   128  	return hostPat.MatchString(s)
   129  }
   130  
   131  // IsValidHost reports whether s is a valid repository part
   132  // of a reference string.
   133  func IsValidRepository(s string) bool {
   134  	return repoPat.MatchString(s)
   135  }
   136  
   137  // IsValidTag reports whether s is a valid reference tag.
   138  func IsValidTag(s string) bool {
   139  	return checkTag(s) == nil
   140  }
   141  
   142  // Parse parses a reference string that must include
   143  // a host name (or host:port pair) component.
   144  //
   145  // It is represented in string form as HOST[:PORT]/NAME[:TAG|@DIGEST]
   146  // form: the same syntax accepted by "docker pull".
   147  // Unlike "docker pull" however, there is no default registry: when
   148  // presented with a bare repository name, Parse will return an error.
   149  func Parse(refStr string) (Reference, error) {
   150  	ref, err := ParseRelative(refStr)
   151  	if err != nil {
   152  		return Reference{}, err
   153  	}
   154  	if ref.Host == "" {
   155  		return Reference{}, fmt.Errorf("reference does not contain host name")
   156  	}
   157  	return ref, nil
   158  }
   159  
   160  // ParseRelative parses a reference string that may
   161  // or may not include a host name component.
   162  //
   163  // It is represented in string form as [HOST[:PORT]/]NAME[:TAG|@DIGEST]
   164  // form: the same syntax accepted by "docker pull".
   165  // Unlike "docker pull" however, there is no default registry: when
   166  // presented with a bare repository name, the Host field will be empty.
   167  func ParseRelative(refStr string) (Reference, error) {
   168  	m := referencePat.FindStringSubmatch(refStr)
   169  	if m == nil {
   170  		return Reference{}, fmt.Errorf("invalid reference syntax (%q)", refStr)
   171  	}
   172  	var ref Reference
   173  	ref.Host, ref.Repository, ref.Tag, ref.Digest = m[1], m[2], m[3], ociregistry.Digest(m[4])
   174  	// Check lengths and digest: we don't check these as part of the regexp
   175  	// because it's more efficient to do it in Go and we get
   176  	// nicer error messages as a result.
   177  	if len(ref.Digest) > 0 {
   178  		if err := ref.Digest.Validate(); err != nil {
   179  			return Reference{}, fmt.Errorf("invalid digest %q: %v", ref.Digest, err)
   180  		}
   181  	}
   182  	if len(ref.Tag) > 0 {
   183  		if err := checkTag(ref.Tag); err != nil {
   184  			return Reference{}, err
   185  		}
   186  	}
   187  	if len(ref.Repository) > 255 {
   188  		return Reference{}, fmt.Errorf("repository name too long")
   189  	}
   190  	return ref, nil
   191  }
   192  
   193  func checkTag(s string) error {
   194  	if len(s) > 128 {
   195  		return fmt.Errorf("tag too long")
   196  	}
   197  	if !isWord(s[0]) {
   198  		return fmt.Errorf("tag %q does not start with word character", s)
   199  	}
   200  	for i := 1; i < len(s); i++ {
   201  		c := s[i]
   202  		if !isWord(c) && c != '.' && c != '-' {
   203  			return fmt.Errorf("tag %q contains invalid invalid character %q", s, c)
   204  		}
   205  	}
   206  	return nil
   207  }
   208  
   209  func isWord(c byte) bool {
   210  	return c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
   211  }
   212  
   213  // String returns the string form of a reference in the form
   214  //
   215  //	[HOST/]NAME[:TAG|@DIGEST]
   216  func (ref Reference) String() string {
   217  	var buf strings.Builder
   218  	buf.Grow(len(ref.Host) + 1 + len(ref.Repository) + 1 + len(ref.Tag) + 1 + len(ref.Digest))
   219  	if ref.Host != "" {
   220  		buf.WriteString(ref.Host)
   221  		buf.WriteByte('/')
   222  	}
   223  	buf.WriteString(ref.Repository)
   224  	if len(ref.Tag) > 0 {
   225  		buf.WriteByte(':')
   226  		buf.WriteString(ref.Tag)
   227  	}
   228  	if len(ref.Digest) > 0 {
   229  		buf.WriteByte('@')
   230  		buf.WriteString(string(ref.Digest))
   231  	}
   232  	return buf.String()
   233  }
   234  

View as plain text