1 // Copyright 2023 CUE Labs AG 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ociref supports parsing cross-registry OCI registry references. 16 package ociref 17 18 import ( 19 "fmt" 20 "regexp" 21 "strings" 22 23 "cuelabs.dev/go/oci/ociregistry" 24 ) 25 26 // The following regular expressions derived from code in the 27 // [github.com/distribution/distribution/v3/reference] package. 28 const ( 29 // alphanumeric defines the alphanumeric atom, typically a 30 // component of names. This only allows lower case characters and digits. 31 alphanumeric = `[a-z0-9]+` 32 33 // separator defines the separators allowed to be embedded in name 34 // components. This allows one period, one or two underscore and multiple 35 // dashes. Repeated dashes and underscores are intentionally treated 36 // differently. In order to support valid hostnames as name components, 37 // supporting repeated dash was added. Additionally double underscore is 38 // now allowed as a separator to loosen the restriction for previously 39 // supported names. 40 // TODO the distribution spec doesn't allow these variations. 41 separator = `(?:[._]|__|[-]+)` 42 43 // domainNameComponent restricts the registry domain component of a 44 // repository name to start with a component as defined by DomainRegexp. 45 domainNameComponent = `(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?)` 46 47 // ipv6address are enclosed between square brackets and may be represented 48 // in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format 49 // are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as 50 // IPv4-Mapped are deliberately excluded. 51 ipv6address = `(?:\[[a-fA-F0-9:]+\])` 52 53 // optionalPort matches an optional port-number including the port separator 54 // (e.g. ":80"). 55 port = `[0-9]+` 56 57 // domainName defines the structure of potential domain components 58 // that may be part of image names. This is purposely a subset of what is 59 // allowed by DNS to ensure backwards compatibility with Docker image 60 // names. This includes IPv4 addresses on decimal format. 61 // 62 // Note: we purposely exclude domain names without dots here, 63 // because otherwise we can't tell if the first component is 64 // a host name or not when it doesn't have a port. 65 // When it does have a port, the distinction is clear. 66 // 67 domainName = `(?:` + domainNameComponent + `(?:\.` + domainNameComponent + `)+` + `)` 68 69 // host defines the structure of potential domains based on the URI 70 // Host subcomponent on rfc3986. It may be a subset of DNS domain name, 71 // or an IPv4 address in decimal format, or an IPv6 address between square 72 // brackets (excluding zone identifiers as defined by rfc6874 or special 73 // addresses such as IPv4-Mapped). 74 host = `(?:` + domainName + `|` + ipv6address + `)` 75 76 // allowed by the URI Host subcomponent on rfc3986 to ensure backwards 77 // compatibility with Docker image names. 78 // Note: that we require the port when the host name looks like a regular 79 // name component. 80 domainAndPort = `(?:` + host + `(?:` + `:` + port + `)?` + `|` + domainNameComponent + `:` + port + `)` 81 82 // pathComponent restricts path-components to start with an alphanumeric 83 // character, with following parts able to be separated by a separator 84 // (one period, one or two underscore and multiple dashes). 85 pathComponent = `(?:` + alphanumeric + `(?:` + separator + alphanumeric + `)*` + `)` 86 87 // repoName matches the name of a repository. It consists of one 88 // or more forward slash (/) delimited path-components: 89 // 90 // pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu" 91 repoName = pathComponent + `(?:` + `/` + pathComponent + `)*` 92 ) 93 94 var referencePat = regexp.MustCompile( 95 `^(?:` + 96 `(?:` + `(` + domainAndPort + `)` + `/` + `)?` + // capture 1: host 97 `(` + repoName + `)` + // capture 2: repository name 98 `(?:` + `:([^@]+))?` + // capture 3: tag; rely on Go logic to test validity. 99 `(?:` + `@(.+))?` + // capture 4: digest; rely on go-digest to find issues 100 `)$`, 101 ) 102 103 var hostPat = regexp.MustCompile(`^(?:` + domainAndPort + `)$`) 104 var repoPat = regexp.MustCompile(`^(?:` + repoName + `)$`) 105 106 // Reference represents an entry in an OCI repository. 107 type Reference struct { 108 // Host holds the host name of the registry 109 // within which the repository is stored, optionally in 110 // the form host:port. This might be empty. 111 Host string 112 113 // Repository holds the repository name. 114 Repository string 115 116 // Tag holds the TAG part of a :TAG or :TAG@DIGEST reference. 117 // When Digest is set as well as Tag, the tag will be verified 118 // to exist and have the expected digest. 119 Tag string 120 121 // Digest holds the DIGEST part of an @DIGEST reference 122 // or of a :TAG@DIGEST reference. 123 Digest ociregistry.Digest 124 } 125 126 // IsValidHost reports whether s is a valid host (or host:port) part of a reference string. 127 func IsValidHost(s string) bool { 128 return hostPat.MatchString(s) 129 } 130 131 // IsValidHost reports whether s is a valid repository part 132 // of a reference string. 133 func IsValidRepository(s string) bool { 134 return repoPat.MatchString(s) 135 } 136 137 // IsValidTag reports whether s is a valid reference tag. 138 func IsValidTag(s string) bool { 139 return checkTag(s) == nil 140 } 141 142 // Parse parses a reference string that must include 143 // a host name (or host:port pair) component. 144 // 145 // It is represented in string form as HOST[:PORT]/NAME[:TAG|@DIGEST] 146 // form: the same syntax accepted by "docker pull". 147 // Unlike "docker pull" however, there is no default registry: when 148 // presented with a bare repository name, Parse will return an error. 149 func Parse(refStr string) (Reference, error) { 150 ref, err := ParseRelative(refStr) 151 if err != nil { 152 return Reference{}, err 153 } 154 if ref.Host == "" { 155 return Reference{}, fmt.Errorf("reference does not contain host name") 156 } 157 return ref, nil 158 } 159 160 // ParseRelative parses a reference string that may 161 // or may not include a host name component. 162 // 163 // It is represented in string form as [HOST[:PORT]/]NAME[:TAG|@DIGEST] 164 // form: the same syntax accepted by "docker pull". 165 // Unlike "docker pull" however, there is no default registry: when 166 // presented with a bare repository name, the Host field will be empty. 167 func ParseRelative(refStr string) (Reference, error) { 168 m := referencePat.FindStringSubmatch(refStr) 169 if m == nil { 170 return Reference{}, fmt.Errorf("invalid reference syntax (%q)", refStr) 171 } 172 var ref Reference 173 ref.Host, ref.Repository, ref.Tag, ref.Digest = m[1], m[2], m[3], ociregistry.Digest(m[4]) 174 // Check lengths and digest: we don't check these as part of the regexp 175 // because it's more efficient to do it in Go and we get 176 // nicer error messages as a result. 177 if len(ref.Digest) > 0 { 178 if err := ref.Digest.Validate(); err != nil { 179 return Reference{}, fmt.Errorf("invalid digest %q: %v", ref.Digest, err) 180 } 181 } 182 if len(ref.Tag) > 0 { 183 if err := checkTag(ref.Tag); err != nil { 184 return Reference{}, err 185 } 186 } 187 if len(ref.Repository) > 255 { 188 return Reference{}, fmt.Errorf("repository name too long") 189 } 190 return ref, nil 191 } 192 193 func checkTag(s string) error { 194 if len(s) > 128 { 195 return fmt.Errorf("tag too long") 196 } 197 if !isWord(s[0]) { 198 return fmt.Errorf("tag %q does not start with word character", s) 199 } 200 for i := 1; i < len(s); i++ { 201 c := s[i] 202 if !isWord(c) && c != '.' && c != '-' { 203 return fmt.Errorf("tag %q contains invalid invalid character %q", s, c) 204 } 205 } 206 return nil 207 } 208 209 func isWord(c byte) bool { 210 return c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') 211 } 212 213 // String returns the string form of a reference in the form 214 // 215 // [HOST/]NAME[:TAG|@DIGEST] 216 func (ref Reference) String() string { 217 var buf strings.Builder 218 buf.Grow(len(ref.Host) + 1 + len(ref.Repository) + 1 + len(ref.Tag) + 1 + len(ref.Digest)) 219 if ref.Host != "" { 220 buf.WriteString(ref.Host) 221 buf.WriteByte('/') 222 } 223 buf.WriteString(ref.Repository) 224 if len(ref.Tag) > 0 { 225 buf.WriteByte(':') 226 buf.WriteString(ref.Tag) 227 } 228 if len(ref.Digest) > 0 { 229 buf.WriteByte('@') 230 buf.WriteString(string(ref.Digest)) 231 } 232 return buf.String() 233 } 234