1 package reference 2 3 import ( 4 "regexp" 5 "strings" 6 ) 7 8 // DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>"). 9 var DigestRegexp = regexp.MustCompile(digestPat) 10 11 // DomainRegexp matches hostname or IP-addresses, optionally including a port 12 // number. It defines the structure of potential domain components that may be 13 // part of image names. This is purposely a subset of what is allowed by DNS to 14 // ensure backwards compatibility with Docker image names. It may be a subset of 15 // DNS domain name, an IPv4 address in decimal format, or an IPv6 address between 16 // square brackets (excluding zone identifiers as defined by [RFC 6874] or special 17 // addresses such as IPv4-Mapped). 18 // 19 // [RFC 6874]: https://www.rfc-editor.org/rfc/rfc6874. 20 var DomainRegexp = regexp.MustCompile(domainAndPort) 21 22 // IdentifierRegexp is the format for string identifier used as a 23 // content addressable identifier using sha256. These identifiers 24 // are like digests without the algorithm, since sha256 is used. 25 var IdentifierRegexp = regexp.MustCompile(identifier) 26 27 // NameRegexp is the format for the name component of references, including 28 // an optional domain and port, but without tag or digest suffix. 29 var NameRegexp = regexp.MustCompile(namePat) 30 31 // ReferenceRegexp is the full supported format of a reference. The regexp 32 // is anchored and has capturing groups for name, tag, and digest 33 // components. 34 var ReferenceRegexp = regexp.MustCompile(referencePat) 35 36 // TagRegexp matches valid tag names. From [docker/docker:graph/tags.go]. 37 // 38 // [docker/docker:graph/tags.go]: https://github.com/moby/moby/blob/v1.6.0/graph/tags.go#L26-L28 39 var TagRegexp = regexp.MustCompile(tag) 40 41 const ( 42 // alphanumeric defines the alphanumeric atom, typically a 43 // component of names. This only allows lower case characters and digits. 44 alphanumeric = `[a-z0-9]+` 45 46 // separator defines the separators allowed to be embedded in name 47 // components. This allows one period, one or two underscore and multiple 48 // dashes. Repeated dashes and underscores are intentionally treated 49 // differently. In order to support valid hostnames as name components, 50 // supporting repeated dash was added. Additionally double underscore is 51 // now allowed as a separator to loosen the restriction for previously 52 // supported names. 53 separator = `(?:[._]|__|[-]+)` 54 55 // localhost is treated as a special value for domain-name. Any other 56 // domain-name without a "." or a ":port" are considered a path component. 57 localhost = `localhost` 58 59 // domainNameComponent restricts the registry domain component of a 60 // repository name to start with a component as defined by DomainRegexp. 61 domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])` 62 63 // optionalPort matches an optional port-number including the port separator 64 // (e.g. ":80"). 65 optionalPort = `(?::[0-9]+)?` 66 67 // tag matches valid tag names. From docker/docker:graph/tags.go. 68 tag = `[\w][\w.-]{0,127}` 69 70 // digestPat matches well-formed digests, including algorithm (e.g. "sha256:<encoded>"). 71 // 72 // TODO(thaJeztah): this should follow the same rules as https://pkg.go.dev/github.com/opencontainers/go-digest@v1.0.0#DigestRegexp 73 // so that go-digest defines the canonical format. Note that the go-digest is 74 // more relaxed: 75 // - it allows multiple algorithms (e.g. "sha256+b64:<encoded>") to allow 76 // future expansion of supported algorithms. 77 // - it allows the "<encoded>" value to use urlsafe base64 encoding as defined 78 // in [rfc4648, section 5]. 79 // 80 // [rfc4648, section 5]: https://www.rfc-editor.org/rfc/rfc4648#section-5. 81 digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}` 82 83 // identifier is the format for a content addressable identifier using sha256. 84 // These identifiers are like digests without the algorithm, since sha256 is used. 85 identifier = `([a-f0-9]{64})` 86 87 // ipv6address are enclosed between square brackets and may be represented 88 // in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format 89 // are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as 90 // IPv4-Mapped are deliberately excluded. 91 ipv6address = `\[(?:[a-fA-F0-9:]+)\]` 92 ) 93 94 var ( 95 // domainName defines the structure of potential domain components 96 // that may be part of image names. This is purposely a subset of what is 97 // allowed by DNS to ensure backwards compatibility with Docker image 98 // names. This includes IPv4 addresses on decimal format. 99 domainName = domainNameComponent + anyTimes(`\.`+domainNameComponent) 100 101 // host defines the structure of potential domains based on the URI 102 // Host subcomponent on rfc3986. It may be a subset of DNS domain name, 103 // or an IPv4 address in decimal format, or an IPv6 address between square 104 // brackets (excluding zone identifiers as defined by rfc6874 or special 105 // addresses such as IPv4-Mapped). 106 host = `(?:` + domainName + `|` + ipv6address + `)` 107 108 // allowed by the URI Host subcomponent on rfc3986 to ensure backwards 109 // compatibility with Docker image names. 110 domainAndPort = host + optionalPort 111 112 // anchoredTagRegexp matches valid tag names, anchored at the start and 113 // end of the matched string. 114 anchoredTagRegexp = regexp.MustCompile(anchored(tag)) 115 116 // anchoredDigestRegexp matches valid digests, anchored at the start and 117 // end of the matched string. 118 anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat)) 119 120 // pathComponent restricts path-components to start with an alphanumeric 121 // character, with following parts able to be separated by a separator 122 // (one period, one or two underscore and multiple dashes). 123 pathComponent = alphanumeric + anyTimes(separator+alphanumeric) 124 125 // remoteName matches the remote-name of a repository. It consists of one 126 // or more forward slash (/) delimited path-components: 127 // 128 // pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu" 129 remoteName = pathComponent + anyTimes(`/`+pathComponent) 130 namePat = optional(domainAndPort+`/`) + remoteName 131 132 // anchoredNameRegexp is used to parse a name value, capturing the 133 // domain and trailing components. 134 anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName))) 135 136 referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat))) 137 138 // anchoredIdentifierRegexp is used to check or match an 139 // identifier value, anchored at start and end of string. 140 anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier)) 141 ) 142 143 // optional wraps the expression in a non-capturing group and makes the 144 // production optional. 145 func optional(res ...string) string { 146 return `(?:` + strings.Join(res, "") + `)?` 147 } 148 149 // anyTimes wraps the expression in a non-capturing group that can occur 150 // any number of times. 151 func anyTimes(res ...string) string { 152 return `(?:` + strings.Join(res, "") + `)*` 153 } 154 155 // capture wraps the expression in a capturing group. 156 func capture(res ...string) string { 157 return `(` + strings.Join(res, "") + `)` 158 } 159 160 // anchored anchors the regular expression by adding start and end delimiters. 161 func anchored(res ...string) string { 162 return `^` + strings.Join(res, "") + `$` 163 } 164