1 package storage 2 3 import ( 4 "fmt" 5 "path" 6 "strings" 7 8 "github.com/opencontainers/go-digest" 9 ) 10 11 const ( 12 storagePathVersion = "v2" // fixed storage layout version 13 storagePathRoot = "/docker/registry/" // all driver paths have a prefix 14 15 // TODO(stevvooe): Get rid of the "storagePathRoot". Initially, we though 16 // storage path root would configurable for all drivers through this 17 // package. In reality, we've found it simpler to do this on a per driver 18 // basis. 19 ) 20 21 // pathFor maps paths based on "object names" and their ids. The "object 22 // names" mapped by are internal to the storage system. 23 // 24 // The path layout in the storage backend is roughly as follows: 25 // 26 // <root>/v2 27 // -> repositories/ 28 // -><name>/ 29 // -> _manifests/ 30 // revisions 31 // -> <manifest digest path> 32 // -> link 33 // tags/<tag> 34 // -> current/link 35 // -> index 36 // -> <algorithm>/<hex digest>/link 37 // -> _layers/ 38 // <layer links to blob store> 39 // -> _uploads/<id> 40 // data 41 // startedat 42 // hashstates/<algorithm>/<offset> 43 // -> blob/<algorithm> 44 // <split directory content addressable storage> 45 // 46 // The storage backend layout is broken up into a content-addressable blob 47 // store and repositories. The content-addressable blob store holds most data 48 // throughout the backend, keyed by algorithm and digests of the underlying 49 // content. Access to the blob store is controlled through links from the 50 // repository to blobstore. 51 // 52 // A repository is made up of layers, manifests and tags. The layers component 53 // is just a directory of layers which are "linked" into a repository. A layer 54 // can only be accessed through a qualified repository name if it is linked in 55 // the repository. Uploads of layers are managed in the uploads directory, 56 // which is key by upload id. When all data for an upload is received, the 57 // data is moved into the blob store and the upload directory is deleted. 58 // Abandoned uploads can be garbage collected by reading the startedat file 59 // and removing uploads that have been active for longer than a certain time. 60 // 61 // The third component of the repository directory is the manifests store, 62 // which is made up of a revision store and tag store. Manifests are stored in 63 // the blob store and linked into the revision store. 64 // While the registry can save all revisions of a manifest, no relationship is 65 // implied as to the ordering of changes to a manifest. The tag store provides 66 // support for name, tag lookups of manifests, using "current/link" under a 67 // named tag directory. An index is maintained to support deletions of all 68 // revisions of a given manifest tag. 69 // 70 // We cover the path formats implemented by this path mapper below. 71 // 72 // Manifests: 73 // 74 // manifestRevisionsPathSpec: <root>/v2/repositories/<name>/_manifests/revisions/ 75 // manifestRevisionPathSpec: <root>/v2/repositories/<name>/_manifests/revisions/<algorithm>/<hex digest>/ 76 // manifestRevisionLinkPathSpec: <root>/v2/repositories/<name>/_manifests/revisions/<algorithm>/<hex digest>/link 77 // 78 // Tags: 79 // 80 // manifestTagsPathSpec: <root>/v2/repositories/<name>/_manifests/tags/ 81 // manifestTagPathSpec: <root>/v2/repositories/<name>/_manifests/tags/<tag>/ 82 // manifestTagCurrentPathSpec: <root>/v2/repositories/<name>/_manifests/tags/<tag>/current/link 83 // manifestTagIndexPathSpec: <root>/v2/repositories/<name>/_manifests/tags/<tag>/index/ 84 // manifestTagIndexEntryPathSpec: <root>/v2/repositories/<name>/_manifests/tags/<tag>/index/<algorithm>/<hex digest>/ 85 // manifestTagIndexEntryLinkPathSpec: <root>/v2/repositories/<name>/_manifests/tags/<tag>/index/<algorithm>/<hex digest>/link 86 // 87 // Blobs: 88 // 89 // layerLinkPathSpec: <root>/v2/repositories/<name>/_layers/<algorithm>/<hex digest>/link 90 // 91 // Uploads: 92 // 93 // uploadDataPathSpec: <root>/v2/repositories/<name>/_uploads/<id>/data 94 // uploadStartedAtPathSpec: <root>/v2/repositories/<name>/_uploads/<id>/startedat 95 // uploadHashStatePathSpec: <root>/v2/repositories/<name>/_uploads/<id>/hashstates/<algorithm>/<offset> 96 // 97 // Blob Store: 98 // 99 // blobsPathSpec: <root>/v2/blobs/ 100 // blobPathSpec: <root>/v2/blobs/<algorithm>/<first two hex bytes of digest>/<hex digest> 101 // blobDataPathSpec: <root>/v2/blobs/<algorithm>/<first two hex bytes of digest>/<hex digest>/data 102 // blobMediaTypePathSpec: <root>/v2/blobs/<algorithm>/<first two hex bytes of digest>/<hex digest>/data 103 // 104 // For more information on the semantic meaning of each path and their 105 // contents, please see the path spec documentation. 106 func pathFor(spec pathSpec) (string, error) { 107 108 // Switch on the path object type and return the appropriate path. At 109 // first glance, one may wonder why we don't use an interface to 110 // accomplish this. By keep the formatting separate from the pathSpec, we 111 // keep separate the path generation componentized. These specs could be 112 // passed to a completely different mapper implementation and generate a 113 // different set of paths. 114 // 115 // For example, imagine migrating from one backend to the other: one could 116 // build a filesystem walker that converts a string path in one version, 117 // to an intermediate path object, than can be consumed and mapped by the 118 // other version. 119 120 rootPrefix := []string{storagePathRoot, storagePathVersion} 121 repoPrefix := append(rootPrefix, "repositories") 122 123 switch v := spec.(type) { 124 125 case manifestRevisionsPathSpec: 126 return path.Join(append(repoPrefix, v.name, "_manifests", "revisions")...), nil 127 128 case manifestRevisionPathSpec: 129 components, err := digestPathComponents(v.revision, false) 130 if err != nil { 131 return "", err 132 } 133 134 return path.Join(append(append(repoPrefix, v.name, "_manifests", "revisions"), components...)...), nil 135 case manifestRevisionLinkPathSpec: 136 root, err := pathFor(manifestRevisionPathSpec(v)) 137 138 if err != nil { 139 return "", err 140 } 141 142 return path.Join(root, "link"), nil 143 case manifestTagsPathSpec: 144 return path.Join(append(repoPrefix, v.name, "_manifests", "tags")...), nil 145 case manifestTagPathSpec: 146 root, err := pathFor(manifestTagsPathSpec{ 147 name: v.name, 148 }) 149 150 if err != nil { 151 return "", err 152 } 153 154 return path.Join(root, v.tag), nil 155 case manifestTagCurrentPathSpec: 156 root, err := pathFor(manifestTagPathSpec(v)) 157 158 if err != nil { 159 return "", err 160 } 161 162 return path.Join(root, "current", "link"), nil 163 case manifestTagIndexPathSpec: 164 root, err := pathFor(manifestTagPathSpec(v)) 165 166 if err != nil { 167 return "", err 168 } 169 170 return path.Join(root, "index"), nil 171 case manifestTagIndexEntryLinkPathSpec: 172 root, err := pathFor(manifestTagIndexEntryPathSpec(v)) 173 174 if err != nil { 175 return "", err 176 } 177 178 return path.Join(root, "link"), nil 179 case manifestTagIndexEntryPathSpec: 180 root, err := pathFor(manifestTagIndexPathSpec{ 181 name: v.name, 182 tag: v.tag, 183 }) 184 185 if err != nil { 186 return "", err 187 } 188 189 components, err := digestPathComponents(v.revision, false) 190 if err != nil { 191 return "", err 192 } 193 194 return path.Join(root, path.Join(components...)), nil 195 case layerLinkPathSpec: 196 components, err := digestPathComponents(v.digest, false) 197 if err != nil { 198 return "", err 199 } 200 201 // TODO(stevvooe): Right now, all blobs are linked under "_layers". If 202 // we have future migrations, we may want to rename this to "_blobs". 203 // A migration strategy would simply leave existing items in place and 204 // write the new paths, commit a file then delete the old files. 205 206 blobLinkPathComponents := append(repoPrefix, v.name, "_layers") 207 208 return path.Join(path.Join(append(blobLinkPathComponents, components...)...), "link"), nil 209 case blobsPathSpec: 210 blobsPathPrefix := append(rootPrefix, "blobs") 211 return path.Join(blobsPathPrefix...), nil 212 case blobPathSpec: 213 components, err := digestPathComponents(v.digest, true) 214 if err != nil { 215 return "", err 216 } 217 218 blobPathPrefix := append(rootPrefix, "blobs") 219 return path.Join(append(blobPathPrefix, components...)...), nil 220 case blobDataPathSpec: 221 components, err := digestPathComponents(v.digest, true) 222 if err != nil { 223 return "", err 224 } 225 226 components = append(components, "data") 227 blobPathPrefix := append(rootPrefix, "blobs") 228 return path.Join(append(blobPathPrefix, components...)...), nil 229 230 case uploadDataPathSpec: 231 return path.Join(append(repoPrefix, v.name, "_uploads", v.id, "data")...), nil 232 case uploadStartedAtPathSpec: 233 return path.Join(append(repoPrefix, v.name, "_uploads", v.id, "startedat")...), nil 234 case uploadHashStatePathSpec: 235 offset := fmt.Sprintf("%d", v.offset) 236 if v.list { 237 offset = "" // Limit to the prefix for listing offsets. 238 } 239 return path.Join(append(repoPrefix, v.name, "_uploads", v.id, "hashstates", string(v.alg), offset)...), nil 240 case repositoriesRootPathSpec: 241 return path.Join(repoPrefix...), nil 242 default: 243 // TODO(sday): This is an internal error. Ensure it doesn't escape (panic?). 244 return "", fmt.Errorf("unknown path spec: %#v", v) 245 } 246 } 247 248 // pathSpec is a type to mark structs as path specs. There is no 249 // implementation because we'd like to keep the specs and the mappers 250 // decoupled. 251 type pathSpec interface { 252 pathSpec() 253 } 254 255 // manifestRevisionsPathSpec describes the directory path for 256 // a manifest revision. 257 type manifestRevisionsPathSpec struct { 258 name string 259 } 260 261 func (manifestRevisionsPathSpec) pathSpec() {} 262 263 // manifestRevisionPathSpec describes the components of the directory path for 264 // a manifest revision. 265 type manifestRevisionPathSpec struct { 266 name string 267 revision digest.Digest 268 } 269 270 func (manifestRevisionPathSpec) pathSpec() {} 271 272 // manifestRevisionLinkPathSpec describes the path components required to look 273 // up the data link for a revision of a manifest. If this file is not present, 274 // the manifest blob is not available in the given repo. The contents of this 275 // file should just be the digest. 276 type manifestRevisionLinkPathSpec struct { 277 name string 278 revision digest.Digest 279 } 280 281 func (manifestRevisionLinkPathSpec) pathSpec() {} 282 283 // manifestTagsPathSpec describes the path elements required to point to the 284 // manifest tags directory. 285 type manifestTagsPathSpec struct { 286 name string 287 } 288 289 func (manifestTagsPathSpec) pathSpec() {} 290 291 // manifestTagPathSpec describes the path elements required to point to the 292 // manifest tag links files under a repository. These contain a blob id that 293 // can be used to look up the data and signatures. 294 type manifestTagPathSpec struct { 295 name string 296 tag string 297 } 298 299 func (manifestTagPathSpec) pathSpec() {} 300 301 // manifestTagCurrentPathSpec describes the link to the current revision for a 302 // given tag. 303 type manifestTagCurrentPathSpec struct { 304 name string 305 tag string 306 } 307 308 func (manifestTagCurrentPathSpec) pathSpec() {} 309 310 // manifestTagCurrentPathSpec describes the link to the index of revisions 311 // with the given tag. 312 type manifestTagIndexPathSpec struct { 313 name string 314 tag string 315 } 316 317 func (manifestTagIndexPathSpec) pathSpec() {} 318 319 // manifestTagIndexEntryPathSpec contains the entries of the index by revision. 320 type manifestTagIndexEntryPathSpec struct { 321 name string 322 tag string 323 revision digest.Digest 324 } 325 326 func (manifestTagIndexEntryPathSpec) pathSpec() {} 327 328 // manifestTagIndexEntryLinkPathSpec describes the link to a revisions of a 329 // manifest with given tag within the index. 330 type manifestTagIndexEntryLinkPathSpec struct { 331 name string 332 tag string 333 revision digest.Digest 334 } 335 336 func (manifestTagIndexEntryLinkPathSpec) pathSpec() {} 337 338 // blobLinkPathSpec specifies a path for a blob link, which is a file with a 339 // blob id. The blob link will contain a content addressable blob id reference 340 // into the blob store. The format of the contents is as follows: 341 // 342 // <algorithm>:<hex digest of layer data> 343 // 344 // The following example of the file contents is more illustrative: 345 // 346 // sha256:96443a84ce518ac22acb2e985eda402b58ac19ce6f91980bde63726a79d80b36 347 // 348 // This indicates that there is a blob with the id/digest, calculated via 349 // sha256 that can be fetched from the blob store. 350 type layerLinkPathSpec struct { 351 name string 352 digest digest.Digest 353 } 354 355 func (layerLinkPathSpec) pathSpec() {} 356 357 // blobAlgorithmReplacer does some very simple path sanitization for user 358 // input. Paths should be "safe" before getting this far due to strict digest 359 // requirements but we can add further path conversion here, if needed. 360 var blobAlgorithmReplacer = strings.NewReplacer( 361 "+", "/", 362 ".", "/", 363 ";", "/", 364 ) 365 366 // blobsPathSpec contains the path for the blobs directory 367 type blobsPathSpec struct{} 368 369 func (blobsPathSpec) pathSpec() {} 370 371 // blobPathSpec contains the path for the registry global blob store. 372 type blobPathSpec struct { 373 digest digest.Digest 374 } 375 376 func (blobPathSpec) pathSpec() {} 377 378 // blobDataPathSpec contains the path for the registry global blob store. For 379 // now, this contains layer data, exclusively. 380 type blobDataPathSpec struct { 381 digest digest.Digest 382 } 383 384 func (blobDataPathSpec) pathSpec() {} 385 386 // uploadDataPathSpec defines the path parameters of the data file for 387 // uploads. 388 type uploadDataPathSpec struct { 389 name string 390 id string 391 } 392 393 func (uploadDataPathSpec) pathSpec() {} 394 395 // uploadDataPathSpec defines the path parameters for the file that stores the 396 // start time of an uploads. If it is missing, the upload is considered 397 // unknown. Admittedly, the presence of this file is an ugly hack to make sure 398 // we have a way to cleanup old or stalled uploads that doesn't rely on driver 399 // FileInfo behavior. If we come up with a more clever way to do this, we 400 // should remove this file immediately and rely on the startetAt field from 401 // the client to enforce time out policies. 402 type uploadStartedAtPathSpec struct { 403 name string 404 id string 405 } 406 407 func (uploadStartedAtPathSpec) pathSpec() {} 408 409 // uploadHashStatePathSpec defines the path parameters for the file that stores 410 // the hash function state of an upload at a specific byte offset. If `list` is 411 // set, then the path mapper will generate a list prefix for all hash state 412 // offsets for the upload identified by the name, id, and alg. 413 type uploadHashStatePathSpec struct { 414 name string 415 id string 416 alg digest.Algorithm 417 offset int64 418 list bool 419 } 420 421 func (uploadHashStatePathSpec) pathSpec() {} 422 423 // repositoriesRootPathSpec returns the root of repositories 424 type repositoriesRootPathSpec struct { 425 } 426 427 func (repositoriesRootPathSpec) pathSpec() {} 428 429 // digestPathComponents provides a consistent path breakdown for a given 430 // digest. For a generic digest, it will be as follows: 431 // 432 // <algorithm>/<hex digest> 433 // 434 // If multilevel is true, the first two bytes of the digest will separate 435 // groups of digest folder. It will be as follows: 436 // 437 // <algorithm>/<first two bytes of digest>/<full digest> 438 func digestPathComponents(dgst digest.Digest, multilevel bool) ([]string, error) { 439 if err := dgst.Validate(); err != nil { 440 return nil, err 441 } 442 443 algorithm := blobAlgorithmReplacer.Replace(string(dgst.Algorithm())) 444 hex := dgst.Hex() 445 prefix := []string{algorithm} 446 447 var suffix []string 448 449 if multilevel { 450 suffix = append(suffix, hex[:2]) 451 } 452 453 suffix = append(suffix, hex) 454 455 return append(prefix, suffix...), nil 456 } 457 458 // Reconstructs a digest from a path 459 func digestFromPath(digestPath string) (digest.Digest, error) { 460 461 digestPath = strings.TrimSuffix(digestPath, "/data") 462 dir, hex := path.Split(digestPath) 463 dir = path.Dir(dir) 464 dir, next := path.Split(dir) 465 466 // next is either the algorithm OR the first two characters in the hex string 467 var algo string 468 if next == hex[:2] { 469 algo = path.Base(dir) 470 } else { 471 algo = next 472 } 473 474 dgst := digest.NewDigestFromHex(algo, hex) 475 return dgst, dgst.Validate() 476 } 477