/* Copyright 2018 The Bazel Authors. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package repo import ( "bytes" "encoding/json" "errors" "fmt" "os" "os/exec" "path" "path/filepath" "regexp" "runtime" "strings" "sync" "github.com/bazelbuild/bazel-gazelle/label" "github.com/bazelbuild/bazel-gazelle/pathtools" "golang.org/x/mod/modfile" "golang.org/x/tools/go/vcs" ) // RemoteCache stores information about external repositories. The cache may // be initialized with information about known repositories, i.e., those listed // in the WORKSPACE file and mentioned on the command line. Other information // is retrieved over the network. // // Public methods of RemoteCache may be slow in cases where a network fetch // is needed. Public methods may be called concurrently. // // TODO(jayconrod): this is very Go-centric. It should be moved to language/go. // Unfortunately, doing so would break the resolve.Resolver interface. type RemoteCache struct { // RepoRootForImportPath is vcs.RepoRootForImportPath by default. It may // be overridden so that tests may avoid accessing the network. RepoRootForImportPath func(string, bool) (*vcs.RepoRoot, error) // HeadCmd returns the latest commit on the default branch in the given // repository. This is used by Head. It may be stubbed out for tests. HeadCmd func(remote, vcs string) (string, error) // ModInfo returns the module path and version that provides the package // with the given import path. This is used by Mod. It may be stubbed // out for tests. ModInfo func(importPath string) (modPath string, err error) // ModVersionInfo returns the module path, true version, and sum for // the module that provides the package with the given import path. // This is used by ModVersion. It may be stubbed out for tests. ModVersionInfo func(modPath, query string) (version, sum string, err error) root, remote, head, mod, modVersion remoteCacheMap tmpOnce sync.Once tmpDir string tmpErr error } // remoteCacheMap is a thread-safe, idempotent cache. It is used to store // information which should be fetched over the network no more than once. // This follows the Memo pattern described in The Go Programming Language, // section 9.7. type remoteCacheMap struct { mu sync.Mutex cache map[string]*remoteCacheEntry } type remoteCacheEntry struct { value interface{} err error // ready is nil for entries that were added when the cache was initialized. // It is non-nil for other entries. It is closed when an entry is ready, // i.e., the operation loading the entry completed. ready chan struct{} } type rootValue struct { root, name string } type remoteValue struct { remote, vcs string } type headValue struct { commit, tag string } type modValue struct { path, name string known bool } type modVersionValue struct { path, version, sum string } // Repo describes details of a Go repository known in advance. It is used to // initialize RemoteCache so that some repositories don't need to be looked up. // // DEPRECATED: Go-specific details should be removed from RemoteCache, and // lookup logic should be moved to language/go. This means RemoteCache will // need to be initialized in a different way. type Repo struct { Name, GoPrefix, Remote, VCS string } // NewRemoteCache creates a new RemoteCache with a set of known repositories. // The Root and Remote methods will return information about repositories listed // here without accessing the network. However, the Head method will still // access the network for these repositories to retrieve information about new // versions. // // A cleanup function is also returned. The caller must call this when // RemoteCache is no longer needed. RemoteCache may write files to a temporary // directory. This will delete them. func NewRemoteCache(knownRepos []Repo) (r *RemoteCache, cleanup func() error) { r = &RemoteCache{ RepoRootForImportPath: vcs.RepoRootForImportPath, HeadCmd: defaultHeadCmd, root: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, remote: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, head: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, mod: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, modVersion: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, } r.ModInfo = func(importPath string) (string, error) { return defaultModInfo(r, importPath) } r.ModVersionInfo = func(modPath, query string) (string, string, error) { return defaultModVersionInfo(r, modPath, query) } for _, repo := range knownRepos { r.root.cache[repo.GoPrefix] = &remoteCacheEntry{ value: rootValue{ root: repo.GoPrefix, name: repo.Name, }, } if repo.Remote != "" { r.remote.cache[repo.GoPrefix] = &remoteCacheEntry{ value: remoteValue{ remote: repo.Remote, vcs: repo.VCS, }, } } r.mod.cache[repo.GoPrefix] = &remoteCacheEntry{ value: modValue{ path: repo.GoPrefix, name: repo.Name, known: true, }, } } // Augment knownRepos with additional prefixes for // minimal module compatibility. For example, if repo "com_example_foo_v2" // has prefix "example.com/foo/v2", map "example.com/foo" to the same // entry. // TODO(jayconrod): there should probably be some control over whether // callers can use these mappings: packages within modules should not be // allowed to use them. However, we'll return the same result nearly all // the time, and simpler is better. for _, repo := range knownRepos { newPath := pathWithoutSemver(repo.GoPrefix) if newPath == "" { continue } // Avoid adding the semver-less path for this module if there // is another known module which already covers this path. // See https://github.com/bazelbuild/bazel-gazelle/issues/1595. found := false for prefix := newPath; prefix != "." && prefix != "/"; prefix = path.Dir(prefix) { if _, ok := r.root.cache[prefix]; ok { found = true break } } if found { continue } r.root.cache[newPath] = r.root.cache[repo.GoPrefix] if e := r.remote.cache[repo.GoPrefix]; e != nil { r.remote.cache[newPath] = e } r.mod.cache[newPath] = r.mod.cache[repo.GoPrefix] } return r, r.cleanup } func (r *RemoteCache) cleanup() error { if r.tmpDir == "" { return nil } return os.RemoveAll(r.tmpDir) } // PopulateFromGoMod reads a go.mod file and adds entries to the r.root // map based on the file's require directives. PopulateFromGoMod does not // override entries already in the cache. This should help avoid going // out to the network for external dependency resolution, and it should // let static dependency resolution succeed more often. func (r *RemoteCache) PopulateFromGoMod(goModPath string) (err error) { defer func() { if err != nil { err = fmt.Errorf("reading module paths from %s: %w", goModPath, err) } }() data, err := os.ReadFile(goModPath) if err != nil { return err } var versionFixer modfile.VersionFixer f, err := modfile.Parse(goModPath, data, versionFixer) if err != nil { return err } for _, req := range f.Require { r.root.ensure(req.Mod.Path, func() (any, error) { return rootValue{ root: req.Mod.Path, name: label.ImportPathToBazelRepoName(req.Mod.Path), }, nil }) } return nil } var gopkginPattern = regexp.MustCompile(`^(gopkg.in/(?:[^/]+/)?[^/]+\.v\d+)(?:/|$)`) var knownPrefixes = []struct { prefix string missing int }{ {prefix: "golang.org/x", missing: 1}, {prefix: "google.golang.org", missing: 1}, {prefix: "cloud.google.com", missing: 1}, {prefix: "github.com", missing: 2}, } // RootStatic checks the cache to see if the provided importpath matches any known roots. // If no matches are found, rather than going out to the network to determine the root, // nothing is returned. func (r *RemoteCache) RootStatic(importPath string) (root, name string, err error) { for prefix := importPath; prefix != "." && prefix != "/"; prefix = path.Dir(prefix) { v, ok, err := r.root.get(prefix) if ok { if err != nil { return "", "", err } value := v.(rootValue) return value.root, value.name, nil } } return "", "", nil } // Root returns the portion of an import path that corresponds to the root // directory of the repository containing the given import path. For example, // given "golang.org/x/tools/go/loader", this will return "golang.org/x/tools". // The workspace name of the repository is also returned. This may be a custom // name set in WORKSPACE, or it may be a generated name based on the root path. func (r *RemoteCache) Root(importPath string) (root, name string, err error) { // Try prefixes of the import path in the cache, but don't actually go out // to vcs yet. We do this before handling known special cases because // the cache is pre-populated with repository rules, and we want to use their // names if we can. prefix := importPath for { v, ok, err := r.root.get(prefix) if ok { if err != nil { return "", "", err } value := v.(rootValue) return value.root, value.name, nil } prefix = path.Dir(prefix) if prefix == "." || prefix == "/" { break } } // Try known prefixes. for _, p := range knownPrefixes { if pathtools.HasPrefix(importPath, p.prefix) { rest := pathtools.TrimPrefix(importPath, p.prefix) var components []string if rest != "" { components = strings.Split(rest, "/") } if len(components) < p.missing { return "", "", fmt.Errorf("import path %q is shorter than the known prefix %q", importPath, p.prefix) } root = p.prefix for _, c := range components[:p.missing] { root = path.Join(root, c) } name = label.ImportPathToBazelRepoName(root) return root, name, nil } } // gopkg.in is special, and might have either one or two levels of // missing paths. See http://labix.org/gopkg.in for URL patterns. if match := gopkginPattern.FindStringSubmatch(importPath); len(match) > 0 { root = match[1] name = label.ImportPathToBazelRepoName(root) return root, name, nil } // Find the prefix using vcs and cache the result. v, err := r.root.ensure(importPath, func() (interface{}, error) { res, err := r.RepoRootForImportPath(importPath, false) if err != nil { return nil, err } return rootValue{res.Root, label.ImportPathToBazelRepoName(res.Root)}, nil }) if err != nil { return "", "", err } value := v.(rootValue) return value.root, value.name, nil } // Remote returns the VCS name and the remote URL for a repository with the // given root import path. This is suitable for creating new repository rules. func (r *RemoteCache) Remote(root string) (remote, vcs string, err error) { v, err := r.remote.ensure(root, func() (interface{}, error) { repo, err := r.RepoRootForImportPath(root, false) if err != nil { return nil, err } return remoteValue{remote: repo.Repo, vcs: repo.VCS.Cmd}, nil }) if err != nil { return "", "", err } value := v.(remoteValue) return value.remote, value.vcs, nil } // Head returns the most recent commit id on the default branch and latest // version tag for the given remote repository. The tag "" is returned if // no latest version was found. // // TODO(jayconrod): support VCS other than git. // TODO(jayconrod): support version tags. "" is always returned. func (r *RemoteCache) Head(remote, vcs string) (commit, tag string, err error) { if vcs != "git" { return "", "", fmt.Errorf("could not locate recent commit in repo %q with unknown version control scheme %q", remote, vcs) } v, err := r.head.ensure(remote, func() (interface{}, error) { commit, err := r.HeadCmd(remote, vcs) if err != nil { return nil, err } return headValue{commit: commit}, nil }) if err != nil { return "", "", err } value := v.(headValue) return value.commit, value.tag, nil } func defaultHeadCmd(remote, vcs string) (string, error) { switch vcs { case "local": return "", nil case "git": // Old versions of git ls-remote exit with code 129 when "--" is passed. // We'll try to validate the argument here instead. if strings.HasPrefix(remote, "-") { return "", fmt.Errorf("remote must not start with '-': %q", remote) } cmd := exec.Command("git", "ls-remote", remote, "HEAD") out, err := cmd.Output() if err != nil { return "", fmt.Errorf("git ls-remote for %s: %v", remote, cleanCmdError(err)) } ix := bytes.IndexByte(out, '\t') if ix < 0 { return "", fmt.Errorf("could not parse output for git ls-remote for %q", remote) } return string(out[:ix]), nil default: return "", fmt.Errorf("unknown version control system: %s", vcs) } } // Mod returns the module path for the module that contains the package // named by importPath. The name of the go_repository rule for the module // is also returned. For example, calling Mod on "github.com/foo/bar/v2/baz" // would give the module path "github.com/foo/bar/v2" and the name // "com_github_foo_bar_v2". // // If a known repository *could* provide importPath (because its "importpath" // is a prefix of importPath), Mod will assume that it does. This may give // inaccurate results if importPath is in an undeclared nested module. Run // "gazelle update-repos -from_file=go.mod" first for best results. // // If no known repository could provide importPath, Mod will run "go list" to // find the module. The special patterns that Root uses are ignored. Results are // cached. Use GOPROXY for faster results. func (r *RemoteCache) Mod(importPath string) (modPath, name string, err error) { // Check if any of the known repositories is a prefix. prefix := importPath for { v, ok, err := r.mod.get(prefix) if ok { if err != nil { return "", "", err } value := v.(modValue) if value.known { return value.path, value.name, nil } else { break } } prefix = path.Dir(prefix) if prefix == "." || prefix == "/" { break } } // Ask "go list". v, err := r.mod.ensure(importPath, func() (interface{}, error) { modPath, err := r.ModInfo(importPath) if err != nil { return nil, err } return modValue{ path: modPath, name: label.ImportPathToBazelRepoName(modPath), }, nil }) if err != nil { return "", "", err } value := v.(modValue) return value.path, value.name, nil } func defaultModInfo(rc *RemoteCache, importPath string) (modPath string, err error) { rc.initTmp() if rc.tmpErr != nil { return "", rc.tmpErr } defer func() { if err != nil { err = fmt.Errorf("finding module path for import %s: %v", importPath, cleanCmdError(err)) } }() goTool := findGoTool() env := append(os.Environ(), "GO111MODULE=on") cmd := exec.Command(goTool, "get", "-d", "--", importPath) cmd.Dir = rc.tmpDir cmd.Env = env if _, err := cmd.Output(); err != nil { return "", err } cmd = exec.Command(goTool, "list", "-find", "-f", "{{.Module.Path}}", "--", importPath) cmd.Dir = rc.tmpDir cmd.Env = env out, err := cmd.Output() if err != nil { return "", fmt.Errorf("finding module path for import %s: %v", importPath, cleanCmdError(err)) } return strings.TrimSpace(string(out)), nil } // ModVersion looks up information about a module at a given version. // The path must be the module path, not a package within the module. // The version may be a canonical semantic version, a query like "latest", // or a branch, tag, or revision name. ModVersion returns the name of // the repository rule providing the module (if any), the true version, // and the sum. func (r *RemoteCache) ModVersion(modPath, query string) (name, version, sum string, err error) { // Ask "go list". arg := modPath + "@" + query v, err := r.modVersion.ensure(arg, func() (interface{}, error) { version, sum, err := r.ModVersionInfo(modPath, query) if err != nil { return nil, err } return modVersionValue{ path: modPath, version: version, sum: sum, }, nil }) if err != nil { return "", "", "", err } value := v.(modVersionValue) // Try to find the repository name for the module, if there's already // a repository rule that provides it. v, ok, err := r.mod.get(modPath) if ok && err == nil { name = v.(modValue).name } else { name = label.ImportPathToBazelRepoName(modPath) } return name, value.version, value.sum, nil } func defaultModVersionInfo(rc *RemoteCache, modPath, query string) (version, sum string, err error) { rc.initTmp() if rc.tmpErr != nil { return "", "", rc.tmpErr } defer func() { if err != nil { err = fmt.Errorf("finding module version and sum for %s@%s: %v", modPath, query, cleanCmdError(err)) } }() goTool := findGoTool() cmd := exec.Command(goTool, "mod", "download", "-json", "--", modPath+"@"+query) cmd.Dir = rc.tmpDir cmd.Env = append(os.Environ(), "GO111MODULE=on") out, err := cmd.Output() if err != nil { return "", "", err } var result struct{ Version, Sum string } if err := json.Unmarshal(out, &result); err != nil { return "", "", fmt.Errorf("invalid output from 'go mod download': %v", err) } return result.Version, result.Sum, nil } // get retrieves a value associated with the given key from the cache. ok will // be true if the key exists in the cache, even if it's in the process of // being fetched. func (m *remoteCacheMap) get(key string) (value interface{}, ok bool, err error) { m.mu.Lock() e, ok := m.cache[key] m.mu.Unlock() if !ok { return nil, ok, nil } if e.ready != nil { <-e.ready } return e.value, ok, e.err } // ensure retreives a value associated with the given key from the cache. If // the key does not exist in the cache, the load function will be called, // and its result will be associated with the key. The load function will not // be called more than once for any key. func (m *remoteCacheMap) ensure(key string, load func() (interface{}, error)) (interface{}, error) { m.mu.Lock() e, ok := m.cache[key] if !ok { e = &remoteCacheEntry{ready: make(chan struct{})} m.cache[key] = e m.mu.Unlock() e.value, e.err = load() close(e.ready) } else { m.mu.Unlock() if e.ready != nil { <-e.ready } } return e.value, e.err } func (rc *RemoteCache) initTmp() { rc.tmpOnce.Do(func() { rc.tmpDir, rc.tmpErr = os.MkdirTemp("", "gazelle-remotecache-") if rc.tmpErr != nil { return } rc.tmpErr = os.WriteFile(filepath.Join(rc.tmpDir, "go.mod"), []byte("module gazelle_remote_cache\ngo 1.15\n"), 0o666) }) } var semverRex = regexp.MustCompile(`^.*?(/v\d+)(?:/.*)?$`) // pathWithoutSemver removes a semantic version suffix from path. // For example, if path is "example.com/foo/v2/bar", pathWithoutSemver // will return "example.com/foo/bar". If there is no semantic version suffix, // "" will be returned. // TODO(jayconrod): copied from language/go. This whole type should be // migrated there. func pathWithoutSemver(path string) string { m := semverRex.FindStringSubmatchIndex(path) if m == nil { return "" } v := path[m[2]+2 : m[3]] if v == "0" || v == "1" { return "" } return path[:m[2]] + path[m[3]:] } // findGoTool attempts to locate the go executable. If GOROOT is set, we'll // prefer the one in there; otherwise, we'll rely on PATH. If the wrapper // script generated by the gazelle rule is invoked by Bazel, it will set // GOROOT to the configured SDK. We don't want to rely on the host SDK in // that situation. // // TODO(jayconrod): copied from language/go (though it was originally in this // package). Go-specific details should be removed from RemoteCache, and // this copy should be deleted. func findGoTool() string { path := "go" // rely on PATH by default if goroot, ok := os.LookupEnv("GOROOT"); ok { path = filepath.Join(goroot, "bin", "go") } if runtime.GOOS == "windows" { path += ".exe" } return path } // cleanCmdError simplifies error messages from os/exec.Cmd.Run. // For ExitErrors, it trims and returns stderr. This is useful for go commands // that print well-formatted errors. By default, ExitError prints the exit // status but not stderr. // // cleanCmdError returns other errors unmodified. func cleanCmdError(err error) error { if xerr, ok := err.(*exec.ExitError); ok { if stderr := strings.TrimSpace(string(xerr.Stderr)); stderr != "" { return errors.New(stderr) } } return err }