...

Source file src/sigs.k8s.io/kustomize/api/internal/git/repospec.go

Documentation: sigs.k8s.io/kustomize/api/internal/git

     1  // Copyright 2019 The Kubernetes Authors.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  package git
     5  
     6  import (
     7  	"fmt"
     8  	"log"
     9  	"net/url"
    10  	"path/filepath"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	"sigs.k8s.io/kustomize/kyaml/errors"
    17  	"sigs.k8s.io/kustomize/kyaml/filesys"
    18  )
    19  
    20  // Used as a temporary non-empty occupant of the cloneDir
    21  // field, as something distinguishable from the empty string
    22  // in various outputs (especially tests). Not using an
    23  // actual directory name here, as that's a temporary directory
    24  // with a unique name that isn't created until clone time.
    25  const notCloned = filesys.ConfirmedDir("/notCloned")
    26  
    27  // RepoSpec specifies a git repository and a branch and path therein.
    28  type RepoSpec struct {
    29  	// Raw, original spec, used to look for cycles.
    30  	// TODO(monopole): Drop raw, use processed fields instead.
    31  	raw string
    32  
    33  	// Host, e.g. https://github.com/
    34  	Host string
    35  
    36  	// RepoPath name (Path to repository),
    37  	// e.g. kubernetes-sigs/kustomize
    38  	RepoPath string
    39  
    40  	// Dir is where the repository is cloned to.
    41  	Dir filesys.ConfirmedDir
    42  
    43  	// Relative path in the repository, and in the cloneDir,
    44  	// to a Kustomization.
    45  	KustRootPath string
    46  
    47  	// Branch or tag reference.
    48  	Ref string
    49  
    50  	// Submodules indicates whether or not to clone git submodules.
    51  	Submodules bool
    52  
    53  	// Timeout is the maximum duration allowed for execing git commands.
    54  	Timeout time.Duration
    55  }
    56  
    57  // CloneSpec returns a string suitable for "git clone {spec}".
    58  func (x *RepoSpec) CloneSpec() string {
    59  	return x.Host + x.RepoPath
    60  }
    61  
    62  func (x *RepoSpec) CloneDir() filesys.ConfirmedDir {
    63  	return x.Dir
    64  }
    65  
    66  func (x *RepoSpec) Raw() string {
    67  	return x.raw
    68  }
    69  
    70  func (x *RepoSpec) AbsPath() string {
    71  	return x.Dir.Join(x.KustRootPath)
    72  }
    73  
    74  func (x *RepoSpec) Cleaner(fSys filesys.FileSystem) func() error {
    75  	return func() error { return fSys.RemoveAll(x.Dir.String()) }
    76  }
    77  
    78  const (
    79  	refQuery         = "?ref="
    80  	gitSuffix        = ".git"
    81  	gitRootDelimiter = "_git/"
    82  	pathSeparator    = "/" // do not use filepath.Separator, as this is a URL
    83  )
    84  
    85  // NewRepoSpecFromURL parses git-like urls.
    86  // From strings like git@github.com:someOrg/someRepo.git or
    87  // https://github.com/someOrg/someRepo?ref=someHash, extract
    88  // the different parts of URL, set into a RepoSpec object and return RepoSpec object.
    89  // It MUST return an error if the input is not a git-like URL, as this is used by some code paths
    90  // to distinguish between local and remote paths.
    91  //
    92  // In particular, NewRepoSpecFromURL separates the URL used to clone the repo from the
    93  // elements Kustomize uses for other purposes (e.g. query params that turn into args, and
    94  // the path to the kustomization root within the repo).
    95  func NewRepoSpecFromURL(n string) (*RepoSpec, error) {
    96  	repoSpec := &RepoSpec{raw: n, Dir: notCloned, Timeout: defaultTimeout, Submodules: defaultSubmodules}
    97  	if filepath.IsAbs(n) {
    98  		return nil, fmt.Errorf("uri looks like abs path: %s", n)
    99  	}
   100  
   101  	// Parse the query first. This is safe because according to rfc3986 "?" is only allowed in the
   102  	// query and is not recognized %-encoded.
   103  	// Note that parseQuery returns default values for empty parameters.
   104  	n, query, _ := strings.Cut(n, "?")
   105  	repoSpec.Ref, repoSpec.Timeout, repoSpec.Submodules = parseQuery(query)
   106  
   107  	var err error
   108  
   109  	// Parse the host (e.g. scheme, username, domain) segment.
   110  	repoSpec.Host, n, err = extractHost(n)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	// In some cases, we're given a path to a git repo + a path to the kustomization root within
   116  	// that repo. We need to split them so that we can ultimately give the repo only to the cloner.
   117  	repoSpec.RepoPath, repoSpec.KustRootPath, err = parsePathParts(n, defaultRepoPathLength(repoSpec.Host))
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	return repoSpec, nil
   123  }
   124  
   125  const allSegments = -999999
   126  const orgRepoSegments = 2
   127  
   128  func defaultRepoPathLength(host string) int {
   129  	if strings.HasPrefix(host, fileScheme) {
   130  		return allSegments
   131  	}
   132  	return orgRepoSegments
   133  }
   134  
   135  // parsePathParts splits the repo path that will ultimately be passed to git to clone the
   136  // repo from the kustomization root path, which Kustomize will execute the build in after the repo
   137  // is cloned.
   138  //
   139  // We first try to do this based on explicit markers in the URL (e.g. _git, .git or //).
   140  // If none are present, we try to apply a historical default repo path length that is derived from
   141  // Github URLs. If there aren't enough segments, we have historically considered the URL invalid.
   142  func parsePathParts(n string, defaultSegmentLength int) (string, string, error) {
   143  	repoPath, kustRootPath, success := tryExplicitMarkerSplit(n)
   144  	if !success {
   145  		repoPath, kustRootPath, success = tryDefaultLengthSplit(n, defaultSegmentLength)
   146  	}
   147  
   148  	// Validate the result
   149  	if !success || len(repoPath) == 0 {
   150  		return "", "", fmt.Errorf("failed to parse repo path segment")
   151  	}
   152  	if kustRootPathExitsRepo(kustRootPath) {
   153  		return "", "", fmt.Errorf("url path exits repo: %s", n)
   154  	}
   155  
   156  	return repoPath, strings.TrimPrefix(kustRootPath, pathSeparator), nil
   157  }
   158  
   159  func tryExplicitMarkerSplit(n string) (string, string, bool) {
   160  	// Look for the _git delimiter, which by convention is expected to be ONE directory above the repo root.
   161  	// If found, split on the NEXT path element, which is the repo root.
   162  	// Example: https://username@dev.azure.com/org/project/_git/repo/path/to/kustomization/root
   163  	if gitRootIdx := strings.Index(n, gitRootDelimiter); gitRootIdx >= 0 {
   164  		gitRootPath := n[:gitRootIdx+len(gitRootDelimiter)]
   165  		subpathSegments := strings.Split(n[gitRootIdx+len(gitRootDelimiter):], pathSeparator)
   166  		return gitRootPath + subpathSegments[0], strings.Join(subpathSegments[1:], pathSeparator), true
   167  
   168  		// Look for a double-slash in the path, which if present separates the repo root from the kust path.
   169  		// It is a convention, not a real path element, so do not preserve it in the returned value.
   170  		// Example: https://github.com/org/repo//path/to/kustomozation/root
   171  	} else if repoRootIdx := strings.Index(n, "//"); repoRootIdx >= 0 {
   172  		return n[:repoRootIdx], n[repoRootIdx+2:], true
   173  
   174  		// Look for .git in the path, which if present is part of the directory name of the git repo.
   175  		// This means we want to grab everything up to and including that suffix
   176  		// Example: https://github.com/org/repo.git/path/to/kustomozation/root
   177  	} else if gitSuffixIdx := strings.Index(n, gitSuffix); gitSuffixIdx >= 0 {
   178  		upToGitSuffix := n[:gitSuffixIdx+len(gitSuffix)]
   179  		afterGitSuffix := n[gitSuffixIdx+len(gitSuffix):]
   180  		return upToGitSuffix, afterGitSuffix, true
   181  	}
   182  	return "", "", false
   183  }
   184  
   185  func tryDefaultLengthSplit(n string, defaultSegmentLength int) (string, string, bool) {
   186  	// If the default is to take all segments, do so.
   187  	if defaultSegmentLength == allSegments {
   188  		return n, "", true
   189  
   190  		// If the default is N segments, make sure we have at least that many and take them if so.
   191  		// If we have less than N, we have historically considered the URL invalid.
   192  	} else if segments := strings.Split(n, pathSeparator); len(segments) >= defaultSegmentLength {
   193  		firstNSegments := strings.Join(segments[:defaultSegmentLength], pathSeparator)
   194  		rest := strings.Join(segments[defaultSegmentLength:], pathSeparator)
   195  		return firstNSegments, rest, true
   196  	}
   197  	return "", "", false
   198  }
   199  
   200  func kustRootPathExitsRepo(kustRootPath string) bool {
   201  	cleanedPath := filepath.Clean(strings.TrimPrefix(kustRootPath, string(filepath.Separator)))
   202  	pathElements := strings.Split(cleanedPath, string(filepath.Separator))
   203  	return len(pathElements) > 0 &&
   204  		pathElements[0] == filesys.ParentDir
   205  }
   206  
   207  // Clone git submodules by default.
   208  const defaultSubmodules = true
   209  
   210  // Arbitrary, but non-infinite, timeout for running commands.
   211  const defaultTimeout = 27 * time.Second
   212  
   213  func parseQuery(query string) (string, time.Duration, bool) {
   214  	values, err := url.ParseQuery(query)
   215  	// in event of parse failure, return defaults
   216  	if err != nil {
   217  		return "", defaultTimeout, defaultSubmodules
   218  	}
   219  
   220  	// ref is the desired git ref to target. Can be specified by in a git URL
   221  	// with ?ref=<string> or ?version=<string>, although ref takes precedence.
   222  	ref := values.Get("version")
   223  	if queryValue := values.Get("ref"); queryValue != "" {
   224  		ref = queryValue
   225  	}
   226  
   227  	// depth is the desired git exec timeout. Can be specified by in a git URL
   228  	// with ?timeout=<duration>.
   229  	duration := defaultTimeout
   230  	if queryValue := values.Get("timeout"); queryValue != "" {
   231  		// Attempt to first parse as a number of integer seconds (like "61"),
   232  		// and then attempt to parse as a suffixed duration (like "61s").
   233  		if intValue, err := strconv.Atoi(queryValue); err == nil && intValue > 0 {
   234  			duration = time.Duration(intValue) * time.Second
   235  		} else if durationValue, err := time.ParseDuration(queryValue); err == nil && durationValue > 0 {
   236  			duration = durationValue
   237  		}
   238  	}
   239  
   240  	// submodules indicates if git submodule cloning is desired. Can be
   241  	// specified by in a git URL with ?submodules=<bool>.
   242  	submodules := defaultSubmodules
   243  	if queryValue := values.Get("submodules"); queryValue != "" {
   244  		if boolValue, err := strconv.ParseBool(queryValue); err == nil {
   245  			submodules = boolValue
   246  		}
   247  	}
   248  
   249  	return ref, duration, submodules
   250  }
   251  
   252  func extractHost(n string) (string, string, error) {
   253  	n = ignoreForcedGitProtocol(n)
   254  	scheme, n := extractScheme(n)
   255  	username, n := extractUsername(n)
   256  	stdGithub := isStandardGithubHost(n)
   257  	acceptSCP := acceptSCPStyle(scheme, username, stdGithub)
   258  
   259  	// Validate the username and scheme before attempting host/path parsing, because if the parsing
   260  	// so far has not succeeded, we will not be able to extract the host and path correctly.
   261  	if err := validateScheme(scheme, acceptSCP); err != nil {
   262  		return "", "", err
   263  	}
   264  
   265  	// Now that we have extracted a valid scheme+username, we can parse host itself.
   266  
   267  	// The file protocol specifies an absolute path to a local git repo.
   268  	// Everything after the scheme (including any 'username' we found) is actually part of that path.
   269  	if scheme == fileScheme {
   270  		return scheme, username + n, nil
   271  	}
   272  	var host, rest = n, ""
   273  	if sepIndex := findPathSeparator(n, acceptSCP); sepIndex >= 0 {
   274  		host, rest = n[:sepIndex+1], n[sepIndex+1:]
   275  	}
   276  
   277  	// Github URLs are strictly normalized in a way that may discard scheme and username components.
   278  	if stdGithub {
   279  		scheme, username, host = normalizeGithubHostParts(scheme, username)
   280  	}
   281  
   282  	// Host is required, so do not concat the scheme and username if we didn't find one.
   283  	if host == "" {
   284  		return "", "", errors.Errorf("failed to parse host segment")
   285  	}
   286  	return scheme + username + host, rest, nil
   287  }
   288  
   289  // ignoreForcedGitProtocol strips the "git::" prefix from URLs.
   290  // We used to use go-getter to handle our urls: https://github.com/hashicorp/go-getter.
   291  // The git:: prefix signaled go-getter to use the git protocol to fetch the url's contents.
   292  // We silently strip this prefix to allow these go-getter-style urls to continue to work,
   293  // although the git protocol (which is insecure and unsupported on many platforms, including Github)
   294  // will not actually be used as intended.
   295  func ignoreForcedGitProtocol(n string) string {
   296  	n, found := trimPrefixIgnoreCase(n, "git::")
   297  	if found {
   298  		log.Println("Warning: Forcing the git protocol using the 'git::' URL prefix is not supported. " +
   299  			"Kustomize currently strips this invalid prefix, but will stop doing so in a future release. " +
   300  			"Please remove the 'git::' prefix from your configuration.")
   301  	}
   302  	return n
   303  }
   304  
   305  // acceptSCPStyle returns true if the scheme and username indicate potential use of an SCP-style URL.
   306  // With this style, the scheme is not explicit and the path is delimited by a colon.
   307  // Strictly speaking the username is optional in SCP-like syntax, but Kustomize has always
   308  // required it for non-Github URLs.
   309  // Example: user@host.xz:path/to/repo.git/
   310  func acceptSCPStyle(scheme, username string, isGithubURL bool) bool {
   311  	return scheme == "" && (username != "" || isGithubURL)
   312  }
   313  
   314  func validateScheme(scheme string, acceptSCPStyle bool) error {
   315  	// see https://git-scm.com/docs/git-fetch#_git_urls for info relevant to these validations
   316  	switch scheme {
   317  	case "":
   318  		// Empty scheme is only ok if it's a Github URL or if it looks like SCP-style syntax
   319  		if !acceptSCPStyle {
   320  			return fmt.Errorf("failed to parse scheme")
   321  		}
   322  	case sshScheme, fileScheme, httpsScheme, httpScheme:
   323  		// These are all supported schemes
   324  	default:
   325  		// At time of writing, we should never end up here because we do not parse out
   326  		// unsupported schemes to begin with.
   327  		return fmt.Errorf("unsupported scheme %q", scheme)
   328  	}
   329  	return nil
   330  }
   331  
   332  const fileScheme = "file://"
   333  const httpScheme = "http://"
   334  const httpsScheme = "https://"
   335  const sshScheme = "ssh://"
   336  
   337  func extractScheme(s string) (string, string) {
   338  	for _, prefix := range []string{sshScheme, httpsScheme, httpScheme, fileScheme} {
   339  		if rest, found := trimPrefixIgnoreCase(s, prefix); found {
   340  			return prefix, rest
   341  		}
   342  	}
   343  	return "", s
   344  }
   345  
   346  func extractUsername(s string) (string, string) {
   347  	var userRegexp = regexp.MustCompile(`^([a-zA-Z][a-zA-Z0-9-]*)@`)
   348  	if m := userRegexp.FindStringSubmatch(s); m != nil {
   349  		username := m[1] + "@"
   350  		return username, s[len(username):]
   351  	}
   352  	return "", s
   353  }
   354  
   355  func isStandardGithubHost(s string) bool {
   356  	lowerCased := strings.ToLower(s)
   357  	return strings.HasPrefix(lowerCased, "github.com/") ||
   358  		strings.HasPrefix(lowerCased, "github.com:")
   359  }
   360  
   361  // trimPrefixIgnoreCase returns the rest of s and true if prefix, ignoring case, prefixes s.
   362  // Otherwise, trimPrefixIgnoreCase returns s and false.
   363  func trimPrefixIgnoreCase(s, prefix string) (string, bool) {
   364  	if len(prefix) <= len(s) && strings.ToLower(s[:len(prefix)]) == prefix {
   365  		return s[len(prefix):], true
   366  	}
   367  	return s, false
   368  }
   369  
   370  func findPathSeparator(hostPath string, acceptSCP bool) int {
   371  	sepIndex := strings.Index(hostPath, pathSeparator)
   372  	if acceptSCP {
   373  		colonIndex := strings.Index(hostPath, ":")
   374  		// The colon acts as a delimiter in scp-style ssh URLs only if not prefixed by '/'.
   375  		if sepIndex == -1 || (colonIndex > 0 && colonIndex < sepIndex) {
   376  			sepIndex = colonIndex
   377  		}
   378  	}
   379  	return sepIndex
   380  }
   381  
   382  func normalizeGithubHostParts(scheme, username string) (string, string, string) {
   383  	if strings.HasPrefix(scheme, sshScheme) || username != "" {
   384  		return "", username, "github.com:"
   385  	}
   386  	return httpsScheme, "", "github.com/"
   387  }
   388  

View as plain text