...

Source file src/cuelang.org/go/internal/core/dep/dep.go

Documentation: cuelang.org/go/internal/core/dep

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package dep analyzes dependencies between values.
    16  package dep
    17  
    18  import (
    19  	"cuelang.org/go/cue/errors"
    20  	"cuelang.org/go/internal/core/adt"
    21  )
    22  
    23  // Dependencies
    24  //
    25  // A dependency is a reference relation from one Vertex to another. A Vertex
    26  // has multiple Conjuncts, each of which is associated with an expression.
    27  // Each expression, in turn, may have multiple references, each representing
    28  // a single dependency.
    29  //
    30  // A reference that occurs in a node will point to another node. A reference
    31  // `x.y` may point to a node `x.y` as well as `x`. By default, only the most
    32  // precise node is reported, which is `x.y` if it exists, or `x` otherwise.
    33  // In the latter case, a path is associated with the reference to indicate
    34  // the specific non-existing path that is needed for that dependency. (TODO)
    35  //
    36  // A single reference may point to multiple nodes. For instance,
    37  // (a & b).z may point to both `a.z` and `b.z`. This has to be taken into
    38  // account if dep is used for substitutions.
    39  //
    40  //
    41  //   field: Conjunct
    42  //             |
    43  //           Expr                       Conjunct Expression
    44  //             |- Reference             A reference to led to a target
    45  //             |-    \- Target Node     Pointed to by Reference
    46  //             |-         \- UsedPath   The sole path used within Node
    47  
    48  // TODO: verify that these concepts are correctly reflected in the API:
    49  // Source:
    50  //     The CUE value for which dependencies are analyzed.
    51  //     This may differ per dependency for dynamic and transitive analysis.
    52  // Target:
    53  //     The field to which the found reference resolves.
    54  // Reference:
    55  //     The reference that resolved to the dependency.
    56  //     Replacing this reference in the conjuncts of the source vertex with a
    57  //     link to the target vertex yields the same result if there only a single
    58  //     dependency matching this reference.
    59  // Conjunct:
    60  //     The conjunct in which the Reference was found.
    61  // Used Path:
    62  //     The target vertex may be a parent of the actual, more precise,
    63  //     dependency, if the latter does not yet exist. The target path is the path
    64  //     from the target vertex to the actual dependency.
    65  // Trace:
    66  //     A sequence of dependencies leading to the result in case of transitive
    67  //     dependencies.
    68  
    69  // TODO: for a public API, a better approach seems to be to have a single
    70  // Visit method, with a configuration to set a bunch of orthogonal options.
    71  // Here are some examples of the options:
    72  //   - Dynamic:    evaluate and descend into computed fields.
    73  //   - Recurse:    evaluate dependencies of subfields as well.
    74  //   - Inner:      report dependencies within the root being visited.
    75  //   - RootLess:   report dependencies that do not have a path to the root.
    76  //   - Transitive: get all dependencies, not just the direct ones.
    77  //   - Substitute: do not get precise dependencies, but rather keep them
    78  //         such that each expression needs to be replaced with at most
    79  //         one dependency. Could be a method on Dependency.
    80  //   - ContinueOnError:  continue visiting even if there are errors.
    81  //   [add more as they come up]
    82  //
    83  
    84  type Config struct {
    85  	// Dynamic enables evaluting dependencies Vertex Arcs, recursively
    86  	Dynamic bool
    87  
    88  	// Descend enables recursively descending into fields. This option is
    89  	// implied by Dynamic.
    90  	Descend bool
    91  
    92  	// Cycles allows a Node to reported more than once. This includes the node
    93  	// passed to Visit, which is otherwise never reported. This option can be
    94  	// used to disable cycle checking. TODO: this is not yet implemented.
    95  	AllowCycles bool
    96  
    97  	// Rootless enables reporting nodes that do not have a path from the root.
    98  	// This includes variables of comprehensions and fields of composite literal
    99  	// values that are part of expressions, such as {out: v}.out.
   100  	Rootless bool
   101  
   102  	// TODO:
   103  	// ContinueOnError indicates whether to continue finding dependencies
   104  	// even when there are errors.
   105  	// ContinueOnError bool
   106  
   107  	//  pkg indicates the main package for which the analyzer is configured,
   108  	// which is used for reporting purposes.
   109  	Pkg *adt.ImportReference
   110  }
   111  
   112  // A Dependency is a reference and the node that reference resolves to.
   113  type Dependency struct {
   114  	// Node is the referenced node.
   115  	Node *adt.Vertex
   116  
   117  	// Reference is the expression that referenced the node.
   118  	Reference adt.Resolver
   119  
   120  	pkg *adt.ImportReference
   121  
   122  	top bool
   123  
   124  	visitor *visitor
   125  }
   126  
   127  // Recurse visits the dependencies of d.Node, using the same visit function as
   128  // the original.
   129  func (d *Dependency) Recurse() {
   130  	savedAll := d.visitor.all
   131  	savedTop := d.visitor.top
   132  	d.visitor.all = d.visitor.recurse
   133  	d.visitor.top = true
   134  
   135  	d.visitor.visitReusingVisitor(d.Node, false)
   136  
   137  	d.visitor.all = savedAll
   138  	d.visitor.top = savedTop
   139  }
   140  
   141  // Import returns the import reference or nil if the reference was within
   142  // the same package as the visited Vertex.
   143  func (d *Dependency) Import() *adt.ImportReference {
   144  	return d.pkg
   145  }
   146  
   147  // IsRoot reports whether the dependency is referenced by the root of the
   148  // original Vertex passed to any of the Visit* functions, and not one of its
   149  // descendent arcs. This always returns true for Visit().
   150  func (d *Dependency) IsRoot() bool {
   151  	return d.top
   152  }
   153  
   154  func importRef(r adt.Expr) *adt.ImportReference {
   155  	switch x := r.(type) {
   156  	case *adt.ImportReference:
   157  		return x
   158  	case *adt.SelectorExpr:
   159  		return importRef(x.X)
   160  	case *adt.IndexExpr:
   161  		return importRef(x.X)
   162  	}
   163  	return nil
   164  }
   165  
   166  // VisitFunc is used for reporting dependencies.
   167  type VisitFunc func(Dependency) error
   168  
   169  var empty *adt.Vertex
   170  
   171  func init() {
   172  	// TODO: Consider setting a non-nil BaseValue.
   173  	empty = &adt.Vertex{}
   174  	empty.ForceDone()
   175  }
   176  
   177  var zeroConfig = &Config{}
   178  
   179  // Visit calls f for the dependencies of n as determined by the given
   180  // configuration.
   181  func Visit(cfg *Config, c *adt.OpContext, n *adt.Vertex, f VisitFunc) error {
   182  	if cfg == nil {
   183  		cfg = zeroConfig
   184  	}
   185  	if c == nil {
   186  		panic("nil context")
   187  	}
   188  	v := visitor{
   189  		ctxt:       c,
   190  		fn:         f,
   191  		pkg:        cfg.Pkg,
   192  		recurse:    cfg.Descend,
   193  		all:        cfg.Descend,
   194  		top:        true,
   195  		cfgDynamic: cfg.Dynamic,
   196  	}
   197  	return v.visitReusingVisitor(n, true)
   198  }
   199  
   200  // visitReusingVisitor is factored out of Visit so that we may reuse visitor.
   201  func (v *visitor) visitReusingVisitor(n *adt.Vertex, top bool) error {
   202  	if v.cfgDynamic {
   203  		if v.marked == nil {
   204  			v.marked = marked{}
   205  		}
   206  		v.marked.markExpr(n)
   207  
   208  		v.dynamic(n, top)
   209  	} else {
   210  		v.visit(n, top)
   211  	}
   212  	return v.err
   213  }
   214  
   215  func (v *visitor) visit(n *adt.Vertex, top bool) (err error) {
   216  	savedNode := v.node
   217  	savedTop := v.top
   218  
   219  	v.node = n
   220  	v.top = top
   221  
   222  	defer func() {
   223  		v.node = savedNode
   224  		v.top = savedTop
   225  
   226  		switch x := recover(); x {
   227  		case nil:
   228  		case aborted:
   229  			err = v.err
   230  		default:
   231  			panic(x)
   232  		}
   233  	}()
   234  
   235  	for _, x := range n.Conjuncts {
   236  		v.markExpr(x.Env, x.Elem())
   237  	}
   238  
   239  	return nil
   240  }
   241  
   242  var aborted = errors.New("aborted")
   243  
   244  type visitor struct {
   245  	ctxt *adt.OpContext
   246  	fn   VisitFunc
   247  	node *adt.Vertex
   248  	err  error
   249  	pkg  *adt.ImportReference
   250  
   251  	// recurse indicates whether, during static analysis, to process references
   252  	// that will be unified into different fields.
   253  	recurse bool
   254  	// all indicates wether to process references that would be unified into
   255  	// different fields. This similar to recurse, but sometimes gets temporarily
   256  	// overridden to deal with special cases.
   257  	all       bool
   258  	top       bool
   259  	topRef    adt.Resolver
   260  	pathStack []refEntry
   261  	numRefs   int // count of reported dependencies
   262  
   263  	// cfgDynamic is kept from the original config.
   264  	cfgDynamic bool
   265  
   266  	marked marked
   267  }
   268  
   269  type refEntry struct {
   270  	env *adt.Environment
   271  	ref adt.Resolver
   272  }
   273  
   274  // TODO: factor out the below logic as either a low-level dependency analyzer or
   275  // some walk functionality.
   276  
   277  // markExpr visits all nodes in an expression to mark dependencies.
   278  func (c *visitor) markExpr(env *adt.Environment, expr adt.Elem) {
   279  	if expr, ok := expr.(adt.Resolver); ok {
   280  		c.markResolver(env, expr)
   281  		return
   282  	}
   283  
   284  	saved := c.topRef
   285  	c.topRef = nil
   286  	defer func() { c.topRef = saved }()
   287  
   288  	switch x := expr.(type) {
   289  	case nil:
   290  	case *adt.BinaryExpr:
   291  		c.markExpr(env, x.X)
   292  		c.markExpr(env, x.Y)
   293  
   294  	case *adt.UnaryExpr:
   295  		c.markExpr(env, x.X)
   296  
   297  	case *adt.Interpolation:
   298  		for i := 1; i < len(x.Parts); i += 2 {
   299  			c.markExpr(env, x.Parts[i])
   300  		}
   301  
   302  	case *adt.BoundExpr:
   303  		c.markExpr(env, x.Expr)
   304  
   305  	case *adt.CallExpr:
   306  		c.markExpr(env, x.Fun)
   307  		saved := c.all
   308  		c.all = true
   309  		for _, a := range x.Args {
   310  			c.markExpr(env, a)
   311  		}
   312  		c.all = saved
   313  
   314  	case *adt.DisjunctionExpr:
   315  		for _, d := range x.Values {
   316  			c.markExpr(env, d.Val)
   317  		}
   318  
   319  	case *adt.SliceExpr:
   320  		c.markExpr(env, x.X)
   321  		c.markExpr(env, x.Lo)
   322  		c.markExpr(env, x.Hi)
   323  		c.markExpr(env, x.Stride)
   324  
   325  	case *adt.ListLit:
   326  		env := &adt.Environment{Up: env, Vertex: empty}
   327  		for _, e := range x.Elems {
   328  			switch x := e.(type) {
   329  			case *adt.Comprehension:
   330  				c.markComprehension(env, x)
   331  
   332  			case adt.Expr:
   333  				c.markSubExpr(env, x)
   334  
   335  			case *adt.Ellipsis:
   336  				if x.Value != nil {
   337  					c.markSubExpr(env, x.Value)
   338  				}
   339  			}
   340  		}
   341  
   342  	case *adt.StructLit:
   343  		env := &adt.Environment{Up: env, Vertex: empty}
   344  		for _, e := range x.Decls {
   345  			c.markDecl(env, e)
   346  		}
   347  
   348  	case *adt.Comprehension:
   349  		c.markComprehension(env, x)
   350  	}
   351  }
   352  
   353  // markResolve resolves dependencies.
   354  func (c *visitor) markResolver(env *adt.Environment, r adt.Resolver) {
   355  	// Note: it is okay to pass an empty CloseInfo{} here as we assume that
   356  	// all nodes are finalized already and we need neither closedness nor cycle
   357  	// checks.
   358  	ref, _ := c.ctxt.Resolve(adt.MakeConjunct(env, r, adt.CloseInfo{}), r)
   359  
   360  	// TODO: consider the case where an inlined composite literal does not
   361  	// resolve, but has references. For instance, {a: k, ref}.b would result
   362  	// in a failure during evaluation if b is not defined within ref. However,
   363  	// ref might still specialize to allow b.
   364  
   365  	if ref != nil {
   366  		c.reportDependency(env, r, ref)
   367  		return
   368  	}
   369  
   370  	// It is possible that a reference cannot be resolved because it is
   371  	// incomplete. In this case, we should check whether subexpressions of the
   372  	// reference can be resolved to mark those dependencies. For instance,
   373  	// prefix paths of selectors and the value or index of an index expression
   374  	// may independently resolve to a valid dependency.
   375  
   376  	switch x := r.(type) {
   377  	case *adt.NodeLink:
   378  		panic("unreachable")
   379  
   380  	case *adt.IndexExpr:
   381  		c.markExpr(env, x.X)
   382  		c.markExpr(env, x.Index)
   383  
   384  	case *adt.SelectorExpr:
   385  		c.markExpr(env, x.X)
   386  	}
   387  }
   388  
   389  // reportDependency reports a dependency from r to v.
   390  // v must be the value that is obtained after resolving r.
   391  func (c *visitor) reportDependency(env *adt.Environment, ref adt.Resolver, v *adt.Vertex) {
   392  	if v == c.node || v == empty {
   393  		return
   394  	}
   395  
   396  	reference := ref
   397  	if c.topRef == nil && len(c.pathStack) == 0 {
   398  		saved := c.topRef
   399  		c.topRef = ref
   400  		defer func() { c.topRef = saved }()
   401  	}
   402  
   403  	// TODO: in "All" mode we still report the latest reference used, instead
   404  	// of the reference at the start of the traversal, as the self-contained
   405  	// algorithm (its only user) depends on it.
   406  	// However, if the stack is non-nil, the reference will not correctly
   407  	// reflect the substituted value, so we use the top reference instead.
   408  	if !c.recurse && len(c.pathStack) == 0 && c.topRef != nil {
   409  		reference = c.topRef
   410  	}
   411  
   412  	if !v.Rooted() {
   413  		before := c.numRefs
   414  		c.markInternalResolvers(env, ref, v)
   415  		// TODO: this logic could probably be simplified if we let clients
   416  		// explicitly mark whether to visit rootless nodes. Visiting these
   417  		// may be necessary when substituting values.
   418  		switch _, ok := ref.(*adt.FieldReference); {
   419  		case !ok:
   420  			// Do not report rootless nodes for selectors.
   421  			return
   422  		case c.numRefs > before:
   423  			// For FieldReferences that resolve to something we do not need
   424  			// to report anything intermediate.
   425  			return
   426  		}
   427  	}
   428  	if hasLetParent(v) {
   429  		return
   430  	}
   431  
   432  	// Expand path.
   433  	altRef := reference
   434  	for i := len(c.pathStack) - 1; i >= 0; i-- {
   435  		x := c.pathStack[i]
   436  		var w *adt.Vertex
   437  		// TODO: instead of setting the reference, the proper thing to do is
   438  		// to record a path that still needs to be selected into the recorded
   439  		// dependency. See the Target Path definition at the top of the file.
   440  		if f := c.feature(x.env, x.ref); f != 0 {
   441  			w = v.Lookup(f)
   442  		}
   443  		if w == nil {
   444  			break
   445  		}
   446  		altRef = x.ref
   447  		if i == 0 && c.topRef != nil {
   448  			altRef = c.topRef
   449  		}
   450  		v = w
   451  	}
   452  
   453  	// All resolvers are expressions.
   454  	if p := importRef(ref.(adt.Expr)); p != nil {
   455  		savedPkg := c.pkg
   456  		c.pkg = p
   457  		defer func() { c.pkg = savedPkg }()
   458  	}
   459  
   460  	c.numRefs++
   461  
   462  	d := Dependency{
   463  		Node:      v,
   464  		Reference: altRef,
   465  		pkg:       c.pkg,
   466  		top:       c.top,
   467  		visitor:   c,
   468  	}
   469  	if err := c.fn(d); err != nil {
   470  		c.err = err
   471  		panic(aborted)
   472  	}
   473  }
   474  
   475  // TODO(perf): make this available as a property of vertices to avoid doing
   476  // work repeatedly.
   477  func hasLetParent(v *adt.Vertex) bool {
   478  	for ; v != nil; v = v.Parent {
   479  		if v.Label.IsLet() {
   480  			return true
   481  		}
   482  	}
   483  	return false
   484  }
   485  
   486  // markConjuncts transitively marks all reference of the current node.
   487  func (c *visitor) markConjuncts(v *adt.Vertex) {
   488  	for _, x := range v.Conjuncts {
   489  		// Use Elem instead of Expr to preserve the Comprehension to, in turn,
   490  		// ensure an Environment is inserted for the Value clause.
   491  		c.markExpr(x.Env, x.Elem())
   492  	}
   493  }
   494  
   495  // markInternalResolvers marks dependencies for rootless nodes. As these
   496  // nodes may not be visited during normal traversal, we need to be more
   497  // proactive. For selectors and indices this means we need to evaluate their
   498  // objects to see exactly what the selector or index refers to.
   499  func (c *visitor) markInternalResolvers(env *adt.Environment, r adt.Resolver, v *adt.Vertex) {
   500  	if v.Rooted() {
   501  		panic("node must not be rooted")
   502  	}
   503  
   504  	saved := c.all // recursive traversal already done by this function.
   505  
   506  	// As lets have no path and we otherwise will not process them, we set
   507  	// processing all to true.
   508  	if c.marked != nil && hasLetParent(v) {
   509  		for _, x := range v.Conjuncts {
   510  			c.marked.markExpr(x.Expr())
   511  		}
   512  	}
   513  
   514  	c.markConjuncts(v)
   515  
   516  	// evaluateInner will already process all values recursively, so disable
   517  	// while processing in this case.
   518  	c.all = false
   519  
   520  	switch r := r.(type) {
   521  	case *adt.SelectorExpr:
   522  		c.evaluateInner(env, r.X, r)
   523  	case *adt.IndexExpr:
   524  		c.evaluateInner(env, r.X, r)
   525  	}
   526  
   527  	c.all = saved
   528  }
   529  
   530  // evaluateInner evaluates the LHS of the given selector or index expression,
   531  // and marks all its conjuncts. The reference is pushed on a stack to mark
   532  // the field or index that needs to be selected for any dependencies that are
   533  // subsequently encountered. This is handled by reportDependency.
   534  func (c *visitor) evaluateInner(env *adt.Environment, x adt.Expr, r adt.Resolver) {
   535  	value, _ := c.ctxt.Evaluate(env, x)
   536  	v, _ := value.(*adt.Vertex)
   537  	if v == nil {
   538  		return
   539  	}
   540  	// TODO(perf): one level of  evaluation would suffice.
   541  	v.Finalize(c.ctxt)
   542  
   543  	saved := len(c.pathStack)
   544  	c.pathStack = append(c.pathStack, refEntry{env, r})
   545  	c.markConjuncts(v)
   546  	c.pathStack = c.pathStack[:saved]
   547  }
   548  
   549  func (c *visitor) feature(env *adt.Environment, r adt.Resolver) adt.Feature {
   550  	switch r := r.(type) {
   551  	case *adt.SelectorExpr:
   552  		return r.Sel
   553  	case *adt.IndexExpr:
   554  		v, _ := c.ctxt.Evaluate(env, r.Index)
   555  		v = adt.Unwrap(v)
   556  		return adt.LabelFromValue(c.ctxt, r.Index, v)
   557  	default:
   558  		return adt.InvalidLabel
   559  	}
   560  }
   561  
   562  func (c *visitor) markSubExpr(env *adt.Environment, x adt.Expr) {
   563  	if c.all {
   564  		saved := c.top
   565  		c.top = false
   566  		c.markExpr(env, x)
   567  		c.top = saved
   568  	}
   569  }
   570  
   571  func (c *visitor) markDecl(env *adt.Environment, d adt.Decl) {
   572  	switch x := d.(type) {
   573  	case *adt.Field:
   574  		c.markSubExpr(env, x.Value)
   575  
   576  	case *adt.BulkOptionalField:
   577  		c.markExpr(env, x.Filter)
   578  		// when dynamic, only continue if there is evidence of
   579  		// the field in the parallel actual evaluation.
   580  		c.markSubExpr(env, x.Value)
   581  
   582  	case *adt.DynamicField:
   583  		c.markExpr(env, x.Key)
   584  		// when dynamic, only continue if there is evidence of
   585  		// a matching field in the parallel actual evaluation.
   586  		c.markSubExpr(env, x.Value)
   587  
   588  	case *adt.Comprehension:
   589  		c.markComprehension(env, x)
   590  
   591  	case adt.Expr:
   592  		c.markExpr(env, x)
   593  
   594  	case *adt.Ellipsis:
   595  		if x.Value != nil {
   596  			c.markSubExpr(env, x.Value)
   597  		}
   598  	}
   599  }
   600  
   601  func (c *visitor) markComprehension(env *adt.Environment, y *adt.Comprehension) {
   602  	env = c.markClauses(env, y.Clauses)
   603  
   604  	// Use "live" environments if we have them. This is important if
   605  	// dependencies are computed on a partially evaluated value where a pushed
   606  	// down comprehension is defined outside the root of the dependency
   607  	// analysis. For instance, when analyzing dependencies at path a.b in:
   608  	//
   609  	//  a: {
   610  	//      for value in { test: 1 } {
   611  	//          b: bar: value
   612  	//      }
   613  	//  }
   614  	//
   615  	if envs := y.Envs(); len(envs) > 0 {
   616  		// We use the Environment to get access to the parent chain. It
   617  		// suffices to take any Environment (in this case the first), as all
   618  		// will have the same parent chain.
   619  		env = envs[0]
   620  	}
   621  	for i := y.Nest(); i > 0; i-- {
   622  		env = &adt.Environment{Up: env, Vertex: empty}
   623  	}
   624  	// TODO: consider using adt.EnvExpr and remove the above loop.
   625  	c.markExpr(env, adt.ToExpr(y.Value))
   626  }
   627  
   628  func (c *visitor) markClauses(env *adt.Environment, a []adt.Yielder) *adt.Environment {
   629  	for _, y := range a {
   630  		switch x := y.(type) {
   631  		case *adt.ForClause:
   632  			c.markExpr(env, x.Src)
   633  			env = &adt.Environment{Up: env, Vertex: empty}
   634  			// In dynamic mode, iterate over all actual value and
   635  			// evaluate.
   636  
   637  		case *adt.LetClause:
   638  			c.markExpr(env, x.Expr)
   639  			env = &adt.Environment{Up: env, Vertex: empty}
   640  
   641  		case *adt.IfClause:
   642  			c.markExpr(env, x.Condition)
   643  			// In dynamic mode, only continue if condition is true.
   644  
   645  		case *adt.ValueClause:
   646  			env = &adt.Environment{Up: env, Vertex: empty}
   647  		}
   648  	}
   649  	return env
   650  }
   651  

View as plain text