...

Source file src/cuelang.org/go/internal/core/adt/disjunct.go

Documentation: cuelang.org/go/internal/core/adt

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package adt
    16  
    17  import (
    18  	"cuelang.org/go/cue/errors"
    19  	"cuelang.org/go/cue/token"
    20  )
    21  
    22  // Nodes man not reenter a disjunction.
    23  //
    24  // Copy one layer deep; throw away items on failure.
    25  
    26  // DISJUNCTION ALGORITHM
    27  //
    28  // The basic concept of the algorithm is to use backtracking to find valid
    29  // disjunctions. The algorithm can stop if two matching disjuncts are found
    30  // where one does not subsume the other.
    31  //
    32  // At a later point, we can introduce a filter step to filter out possible
    33  // disjuncts based on, say, discriminator fields or field exclusivity (oneOf
    34  // fields in Protobuf).
    35  //
    36  // To understand the details of the algorithm, it is important to understand
    37  // some properties of disjunction.
    38  //
    39  //
    40  // EVALUATION OF A DISJUNCTION IS SELF CONTAINED
    41  //
    42  // In other words, fields outside of a disjunction cannot bind to values within
    43  // a disjunction whilst evaluating that disjunction. This allows the computation
    44  // of disjunctions to be isolated from side effects.
    45  //
    46  // The intuition behind this is as follows: as a disjunction is not a concrete
    47  // value, it is not possible to lookup a field within a disjunction if it has
    48  // not yet been evaluated. So if a reference within a disjunction that is needed
    49  // to disambiguate that disjunction refers to a field outside the scope of the
    50  // disjunction which, in turn, refers to a field within the disjunction, this
    51  // results in a cycle error. We achieve this by not removing the cycle marker of
    52  // the Vertex of the disjunction until the disjunction is resolved.
    53  //
    54  // Note that the following disjunct is still allowed:
    55  //
    56  //    a: 1
    57  //    b: a
    58  //
    59  // Even though `a` refers to the root of the disjunction, it does not _select
    60  // into_ the disjunction. Implementation-wise, it also doesn't have to, as the
    61  // respective vertex is available within the Environment. Referencing a node
    62  // outside the disjunction that in turn selects the disjunction root, however,
    63  // will result in a detected cycle.
    64  //
    65  // As usual, cycle detection should be interpreted marked as incomplete, so that
    66  // the referring node will not be fixed to an error prematurely.
    67  //
    68  //
    69  // SUBSUMPTION OF AMBIGUOUS DISJUNCTS
    70  //
    71  // A disjunction can be evaluated to a concrete value if only one disjunct
    72  // remains. Aside from disambiguating through unification failure, disjuncts
    73  // may also be disambiguated by taking the least specific of two disjuncts.
    74  // For instance, if a subsumes b, then the result of disjunction may be a.
    75  //
    76  //   NEW ALGORITHM NO LONGER VERIFIES SUBSUMPTION. SUBSUMPTION IS INHERENTLY
    77  //   IMPRECISE (DUE TO BULK OPTIONAL FIELDS). OTHER THAN THAT, FOR SCALAR VALUES
    78  //   IT JUST MEANS THERE IS AMBIGUITY, AND FOR STRUCTS IT CAN LEAD TO STRANGE
    79  //   CONSEQUENCES.
    80  //
    81  //   USE EQUALITY INSTEAD:
    82  //     - Undefined == error for optional fields.
    83  //     - So only need to check exact labels for vertices.
    84  
    85  type envDisjunct struct {
    86  	env         *Environment
    87  	cloneID     CloseInfo
    88  	expr        *DisjunctionExpr
    89  	value       *Disjunction
    90  	hasDefaults bool
    91  
    92  	// These are used for book keeping, tracking whether any of the
    93  	// disjuncts marked with a default marker remains after unification.
    94  	// If no default is used, all other elements are treated as "maybeDefault".
    95  	// Otherwise, elements are treated as is.
    96  	parentDefaultUsed bool
    97  	childDefaultUsed  bool
    98  }
    99  
   100  func (n *nodeContext) addDisjunction(env *Environment, x *DisjunctionExpr, cloneID CloseInfo) {
   101  
   102  	// TODO: precompute
   103  	numDefaults := 0
   104  	for _, v := range x.Values {
   105  		isDef := v.Default // || n.hasDefaults(env, v.Val)
   106  		if isDef {
   107  			numDefaults++
   108  		}
   109  	}
   110  
   111  	n.disjunctions = append(n.disjunctions,
   112  		envDisjunct{env, cloneID, x, nil, numDefaults > 0, false, false})
   113  }
   114  
   115  func (n *nodeContext) addDisjunctionValue(env *Environment, x *Disjunction, cloneID CloseInfo) {
   116  	n.disjunctions = append(n.disjunctions,
   117  		envDisjunct{env, cloneID, nil, x, x.HasDefaults, false, false})
   118  
   119  }
   120  
   121  func (n *nodeContext) expandDisjuncts(
   122  	state vertexStatus,
   123  	parent *nodeContext,
   124  	parentMode defaultMode, // default mode of this disjunct
   125  	recursive, last bool) {
   126  
   127  	unreachableForDev(n.ctx)
   128  
   129  	n.ctx.stats.Disjuncts++
   130  
   131  	// refNode is used to collect cyclicReferences for all disjuncts to be
   132  	// passed up to the parent node. Note that because the node in the parent
   133  	// context is overwritten in the course of expanding disjunction to retain
   134  	// pointer identity, it is not possible to simply record the refNodes in the
   135  	// parent directly.
   136  	var refNode *RefNode
   137  
   138  	node := n.node
   139  	defer func() {
   140  		n.node = node
   141  	}()
   142  
   143  	for n.expandOne(partial) {
   144  	}
   145  
   146  	// save node to snapShot in nodeContex
   147  	// save nodeContext.
   148  
   149  	if recursive || len(n.disjunctions) > 0 {
   150  		n.snapshot = clone(*n.node)
   151  	} else {
   152  		n.snapshot = *n.node
   153  	}
   154  
   155  	defaultOffset := len(n.usedDefault)
   156  
   157  	switch {
   158  	default: // len(n.disjunctions) == 0
   159  		m := *n
   160  		n.postDisjunct(state)
   161  
   162  		switch {
   163  		case n.hasErr():
   164  			// TODO: consider finalizing the node thusly:
   165  			// if recursive {
   166  			// 	n.node.Finalize(n.ctx)
   167  			// }
   168  			x := n.node
   169  			err, ok := x.BaseValue.(*Bottom)
   170  			if !ok {
   171  				err = n.getErr()
   172  			}
   173  			if err == nil {
   174  				// TODO(disjuncts): Is this always correct? Especially for partial
   175  				// evaluation it is okay for child errors to have incomplete errors.
   176  				// Perhaps introduce an Err() method.
   177  				err = x.ChildErrors
   178  			}
   179  			if err != nil {
   180  				parent.disjunctErrs = append(parent.disjunctErrs, err)
   181  			}
   182  			if recursive {
   183  				n.free()
   184  			}
   185  			return
   186  		}
   187  
   188  		if recursive {
   189  			*n = m
   190  			n.result = *n.node // XXX: n.result = snapshotVertex(n.node)?
   191  			n.node = &n.result
   192  			n.disjuncts = append(n.disjuncts, n)
   193  		}
   194  		if n.node.BaseValue == nil {
   195  			n.node.BaseValue = n.getValidators(state)
   196  		}
   197  
   198  		n.usedDefault = append(n.usedDefault, defaultInfo{
   199  			parentMode: parentMode,
   200  			nestedMode: parentMode,
   201  			origMode:   parentMode,
   202  		})
   203  
   204  	case len(n.disjunctions) > 0:
   205  		// Process full disjuncts to ensure that erroneous disjuncts are
   206  		// eliminated as early as possible.
   207  		state = finalized
   208  
   209  		n.disjuncts = append(n.disjuncts, n)
   210  
   211  		n.refCount++
   212  		defer n.free()
   213  
   214  		for i, d := range n.disjunctions {
   215  			a := n.disjuncts
   216  			n.disjuncts = n.buffer[:0]
   217  			n.buffer = a[:0]
   218  
   219  			last := i+1 == len(n.disjunctions)
   220  			skipNonMonotonicChecks := i+1 < len(n.disjunctions)
   221  			if skipNonMonotonicChecks {
   222  				n.ctx.inDisjunct++
   223  			}
   224  
   225  			for _, dn := range a {
   226  				switch {
   227  				case d.expr != nil:
   228  					for _, v := range d.expr.Values {
   229  						cn := dn.clone()
   230  						*cn.node = clone(dn.snapshot)
   231  						cn.node.state = cn
   232  
   233  						c := MakeConjunct(d.env, v.Val, d.cloneID)
   234  						cn.addExprConjunct(c, state)
   235  
   236  						newMode := mode(d.hasDefaults, v.Default)
   237  
   238  						cn.expandDisjuncts(state, n, newMode, true, last)
   239  
   240  						// Record the cyclicReferences of the conjunct in the
   241  						// parent list.
   242  						// TODO: avoid the copy. It should be okay to "steal"
   243  						// this list and avoid the copy. But this change is best
   244  						// done in a separate CL.
   245  						for r := n.node.cyclicReferences; r != nil; r = r.Next {
   246  							s := *r
   247  							s.Next = refNode
   248  							refNode = &s
   249  						}
   250  					}
   251  
   252  				case d.value != nil:
   253  					for i, v := range d.value.Values {
   254  						cn := dn.clone()
   255  						*cn.node = clone(dn.snapshot)
   256  						cn.node.state = cn
   257  
   258  						cn.addValueConjunct(d.env, v, d.cloneID)
   259  
   260  						newMode := mode(d.hasDefaults, i < d.value.NumDefaults)
   261  
   262  						cn.expandDisjuncts(state, n, newMode, true, last)
   263  
   264  						// See comment above.
   265  						for r := n.node.cyclicReferences; r != nil; r = r.Next {
   266  							s := *r
   267  							s.Next = refNode
   268  							refNode = &s
   269  						}
   270  					}
   271  				}
   272  			}
   273  
   274  			if skipNonMonotonicChecks {
   275  				n.ctx.inDisjunct--
   276  			}
   277  
   278  			if len(n.disjuncts) == 0 {
   279  				n.makeError()
   280  			}
   281  
   282  			if recursive || i > 0 {
   283  				for _, x := range a {
   284  					x.free()
   285  				}
   286  			}
   287  
   288  			if len(n.disjuncts) == 0 {
   289  				break
   290  			}
   291  		}
   292  
   293  		// Annotate disjunctions with whether any of the default disjunctions
   294  		// was used.
   295  		for _, d := range n.disjuncts {
   296  			for i, info := range d.usedDefault[defaultOffset:] {
   297  				if info.parentMode == isDefault {
   298  					n.disjunctions[i].parentDefaultUsed = true
   299  				}
   300  				if info.origMode == isDefault {
   301  					n.disjunctions[i].childDefaultUsed = true
   302  				}
   303  			}
   304  		}
   305  
   306  		// Combine parent and child default markers, considering that a parent
   307  		// "notDefault" is treated as "maybeDefault" if none of the disjuncts
   308  		// marked as default remain.
   309  		//
   310  		// NOTE for a parent marked as "notDefault", a child is *never*
   311  		// considered as default. It may either be "not" or "maybe" default.
   312  		//
   313  		// The result for each disjunction is conjoined into a single value.
   314  		for _, d := range n.disjuncts {
   315  			m := maybeDefault
   316  			orig := maybeDefault
   317  			for i, info := range d.usedDefault[defaultOffset:] {
   318  				parent := info.parentMode
   319  
   320  				used := n.disjunctions[i].parentDefaultUsed
   321  				childUsed := n.disjunctions[i].childDefaultUsed
   322  				hasDefaults := n.disjunctions[i].hasDefaults
   323  
   324  				orig = combineDefault(orig, info.parentMode)
   325  				orig = combineDefault(orig, info.nestedMode)
   326  
   327  				switch {
   328  				case childUsed:
   329  					// One of the children used a default. This is "normal"
   330  					// mode. This may also happen when we are in
   331  					// hasDefaults/notUsed mode. Consider
   332  					//
   333  					//      ("a" | "b") & (*(*"a" | string) | string)
   334  					//
   335  					// Here the doubly nested default is called twice, once
   336  					// for "a" and then for "b", where the second resolves to
   337  					// not using a default. The first does, however, and on that
   338  					// basis the "ot default marker cannot be overridden.
   339  					m = combineDefault(m, info.parentMode)
   340  					m = combineDefault(m, info.origMode)
   341  
   342  				case !hasDefaults, used:
   343  					m = combineDefault(m, info.parentMode)
   344  					m = combineDefault(m, info.nestedMode)
   345  
   346  				case hasDefaults && !used:
   347  					Assertf(parent == notDefault, "unexpected default mode")
   348  				}
   349  			}
   350  			d.defaultMode = m
   351  
   352  			d.usedDefault = d.usedDefault[:defaultOffset]
   353  			d.usedDefault = append(d.usedDefault, defaultInfo{
   354  				parentMode: parentMode,
   355  				nestedMode: m,
   356  				origMode:   orig,
   357  			})
   358  
   359  		}
   360  
   361  		// TODO: this is an old trick that seems no longer necessary for the new
   362  		// implementation. Keep around until we finalize the semantics for
   363  		// defaults, though. The recursion of nested defaults is not entirely
   364  		// proper yet.
   365  		//
   366  		// A better approach, that avoids the need for recursion (semantically),
   367  		// would be to only consider default usage for one level, but then to
   368  		// also allow a default to be passed if only one value is remaining.
   369  		// This means that a nested subsumption would first have to be evaluated
   370  		// in isolation, however, to determine that it is not previous
   371  		// disjunctions that cause the disambiguation.
   372  		//
   373  		// HACK alert: this replaces the hack of the previous algorithm with a
   374  		// slightly less worse hack: instead of dropping the default info when
   375  		// the value was scalar before, we drop this information when there is
   376  		// only one disjunct, while not discarding hard defaults. TODO: a more
   377  		// principled approach would be to recognize that there is only one
   378  		// default at a point where this does not break commutativity.
   379  		// if len(n.disjuncts) == 1 && n.disjuncts[0].defaultMode != isDefault {
   380  		// 	n.disjuncts[0].defaultMode = maybeDefault
   381  		// }
   382  	}
   383  
   384  	// Compare to root, but add to this one.
   385  	switch p := parent; {
   386  	case p != n:
   387  		p.disjunctErrs = append(p.disjunctErrs, n.disjunctErrs...)
   388  		n.disjunctErrs = n.disjunctErrs[:0]
   389  
   390  	outer:
   391  		for _, d := range n.disjuncts {
   392  			for k, v := range p.disjuncts {
   393  				// As long as a vertex isn't finalized, it may be that potential
   394  				// errors are not yet detected. This may lead two structs that
   395  				// are identical except for closedness information,
   396  				// for instance, to appear identical.
   397  				if v.result.status < finalized || d.result.status < finalized {
   398  					break
   399  				}
   400  				// Even if a node is finalized, it may still have an
   401  				// "incomplete" component that may change down the line.
   402  				if !d.done() || !v.done() {
   403  					break
   404  				}
   405  				flags := CheckStructural
   406  				if last {
   407  					flags |= IgnoreOptional
   408  				}
   409  				if Equal(n.ctx, &v.result, &d.result, flags) {
   410  					m := maybeDefault
   411  					for _, u := range d.usedDefault {
   412  						m = combineDefault(m, u.nestedMode)
   413  					}
   414  					if m == isDefault {
   415  						p.disjuncts[k] = d
   416  						v.free()
   417  					} else {
   418  						d.free()
   419  					}
   420  					continue outer
   421  				}
   422  			}
   423  
   424  			p.disjuncts = append(p.disjuncts, d)
   425  		}
   426  
   427  		n.disjuncts = n.disjuncts[:0]
   428  	}
   429  
   430  	// Record the refNodes in the parent.
   431  	for r := refNode; r != nil; {
   432  		next := r.Next
   433  		r.Next = parent.node.cyclicReferences
   434  		parent.node.cyclicReferences = r
   435  		r = next
   436  	}
   437  }
   438  
   439  func (n *nodeContext) makeError() {
   440  	code := IncompleteError
   441  
   442  	if len(n.disjunctErrs) > 0 {
   443  		code = EvalError
   444  		for _, c := range n.disjunctErrs {
   445  			if c.Code > code {
   446  				code = c.Code
   447  			}
   448  		}
   449  	}
   450  
   451  	b := &Bottom{
   452  		Code: code,
   453  		Err:  n.disjunctError(),
   454  	}
   455  	n.node.SetValue(n.ctx, b)
   456  }
   457  
   458  func mode(hasDefault, marked bool) defaultMode {
   459  	var mode defaultMode
   460  	switch {
   461  	case !hasDefault:
   462  		mode = maybeDefault
   463  	case marked:
   464  		mode = isDefault
   465  	default:
   466  		mode = notDefault
   467  	}
   468  	return mode
   469  }
   470  
   471  // clone makes a shallow copy of a Vertex. The purpose is to create different
   472  // disjuncts from the same Vertex under computation. This allows the conjuncts
   473  // of an arc to be reset to a previous position and the reuse of earlier
   474  // computations.
   475  //
   476  // Notes: only Arcs need to be copied recursively. Either the arc is finalized
   477  // and can be used as is, or Structs is assumed to not yet be computed at the
   478  // time that a clone is needed and must be nil. Conjuncts no longer needed and
   479  // can become nil. All other fields can be copied shallowly.
   480  func clone(v Vertex) Vertex {
   481  	v.state = nil
   482  	if a := v.Arcs; len(a) > 0 {
   483  		v.Arcs = make([]*Vertex, len(a))
   484  		for i, arc := range a {
   485  			switch arc.status {
   486  			case finalized:
   487  				v.Arcs[i] = arc
   488  
   489  			case 0:
   490  				a := *arc
   491  				v.Arcs[i] = &a
   492  
   493  				a.Conjuncts = make([]Conjunct, len(arc.Conjuncts))
   494  				copy(a.Conjuncts, arc.Conjuncts)
   495  
   496  			default:
   497  				a := *arc
   498  				a.state = arc.state.clone()
   499  				a.state.node = &a
   500  				a.state.snapshot = clone(a)
   501  				v.Arcs[i] = &a
   502  			}
   503  		}
   504  	}
   505  
   506  	if a := v.Structs; len(a) > 0 {
   507  		v.Structs = make([]*StructInfo, len(a))
   508  		copy(v.Structs, a)
   509  	}
   510  
   511  	return v
   512  }
   513  
   514  // Default rules from spec:
   515  //
   516  // U1: (v1, d1) & v2       => (v1&v2, d1&v2)
   517  // U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2)
   518  //
   519  // D1: (v1, d1) | v2       => (v1|v2, d1)
   520  // D2: (v1, d1) | (v2, d2) => (v1|v2, d1|d2)
   521  //
   522  // M1: *v        => (v, v)
   523  // M2: *(v1, d1) => (v1, d1)
   524  //
   525  // NOTE: M2 cannot be *(v1, d1) => (v1, v1), as this has the weird property
   526  // of making a value less specific. This causes issues, for instance, when
   527  // trimming.
   528  //
   529  // The old implementation does something similar though. It will discard
   530  // default information after first determining if more than one conjunct
   531  // has survived.
   532  //
   533  // def + maybe -> def
   534  // not + maybe -> def
   535  // not + def   -> def
   536  
   537  type defaultMode int
   538  
   539  const (
   540  	maybeDefault defaultMode = iota
   541  	isDefault
   542  	notDefault
   543  )
   544  
   545  // combineDefaults combines default modes for unifying conjuncts.
   546  //
   547  // Default rules from spec:
   548  //
   549  // U1: (v1, d1) & v2       => (v1&v2, d1&v2)
   550  // U2: (v1, d1) & (v2, d2) => (v1&v2, d1&d2)
   551  func combineDefault(a, b defaultMode) defaultMode {
   552  	if a > b {
   553  		return a
   554  	}
   555  	return b
   556  }
   557  
   558  // disjunctError returns a compound error for a failed disjunction.
   559  //
   560  // TODO(perf): the set of errors is now computed during evaluation. Eventually,
   561  // this could be done lazily.
   562  func (n *nodeContext) disjunctError() (errs errors.Error) {
   563  	ctx := n.ctx
   564  
   565  	disjuncts := selectErrors(n.disjunctErrs)
   566  
   567  	if disjuncts == nil {
   568  		errs = ctx.Newf("empty disjunction") // XXX: add space to sort first
   569  	} else {
   570  		disjuncts = errors.Sanitize(disjuncts)
   571  		k := len(errors.Errors(disjuncts))
   572  		if k == 1 {
   573  			return disjuncts
   574  		}
   575  		// prefix '-' to sort to top
   576  		errs = ctx.Newf("%d errors in empty disjunction:", k)
   577  		errs = errors.Append(errs, disjuncts)
   578  	}
   579  
   580  	return errs
   581  }
   582  
   583  func selectErrors(a []*Bottom) (errs errors.Error) {
   584  	// return all errors if less than a certain number.
   585  	if len(a) <= 2 {
   586  		for _, b := range a {
   587  			errs = errors.Append(errs, b.Err)
   588  
   589  		}
   590  		return errs
   591  	}
   592  
   593  	// First select only relevant errors.
   594  	isIncomplete := false
   595  	k := 0
   596  	for _, b := range a {
   597  		if !isIncomplete && b.Code >= IncompleteError {
   598  			k = 0
   599  			isIncomplete = true
   600  		}
   601  		a[k] = b
   602  		k++
   603  	}
   604  	a = a[:k]
   605  
   606  	// filter errors
   607  	positions := map[token.Pos]bool{}
   608  
   609  	add := func(b *Bottom, p token.Pos) bool {
   610  		if positions[p] {
   611  			return false
   612  		}
   613  		positions[p] = true
   614  		errs = errors.Append(errs, b.Err)
   615  		return true
   616  	}
   617  
   618  	for _, b := range a {
   619  		// TODO: Should we also distinguish by message type?
   620  		if add(b, b.Err.Position()) {
   621  			continue
   622  		}
   623  		for _, p := range b.Err.InputPositions() {
   624  			if add(b, p) {
   625  				break
   626  			}
   627  		}
   628  	}
   629  
   630  	return errs
   631  }
   632  

View as plain text