1 // Copyright 2022 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package adt 16 17 // Cycle detection: 18 // 19 // - Current algorithm does not allow for early non-cyclic conjunct detection. 20 // - Record possibly cyclic references. 21 // - Mark as cyclic if no evidence is found. 22 // - Note that this also activates the same reference in other (parent) conjuncts. 23 24 // TODO: 25 // - get rid of nodeContext.{hasCycle|hasNonCycle}. 26 // - compiler support for detecting cross-pattern references. 27 // - handle propagation of cyclic references to root across disjunctions. 28 29 // CYCLE DETECTION ALGORITHM 30 // 31 // BACKGROUND 32 // 33 // The cycle detection is inspired by the cycle detection used by Tomabechi's 34 // [Tomabechi COLING 1992] and Van Lohuizen's [Van Lohuizen ACL 2000] graph 35 // unification algorithms. 36 // 37 // Unlike with traditional graph unification, however, CUE uses references, 38 // which, unlike node equivalence, are unidirectional. This means that the 39 // technique to track equivalence through dereference, as common in graph 40 // unification algorithms like Tomabechi's, does not work unaltered. 41 // 42 // The unidirectional nature of references imply that each reference equates a 43 // facsimile of the value it points to. This renders the original approach of 44 // node-pointer equivalence useless. 45 // 46 // 47 // PRINCIPLE OF ALGORITHM 48 // 49 // The solution for CUE is based on two observations: 50 // 51 // - the CUE algorithm tracks all conjuncts that define a node separately, - 52 // accumulating used references on a per-conjunct basis causes duplicate 53 // references to uniquely identify cycles. 54 // 55 // A structural cycle, as defined by the spec, can then be detected if all 56 // conjuncts are marked as a cycle. 57 // 58 // References are accumulated as follows: 59 // 60 // 1. If a conjunct is a reference the reference is associated with that 61 // conjunct as well as the conjunct corresponding to the value it refers to. 62 // 2. If a conjunct is a struct (including lists), its references are associated 63 // with all embedded values and fields. 64 // 65 // To narrow down the specifics of the reference-based cycle detection, let us 66 // explore structural cycles in a bit more detail. 67 // 68 // 69 // STRUCTURAL CYCLES 70 // 71 // See the language specification for a higher-level and more complete overview. 72 // 73 // We have to define when a cycle is detected. CUE implementations MUST report 74 // an error upon a structural cycle, and SHOULD report cycles at the shortest 75 // possible paths at which they occur, but MAY report these at deeper paths. For 76 // instance, the following CUE has a structural cycle 77 // 78 // f: g: f 79 // 80 // The shortest path at which the cycle can be reported is f.g, but as all 81 // failed configurations are logically equal, it is fine for implementations to 82 // report them at f.g.g, for instance. 83 // 84 // It is not, however, correct to assume that a reference to a parent is always 85 // a cycle. Consider this case: 86 // 87 // a: [string]: b: a 88 // 89 // Even though reference `a` refers to a parent node, the cycle needs to be fed 90 // by a concrete field in struct `a` to persist, meaning it cannot result in a 91 // cycle as defined in the spec as it is defined here. Note however, that a 92 // specialization of this configuration _can_ result in a cycle. Consider 93 // 94 // a: [string]: b: a 95 // a: c: _ 96 // 97 // Here reference `a` is guaranteed to result in a structural cycle, as field 98 // `c` will match the pattern constraint unconditionally. 99 // 100 // In other words, it is not possible to exclude tracking references across 101 // pattern constraints from cycle checking. 102 // 103 // It is tempting to try to find a complete set of these edge cases with the aim 104 // to statically determine cases in which this occurs. But as [Carpenter 1992] 105 // demonstrates, it is possible for cycles to be created as a result of unifying 106 // two graphs that are themselves acyclic. The following example is a 107 // translation of Carpenters example to CUE: 108 // 109 // y: { 110 // f: h: g 111 // g: _ 112 // } 113 // x: { 114 // f: _ 115 // g: f 116 // } 117 // 118 // Even though the above contains no cycles, the result of `x & y` is cyclic: 119 // 120 // f: h: g 121 // g: f 122 // 123 // This means that, in practice, cycle detection has at least partially a 124 // dynamic component to it. 125 // 126 // 127 // ABSTRACT ALGORITHM 128 // 129 // The algorithm is described declaratively by defining what it means for a 130 // field to have a structural cycle. In the below, a _reference_ is uniquely 131 // identified by the pointer identity of a Go Resolver instance. 132 // 133 // Cycles are tracked on a per-conjunct basis and are not aggregated per Vertex: 134 // administrative information is only passed on from parent to child conjunct. 135 // 136 // A conjunct is a _parent_ of another conjunct if is a conjunct of one of the 137 // non-optional fields of the conjunct. For instance, conjunct `x` with value 138 // `{b: y & z}`, is a parent of conjunct `y` as well as `z`. Within field `b`, 139 // the conjuncts `y` and `z` would be tracked individually, though. 140 // 141 // A conjunct is _associated with a reference_ if its value was obtained by 142 // evaluating a reference. Note that a conjunct may be associated with many 143 // references if its evaluation requires evaluating a chain of references. For 144 // instance, consider 145 // 146 // a: {x: d} 147 // b: a 148 // c: b & e 149 // d: y: 1 150 // 151 // the first conjunct of field `c` (reference `b`) has the value `{x: y: 1}` and 152 // is associated with references `b` and `a`. 153 // 154 // The _tracked references_ of a conjunct are all references that are associated 155 // with it or any of its ancestors (parents of parents). For instance, the 156 // tracked references of conjunct `b.x` of field `c.x` are `a`, `b`, and `d`. 157 // 158 // A conjunct is a violating cycle if it is a reference that: 159 // - occurs in the tracked references of the conjunct, or 160 // - directly refers to a parent node of the conjunct. 161 // 162 // A conjunct is cyclic if it is a violating cycle or if any of its ancestors 163 // are a violating cycle. 164 // 165 // A field has a structural cycle if it is composed of at least one conjunct 166 // that is a violating cycle and no conjunct that is not cyclic. 167 // 168 // Note that a field can be composed of only cyclic conjuncts while still not be 169 // structural cycle: as long as there are no conjuncts that are a violating 170 // cycle, it is not a structural cycle. This is important for the following 171 // case: 172 // 173 // a: [string]: b: a 174 // x: a 175 // x: c: b: c: {} 176 // 177 // Here, reference `a` is never a cycle as the recursive references crosses a 178 // pattern constraint that only instantiates if it is unified with something 179 // else. 180 // 181 // 182 // DISCUSSION 183 // 184 // The goal of conjunct cycle marking algorithm is twofold: - mark conjuncts 185 // that are proven to propagate indefinitely - mark them as early as possible 186 // (shortest CUE path). 187 // 188 // TODO: Prove all cyclic conjuncts will eventually be marked as cyclic. 189 // 190 // TODO: 191 // - reference marks whether it crosses a pattern, improving the case 192 // a: [string]: b: c: b 193 // This requires a compile-time detection mechanism. 194 // 195 // 196 // REFERENCES 197 // [Tomabechi COLING 1992]: https://aclanthology.org/C92-2068 198 // Hideto Tomabechi. 1992. Quasi-Destructive Graph Unification with 199 // Structure-Sharing. In COLING 1992 Volume 2: The 14th International 200 // Conference on Computational Linguistics. 201 // 202 // [Van Lohuizen ACL 2000]: https://aclanthology.org/P00-1045/ 203 // Marcel P. van Lohuizen. 2000. "Memory-Efficient and Thread-Safe 204 // Quasi-Destructive Graph Unification". In Proceedings of the 38th Annual 205 // Meeting of the Association for Computational Linguistics, pages 352–359, 206 // Hong Kong. Association for Computational Linguistics. 207 // 208 // [Carpenter 1992]: 209 // Bob Carpenter, "The logic of typed feature structures." 210 // Cambridge University Press, ISBN:0-521-41932-8 211 212 type CycleInfo struct { 213 // IsCyclic indicates whether this conjunct, or any of its ancestors, 214 // had a violating cycle. 215 IsCyclic bool 216 217 // Inline is used to detect expressions referencing themselves, for instance: 218 // {x: out, out: x}.out 219 Inline bool 220 221 // TODO(perf): pack this in with CloseInfo. Make an uint32 pointing into 222 // a buffer maintained in OpContext, using a mark-release mechanism. 223 Refs *RefNode 224 } 225 226 // A RefNode is a linked list of associated references. 227 type RefNode struct { 228 Ref Resolver 229 Arc *Vertex // Ref points to this Vertex 230 231 // Node is the Vertex of which Ref is evaluated as a conjunct. 232 // If there is a cyclic reference (not structural cycle), then 233 // the reference will have the same node. This allows detecting reference 234 // cycles for nodes referring to nodes with an evaluation cycle 235 // (mode tracked to Evaluating status). Examples: 236 // 237 // a: x 238 // Y: x 239 // x: {Y} 240 // 241 // and 242 // 243 // Y: x.b 244 // a: x 245 // x: b: {Y} | null 246 // 247 // In both cases there are not structural cycles and thus need to be 248 // distinguished from regular structural cycles. 249 Node *Vertex 250 251 Next *RefNode 252 Depth int32 253 } 254 255 // cyclicConjunct is used in nodeContext to postpone the computation of 256 // cyclic conjuncts until a non-cyclic conjunct permits it to be processed. 257 type cyclicConjunct struct { 258 c Conjunct 259 arc *Vertex // cached Vertex 260 } 261 262 // markCycle checks whether the reference x is cyclic. There are two cases: 263 // 1. it was previously used in this conjunct, and 264 // 2. it directly references a parent node. 265 // 266 // Other inputs: 267 // 268 // arc the reference to which x points 269 // env, ci the components of the Conjunct from which x originates 270 // 271 // A cyclic node is added to a queue for later processing if no evidence of a 272 // non-cyclic node has so far been found. updateCyclicStatus processes delayed 273 // nodes down the line once such evidence is found. 274 // 275 // If a cycle is the result of "inline" processing (an expression referencing 276 // itself), an error is reported immediately. 277 // 278 // It returns the CloseInfo with tracked cyclic conjuncts updated, and 279 // whether or not its processing should be skipped, which is the case either if 280 // the conjunct seems to be fully cyclic so far or if there is a valid reference 281 // cycle. 282 func (n *nodeContext) markCycle(arc *Vertex, env *Environment, x Resolver, ci CloseInfo) (_ CloseInfo, skip bool) { 283 // TODO(perf): this optimization can work if we also check for any 284 // references pointing to arc within arc. This can be done with compiler 285 // support. With this optimization, almost all references could avoid cycle 286 // checking altogether! 287 // if arc.status == Finalized && arc.cyclicReferences == nil { 288 // return v, false 289 // } 290 291 // Check whether the reference already occurred in the list, signaling 292 // a potential cycle. 293 found := false 294 depth := int32(0) 295 for r := ci.Refs; r != nil; r = r.Next { 296 if r.Ref != x { 297 continue 298 } 299 300 // A reference that is within a graph that is being evaluated 301 // may repeat with a different arc and will point to a 302 // non-finalized arc. A repeating reference that points outside the 303 // graph will always be the same address. Hence, if this is a 304 // finalized arc with a different address, it resembles a reference that 305 // is included through a different path and is not a cycle. 306 if r.Arc != arc && arc.status == finalized { 307 continue 308 } 309 310 // For dynamically created structs we mark this as an error. Otherwise 311 // there is only an error if we have visited the arc before. 312 if ci.Inline && (arc.IsDynamic || r.Arc == arc) { 313 n.reportCycleError() 314 return ci, true 315 } 316 317 // We have a reference cycle, as distinguished from a structural 318 // cycle. Reference cycles represent equality, and thus are equal 319 // to top. We can stop processing here. 320 if r.Node == n.node { 321 return ci, true 322 } 323 324 depth = r.Depth 325 found = true 326 327 // Mark all conjuncts of this Vertex that refer to the same node as 328 // cyclic. This is an extra safety measure to ensure that two conjuncts 329 // cannot work in tandom to circumvent a cycle. It also tightens 330 // structural cycle detection in some cases. Late detection of cycles 331 // can result in a lot of redundant work. 332 // 333 // TODO: this loop is not on a critical path, but it may be evaluated 334 // if it is worthy keeping at some point. 335 for i, c := range n.node.Conjuncts { 336 if c.CloseInfo.IsCyclic { 337 continue 338 } 339 for rr := c.CloseInfo.Refs; rr != nil; rr = rr.Next { 340 // TODO: Is it necessary to find another way to find 341 // "parent" conjuncts? This mechanism seems not entirely 342 // accurate. Maybe a pointer up to find the root and then 343 // "spread" downwards? 344 if r.Ref == x && r.Arc == rr.Arc { 345 n.node.Conjuncts[i].CloseInfo.IsCyclic = true 346 break 347 } 348 } 349 } 350 351 break 352 } 353 354 // The code in this switch statement registers structural cycles caught 355 // through EvaluatingArcs to the root of the cycle. This way, any node 356 // referencing this value can track these nodes early. This is mostly an 357 // optimization to shorten the path for which structural cycles are 358 // detected, which may be critical for performance. 359 outer: 360 switch arc.status { 361 case evaluatingArcs: // also Evaluating? 362 // The reference may already be there if we had no-cyclic structure 363 // invalidating the cycle. 364 for r := arc.cyclicReferences; r != nil; r = r.Next { 365 if r.Ref == x { 366 break outer 367 } 368 } 369 370 arc.cyclicReferences = &RefNode{ 371 Arc: arc, 372 Ref: x, 373 Next: arc.cyclicReferences, 374 } 375 376 case finalized: 377 // Insert cyclic references from found arc, if any. 378 for r := arc.cyclicReferences; r != nil; r = r.Next { 379 if r.Ref == x { 380 // We have detected a cycle, with the only exception if arc is 381 // a disjunction, as evaluation always stops at unresolved 382 // disjunctions. 383 if _, ok := arc.BaseValue.(*Disjunction); !ok { 384 found = true 385 } 386 } 387 ci.Refs = &RefNode{ 388 Arc: r.Arc, 389 Node: n.node, 390 391 Ref: x, 392 Next: ci.Refs, 393 Depth: n.depth, 394 } 395 } 396 } 397 398 // NOTE: we need to add a tracked reference even if arc is not cyclic: it 399 // may still cause a cycle that does not refer to a parent node. For 400 // instance: 401 // 402 // y: [string]: b: y 403 // x: y 404 // x: c: x 405 // -> 406 // x: [string]: b: y 407 // x: c: b: y 408 // x: c: [string]: b: y 409 // x: c: b: b: y 410 // x: c: b: [string]: b: y 411 // x: c: b: b: b: y 412 // .... // structural cycle 1 413 // x: c: c: x // structural cycle 2 414 // 415 // Note that in this example there is a structural cycle at x.c.c, but we 416 // would need go guarantee that cycle is detected before the algorithm 417 // descends into x.c.b. 418 if !found || depth != n.depth { 419 // Adding this in case there is a definite cycle is unnecessary, but 420 // gives somewhat better error messages. 421 // We also need to add the reference again if the depth differs, as 422 // the depth is used for tracking "new structure". 423 ci.Refs = &RefNode{ 424 Arc: arc, 425 Ref: x, 426 Node: n.node, 427 Next: ci.Refs, 428 Depth: n.depth, 429 } 430 } 431 432 if !found && arc.status != evaluatingArcs { 433 // No cycle. 434 return ci, false 435 } 436 437 alreadyCycle := ci.IsCyclic 438 ci.IsCyclic = true 439 440 // TODO: depth might legitimately be 0 if it is a root vertex. 441 // In the worst case, this may lead to a spurious cycle. 442 // Fix this by ensuring the root vertex starts with a depth of 1, for 443 // instance. 444 if depth > 0 { 445 // Look for evidence of "new structure" to invalidate the cycle. 446 // This is done by checking for non-cyclic conjuncts between the 447 // current vertex up to the ancestor to which the reference points. 448 // Note that the cyclic conjunct may not be marked as such, so we 449 // look for at least one other non-cyclic conjunct if this is the case. 450 upCount := n.depth - depth 451 for p := n.node.Parent; p != nil; p = p.Parent { 452 if upCount--; upCount <= 0 { 453 break 454 } 455 a := p.Conjuncts 456 count := 0 457 for _, c := range a { 458 count += getNonCyclicCount(c) 459 } 460 if !alreadyCycle { 461 count-- 462 } 463 if count > 0 { 464 return ci, false 465 } 466 } 467 } 468 469 n.hasCycle = true 470 if !n.hasNonCycle && env != nil { 471 v := Conjunct{env, x, ci} 472 n.cyclicConjuncts = append(n.cyclicConjuncts, cyclicConjunct{v, arc}) 473 return ci, true 474 } 475 476 return ci, false 477 } 478 479 func getNonCyclicCount(c Conjunct) int { 480 switch a, ok := c.x.(*ConjunctGroup); { 481 case ok: 482 count := 0 483 for _, c := range *a { 484 count += getNonCyclicCount(c) 485 } 486 return count 487 488 case !c.CloseInfo.IsCyclic: 489 return 1 490 491 default: 492 return 0 493 } 494 } 495 496 // updateCyclicStatus looks for proof of non-cyclic conjuncts to override 497 // a structural cycle. 498 func (n *nodeContext) updateCyclicStatus(c CloseInfo) { 499 if !c.IsCyclic { 500 n.hasNonCycle = true 501 for _, c := range n.cyclicConjuncts { 502 if n.ctx.isDevVersion() { 503 ci := c.c.CloseInfo 504 ci.cc = n.node.rootCloseContext() 505 n.scheduleVertexConjuncts(c.c, c.arc, ci) 506 } else { 507 n.addVertexConjuncts(c.c, c.arc, false) 508 } 509 } 510 n.cyclicConjuncts = n.cyclicConjuncts[:0] 511 } 512 } 513 514 func assertStructuralCycle(n *nodeContext) bool { 515 if n.hasCycle && !n.hasNonCycle { 516 n.reportCycleError() 517 return true 518 } 519 return false 520 } 521 522 func (n *nodeContext) reportCycleError() { 523 n.node.BaseValue = CombineErrors(nil, 524 n.node.Value(), 525 &Bottom{ 526 Code: StructuralCycleError, 527 Err: n.ctx.Newf("structural cycle"), 528 Value: n.node.Value(), 529 // TODO: probably, this should have the referenced arc. 530 }) 531 n.node.Arcs = nil 532 } 533 534 // makeAnonymousConjunct creates a conjunct that tracks self-references when 535 // evaluating an expression. 536 // 537 // Example: 538 // TODO: 539 func makeAnonymousConjunct(env *Environment, x Expr, refs *RefNode) Conjunct { 540 return Conjunct{ 541 env, x, CloseInfo{CycleInfo: CycleInfo{ 542 Inline: true, 543 Refs: refs, 544 }}, 545 } 546 } 547