1 // Copyright 2023 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package s2intersect efficiently finds common areas shared by sets of S2 16 // CellUnions. Use this package instead of s2.CellUnionFromIntersection when 17 // there are 3 or more CellUnions to compare and all intersections are required, 18 // as the brute force alternative is exponential in time. 19 // 20 // Benchmarks show that direct comparison of a pair of CellUnions is more 21 // efficient with respect to both time and memory when using the standard 22 // s2.CellUnionFromIntersection function. The benefits of s2intersect arise as 23 // the number of potential intersections grows; for n CellUnions, there are 24 // 2^n - n - 1 possible intersections (power set excluding single-member sets 25 // and the empty set). 26 package s2intersect 27 28 import ( 29 "fmt" 30 "sort" 31 32 "github.com/golang/geo/s2" 33 ) 34 35 // Re nomenclature used throughout this file: an "intersection" is a 2D 36 // (spherical) concept in keeping with s2.CellUnionFromIntersection(). The 1D 37 // (Hilbert curve) equivalent is referred to as an "overlap", simply to 38 // differentiate between the two. An overlap interval is closed, and defined by 39 // RangeMin and RangeMax (i.e. leaf Cells) of the first and last CellIDs, 40 // respectively, in the intersection that it maps to—these are referred to as 41 // "limits". Wherever "indices" are referred to, these reference the slice of 42 // CellUnions passed in by a user of this package. 43 44 // An Intersection describes the intersection of a set of CellUnions. 45 // 46 // Intersections returned by Find() are disjoint; i.e. if the Indices of one are 47 // a sub set of the Indices of another, the sub-set Intersection will NOT 48 // include the CellIDs that are included in the super-set Intersection; for 49 // details, see Find() re disjoint Intersections. 50 type Intersection struct { 51 // Indices refer to the slice of CellUnions passed to Find(). 52 Indices []int 53 Intersection s2.CellUnion 54 } 55 56 // Find returns the set of all disjoint intersections of the CellUnions. The 57 // ordering of the output is undefined. 58 // 59 // Whereas the naive approach of finding pairwise intersections is O(n^2) for n 60 // CellUnions, and a naive multi-way search is O(2^n), Find() is 61 // O(max(i log i, c)) for c Cells constituting i intervals on the Hilbert curve 62 // (contiguous Cells form a single interval). 63 // 64 // Find calls Normalize() on all CellUnions, the efficiency of which is not 65 // considered in the previous calculations. 66 // 67 // Note that returned Intersections are disjoint; Cells are only counted towards 68 // the most comprehensive Intersection. For example, the intersection of regions 69 // A and B will NOT include the area in which regions A, B, _and_ C intersect 70 // because {A,B,C} is a super set of {A,B}. Correcting this requires iterating 71 // over the power set of all resulting Intersections and "decomposing" them into 72 // constituent parts (power sets are O(2^n) for a set of size n). 73 // 74 // Consider the following CellUnions, shown as their corresponding intervals on 75 // the straightened Hilbert curve. Cells are denoted as = and grouped into 4 for 76 // ease of viewing (i.e. the pipes | have no meaning). The resulting 77 // Intersections are denoted with the letters X–Z and the disjoint nature of the 78 // output is such that Y and Z will not include the Cells that are instead 79 // reported in X, even though the respective CellUnions do indeed intersect 80 // there. Analogously, each area of the Venn diagram of regions can receive only 81 // one label. 82 // 83 // 0: |====| ==| | 84 // 1: |== | =|=== | 85 // 2: |====|====| ==| 86 // XXYY YX Z 87 // 88 // X: {0,1,2} 89 // Y: {0,2} 90 // Z: {1,2} 91 func Find(cus []s2.CellUnion) []Intersection { 92 return overlapsToIntersections(cellUnionsToOverlaps(cus)) 93 } 94 95 // An overlap describes a closed range on the Hilbert curve, throughout which, 96 // a set of CellUnions intersect. 97 type overlap struct { 98 indices []int 99 start, end s2.CellID 100 } 101 102 func cellUnionsToOverlaps(cus []s2.CellUnion) []overlap { 103 // We convert the 2-dimensional problem of finding intersections into a 1D 104 // problem of overlapping intervals on a line (the Hilbert curve). 105 var lims []*limit 106 for i, cu := range cus { 107 cu.Normalize() 108 lims = append(lims, cellUnionToIntervalLimits(cu, i)...) 109 } 110 if len(lims) == 0 { 111 return nil 112 } 113 114 lims = collapseLimits(lims) 115 return intervalOverlaps(lims) 116 } 117 118 func intervalOverlaps(lims []*limit) []overlap { 119 open := make(map[int]struct{}) 120 openIndices := func() []int { 121 is := make([]int, 0, len(open)) 122 for i := range open { 123 is = append(is, i) 124 } 125 sort.Ints(is) 126 return is 127 } 128 129 var ( 130 lastStart s2.CellID 131 overlaps []overlap 132 ) 133 134 for _, l := range lims { 135 if len(open) > 1 { 136 // The existence of a limit at this point constitutes a new overlap 137 // regardless of the limit type. If it is an end limit then obviously the 138 // overlap is ceasing, but if it's a start limit then the overlap until 139 // here only applies to the already-open values but until the previous 140 // leaf Cell. 141 endLeaf := l.leaf 142 if l.typ == start { 143 endLeaf = endLeaf.Prev() 144 } 145 overlaps = append(overlaps, overlap{ 146 indices: openIndices(), 147 start: lastStart, 148 end: endLeaf, 149 }) 150 } 151 152 switch l.typ { 153 case start: 154 for _, i := range l.indices { 155 open[i] = struct{}{} 156 } 157 case end: 158 for _, i := range l.indices { 159 delete(open, i) 160 } 161 } 162 163 if len(open) > 1 { 164 // As with the rationale at the beginning of the limit loop, we now have a 165 // new interval regardless of the limit type. However, the starting leaf 166 // is iterated if this is currently a close limit because the remaining 167 // indices only overlap _after_ the current leaf by nature of the 168 // intervals being closed. 169 lastStart = l.leaf 170 if l.typ == end { 171 lastStart = lastStart.Next() 172 } 173 } 174 } 175 176 return overlaps 177 } 178 179 // A limit defines either the beginning or end of a closed interval on the 180 // Hilbert curve of leaf Cells. 181 type limit struct { 182 leaf s2.CellID 183 typ limitType // There are always pairs of {start,end} 184 indices []int // Indices within in the slice of CellUnions passed to Find(). 185 } 186 187 type limitType bool 188 189 const ( 190 start limitType = false 191 end limitType = true // end is inclusive 192 ) 193 194 // cellUnionToIntervalLimits converts all Cells into pairs of limits 195 // representing closed intervals on the Hilbert curve of leaf Cells. The value 196 // of idx must be the index of cu within the slice passed to Find(). 197 func cellUnionToIntervalLimits(cu s2.CellUnion, idx int) []*limit { 198 if len(cu) == 0 { 199 return nil 200 } 201 202 var lims []*limit 203 pushLeaf := func(cID s2.CellID, t limitType) { 204 lims = append(lims, &limit{ 205 leaf: cID, 206 indices: []int{idx}, 207 typ: t, 208 }) 209 } 210 211 // Intervals are defined by start and end leaf Cells. Instead of 212 // converting every Cell into an interval, we merge contiguous ones by 213 // checking if lastend.Next() is equal to the next Cell's start leaf. 214 var lastend s2.CellID 215 216 for _, cID := range cu { 217 switch startLeaf := cID.RangeMin(); { 218 case lastend == 0: 219 // Current Cell represents the first interval 220 pushLeaf(startLeaf, start) 221 case lastend.Next() != startLeaf: 222 // Current and last Cells are non-contiguous 223 pushLeaf(lastend, end) 224 pushLeaf(startLeaf, start) 225 } 226 lastend = cID.RangeMax() 227 } 228 229 pushLeaf(lastend, end) 230 return lims 231 } 232 233 // collapseLimits returns a slice of limits such that all of those of the same 234 // type at the same leaf CellID are grouped with their indices in the same 235 // slice. The returned slice will be sorted by CellID and tyoe, 236 // 237 // e.g. {leaf(x),start,indices[2]} and {leaf(x),start,indices[7]} will become 238 // {leaf(x),start,indices[2,7]}. 239 // 240 // The original slice and its contents will be modified, but not in place; 241 // therefore only the returned value is valid. 242 func collapseLimits(lims []*limit) []*limit { 243 if len(lims) == 0 { 244 return nil 245 } 246 247 // Limits at the same leaf MUST be sorted such that start-types come before 248 // end-types. This is because they are closed intervals so we can't have 249 // ending intervals removed before the overlap is accounted for. 250 sort.Slice(lims, func(i, j int) bool { 251 lI, lJ := lims[i], lims[j] 252 if lI.leaf != lJ.leaf { 253 return lI.leaf < lJ.leaf 254 } 255 return lI.typ == start 256 }) 257 258 out := []*limit{lims[0]} 259 last := out[0] 260 for _, l := range lims[1:] { 261 if l.leaf == last.leaf && l.typ == last.typ { 262 last.indices = append(last.indices, l.indices...) 263 continue 264 } 265 sort.Ints(last.indices) 266 last = l 267 out = append(out, l) 268 } 269 return out 270 } 271 272 // indexKey converts a slice of sorted ints into a string for use as a map key. 273 func indexKey(is []int) string { 274 return fmt.Sprintf("%d", is) 275 } 276 277 func overlapsToIntersections(overlaps []overlap) []Intersection { 278 set := make(map[string]*Intersection) 279 280 for _, o := range overlaps { 281 k := indexKey(o.indices) 282 i, ok := set[k] 283 if !ok { 284 i = &Intersection{Indices: o.indices} 285 set[k] = i 286 } 287 // CellUnionFromRange assumes a half-open interval, so use Next() as 288 // suggested in its comment. 289 i.Intersection = append(i.Intersection, s2.CellUnionFromRange(o.start, o.end.Next())...) 290 } 291 292 result := make([]Intersection, 0, len(set)) 293 for _, i := range set { 294 i.Intersection.Normalize() 295 result = append(result, *i) 296 } 297 return result 298 } 299