s2intersect.go

Documentation: github.com/golang/geo/s2/s2intersect

     1  // Copyright 2023 Google Inc. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package s2intersect efficiently finds common areas shared by sets of S2
    16  // CellUnions. Use this package instead of s2.CellUnionFromIntersection when
    17  // there are 3 or more CellUnions to compare and all intersections are required,
    18  // as the brute force alternative is exponential in time.
    19  //
    20  // Benchmarks show that direct comparison of a pair of CellUnions is more
    21  // efficient with respect to both time and memory when using the standard
    22  // s2.CellUnionFromIntersection function. The benefits of s2intersect arise as
    23  // the number of potential intersections grows; for n CellUnions, there are
    24  // 2^n - n - 1 possible intersections (power set excluding single-member sets
    25  // and the empty set).
    26  package s2intersect
    27  
    28  import (
    29  	"fmt"
    30  	"sort"
    31  
    32  	"github.com/golang/geo/s2"
    33  )
    34  
    35  // Re nomenclature used throughout this file: an "intersection" is a 2D
    36  // (spherical) concept in keeping with s2.CellUnionFromIntersection(). The 1D
    37  // (Hilbert curve) equivalent is referred to as an "overlap", simply to
    38  // differentiate between the two. An overlap interval is closed, and defined by
    39  // RangeMin and RangeMax (i.e. leaf Cells) of the first and last CellIDs,
    40  // respectively, in the intersection that it maps to—these are referred to as
    41  // "limits". Wherever "indices" are referred to, these reference the slice of
    42  // CellUnions passed in by a user of this package.
    43  
    44  // An Intersection describes the intersection of a set of CellUnions.
    45  //
    46  // Intersections returned by Find() are disjoint; i.e. if the Indices of one are
    47  // a sub set of the Indices of another, the sub-set Intersection will NOT
    48  // include the CellIDs that are included in the super-set Intersection; for
    49  // details, see Find() re disjoint Intersections.
    50  type Intersection struct {
    51  	// Indices refer to the slice of CellUnions passed to Find().
    52  	Indices      []int
    53  	Intersection s2.CellUnion
    54  }
    55  
    56  // Find returns the set of all disjoint intersections of the CellUnions. The
    57  // ordering of the output is undefined.
    58  //
    59  // Whereas the naive approach of finding pairwise intersections is O(n^2) for n
    60  // CellUnions, and a naive multi-way search is O(2^n), Find() is
    61  // O(max(i log i, c)) for c Cells constituting i intervals on the Hilbert curve
    62  // (contiguous Cells form a single interval).
    63  //
    64  // Find calls Normalize() on all CellUnions, the efficiency of which is not
    65  // considered in the previous calculations.
    66  //
    67  // Note that returned Intersections are disjoint; Cells are only counted towards
    68  // the most comprehensive Intersection. For example, the intersection of regions
    69  // A and B will NOT include the area in which regions A, B, _and_ C intersect
    70  // because {A,B,C} is a super set of {A,B}. Correcting this requires iterating
    71  // over the power set of all resulting Intersections and "decomposing" them into
    72  // constituent parts (power sets are O(2^n) for a set of size n).
    73  //
    74  // Consider the following CellUnions, shown as their corresponding intervals on
    75  // the straightened Hilbert curve. Cells are denoted as = and grouped into 4 for
    76  // ease of viewing (i.e. the pipes | have no meaning). The resulting
    77  // Intersections are denoted with the letters X–Z and the disjoint nature of the
    78  // output is such that Y and Z will not include the Cells that are instead
    79  // reported in X, even though the respective CellUnions do indeed intersect
    80  // there. Analogously, each area of the Venn diagram of regions can receive only
    81  // one label.
    82  //
    83  //	0: |====|  ==|    |
    84  //	1: |==  |   =|=== |
    85  //	2: |====|====|  ==|
    86  //	    XXYY   YX   Z
    87  //
    88  //	X: {0,1,2}
    89  //	Y: {0,2}
    90  //	Z: {1,2}
    91  func Find(cus []s2.CellUnion) []Intersection {
    92  	return overlapsToIntersections(cellUnionsToOverlaps(cus))
    93  }
    94  
    95  // An overlap describes a closed range on the Hilbert curve, throughout which,
    96  // a set of CellUnions intersect.
    97  type overlap struct {
    98  	indices    []int
    99  	start, end s2.CellID
   100  }
   101  
   102  func cellUnionsToOverlaps(cus []s2.CellUnion) []overlap {
   103  	// We convert the 2-dimensional problem of finding intersections into a 1D
   104  	// problem of overlapping intervals on a line (the Hilbert curve).
   105  	var lims []*limit
   106  	for i, cu := range cus {
   107  		cu.Normalize()
   108  		lims = append(lims, cellUnionToIntervalLimits(cu, i)...)
   109  	}
   110  	if len(lims) == 0 {
   111  		return nil
   112  	}
   113  
   114  	lims = collapseLimits(lims)
   115  	return intervalOverlaps(lims)
   116  }
   117  
   118  func intervalOverlaps(lims []*limit) []overlap {
   119  	open := make(map[int]struct{})
   120  	openIndices := func() []int {
   121  		is := make([]int, 0, len(open))
   122  		for i := range open {
   123  			is = append(is, i)
   124  		}
   125  		sort.Ints(is)
   126  		return is
   127  	}
   128  
   129  	var (
   130  		lastStart s2.CellID
   131  		overlaps  []overlap
   132  	)
   133  
   134  	for _, l := range lims {
   135  		if len(open) > 1 {
   136  			// The existence of a limit at this point constitutes a new overlap
   137  			// regardless of the limit type. If it is an end limit then obviously the
   138  			// overlap is ceasing, but if it's a start limit then the overlap until
   139  			// here only applies to the already-open values but until the previous
   140  			// leaf Cell.
   141  			endLeaf := l.leaf
   142  			if l.typ == start {
   143  				endLeaf = endLeaf.Prev()
   144  			}
   145  			overlaps = append(overlaps, overlap{
   146  				indices: openIndices(),
   147  				start:   lastStart,
   148  				end:     endLeaf,
   149  			})
   150  		}
   151  
   152  		switch l.typ {
   153  		case start:
   154  			for _, i := range l.indices {
   155  				open[i] = struct{}{}
   156  			}
   157  		case end:
   158  			for _, i := range l.indices {
   159  				delete(open, i)
   160  			}
   161  		}
   162  
   163  		if len(open) > 1 {
   164  			// As with the rationale at the beginning of the limit loop, we now have a
   165  			// new interval regardless of the limit type. However, the starting leaf
   166  			// is iterated if this is currently a close limit because the remaining
   167  			// indices only overlap _after_ the current leaf by nature of the
   168  			// intervals being closed.
   169  			lastStart = l.leaf
   170  			if l.typ == end {
   171  				lastStart = lastStart.Next()
   172  			}
   173  		}
   174  	}
   175  
   176  	return overlaps
   177  }
   178  
   179  // A limit defines either the beginning or end of a closed interval on the
   180  // Hilbert curve of leaf Cells.
   181  type limit struct {
   182  	leaf    s2.CellID
   183  	typ     limitType // There are always pairs of {start,end}
   184  	indices []int     // Indices within in the slice of CellUnions passed to Find().
   185  }
   186  
   187  type limitType bool
   188  
   189  const (
   190  	start limitType = false
   191  	end   limitType = true // end is inclusive
   192  )
   193  
   194  // cellUnionToIntervalLimits converts all Cells into pairs of limits
   195  // representing closed intervals on the Hilbert curve of leaf Cells. The value
   196  // of idx must be the index of cu within the slice passed to Find().
   197  func cellUnionToIntervalLimits(cu s2.CellUnion, idx int) []*limit {
   198  	if len(cu) == 0 {
   199  		return nil
   200  	}
   201  
   202  	var lims []*limit
   203  	pushLeaf := func(cID s2.CellID, t limitType) {
   204  		lims = append(lims, &limit{
   205  			leaf:    cID,
   206  			indices: []int{idx},
   207  			typ:     t,
   208  		})
   209  	}
   210  
   211  	// Intervals are defined by start and end leaf Cells. Instead of
   212  	// converting every Cell into an interval, we merge contiguous ones by
   213  	// checking if lastend.Next() is equal to the next Cell's start leaf.
   214  	var lastend s2.CellID
   215  
   216  	for _, cID := range cu {
   217  		switch startLeaf := cID.RangeMin(); {
   218  		case lastend == 0:
   219  			// Current Cell represents the first interval
   220  			pushLeaf(startLeaf, start)
   221  		case lastend.Next() != startLeaf:
   222  			// Current and last Cells are non-contiguous
   223  			pushLeaf(lastend, end)
   224  			pushLeaf(startLeaf, start)
   225  		}
   226  		lastend = cID.RangeMax()
   227  	}
   228  
   229  	pushLeaf(lastend, end)
   230  	return lims
   231  }
   232  
   233  // collapseLimits returns a slice of limits such that all of those of the same
   234  // type at the same leaf CellID are grouped with their indices in the same
   235  // slice. The returned slice will be sorted by CellID and tyoe,
   236  //
   237  // e.g. {leaf(x),start,indices[2]} and {leaf(x),start,indices[7]} will become
   238  // {leaf(x),start,indices[2,7]}.
   239  //
   240  // The original slice and its contents will be modified, but not in place;
   241  // therefore only the returned value is valid.
   242  func collapseLimits(lims []*limit) []*limit {
   243  	if len(lims) == 0 {
   244  		return nil
   245  	}
   246  
   247  	// Limits at the same leaf MUST be sorted such that start-types come before
   248  	// end-types. This is because they are closed intervals so we can't have
   249  	// ending intervals removed before the overlap is accounted for.
   250  	sort.Slice(lims, func(i, j int) bool {
   251  		lI, lJ := lims[i], lims[j]
   252  		if lI.leaf != lJ.leaf {
   253  			return lI.leaf < lJ.leaf
   254  		}
   255  		return lI.typ == start
   256  	})
   257  
   258  	out := []*limit{lims[0]}
   259  	last := out[0]
   260  	for _, l := range lims[1:] {
   261  		if l.leaf == last.leaf && l.typ == last.typ {
   262  			last.indices = append(last.indices, l.indices...)
   263  			continue
   264  		}
   265  		sort.Ints(last.indices)
   266  		last = l
   267  		out = append(out, l)
   268  	}
   269  	return out
   270  }
   271  
   272  // indexKey converts a slice of sorted ints into a string for use as a map key.
   273  func indexKey(is []int) string {
   274  	return fmt.Sprintf("%d", is)
   275  }
   276  
   277  func overlapsToIntersections(overlaps []overlap) []Intersection {
   278  	set := make(map[string]*Intersection)
   279  
   280  	for _, o := range overlaps {
   281  		k := indexKey(o.indices)
   282  		i, ok := set[k]
   283  		if !ok {
   284  			i = &Intersection{Indices: o.indices}
   285  			set[k] = i
   286  		}
   287  		// CellUnionFromRange assumes a half-open interval, so use Next() as
   288  		// suggested in its comment.
   289  		i.Intersection = append(i.Intersection, s2.CellUnionFromRange(o.start, o.end.Next())...)
   290  	}
   291  
   292  	result := make([]Intersection, 0, len(set))
   293  	for _, i := range set {
   294  		i.Intersection.Normalize()
   295  		result = append(result, *i)
   296  	}
   297  	return result
   298  }
   299
View as plain text