// Copyright 2023 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package s2intersect efficiently finds common areas shared by sets of S2
// CellUnions. Use this package instead of s2.CellUnionFromIntersection when
// there are 3 or more CellUnions to compare and all intersections are required,
// as the brute force alternative is exponential in time.
//
// Benchmarks show that direct comparison of a pair of CellUnions is more
// efficient with respect to both time and memory when using the standard
// s2.CellUnionFromIntersection function. The benefits of s2intersect arise as
// the number of potential intersections grows; for n CellUnions, there are
// 2^n - n - 1 possible intersections (power set excluding single-member sets
// and the empty set).
package s2intersect

import (
	"fmt"
	"sort"

	"github.com/golang/geo/s2"
)

// Re nomenclature used throughout this file: an "intersection" is a 2D
// (spherical) concept in keeping with s2.CellUnionFromIntersection(). The 1D
// (Hilbert curve) equivalent is referred to as an "overlap", simply to
// differentiate between the two. An overlap interval is closed, and defined by
// RangeMin and RangeMax (i.e. leaf Cells) of the first and last CellIDs,
// respectively, in the intersection that it maps to—these are referred to as
// "limits". Wherever "indices" are referred to, these reference the slice of
// CellUnions passed in by a user of this package.

// An Intersection describes the intersection of a set of CellUnions.
//
// Intersections returned by Find() are disjoint; i.e. if the Indices of one are
// a sub set of the Indices of another, the sub-set Intersection will NOT
// include the CellIDs that are included in the super-set Intersection; for
// details, see Find() re disjoint Intersections.
type Intersection struct {
	// Indices refer to the slice of CellUnions passed to Find().
	Indices      []int
	Intersection s2.CellUnion
}

// Find returns the set of all disjoint intersections of the CellUnions. The
// ordering of the output is undefined.
//
// Whereas the naive approach of finding pairwise intersections is O(n^2) for n
// CellUnions, and a naive multi-way search is O(2^n), Find() is
// O(max(i log i, c)) for c Cells constituting i intervals on the Hilbert curve
// (contiguous Cells form a single interval).
//
// Find calls Normalize() on all CellUnions, the efficiency of which is not
// considered in the previous calculations.
//
// Note that returned Intersections are disjoint; Cells are only counted towards
// the most comprehensive Intersection. For example, the intersection of regions
// A and B will NOT include the area in which regions A, B, _and_ C intersect
// because {A,B,C} is a super set of {A,B}. Correcting this requires iterating
// over the power set of all resulting Intersections and "decomposing" them into
// constituent parts (power sets are O(2^n) for a set of size n).
//
// Consider the following CellUnions, shown as their corresponding intervals on
// the straightened Hilbert curve. Cells are denoted as = and grouped into 4 for
// ease of viewing (i.e. the pipes | have no meaning). The resulting
// Intersections are denoted with the letters X–Z and the disjoint nature of the
// output is such that Y and Z will not include the Cells that are instead
// reported in X, even though the respective CellUnions do indeed intersect
// there. Analogously, each area of the Venn diagram of regions can receive only
// one label.
//
//	0: |====|  ==|    |
//	1: |==  |   =|=== |
//	2: |====|====|  ==|
//	    XXYY   YX   Z
//
//	X: {0,1,2}
//	Y: {0,2}
//	Z: {1,2}
func Find(cus []s2.CellUnion) []Intersection {
	return overlapsToIntersections(cellUnionsToOverlaps(cus))
}

// An overlap describes a closed range on the Hilbert curve, throughout which,
// a set of CellUnions intersect.
type overlap struct {
	indices    []int
	start, end s2.CellID
}

func cellUnionsToOverlaps(cus []s2.CellUnion) []overlap {
	// We convert the 2-dimensional problem of finding intersections into a 1D
	// problem of overlapping intervals on a line (the Hilbert curve).
	var lims []*limit
	for i, cu := range cus {
		cu.Normalize()
		lims = append(lims, cellUnionToIntervalLimits(cu, i)...)
	}
	if len(lims) == 0 {
		return nil
	}

	lims = collapseLimits(lims)
	return intervalOverlaps(lims)
}

func intervalOverlaps(lims []*limit) []overlap {
	open := make(map[int]struct{})
	openIndices := func() []int {
		is := make([]int, 0, len(open))
		for i := range open {
			is = append(is, i)
		}
		sort.Ints(is)
		return is
	}

	var (
		lastStart s2.CellID
		overlaps  []overlap
	)

	for _, l := range lims {
		if len(open) > 1 {
			// The existence of a limit at this point constitutes a new overlap
			// regardless of the limit type. If it is an end limit then obviously the
			// overlap is ceasing, but if it's a start limit then the overlap until
			// here only applies to the already-open values but until the previous
			// leaf Cell.
			endLeaf := l.leaf
			if l.typ == start {
				endLeaf = endLeaf.Prev()
			}
			overlaps = append(overlaps, overlap{
				indices: openIndices(),
				start:   lastStart,
				end:     endLeaf,
			})
		}

		switch l.typ {
		case start:
			for _, i := range l.indices {
				open[i] = struct{}{}
			}
		case end:
			for _, i := range l.indices {
				delete(open, i)
			}
		}

		if len(open) > 1 {
			// As with the rationale at the beginning of the limit loop, we now have a
			// new interval regardless of the limit type. However, the starting leaf
			// is iterated if this is currently a close limit because the remaining
			// indices only overlap _after_ the current leaf by nature of the
			// intervals being closed.
			lastStart = l.leaf
			if l.typ == end {
				lastStart = lastStart.Next()
			}
		}
	}

	return overlaps
}

// A limit defines either the beginning or end of a closed interval on the
// Hilbert curve of leaf Cells.
type limit struct {
	leaf    s2.CellID
	typ     limitType // There are always pairs of {start,end}
	indices []int     // Indices within in the slice of CellUnions passed to Find().
}

type limitType bool

const (
	start limitType = false
	end   limitType = true // end is inclusive
)

// cellUnionToIntervalLimits converts all Cells into pairs of limits
// representing closed intervals on the Hilbert curve of leaf Cells. The value
// of idx must be the index of cu within the slice passed to Find().
func cellUnionToIntervalLimits(cu s2.CellUnion, idx int) []*limit {
	if len(cu) == 0 {
		return nil
	}

	var lims []*limit
	pushLeaf := func(cID s2.CellID, t limitType) {
		lims = append(lims, &limit{
			leaf:    cID,
			indices: []int{idx},
			typ:     t,
		})
	}

	// Intervals are defined by start and end leaf Cells. Instead of
	// converting every Cell into an interval, we merge contiguous ones by
	// checking if lastend.Next() is equal to the next Cell's start leaf.
	var lastend s2.CellID

	for _, cID := range cu {
		switch startLeaf := cID.RangeMin(); {
		case lastend == 0:
			// Current Cell represents the first interval
			pushLeaf(startLeaf, start)
		case lastend.Next() != startLeaf:
			// Current and last Cells are non-contiguous
			pushLeaf(lastend, end)
			pushLeaf(startLeaf, start)
		}
		lastend = cID.RangeMax()
	}

	pushLeaf(lastend, end)
	return lims
}

// collapseLimits returns a slice of limits such that all of those of the same
// type at the same leaf CellID are grouped with their indices in the same
// slice. The returned slice will be sorted by CellID and tyoe,
//
// e.g. {leaf(x),start,indices[2]} and {leaf(x),start,indices[7]} will become
// {leaf(x),start,indices[2,7]}.
//
// The original slice and its contents will be modified, but not in place;
// therefore only the returned value is valid.
func collapseLimits(lims []*limit) []*limit {
	if len(lims) == 0 {
		return nil
	}

	// Limits at the same leaf MUST be sorted such that start-types come before
	// end-types. This is because they are closed intervals so we can't have
	// ending intervals removed before the overlap is accounted for.
	sort.Slice(lims, func(i, j int) bool {
		lI, lJ := lims[i], lims[j]
		if lI.leaf != lJ.leaf {
			return lI.leaf < lJ.leaf
		}
		return lI.typ == start
	})

	out := []*limit{lims[0]}
	last := out[0]
	for _, l := range lims[1:] {
		if l.leaf == last.leaf && l.typ == last.typ {
			last.indices = append(last.indices, l.indices...)
			continue
		}
		sort.Ints(last.indices)
		last = l
		out = append(out, l)
	}
	return out
}

// indexKey converts a slice of sorted ints into a string for use as a map key.
func indexKey(is []int) string {
	return fmt.Sprintf("%d", is)
}

func overlapsToIntersections(overlaps []overlap) []Intersection {
	set := make(map[string]*Intersection)

	for _, o := range overlaps {
		k := indexKey(o.indices)
		i, ok := set[k]
		if !ok {
			i = &Intersection{Indices: o.indices}
			set[k] = i
		}
		// CellUnionFromRange assumes a half-open interval, so use Next() as
		// suggested in its comment.
		i.Intersection = append(i.Intersection, s2.CellUnionFromRange(o.start, o.end.Next())...)
	}

	result := make([]Intersection, 0, len(set))
	for _, i := range set {
		i.Intersection.Normalize()
		result = append(result, *i)
	}
	return result
}