//go:build !launchdarkly_easyjson
// +build !launchdarkly_easyjson

package jreader

// This file defines the default implementation of the low-level JSON tokenizer. If the launchdarkly_easyjson
// build tag is enabled, we use the easyjson adapter in token_reader_easyjson.go instead. These have the same
// methods so the Reader code does not need to know which implementation we're using; however, we don't
// actually define an interface for these, because calling the methods through an interface would limit
// performance.

import (
	"bytes"
	"io"
	"strconv"
	"unicode"
	"unicode/utf8"
)

var (
	tokenNull  = []byte("null")  //nolint:gochecknoglobals
	tokenTrue  = []byte("true")  //nolint:gochecknoglobals
	tokenFalse = []byte("false") //nolint:gochecknoglobals
)

type token struct {
	kind        tokenKind
	boolValue   bool
	numberValue float64
	stringValue []byte
	delimiter   byte
}

type tokenKind int

const (
	nullToken      tokenKind = iota
	boolToken      tokenKind = iota
	numberToken    tokenKind = iota
	stringToken    tokenKind = iota
	delimiterToken tokenKind = iota
)

func (t token) valueKind() ValueKind {
	if t.kind == delimiterToken {
		if t.delimiter == '[' {
			return ArrayValue
		}
		if t.delimiter == '{' {
			return ObjectValue
		}
	}
	return valueKindFromTokenKind(t.kind)
}

func (t token) description() string {
	if t.kind == delimiterToken && t.delimiter != '[' && t.delimiter != '{' {
		return "'" + string(t.delimiter) + "'"
	}
	return t.valueKind().String()
}

type tokenReader struct {
	data        []byte
	pos         int
	len         int
	hasUnread   bool
	unreadToken token
	lastPos     int
}

func newTokenReader(data []byte) tokenReader {
	tr := tokenReader{
		data: data,
		pos:  0,
		len:  len(data),
	}
	return tr
}

// EOF returns true if we are at the end of the input (not counting whitespace).
func (r *tokenReader) EOF() bool {
	if r.hasUnread {
		return false
	}
	_, ok := r.skipWhitespaceAndReadByte()
	if !ok {
		return true
	}
	r.unreadByte()
	return false
}

// LastPos returns the byte offset within the input where we most recently started parsing a token.
func (r *tokenReader) LastPos() int {
	return r.lastPos
}

func (r *tokenReader) getPos() int {
	if r.hasUnread {
		return r.lastPos
	}
	return r.pos
}

// Null returns (true, nil) if the next token is a null (consuming the token); (false, nil) if the next
// token is not a null (not consuming the token); or (false, error) if the next token is not a valid
// JSON value.
//
// This and all other tokenReader methods skip transparently past whitespace between tokens.
func (r *tokenReader) Null() (bool, error) {
	t, err := r.next()
	if err != nil {
		return false, err
	}
	if t.kind == nullToken {
		return true, nil
	}
	r.putBack(t)
	if t.kind == delimiterToken && t.delimiter != '[' && t.delimiter != '{' {
		return false, SyntaxError{Message: errMsgUnexpectedChar, Value: string(t.delimiter), Offset: r.getPos()}
	}
	return false, nil
}

// Bool requires that the next token is a JSON boolean, returning its value if successful (consuming
// the token), or an error if the next token is anything other than a JSON boolean.
//
// This and all other tokenReader methods skip transparently past whitespace between tokens.
func (r *tokenReader) Bool() (bool, error) {
	t, err := r.consumeScalar(boolToken)
	return t.boolValue, err
}

// Bool requires that the next token is a JSON number, returning its value if successful (consuming
// the token), or an error if the next token is anything other than a JSON number.
//
// This and all other tokenReader methods skip transparently past whitespace between tokens.
func (r *tokenReader) Number() (float64, error) {
	t, err := r.consumeScalar(numberToken)
	return t.numberValue, err
}

// String requires that the next token is a JSON string, returning its value if successful (consuming
// the token), or an error if the next token is anything other than a JSON string.
//
// This and all other tokenReader methods skip transparently past whitespace between tokens.
func (r *tokenReader) String() (string, error) {
	t, err := r.consumeScalar(stringToken)
	return string(t.stringValue), err
}

// PropertyName requires that the next token is a JSON string and the token after that is a colon,
// returning the string as a byte slice if successful, or an error otherwise.
//
// Returning the string as a byte slice avoids the overhead of allocating a string, since normally
// the names of properties will not be retained as strings but are only compared to constants while
// parsing an object.
//
// This and all other tokenReader methods skip transparently past whitespace between tokens.
func (r *tokenReader) PropertyName() ([]byte, error) {
	t, err := r.consumeScalar(stringToken)
	if err != nil {
		return nil, err
	}
	b, ok := r.skipWhitespaceAndReadByte()
	if !ok {
		return nil, io.EOF
	}
	if b != ':' {
		r.unreadByte()
		return nil, r.syntaxErrorOnNextToken(errMsgExpectedColon)
	}
	return t.stringValue, nil
}

// Delimiter checks whether the next token is the specified ASCII delimiter character. If so, it
// returns (true, nil) and consumes the token. If it is a delimiter, but not the same one, it
// returns (false, nil) and does not consume the token. For anything else, it returns an error.
//
// This and all other tokenReader methods skip transparently past whitespace between tokens.
func (r *tokenReader) Delimiter(delimiter byte) (bool, error) {
	if r.hasUnread {
		if r.unreadToken.kind == delimiterToken && r.unreadToken.delimiter == delimiter {
			r.hasUnread = false
			return true, nil
		}
		return false, nil
	}
	b, ok := r.skipWhitespaceAndReadByte()
	if !ok {
		return false, nil
	}
	if b == delimiter {
		return true, nil
	}
	r.unreadByte() // we'll back up and try to parse a token, to see if it's valid JSON or not
	token, err := r.next()
	if err != nil {
		return false, err // it was malformed JSON
	}
	r.putBack(token) // it was valid JSON, we just haven't hit that delimiter
	return false, nil
}

// EndDelimiterOrComma checks whether the next token is the specified ASCII delimiter character
// or a comma. If it is the specified delimiter, it returns (true, nil) and consumes the token.
// If it is a comma, it returns (false, nil) and consumes the token. For anything else, it
// returns an error. The delimiter parameter will always be either '}' or ']'.
func (r *tokenReader) EndDelimiterOrComma(delimiter byte) (bool, error) {
	if r.hasUnread {
		if r.unreadToken.kind == delimiterToken &&
			(r.unreadToken.delimiter == delimiter || r.unreadToken.delimiter == ',') {
			r.hasUnread = false
			return r.unreadToken.delimiter == delimiter, nil
		}
		return false, SyntaxError{Message: badArrayOrObjectItemMessage(delimiter == '}'),
			Value: r.unreadToken.description(), Offset: r.lastPos}
	}
	b, ok := r.skipWhitespaceAndReadByte()
	if !ok {
		return false, io.EOF
	}
	if b == delimiter || b == ',' {
		return b == delimiter, nil
	}
	r.unreadByte()
	t, err := r.next()
	if err != nil {
		return false, err
	}
	return false, SyntaxError{Message: badArrayOrObjectItemMessage(delimiter == '}'),
		Value: t.description(), Offset: r.lastPos}
}

func badArrayOrObjectItemMessage(isObject bool) string {
	if isObject {
		return errMsgBadObjectItem
	}
	return errMsgBadArrayItem
}

// Any checks whether the next token is either a valid JSON scalar value or the opening delimiter of
// an array or object value. If so, it returns (AnyValue, nil) and consumes the token; if not, it
// returns an error. Unlike Reader.Any(), for array and object values it does not create an
// ArrayState or ObjectState.
func (r *tokenReader) Any() (AnyValue, error) {
	t, err := r.next()
	if err != nil {
		return AnyValue{}, err
	}
	switch t.kind {
	case boolToken:
		return AnyValue{Kind: BoolValue, Bool: t.boolValue}, nil
	case numberToken:
		return AnyValue{Kind: NumberValue, Number: t.numberValue}, nil
	case stringToken:
		return AnyValue{Kind: StringValue, String: string(t.stringValue)}, nil
	case delimiterToken:
		if t.delimiter == '[' {
			return AnyValue{Kind: ArrayValue}, nil
		}
		if t.delimiter == '{' {
			return AnyValue{Kind: ObjectValue}, nil
		}
		return AnyValue{},
			SyntaxError{Message: errMsgUnexpectedChar, Value: string(t.delimiter), Offset: r.lastPos}
	default:
		return AnyValue{Kind: NullValue}, nil
	}
}

// Attempts to parse and consume the next token, ignoring whitespace. A token is either a valid JSON scalar
// value or an ASCII delimiter character. If a token was previously unread using putBack, it consumes that
// instead.
func (r *tokenReader) next() (token, error) {
	if r.hasUnread {
		r.hasUnread = false
		return r.unreadToken, nil
	}
	b, ok := r.skipWhitespaceAndReadByte()
	if !ok {
		return token{}, io.EOF
	}

	switch {
	// We can get away with reading bytes instead of runes because the JSON spec doesn't allow multi-byte
	// characters except within a string literal.
	case b >= 'a' && b <= 'z':
		n := r.consumeASCIILowercaseAlphabeticChars() + 1
		id := r.data[r.lastPos : r.lastPos+n]
		if b == 'f' && bytes.Equal(id, tokenFalse) {
			return token{kind: boolToken, boolValue: false}, nil
		}
		if b == 't' && bytes.Equal(id, tokenTrue) {
			return token{kind: boolToken, boolValue: true}, nil
		}
		if b == 'n' && bytes.Equal(id, tokenNull) {
			return token{kind: nullToken}, nil
		}
		return token{}, SyntaxError{Message: errMsgUnexpectedSymbol, Value: string(id), Offset: r.lastPos}
	case (b >= '0' && b <= '9') || b == '-':
		if n, ok := r.readNumber(b); ok {
			return token{kind: numberToken, numberValue: n}, nil
		}
		return token{}, SyntaxError{Message: errMsgInvalidNumber, Offset: r.lastPos}
	case b == '"':
		s, err := r.readString()
		if err != nil {
			return token{}, err
		}
		return token{kind: stringToken, stringValue: s}, nil
	case b == '[', b == ']', b == '{', b == '}', b == ':', b == ',':
		return token{kind: delimiterToken, delimiter: b}, nil
	}

	return token{}, SyntaxError{Message: errMsgUnexpectedChar, Value: string(b), Offset: r.lastPos}
}

func (r *tokenReader) putBack(token token) {
	r.unreadToken = token
	r.hasUnread = true
}

func (r *tokenReader) consumeScalar(kind tokenKind) (token, error) {
	t, err := r.next()
	if err != nil {
		return token{}, err
	}
	if t.kind == kind {
		return t, nil
	}
	if t.kind == delimiterToken && t.delimiter != '[' && t.delimiter != '{' {
		return token{}, SyntaxError{Message: errMsgUnexpectedChar, Value: string(t.delimiter), Offset: r.LastPos()}
	}
	return token{}, TypeError{Expected: valueKindFromTokenKind(kind),
		Actual: t.valueKind(), Offset: r.LastPos()}
}

func (r *tokenReader) readByte() (byte, bool) {
	if r.pos >= r.len {
		return 0, false
	}
	b := r.data[r.pos]
	r.pos++
	return b, true
}

func (r *tokenReader) unreadByte() {
	r.pos--
}

func (r *tokenReader) skipWhitespaceAndReadByte() (byte, bool) {
	for {
		ch, ok := r.readByte()
		if !ok {
			return 0, false
		}
		if !unicode.IsSpace(rune(ch)) {
			r.lastPos = r.pos - 1
			return ch, true
		}
	}
}

func (r *tokenReader) consumeASCIILowercaseAlphabeticChars() int {
	n := 0
	for {
		ch, ok := r.readByte()
		if !ok {
			break
		}
		if ch < 'a' || ch > 'z' {
			r.unreadByte()
			break
		}
		n++
	}
	return n
}

func (r *tokenReader) readNumber(first byte) (float64, bool) { //nolint:unparam
	startPos := r.lastPos
	isFloat := false
	var ch byte
	var ok bool
	for {
		ch, ok = r.readByte()
		if !ok {
			break
		}
		if (ch < '0' || ch > '9') && !(ch == '.' && !isFloat) {
			break
		}
		if ch == '.' {
			isFloat = true
		}
	}
	hasExponent := false
	if ch == 'e' || ch == 'E' {
		// exponent must match this regex: [eE][-+]?[0-9]+
		ch, ok = r.readByte()
		if !ok {
			return 0, false
		}
		if ch == '+' || ch == '-' { //nolint:gocritic
		} else if ch >= '0' && ch <= '9' {
			r.unreadByte()
		} else {
			return 0, false
		}
		for {
			ch, ok = r.readByte()
			if !ok {
				break
			}
			if ch < '0' || ch > '9' {
				r.unreadByte()
				break
			}
			hasExponent = true
		}
		if !hasExponent {
			return 0, false
		}
		isFloat = true
	} else { //nolint:gocritic
		if ok {
			r.unreadByte()
		}
	}
	chars := r.data[startPos:r.pos]
	if isFloat {
		// Unfortunately, strconv.ParseFloat requires a string - there is no []byte equivalent. This means we can't
		// avoid a heap allocation here. Easyjson works around this by creating an unsafe string that points directly
		// at the existing bytes, but in our default implementation we can't use unsafe.
		n, err := strconv.ParseFloat(string(chars), 64)
		return n, err == nil
	} else { //nolint:revive
		n, ok := parseIntFromBytes(chars)
		return float64(n), ok
	}
}

func (r *tokenReader) readString() ([]byte, error) {
	startPos := r.pos // the opening quote mark has already been read
	var chars []byte
	haveEscaped := false
	var reader bytes.Reader // bytes.Reader understands multi-byte characters
	reader.Reset(r.data)
	_, _ = reader.Seek(int64(r.pos), io.SeekStart)

	for {
		ch, _, err := reader.ReadRune()
		if err != nil {
			return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
		}
		if ch == '"' {
			break
		}
		if ch != '\\' {
			if haveEscaped {
				chars = appendRune(chars, ch)
			}
			continue
		}
		if !haveEscaped {
			pos := (r.len - reader.Len()) - 1 // don't include the backslash we just read
			chars = make([]byte, pos-startPos, pos-startPos+20)
			if pos > startPos {
				copy(chars, r.data[startPos:pos])
			}
			haveEscaped = true
		}
		ch, _, err = reader.ReadRune()
		if err != nil {
			return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
		}
		switch ch {
		case '"', '\\', '/':
			chars = appendRune(chars, ch)
		case 'b':
			chars = appendRune(chars, '\b')
		case 'f':
			chars = appendRune(chars, '\f')
		case 'n':
			chars = appendRune(chars, '\n')
		case 'r':
			chars = appendRune(chars, '\r')
		case 't':
			chars = appendRune(chars, '\t')
		case 'u':
			if ch, ok := readHexChar(&reader); ok {
				chars = appendRune(chars, ch)
			} else {
				return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
			}
		default:
			return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
		}
	}
	r.pos = r.len - reader.Len()
	if haveEscaped {
		if len(chars) == 0 {
			return nil, nil
		}
		return chars, nil
	} else { //nolint:revive
		pos := r.pos - 1
		if pos <= startPos {
			return nil, nil
		}
		return r.data[startPos:pos], nil
	}
}

func readHexChar(reader *bytes.Reader) (rune, bool) {
	var digits [4]byte
	for i := 0; i < 4; i++ {
		ch, err := reader.ReadByte()
		if err != nil || !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
			return 0, false
		}
		digits[i] = ch
	}
	n, _ := strconv.ParseUint(string(digits[:]), 16, 32)
	return rune(n), true
}

func (r *tokenReader) syntaxErrorOnLastToken(msg string) error { //nolint:unparam
	return SyntaxError{Message: msg, Offset: r.LastPos()}
}

func (r *tokenReader) syntaxErrorOnNextToken(msg string) error {
	t, err := r.next()
	if err != nil {
		return err
	}
	return SyntaxError{Message: msg, Value: t.description(), Offset: r.LastPos()}
}

// This is faster than creating a string to pass to strconv.Atoi.
func parseIntFromBytes(chars []byte) (int64, bool) {
	negate := false
	p := 0
	var ret int64
	if len(chars) == 0 {
		return 0, false
	}
	if chars[0] == '-' {
		negate = true
		p++
		if p == len(chars) {
			return 0, false
		}
	}
	for p < len(chars) {
		ret = ret*10 + int64(chars[p]-'0')
		p++
	}
	if negate {
		ret = -ret
	}
	return ret, true
}

func appendRune(out []byte, ch rune) []byte {
	var encodedRune [10]byte
	n := utf8.EncodeRune(encodedRune[0:10], ch)
	return append(out, encodedRune[0:n]...)
}

func valueKindFromTokenKind(k tokenKind) ValueKind {
	switch k {
	case nullToken:
		return NullValue
	case boolToken:
		return BoolValue
	case numberToken:
		return NumberValue
	case stringToken:
		return StringValue
	}
	return -1
}