// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package json

import (
	"reflect"
	"strings"
	"testing"
	"unicode/utf16"
)

type rawValueTestdataEntry struct {
	name                testName
	in                  string
	wantValid           bool
	wantCompacted       string
	wantCompactErr      error  // implies wantCompacted is in
	wantIndented        string // wantCompacted if empty; uses "\t" for indent prefix and "    " for indent
	wantIndentErr       error  // implies wantCompacted is in
	wantCanonicalized   string // wantCompacted if empty
	wantCanonicalizeErr error  // implies wantCompacted is in
}

var rawValueTestdata = append(func() (out []rawValueTestdataEntry) {
	// Initialize rawValueTestdata from coderTestdata.
	for _, td := range coderTestdata {
		// NOTE: The Compact method preserves the raw formatting of strings,
		// while the Encoder (by default) does not.
		if td.name.name == "ComplicatedString" {
			td.outCompacted = strings.TrimSpace(td.in)
		}
		out = append(out, rawValueTestdataEntry{
			name:              td.name,
			in:                td.in,
			wantValid:         true,
			wantCompacted:     td.outCompacted,
			wantIndented:      td.outIndented,
			wantCanonicalized: td.outCanonicalized,
		})
	}
	return out
}(), []rawValueTestdataEntry{{
	name: name("RFC8785/Primitives"),
	in: `{
		"numbers": [333333333.33333329, 1E30, 4.50,
					2e-3, 0.000000000000000000000000001],
		"string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
		"literals": [null, true, false]
	}`,
	wantValid:     true,
	wantCompacted: `{"numbers":[333333333.33333329,1E30,4.50,2e-3,0.000000000000000000000000001],"string":"\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/","literals":[null,true,false]}`,
	wantIndented: `{
	    "numbers": [
	        333333333.33333329,
	        1E30,
	        4.50,
	        2e-3,
	        0.000000000000000000000000001
	    ],
	    "string": "\u20ac$\u000F\u000aA'\u0042\u0022\u005c\\\"\/",
	    "literals": [
	        null,
	        true,
	        false
	    ]
	}`,
	wantCanonicalized: `{"literals":[null,true,false],"numbers":[333333333.3333333,1e+30,4.5,0.002,1e-27],"string":"€$\u000f\nA'B\"\\\\\"/"}`,
}, {
	name: name("RFC8785/ObjectOrdering"),
	in: `{
		"\u20ac": "Euro Sign",
		"\r": "Carriage Return",
		"\ufb33": "Hebrew Letter Dalet With Dagesh",
		"1": "One",
		"\ud83d\ude00": "Emoji: Grinning Face",
		"\u0080": "Control",
		"\u00f6": "Latin Small Letter O With Diaeresis"
	}`,
	wantValid:     true,
	wantCompacted: `{"\u20ac":"Euro Sign","\r":"Carriage Return","\ufb33":"Hebrew Letter Dalet With Dagesh","1":"One","\ud83d\ude00":"Emoji: Grinning Face","\u0080":"Control","\u00f6":"Latin Small Letter O With Diaeresis"}`,
	wantIndented: `{
	    "\u20ac": "Euro Sign",
	    "\r": "Carriage Return",
	    "\ufb33": "Hebrew Letter Dalet With Dagesh",
	    "1": "One",
	    "\ud83d\ude00": "Emoji: Grinning Face",
	    "\u0080": "Control",
	    "\u00f6": "Latin Small Letter O With Diaeresis"
	}`,
	wantCanonicalized: `{"\r":"Carriage Return","1":"One","":"Control","ö":"Latin Small Letter O With Diaeresis","€":"Euro Sign","😀":"Emoji: Grinning Face","דּ":"Hebrew Letter Dalet With Dagesh"}`,
}, {
	name:          name("LargeIntegers"),
	in:            ` [ -9223372036854775808 , 9223372036854775807 ] `,
	wantValid:     true,
	wantCompacted: `[-9223372036854775808,9223372036854775807]`,
	wantIndented: `[
	    -9223372036854775808,
	    9223372036854775807
	]`,
	wantCanonicalized: `[-9223372036854776000,9223372036854776000]`, // NOTE: Loss of precision due to numbers being treated as floats.
}, {
	name:                name("InvalidUTF8"),
	in:                  `  "living` + "\xde\xad\xbe\xef" + `\ufffd�"  `,
	wantValid:           false, // uses RFC 7493 as the definition; which validates UTF-8
	wantCompacted:       `"living` + "\xde\xad\xbe\xef" + `\ufffd�"`,
	wantCanonicalizeErr: &SyntacticError{str: "invalid UTF-8 within string"},
}, {
	name:                name("InvalidUTF8/SurrogateHalf"),
	in:                  `"\ud800"`,
	wantValid:           false, // uses RFC 7493 as the definition; which validates UTF-8
	wantCompacted:       `"\ud800"`,
	wantCanonicalizeErr: &SyntacticError{str: `invalid escape sequence "\"" within string`},
}, {
	name:              name("UppercaseEscaped"),
	in:                `"\u000B"`,
	wantValid:         true,
	wantCompacted:     `"\u000B"`,
	wantCanonicalized: `"\u000b"`,
}, {
	name:          name("DuplicateNames"),
	in:            ` { "0" : 0 , "1" : 1 , "0" : 0 }`,
	wantValid:     false, // uses RFC 7493 as the definition; which does check for object uniqueness
	wantCompacted: `{"0":0,"1":1,"0":0}`,
	wantIndented: `{
	    "0": 0,
	    "1": 1,
	    "0": 0
	}`,
	wantCanonicalizeErr: &SyntacticError{str: `duplicate name "0" in object`},
}}...)

func TestRawValueMethods(t *testing.T) {
	for _, td := range rawValueTestdata {
		t.Run(td.name.name, func(t *testing.T) {
			if td.wantIndented == "" {
				td.wantIndented = td.wantCompacted
			}
			if td.wantCanonicalized == "" {
				td.wantCanonicalized = td.wantCompacted
			}
			if td.wantCompactErr != nil {
				td.wantCompacted = td.in
			}
			if td.wantIndentErr != nil {
				td.wantIndented = td.in
			}
			if td.wantCanonicalizeErr != nil {
				td.wantCanonicalized = td.in
			}

			v := RawValue(td.in)
			gotValid := v.IsValid()
			if gotValid != td.wantValid {
				t.Errorf("%s: RawValue.IsValid = %v, want %v", td.name.where, gotValid, td.wantValid)
			}

			gotCompacted := RawValue(td.in)
			gotCompactErr := gotCompacted.Compact()
			if string(gotCompacted) != td.wantCompacted {
				t.Errorf("%s: RawValue.Compact = %s, want %s", td.name.where, gotCompacted, td.wantCompacted)
			}
			if !reflect.DeepEqual(gotCompactErr, td.wantCompactErr) {
				t.Errorf("%s: RawValue.Compact error mismatch: got %#v, want %#v", td.name.where, gotCompactErr, td.wantCompactErr)
			}

			gotIndented := RawValue(td.in)
			gotIndentErr := gotIndented.Indent("\t", "    ")
			if string(gotIndented) != td.wantIndented {
				t.Errorf("%s: RawValue.Indent = %s, want %s", td.name.where, gotIndented, td.wantIndented)
			}
			if !reflect.DeepEqual(gotIndentErr, td.wantIndentErr) {
				t.Errorf("%s: RawValue.Indent error mismatch: got %#v, want %#v", td.name.where, gotIndentErr, td.wantIndentErr)
			}

			gotCanonicalized := RawValue(td.in)
			gotCanonicalizeErr := gotCanonicalized.Canonicalize()
			if string(gotCanonicalized) != td.wantCanonicalized {
				t.Errorf("%s: RawValue.Canonicalize = %s, want %s", td.name.where, gotCanonicalized, td.wantCanonicalized)
			}
			if !reflect.DeepEqual(gotCanonicalizeErr, td.wantCanonicalizeErr) {
				t.Errorf("%s: RawValue.Canonicalize error mismatch: got %#v, want %#v", td.name.where, gotCanonicalizeErr, td.wantCanonicalizeErr)
			}
		})
	}
}

var lessUTF16Testdata = []string{"", "\r", "1", "\u0080", "\u00f6", "\u20ac", "\U0001f600", "\ufb33"}

func TestLessUTF16(t *testing.T) {
	for i, si := range lessUTF16Testdata {
		for j, sj := range lessUTF16Testdata {
			got := lessUTF16([]byte(si), []byte(sj))
			want := i < j
			if got != want {
				t.Errorf("lessUTF16(%q, %q) = %v, want %v", si, sj, got, want)
			}
		}
	}
}

func FuzzLessUTF16(f *testing.F) {
	for _, td1 := range lessUTF16Testdata {
		for _, td2 := range lessUTF16Testdata {
			f.Add([]byte(td1), []byte(td2))
		}
	}

	// lessUTF16Simple is identical to lessUTF16,
	// but relies on naively converting a string to a []uint16 codepoints.
	// It is easy to verify as correct, but is slow.
	lessUTF16Simple := func(x, y []byte) bool {
		ux := utf16.Encode([]rune(string(x)))
		uy := utf16.Encode([]rune(string(y)))
		// TODO(https://go.dev/issue/57433): Use slices.Compare.
		for {
			if len(ux) == 0 || len(uy) == 0 {
				if len(ux) == len(uy) {
					return string(x) < string(y)
				}
				return len(ux) < len(uy)
			}
			if ux[0] != uy[0] {
				return ux[0] < uy[0]
			}
			ux, uy = ux[1:], uy[1:]
		}
	}

	f.Fuzz(func(t *testing.T, s1, s2 []byte) {
		// Compare the optimized and simplified implementations.
		got := lessUTF16(s1, s2)
		want := lessUTF16Simple(s1, s2)
		if got != want {
			t.Errorf("lessUTF16(%q, %q) = %v, want %v", s1, s2, got, want)
		}
	})
}