...

Source file src/go.starlark.net/lib/json/json.go

Documentation: go.starlark.net/lib/json

     1  // Copyright 2020 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package json defines utilities for converting Starlark values
     6  // to/from JSON strings. The most recent IETF standard for JSON is
     7  // https://www.ietf.org/rfc/rfc7159.txt.
     8  package json // import "go.starlark.net/lib/json"
     9  
    10  import (
    11  	"bytes"
    12  	"encoding/json"
    13  	"fmt"
    14  	"math"
    15  	"math/big"
    16  	"reflect"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  	"unicode/utf8"
    21  	"unsafe"
    22  
    23  	"go.starlark.net/starlark"
    24  	"go.starlark.net/starlarkstruct"
    25  )
    26  
    27  // Module json is a Starlark module of JSON-related functions.
    28  //
    29  //   json = module(
    30  //      encode,
    31  //      decode,
    32  //      indent,
    33  //   )
    34  //
    35  // def encode(x):
    36  //
    37  // The encode function accepts one required positional argument,
    38  // which it converts to JSON by cases:
    39  // - A Starlark value that implements Go's standard json.Marshal
    40  //   interface defines its own JSON encoding.
    41  // - None, True, and False are converted to null, true, and false, respectively.
    42  // - Starlark int values, no matter how large, are encoded as decimal integers.
    43  //   Some decoders may not be able to decode very large integers.
    44  // - Starlark float values are encoded using decimal point notation,
    45  //   even if the value is an integer.
    46  //   It is an error to encode a non-finite floating-point value.
    47  // - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
    48  // - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
    49  //   It is an error if any key is not a string.
    50  // - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
    51  // - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
    52  // It an application-defined type matches more than one the cases describe above,
    53  // (e.g. it implements both Iterable and HasFields), the first case takes precedence.
    54  // Encoding any other value yields an error.
    55  //
    56  // def decode(x[, default]):
    57  //
    58  // The decode function has one required positional parameter, a JSON string.
    59  // It returns the Starlark value that the string denotes.
    60  // - Numbers are parsed as int or float, depending on whether they
    61  //   contain a decimal point.
    62  // - JSON objects are parsed as new unfrozen Starlark dicts.
    63  // - JSON arrays are parsed as new unfrozen Starlark lists.
    64  // If x is not a valid JSON string, the behavior depends on the "default"
    65  // parameter: if present, Decode returns its value; otherwise, Decode fails.
    66  //
    67  // def indent(str, *, prefix="", indent="\t"):
    68  //
    69  // The indent function pretty-prints a valid JSON encoding,
    70  // and returns a string containing the indented form.
    71  // It accepts one required positional parameter, the JSON string,
    72  // and two optional keyword-only string parameters, prefix and indent,
    73  // that specify a prefix of each new line, and the unit of indentation.
    74  //
    75  var Module = &starlarkstruct.Module{
    76  	Name: "json",
    77  	Members: starlark.StringDict{
    78  		"encode": starlark.NewBuiltin("json.encode", encode),
    79  		"decode": starlark.NewBuiltin("json.decode", decode),
    80  		"indent": starlark.NewBuiltin("json.indent", indent),
    81  	},
    82  }
    83  
    84  func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
    85  	var x starlark.Value
    86  	if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	buf := new(bytes.Buffer)
    91  
    92  	var quoteSpace [128]byte
    93  	quote := func(s string) {
    94  		// Non-trivial escaping is handled by Go's encoding/json.
    95  		if isPrintableASCII(s) {
    96  			buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
    97  		} else {
    98  			// TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
    99  			// Can we avoid this call?
   100  			data, _ := json.Marshal(s)
   101  			buf.Write(data)
   102  		}
   103  	}
   104  
   105  	path := make([]unsafe.Pointer, 0, 8)
   106  
   107  	var emit func(x starlark.Value) error
   108  	emit = func(x starlark.Value) error {
   109  
   110  		// It is only necessary to push/pop the item when it might contain
   111  		// itself (i.e. the last three switch cases), but omitting it in the other
   112  		// cases did not show significant improvement on the benchmarks.
   113  		if ptr := pointer(x); ptr != nil {
   114  			if pathContains(path, ptr) {
   115  				return fmt.Errorf("cycle in JSON structure")
   116  			}
   117  
   118  			path = append(path, ptr)
   119  			defer func() { path = path[0 : len(path)-1] }()
   120  		}
   121  
   122  		switch x := x.(type) {
   123  		case json.Marshaler:
   124  			// Application-defined starlark.Value types
   125  			// may define their own JSON encoding.
   126  			data, err := x.MarshalJSON()
   127  			if err != nil {
   128  				return err
   129  			}
   130  			buf.Write(data)
   131  
   132  		case starlark.NoneType:
   133  			buf.WriteString("null")
   134  
   135  		case starlark.Bool:
   136  			if x {
   137  				buf.WriteString("true")
   138  			} else {
   139  				buf.WriteString("false")
   140  			}
   141  
   142  		case starlark.Int:
   143  			fmt.Fprint(buf, x)
   144  
   145  		case starlark.Float:
   146  			if !isFinite(float64(x)) {
   147  				return fmt.Errorf("cannot encode non-finite float %v", x)
   148  			}
   149  			fmt.Fprintf(buf, "%g", x) // always contains a decimal point
   150  
   151  		case starlark.String:
   152  			quote(string(x))
   153  
   154  		case starlark.IterableMapping:
   155  			// e.g. dict (must have string keys)
   156  			buf.WriteByte('{')
   157  			items := x.Items()
   158  			for _, item := range items {
   159  				if _, ok := item[0].(starlark.String); !ok {
   160  					return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
   161  				}
   162  			}
   163  			sort.Slice(items, func(i, j int) bool {
   164  				return items[i][0].(starlark.String) < items[j][0].(starlark.String)
   165  			})
   166  			for i, item := range items {
   167  				if i > 0 {
   168  					buf.WriteByte(',')
   169  				}
   170  				k, _ := starlark.AsString(item[0])
   171  				quote(k)
   172  				buf.WriteByte(':')
   173  				if err := emit(item[1]); err != nil {
   174  					return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
   175  				}
   176  			}
   177  			buf.WriteByte('}')
   178  
   179  		case starlark.Iterable:
   180  			// e.g. tuple, list
   181  			buf.WriteByte('[')
   182  			iter := x.Iterate()
   183  			defer iter.Done()
   184  			var elem starlark.Value
   185  			for i := 0; iter.Next(&elem); i++ {
   186  				if i > 0 {
   187  					buf.WriteByte(',')
   188  				}
   189  				if err := emit(elem); err != nil {
   190  					return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
   191  				}
   192  			}
   193  			buf.WriteByte(']')
   194  
   195  		case starlark.HasAttrs:
   196  			// e.g. struct
   197  			buf.WriteByte('{')
   198  			var names []string
   199  			names = append(names, x.AttrNames()...)
   200  			sort.Strings(names)
   201  			for i, name := range names {
   202  				v, err := x.Attr(name)
   203  				if err != nil {
   204  					return fmt.Errorf("cannot access attribute %s.%s: %w", x.Type(), name, err)
   205  				}
   206  				if v == nil {
   207  					// x.AttrNames() returned name, but x.Attr(name) returned nil, stating
   208  					// that the field doesn't exist.
   209  					return fmt.Errorf("missing attribute %s.%s (despite %q appearing in dir()", x.Type(), name, name)
   210  				}
   211  				if i > 0 {
   212  					buf.WriteByte(',')
   213  				}
   214  				quote(name)
   215  				buf.WriteByte(':')
   216  				if err := emit(v); err != nil {
   217  					return fmt.Errorf("in field .%s: %v", name, err)
   218  				}
   219  			}
   220  			buf.WriteByte('}')
   221  
   222  		default:
   223  			return fmt.Errorf("cannot encode %s as JSON", x.Type())
   224  		}
   225  		return nil
   226  	}
   227  
   228  	if err := emit(x); err != nil {
   229  		return nil, fmt.Errorf("%s: %v", b.Name(), err)
   230  	}
   231  	return starlark.String(buf.String()), nil
   232  }
   233  
   234  func pointer(i interface{}) unsafe.Pointer {
   235  	v := reflect.ValueOf(i)
   236  	switch v.Kind() {
   237  	case reflect.Ptr, reflect.Chan, reflect.Map, reflect.UnsafePointer, reflect.Slice:
   238  		return v.UnsafePointer()
   239  	default:
   240  		return nil
   241  	}
   242  }
   243  
   244  func pathContains(path []unsafe.Pointer, item unsafe.Pointer) bool {
   245  	for _, p := range path {
   246  		if p == item {
   247  			return true
   248  		}
   249  	}
   250  
   251  	return false
   252  }
   253  
   254  // isPrintableASCII reports whether s contains only printable ASCII.
   255  func isPrintableASCII(s string) bool {
   256  	for i := 0; i < len(s); i++ {
   257  		b := s[i]
   258  		if b < 0x20 || b >= 0x80 {
   259  			return false
   260  		}
   261  	}
   262  	return true
   263  }
   264  
   265  // isFinite reports whether f represents a finite rational value.
   266  // It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
   267  func isFinite(f float64) bool {
   268  	return math.Abs(f) <= math.MaxFloat64
   269  }
   270  
   271  func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
   272  	prefix, indent := "", "\t" // keyword-only
   273  	if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
   274  		"prefix?", &prefix,
   275  		"indent?", &indent,
   276  	); err != nil {
   277  		return nil, err
   278  	}
   279  	var str string // positional-only
   280  	if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
   281  		return nil, err
   282  	}
   283  
   284  	buf := new(bytes.Buffer)
   285  	if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
   286  		return nil, fmt.Errorf("%s: %v", b.Name(), err)
   287  	}
   288  	return starlark.String(buf.String()), nil
   289  }
   290  
   291  func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (v starlark.Value, err error) {
   292  	var s string
   293  	var d starlark.Value
   294  	if err := starlark.UnpackArgs(b.Name(), args, kwargs, "x", &s, "default?", &d); err != nil {
   295  		return nil, err
   296  	}
   297  	if len(args) < 1 {
   298  		// "x" parameter is positional only; UnpackArgs does not allow us to
   299  		// directly express "def decode(x, *, default)"
   300  		return nil, fmt.Errorf("%s: unexpected keyword argument x", b.Name())
   301  	}
   302  
   303  	// The decoder necessarily makes certain representation choices
   304  	// such as list vs tuple, struct vs dict, int vs float.
   305  	// In principle, we could parameterize it to allow the caller to
   306  	// control the returned types, but there's no compelling need yet.
   307  
   308  	// Use panic/recover with a distinguished type (failure) for error handling.
   309  	// If "default" is set, we only want to return it when encountering invalid
   310  	// json - not for any other possible causes of panic.
   311  	// In particular, if we ever extend the json.decode API to take a callback,
   312  	// a distinguished, private failure type prevents the possibility of
   313  	// json.decode with "default" becoming abused as a try-catch mechanism.
   314  	type failure string
   315  	fail := func(format string, args ...interface{}) {
   316  		panic(failure(fmt.Sprintf(format, args...)))
   317  	}
   318  
   319  	i := 0
   320  
   321  	// skipSpace consumes leading spaces, and reports whether there is more input.
   322  	skipSpace := func() bool {
   323  		for ; i < len(s); i++ {
   324  			b := s[i]
   325  			if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
   326  				return true
   327  			}
   328  		}
   329  		return false
   330  	}
   331  
   332  	// next consumes leading spaces and returns the first non-space.
   333  	// It panics if at EOF.
   334  	next := func() byte {
   335  		if skipSpace() {
   336  			return s[i]
   337  		}
   338  		fail("unexpected end of file")
   339  		panic("unreachable")
   340  	}
   341  
   342  	// parse returns the next JSON value from the input.
   343  	// It consumes leading but not trailing whitespace.
   344  	// It panics on error.
   345  	var parse func() starlark.Value
   346  	parse = func() starlark.Value {
   347  		b := next()
   348  		switch b {
   349  		case '"':
   350  			// string
   351  
   352  			// Find end of quotation.
   353  			// Also, record whether trivial unquoting is safe.
   354  			// Non-trivial unquoting is handled by Go's encoding/json.
   355  			safe := true
   356  			closed := false
   357  			j := i + 1
   358  			for ; j < len(s); j++ {
   359  				b := s[j]
   360  				if b == '\\' {
   361  					safe = false
   362  					j++ // skip x in \x
   363  				} else if b == '"' {
   364  					closed = true
   365  					j++ // skip '"'
   366  					break
   367  				} else if b >= utf8.RuneSelf {
   368  					safe = false
   369  				}
   370  			}
   371  			if !closed {
   372  				fail("unclosed string literal")
   373  			}
   374  
   375  			r := s[i:j]
   376  			i = j
   377  
   378  			// unquote
   379  			if safe {
   380  				r = r[1 : len(r)-1]
   381  			} else if err := json.Unmarshal([]byte(r), &r); err != nil {
   382  				fail("%s", err)
   383  			}
   384  			return starlark.String(r)
   385  
   386  		case 'n':
   387  			if strings.HasPrefix(s[i:], "null") {
   388  				i += len("null")
   389  				return starlark.None
   390  			}
   391  
   392  		case 't':
   393  			if strings.HasPrefix(s[i:], "true") {
   394  				i += len("true")
   395  				return starlark.True
   396  			}
   397  
   398  		case 'f':
   399  			if strings.HasPrefix(s[i:], "false") {
   400  				i += len("false")
   401  				return starlark.False
   402  			}
   403  
   404  		case '[':
   405  			// array
   406  			var elems []starlark.Value
   407  
   408  			i++ // '['
   409  			b = next()
   410  			if b != ']' {
   411  				for {
   412  					elem := parse()
   413  					elems = append(elems, elem)
   414  					b = next()
   415  					if b != ',' {
   416  						if b != ']' {
   417  							fail("got %q, want ',' or ']'", b)
   418  						}
   419  						break
   420  					}
   421  					i++ // ','
   422  				}
   423  			}
   424  			i++ // ']'
   425  			return starlark.NewList(elems)
   426  
   427  		case '{':
   428  			// object
   429  			dict := new(starlark.Dict)
   430  
   431  			i++ // '{'
   432  			b = next()
   433  			if b != '}' {
   434  				for {
   435  					key := parse()
   436  					if _, ok := key.(starlark.String); !ok {
   437  						fail("got %s for object key, want string", key.Type())
   438  					}
   439  					b = next()
   440  					if b != ':' {
   441  						fail("after object key, got %q, want ':' ", b)
   442  					}
   443  					i++ // ':'
   444  					value := parse()
   445  					dict.SetKey(key, value) // can't fail
   446  					b = next()
   447  					if b != ',' {
   448  						if b != '}' {
   449  							fail("in object, got %q, want ',' or '}'", b)
   450  						}
   451  						break
   452  					}
   453  					i++ // ','
   454  				}
   455  			}
   456  			i++ // '}'
   457  			return dict
   458  
   459  		default:
   460  			// number?
   461  			if isdigit(b) || b == '-' {
   462  				// scan literal. Allow [0-9+-eE.] for now.
   463  				float := false
   464  				var j int
   465  				for j = i + 1; j < len(s); j++ {
   466  					b = s[j]
   467  					if isdigit(b) {
   468  						// ok
   469  					} else if b == '.' ||
   470  						b == 'e' ||
   471  						b == 'E' ||
   472  						b == '+' ||
   473  						b == '-' {
   474  						float = true
   475  					} else {
   476  						break
   477  					}
   478  				}
   479  				num := s[i:j]
   480  				i = j
   481  
   482  				// Unlike most C-like languages,
   483  				// JSON disallows a leading zero before a digit.
   484  				digits := num
   485  				if num[0] == '-' {
   486  					digits = num[1:]
   487  				}
   488  				if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
   489  					fail("invalid number: %s", num)
   490  				}
   491  
   492  				// parse literal
   493  				if float {
   494  					x, err := strconv.ParseFloat(num, 64)
   495  					if err != nil {
   496  						fail("invalid number: %s", num)
   497  					}
   498  					return starlark.Float(x)
   499  				} else {
   500  					x, ok := new(big.Int).SetString(num, 10)
   501  					if !ok {
   502  						fail("invalid number: %s", num)
   503  					}
   504  					return starlark.MakeBigInt(x)
   505  				}
   506  			}
   507  		}
   508  		fail("unexpected character %q", b)
   509  		panic("unreachable")
   510  	}
   511  	defer func() {
   512  		x := recover()
   513  		switch x := x.(type) {
   514  		case failure:
   515  			if d != nil {
   516  				v = d
   517  			} else {
   518  				err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
   519  			}
   520  		case nil:
   521  			// nop
   522  		default:
   523  			panic(x) // unexpected panic
   524  		}
   525  	}()
   526  	v = parse()
   527  	if skipSpace() {
   528  		fail("unexpected character %q after value", s[i])
   529  	}
   530  	return v, nil
   531  }
   532  
   533  func isdigit(b byte) bool {
   534  	return b >= '0' && b <= '9'
   535  }
   536  

View as plain text