...

Source file src/go.starlark.net/internal/compile/serial.go

Documentation: go.starlark.net/internal/compile

     1  package compile
     2  
     3  // This file defines functions to read and write a compile.Program to a file.
     4  //
     5  // It is the client's responsibility to avoid version skew between the
     6  // compiler used to produce a file and the interpreter that consumes it.
     7  // The version number is provided as a constant.
     8  // Incompatible protocol changes should also increment the version number.
     9  //
    10  // Encoding
    11  //
    12  // Program:
    13  //	"sky!"		[4]byte		# magic number
    14  //	str		uint32le	# offset of <strings> section
    15  //	version		varint		# must match Version
    16  //	filename	string
    17  //	numloads	varint
    18  //	loads		[]Ident
    19  //	numnames	varint
    20  //	names		[]string
    21  //	numconsts	varint
    22  //	consts		[]Constant
    23  //	numglobals	varint
    24  //	globals		[]Ident
    25  //	toplevel	Funcode
    26  //	numfuncs	varint
    27  //	funcs		[]Funcode
    28  //	<strings>	[]byte		# concatenation of all referenced strings
    29  //	EOF
    30  //
    31  // Funcode:
    32  //	id		Ident
    33  //	code		[]byte
    34  //	pclinetablen	varint
    35  //	pclinetab	[]varint
    36  //	numlocals	varint
    37  //	locals		[]Ident
    38  //	numcells	varint
    39  //	cells		[]int
    40  //	numfreevars	varint
    41  //	freevar		[]Ident
    42  //	maxstack	varint
    43  //	numparams	varint
    44  //	numkwonlyparams	varint
    45  //	hasvarargs	varint (0 or 1)
    46  //	haskwargs	varint (0 or 1)
    47  //
    48  // Ident:
    49  //	filename	string
    50  //	line, col	varint
    51  //
    52  // Constant:                            # type      data
    53  //      type            varint          # 0=string  string
    54  //      data            ...             # 1=bytes   string
    55  //                                      # 2=int     varint
    56  //                                      # 3=float   varint (bits as uint64)
    57  //                                      # 4=bigint  string (decimal ASCII text)
    58  //
    59  // The encoding starts with a four-byte magic number.
    60  // The next four bytes are a little-endian uint32
    61  // that provides the offset of the string section
    62  // at the end of the file, which contains the ordered
    63  // concatenation of all strings referenced by the
    64  // program. This design permits the decoder to read
    65  // the first and second parts of the file into different
    66  // memory allocations: the first (the encoded program)
    67  // is transient, but the second (the strings) persists
    68  // for the life of the Program.
    69  //
    70  // Within the encoded program, all strings are referred
    71  // to by their length. As the encoder and decoder process
    72  // the entire file sequentially, they are in lock step,
    73  // so the start offset of each string is implicit.
    74  //
    75  // Program.Code is represented as a []byte slice to permit
    76  // modification when breakpoints are set. All other strings
    77  // are represented as strings. They all (unsafely) share the
    78  // same backing byte slice.
    79  //
    80  // Aside from the str field, all integers are encoded as varints.
    81  
    82  import (
    83  	"encoding/binary"
    84  	"fmt"
    85  	"math"
    86  	"math/big"
    87  	debugpkg "runtime/debug"
    88  	"unsafe"
    89  
    90  	"go.starlark.net/syntax"
    91  )
    92  
    93  const magic = "!sky"
    94  
    95  // Encode encodes a compiled Starlark program.
    96  func (prog *Program) Encode() []byte {
    97  	var e encoder
    98  	e.p = append(e.p, magic...)
    99  	e.p = append(e.p, "????"...) // string data offset; filled in later
   100  	e.int(Version)
   101  	e.string(prog.Toplevel.Pos.Filename())
   102  	e.bindings(prog.Loads)
   103  	e.int(len(prog.Names))
   104  	for _, name := range prog.Names {
   105  		e.string(name)
   106  	}
   107  	e.int(len(prog.Constants))
   108  	for _, c := range prog.Constants {
   109  		switch c := c.(type) {
   110  		case string:
   111  			e.int(0)
   112  			e.string(c)
   113  		case Bytes:
   114  			e.int(1)
   115  			e.string(string(c))
   116  		case int64:
   117  			e.int(2)
   118  			e.int64(c)
   119  		case float64:
   120  			e.int(3)
   121  			e.uint64(math.Float64bits(c))
   122  		case *big.Int:
   123  			e.int(4)
   124  			e.string(c.Text(10))
   125  		}
   126  	}
   127  	e.bindings(prog.Globals)
   128  	e.function(prog.Toplevel)
   129  	e.int(len(prog.Functions))
   130  	for _, fn := range prog.Functions {
   131  		e.function(fn)
   132  	}
   133  
   134  	// Patch in the offset of the string data section.
   135  	binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
   136  
   137  	return append(e.p, e.s...)
   138  }
   139  
   140  type encoder struct {
   141  	p   []byte // encoded program
   142  	s   []byte // strings
   143  	tmp [binary.MaxVarintLen64]byte
   144  }
   145  
   146  func (e *encoder) int(x int) {
   147  	e.int64(int64(x))
   148  }
   149  
   150  func (e *encoder) int64(x int64) {
   151  	n := binary.PutVarint(e.tmp[:], x)
   152  	e.p = append(e.p, e.tmp[:n]...)
   153  }
   154  
   155  func (e *encoder) uint64(x uint64) {
   156  	n := binary.PutUvarint(e.tmp[:], x)
   157  	e.p = append(e.p, e.tmp[:n]...)
   158  }
   159  
   160  func (e *encoder) string(s string) {
   161  	e.int(len(s))
   162  	e.s = append(e.s, s...)
   163  }
   164  
   165  func (e *encoder) bytes(b []byte) {
   166  	e.int(len(b))
   167  	e.s = append(e.s, b...)
   168  }
   169  
   170  func (e *encoder) binding(bind Binding) {
   171  	e.string(bind.Name)
   172  	e.int(int(bind.Pos.Line))
   173  	e.int(int(bind.Pos.Col))
   174  }
   175  
   176  func (e *encoder) bindings(binds []Binding) {
   177  	e.int(len(binds))
   178  	for _, bind := range binds {
   179  		e.binding(bind)
   180  	}
   181  }
   182  
   183  func (e *encoder) function(fn *Funcode) {
   184  	e.binding(Binding{fn.Name, fn.Pos})
   185  	e.string(fn.Doc)
   186  	e.bytes(fn.Code)
   187  	e.int(len(fn.pclinetab))
   188  	for _, x := range fn.pclinetab {
   189  		e.int64(int64(x))
   190  	}
   191  	e.bindings(fn.Locals)
   192  	e.int(len(fn.Cells))
   193  	for _, index := range fn.Cells {
   194  		e.int(index)
   195  	}
   196  	e.bindings(fn.Freevars)
   197  	e.int(fn.MaxStack)
   198  	e.int(fn.NumParams)
   199  	e.int(fn.NumKwonlyParams)
   200  	e.int(b2i(fn.HasVarargs))
   201  	e.int(b2i(fn.HasKwargs))
   202  }
   203  
   204  func b2i(b bool) int {
   205  	if b {
   206  		return 1
   207  	} else {
   208  		return 0
   209  	}
   210  }
   211  
   212  // DecodeProgram decodes a compiled Starlark program from data.
   213  func DecodeProgram(data []byte) (_ *Program, err error) {
   214  	if len(data) < len(magic) {
   215  		return nil, fmt.Errorf("not a compiled module: no magic number")
   216  	}
   217  	if got := string(data[:4]); got != magic {
   218  		return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
   219  			got, magic)
   220  	}
   221  	defer func() {
   222  		if x := recover(); x != nil {
   223  			debugpkg.PrintStack()
   224  			err = fmt.Errorf("internal error while decoding program: %v", x)
   225  		}
   226  	}()
   227  
   228  	offset := binary.LittleEndian.Uint32(data[4:8])
   229  	d := decoder{
   230  		p: data[8:offset],
   231  		s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
   232  	}
   233  
   234  	if v := d.int(); v != Version {
   235  		return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
   236  	}
   237  
   238  	filename := d.string()
   239  	d.filename = &filename
   240  
   241  	loads := d.bindings()
   242  
   243  	names := make([]string, d.int())
   244  	for i := range names {
   245  		names[i] = d.string()
   246  	}
   247  
   248  	// constants
   249  	constants := make([]interface{}, d.int())
   250  	for i := range constants {
   251  		var c interface{}
   252  		switch d.int() {
   253  		case 0:
   254  			c = d.string()
   255  		case 1:
   256  			c = Bytes(d.string())
   257  		case 2:
   258  			c = d.int64()
   259  		case 3:
   260  			c = math.Float64frombits(d.uint64())
   261  		case 4:
   262  			c, _ = new(big.Int).SetString(d.string(), 10)
   263  		}
   264  		constants[i] = c
   265  	}
   266  
   267  	globals := d.bindings()
   268  	toplevel := d.function()
   269  	funcs := make([]*Funcode, d.int())
   270  	for i := range funcs {
   271  		funcs[i] = d.function()
   272  	}
   273  
   274  	prog := &Program{
   275  		Loads:     loads,
   276  		Names:     names,
   277  		Constants: constants,
   278  		Globals:   globals,
   279  		Functions: funcs,
   280  		Toplevel:  toplevel,
   281  	}
   282  	toplevel.Prog = prog
   283  	for _, f := range funcs {
   284  		f.Prog = prog
   285  	}
   286  
   287  	if len(d.p)+len(d.s) > 0 {
   288  		return nil, fmt.Errorf("internal error: unconsumed data during decoding")
   289  	}
   290  
   291  	return prog, nil
   292  }
   293  
   294  type decoder struct {
   295  	p        []byte  // encoded program
   296  	s        []byte  // strings
   297  	filename *string // (indirect to avoid keeping decoder live)
   298  }
   299  
   300  func (d *decoder) int() int {
   301  	return int(d.int64())
   302  }
   303  
   304  func (d *decoder) int64() int64 {
   305  	x, len := binary.Varint(d.p[:])
   306  	d.p = d.p[len:]
   307  	return x
   308  }
   309  
   310  func (d *decoder) uint64() uint64 {
   311  	x, len := binary.Uvarint(d.p[:])
   312  	d.p = d.p[len:]
   313  	return x
   314  }
   315  
   316  func (d *decoder) string() (s string) {
   317  	if slice := d.bytes(); len(slice) > 0 {
   318  		// Avoid a memory allocation for each string
   319  		// by unsafely aliasing slice.
   320  		type string struct {
   321  			data *byte
   322  			len  int
   323  		}
   324  		ptr := (*string)(unsafe.Pointer(&s))
   325  		ptr.data = &slice[0]
   326  		ptr.len = len(slice)
   327  	}
   328  	return s
   329  }
   330  
   331  func (d *decoder) bytes() []byte {
   332  	len := d.int()
   333  	r := d.s[:len:len]
   334  	d.s = d.s[len:]
   335  	return r
   336  }
   337  
   338  func (d *decoder) binding() Binding {
   339  	name := d.string()
   340  	line := int32(d.int())
   341  	col := int32(d.int())
   342  	return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
   343  }
   344  
   345  func (d *decoder) bindings() []Binding {
   346  	bindings := make([]Binding, d.int())
   347  	for i := range bindings {
   348  		bindings[i] = d.binding()
   349  	}
   350  	return bindings
   351  }
   352  
   353  func (d *decoder) ints() []int {
   354  	ints := make([]int, d.int())
   355  	for i := range ints {
   356  		ints[i] = d.int()
   357  	}
   358  	return ints
   359  }
   360  
   361  func (d *decoder) bool() bool { return d.int() != 0 }
   362  
   363  func (d *decoder) function() *Funcode {
   364  	id := d.binding()
   365  	doc := d.string()
   366  	code := d.bytes()
   367  	pclinetab := make([]uint16, d.int())
   368  	for i := range pclinetab {
   369  		pclinetab[i] = uint16(d.int())
   370  	}
   371  	locals := d.bindings()
   372  	cells := d.ints()
   373  	freevars := d.bindings()
   374  	maxStack := d.int()
   375  	numParams := d.int()
   376  	numKwonlyParams := d.int()
   377  	hasVarargs := d.int() != 0
   378  	hasKwargs := d.int() != 0
   379  	return &Funcode{
   380  		// Prog is filled in later.
   381  		Pos:             id.Pos,
   382  		Name:            id.Name,
   383  		Doc:             doc,
   384  		Code:            code,
   385  		pclinetab:       pclinetab,
   386  		Locals:          locals,
   387  		Cells:           cells,
   388  		Freevars:        freevars,
   389  		MaxStack:        maxStack,
   390  		NumParams:       numParams,
   391  		NumKwonlyParams: numKwonlyParams,
   392  		HasVarargs:      hasVarargs,
   393  		HasKwargs:       hasKwargs,
   394  	}
   395  }
   396  

View as plain text