rta.go

Documentation: golang.org/x/tools/go/callgraph/rta

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This package provides Rapid Type Analysis (RTA) for Go, a fast
     6  // algorithm for call graph construction and discovery of reachable code
     7  // (and hence dead code) and runtime types.  The algorithm was first
     8  // described in:
     9  //
    10  // David F. Bacon and Peter F. Sweeney. 1996.
    11  // Fast static analysis of C++ virtual function calls. (OOPSLA '96)
    12  // http://doi.acm.org/10.1145/236337.236371
    13  //
    14  // The algorithm uses dynamic programming to tabulate the cross-product
    15  // of the set of known "address-taken" functions with the set of known
    16  // dynamic calls of the same type.  As each new address-taken function
    17  // is discovered, call graph edges are added from each known callsite,
    18  // and as each new call site is discovered, call graph edges are added
    19  // from it to each known address-taken function.
    20  //
    21  // A similar approach is used for dynamic calls via interfaces: it
    22  // tabulates the cross-product of the set of known "runtime types",
    23  // i.e. types that may appear in an interface value, or may be derived from
    24  // one via reflection, with the set of known "invoke"-mode dynamic
    25  // calls.  As each new runtime type is discovered, call edges are
    26  // added from the known call sites, and as each new call site is
    27  // discovered, call graph edges are added to each compatible
    28  // method.
    29  //
    30  // In addition, we must consider as reachable all address-taken
    31  // functions and all exported methods of any runtime type, since they
    32  // may be called via reflection.
    33  //
    34  // Each time a newly added call edge causes a new function to become
    35  // reachable, the code of that function is analyzed for more call sites,
    36  // address-taken functions, and runtime types.  The process continues
    37  // until a fixed point is reached.
    38  package rta // import "golang.org/x/tools/go/callgraph/rta"
    39  
    40  import (
    41  	"fmt"
    42  	"go/types"
    43  	"hash/crc32"
    44  
    45  	"golang.org/x/tools/go/callgraph"
    46  	"golang.org/x/tools/go/ssa"
    47  	"golang.org/x/tools/go/types/typeutil"
    48  	"golang.org/x/tools/internal/aliases"
    49  )
    50  
    51  // A Result holds the results of Rapid Type Analysis, which includes the
    52  // set of reachable functions/methods, runtime types, and the call graph.
    53  type Result struct {
    54  	// CallGraph is the discovered callgraph.
    55  	// It does not include edges for calls made via reflection.
    56  	CallGraph *callgraph.Graph
    57  
    58  	// Reachable contains the set of reachable functions and methods.
    59  	// This includes exported methods of runtime types, since
    60  	// they may be accessed via reflection.
    61  	// The value indicates whether the function is address-taken.
    62  	//
    63  	// (We wrap the bool in a struct to avoid inadvertent use of
    64  	// "if Reachable[f] {" to test for set membership.)
    65  	Reachable map[*ssa.Function]struct{ AddrTaken bool }
    66  
    67  	// RuntimeTypes contains the set of types that are needed at
    68  	// runtime, for interfaces or reflection.
    69  	//
    70  	// The value indicates whether the type is inaccessible to reflection.
    71  	// Consider:
    72  	// 	type A struct{B}
    73  	// 	fmt.Println(new(A))
    74  	// Types *A, A and B are accessible to reflection, but the unnamed
    75  	// type struct{B} is not.
    76  	RuntimeTypes typeutil.Map
    77  }
    78  
    79  // Working state of the RTA algorithm.
    80  type rta struct {
    81  	result *Result
    82  
    83  	prog *ssa.Program
    84  
    85  	reflectValueCall *ssa.Function // (*reflect.Value).Call, iff part of prog
    86  
    87  	worklist []*ssa.Function // list of functions to visit
    88  
    89  	// addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
    90  	// Keys are *types.Signature, values are map[*ssa.Function]bool sets.
    91  	addrTakenFuncsBySig typeutil.Map
    92  
    93  	// dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
    94  	// Keys are *types.Signature, values are unordered []ssa.CallInstruction.
    95  	dynCallSites typeutil.Map
    96  
    97  	// invokeSites contains all "invoke"-mode call sites, grouped by interface.
    98  	// Keys are *types.Interface (never *types.Named),
    99  	// Values are unordered []ssa.CallInstruction sets.
   100  	invokeSites typeutil.Map
   101  
   102  	// The following two maps together define the subset of the
   103  	// m:n "implements" relation needed by the algorithm.
   104  
   105  	// concreteTypes maps each concrete type to information about it.
   106  	// Keys are types.Type, values are *concreteTypeInfo.
   107  	// Only concrete types used as MakeInterface operands are included.
   108  	concreteTypes typeutil.Map
   109  
   110  	// interfaceTypes maps each interface type to information about it.
   111  	// Keys are *types.Interface, values are *interfaceTypeInfo.
   112  	// Only interfaces used in "invoke"-mode CallInstructions are included.
   113  	interfaceTypes typeutil.Map
   114  }
   115  
   116  type concreteTypeInfo struct {
   117  	C          types.Type
   118  	mset       *types.MethodSet
   119  	fprint     uint64             // fingerprint of method set
   120  	implements []*types.Interface // unordered set of implemented interfaces
   121  }
   122  
   123  type interfaceTypeInfo struct {
   124  	I               *types.Interface
   125  	mset            *types.MethodSet
   126  	fprint          uint64
   127  	implementations []types.Type // unordered set of concrete implementations
   128  }
   129  
   130  // addReachable marks a function as potentially callable at run-time,
   131  // and ensures that it gets processed.
   132  func (r *rta) addReachable(f *ssa.Function, addrTaken bool) {
   133  	reachable := r.result.Reachable
   134  	n := len(reachable)
   135  	v := reachable[f]
   136  	if addrTaken {
   137  		v.AddrTaken = true
   138  	}
   139  	reachable[f] = v
   140  	if len(reachable) > n {
   141  		// First time seeing f.  Add it to the worklist.
   142  		r.worklist = append(r.worklist, f)
   143  	}
   144  }
   145  
   146  // addEdge adds the specified call graph edge, and marks it reachable.
   147  // addrTaken indicates whether to mark the callee as "address-taken".
   148  // site is nil for calls made via reflection.
   149  func (r *rta) addEdge(caller *ssa.Function, site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) {
   150  	r.addReachable(callee, addrTaken)
   151  
   152  	if g := r.result.CallGraph; g != nil {
   153  		if caller == nil {
   154  			panic(site)
   155  		}
   156  		from := g.CreateNode(caller)
   157  		to := g.CreateNode(callee)
   158  		callgraph.AddEdge(from, site, to)
   159  	}
   160  }
   161  
   162  // ---------- addrTakenFuncs × dynCallSites ----------
   163  
   164  // visitAddrTakenFunc is called each time we encounter an address-taken function f.
   165  func (r *rta) visitAddrTakenFunc(f *ssa.Function) {
   166  	// Create two-level map (Signature -> Function -> bool).
   167  	S := f.Signature
   168  	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
   169  	if funcs == nil {
   170  		funcs = make(map[*ssa.Function]bool)
   171  		r.addrTakenFuncsBySig.Set(S, funcs)
   172  	}
   173  	if !funcs[f] {
   174  		// First time seeing f.
   175  		funcs[f] = true
   176  
   177  		// If we've seen any dyncalls of this type, mark it reachable,
   178  		// and add call graph edges.
   179  		sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
   180  		for _, site := range sites {
   181  			r.addEdge(site.Parent(), site, f, true)
   182  		}
   183  
   184  		// If the program includes (*reflect.Value).Call,
   185  		// add a dynamic call edge from it to any address-taken
   186  		// function, regardless of signature.
   187  		//
   188  		// This isn't perfect.
   189  		// - The actual call comes from an internal function
   190  		//   called reflect.call, but we can't rely on that here.
   191  		// - reflect.Value.CallSlice behaves similarly,
   192  		//   but we don't bother to create callgraph edges from
   193  		//   it as well as it wouldn't fundamentally change the
   194  		//   reachability but it would add a bunch more edges.
   195  		// - We assume that if reflect.Value.Call is among
   196  		//   the dependencies of the application, it is itself
   197  		//   reachable. (It would be more accurate to defer
   198  		//   all the addEdges below until r.V.Call itself
   199  		//   becomes reachable.)
   200  		// - Fake call graph edges are added from r.V.Call to
   201  		//   each address-taken function, but not to every
   202  		//   method reachable through a materialized rtype,
   203  		//   which is a little inconsistent. Still, the
   204  		//   reachable set includes both kinds, which is what
   205  		//   matters for e.g. deadcode detection.)
   206  		if r.reflectValueCall != nil {
   207  			var site ssa.CallInstruction = nil // can't find actual call site
   208  			r.addEdge(r.reflectValueCall, site, f, true)
   209  		}
   210  	}
   211  }
   212  
   213  // visitDynCall is called each time we encounter a dynamic "call"-mode call.
   214  func (r *rta) visitDynCall(site ssa.CallInstruction) {
   215  	S := site.Common().Signature()
   216  
   217  	// Record the call site.
   218  	sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
   219  	r.dynCallSites.Set(S, append(sites, site))
   220  
   221  	// For each function of signature S that we know is address-taken,
   222  	// add an edge and mark it reachable.
   223  	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
   224  	for g := range funcs {
   225  		r.addEdge(site.Parent(), site, g, true)
   226  	}
   227  }
   228  
   229  // ---------- concrete types × invoke sites ----------
   230  
   231  // addInvokeEdge is called for each new pair (site, C) in the matrix.
   232  func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) {
   233  	// Ascertain the concrete method of C to be called.
   234  	imethod := site.Common().Method
   235  	cmethod := r.prog.LookupMethod(C, imethod.Pkg(), imethod.Name())
   236  	r.addEdge(site.Parent(), site, cmethod, true)
   237  }
   238  
   239  // visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
   240  func (r *rta) visitInvoke(site ssa.CallInstruction) {
   241  	I := site.Common().Value.Type().Underlying().(*types.Interface)
   242  
   243  	// Record the invoke site.
   244  	sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
   245  	r.invokeSites.Set(I, append(sites, site))
   246  
   247  	// Add callgraph edge for each existing
   248  	// address-taken concrete type implementing I.
   249  	for _, C := range r.implementations(I) {
   250  		r.addInvokeEdge(site, C)
   251  	}
   252  }
   253  
   254  // ---------- main algorithm ----------
   255  
   256  // visitFunc processes function f.
   257  func (r *rta) visitFunc(f *ssa.Function) {
   258  	var space [32]*ssa.Value // preallocate space for common case
   259  
   260  	for _, b := range f.Blocks {
   261  		for _, instr := range b.Instrs {
   262  			rands := instr.Operands(space[:0])
   263  
   264  			switch instr := instr.(type) {
   265  			case ssa.CallInstruction:
   266  				call := instr.Common()
   267  				if call.IsInvoke() {
   268  					r.visitInvoke(instr)
   269  				} else if g := call.StaticCallee(); g != nil {
   270  					r.addEdge(f, instr, g, false)
   271  				} else if _, ok := call.Value.(*ssa.Builtin); !ok {
   272  					r.visitDynCall(instr)
   273  				}
   274  
   275  				// Ignore the call-position operand when
   276  				// looking for address-taken Functions.
   277  				// Hack: assume this is rands[0].
   278  				rands = rands[1:]
   279  
   280  			case *ssa.MakeInterface:
   281  				// Converting a value of type T to an
   282  				// interface materializes its runtime
   283  				// type, allowing any of its exported
   284  				// methods to be called though reflection.
   285  				r.addRuntimeType(instr.X.Type(), false)
   286  			}
   287  
   288  			// Process all address-taken functions.
   289  			for _, op := range rands {
   290  				if g, ok := (*op).(*ssa.Function); ok {
   291  					r.visitAddrTakenFunc(g)
   292  				}
   293  			}
   294  		}
   295  	}
   296  }
   297  
   298  // Analyze performs Rapid Type Analysis, starting at the specified root
   299  // functions.  It returns nil if no roots were specified.
   300  //
   301  // The root functions must be one or more entrypoints (main and init
   302  // functions) of a complete SSA program, with function bodies for all
   303  // dependencies, constructed with the [ssa.InstantiateGenerics] mode
   304  // flag.
   305  //
   306  // If buildCallGraph is true, Result.CallGraph will contain a call
   307  // graph; otherwise, only the other fields (reachable functions) are
   308  // populated.
   309  func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result {
   310  	if len(roots) == 0 {
   311  		return nil
   312  	}
   313  
   314  	r := &rta{
   315  		result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })},
   316  		prog:   roots[0].Prog,
   317  	}
   318  
   319  	if buildCallGraph {
   320  		// TODO(adonovan): change callgraph API to eliminate the
   321  		// notion of a distinguished root node.  Some callgraphs
   322  		// have many roots, or none.
   323  		r.result.CallGraph = callgraph.New(roots[0])
   324  	}
   325  
   326  	// Grab ssa.Function for (*reflect.Value).Call,
   327  	// if "reflect" is among the dependencies.
   328  	if reflectPkg := r.prog.ImportedPackage("reflect"); reflectPkg != nil {
   329  		reflectValue := reflectPkg.Members["Value"].(*ssa.Type)
   330  		r.reflectValueCall = r.prog.LookupMethod(reflectValue.Object().Type(), reflectPkg.Pkg, "Call")
   331  	}
   332  
   333  	hasher := typeutil.MakeHasher()
   334  	r.result.RuntimeTypes.SetHasher(hasher)
   335  	r.addrTakenFuncsBySig.SetHasher(hasher)
   336  	r.dynCallSites.SetHasher(hasher)
   337  	r.invokeSites.SetHasher(hasher)
   338  	r.concreteTypes.SetHasher(hasher)
   339  	r.interfaceTypes.SetHasher(hasher)
   340  
   341  	for _, root := range roots {
   342  		r.addReachable(root, false)
   343  	}
   344  
   345  	// Visit functions, processing their instructions, and adding
   346  	// new functions to the worklist, until a fixed point is
   347  	// reached.
   348  	var shadow []*ssa.Function // for efficiency, we double-buffer the worklist
   349  	for len(r.worklist) > 0 {
   350  		shadow, r.worklist = r.worklist, shadow[:0]
   351  		for _, f := range shadow {
   352  			r.visitFunc(f)
   353  		}
   354  	}
   355  	return r.result
   356  }
   357  
   358  // interfaces(C) returns all currently known interfaces implemented by C.
   359  func (r *rta) interfaces(C types.Type) []*types.Interface {
   360  	// Create an info for C the first time we see it.
   361  	var cinfo *concreteTypeInfo
   362  	if v := r.concreteTypes.At(C); v != nil {
   363  		cinfo = v.(*concreteTypeInfo)
   364  	} else {
   365  		mset := r.prog.MethodSets.MethodSet(C)
   366  		cinfo = &concreteTypeInfo{
   367  			C:      C,
   368  			mset:   mset,
   369  			fprint: fingerprint(mset),
   370  		}
   371  		r.concreteTypes.Set(C, cinfo)
   372  
   373  		// Ascertain set of interfaces C implements
   374  		// and update the 'implements' relation.
   375  		r.interfaceTypes.Iterate(func(I types.Type, v interface{}) {
   376  			iinfo := v.(*interfaceTypeInfo)
   377  			if I := aliases.Unalias(I).(*types.Interface); implements(cinfo, iinfo) {
   378  				iinfo.implementations = append(iinfo.implementations, C)
   379  				cinfo.implements = append(cinfo.implements, I)
   380  			}
   381  		})
   382  	}
   383  
   384  	return cinfo.implements
   385  }
   386  
   387  // implementations(I) returns all currently known concrete types that implement I.
   388  func (r *rta) implementations(I *types.Interface) []types.Type {
   389  	// Create an info for I the first time we see it.
   390  	var iinfo *interfaceTypeInfo
   391  	if v := r.interfaceTypes.At(I); v != nil {
   392  		iinfo = v.(*interfaceTypeInfo)
   393  	} else {
   394  		mset := r.prog.MethodSets.MethodSet(I)
   395  		iinfo = &interfaceTypeInfo{
   396  			I:      I,
   397  			mset:   mset,
   398  			fprint: fingerprint(mset),
   399  		}
   400  		r.interfaceTypes.Set(I, iinfo)
   401  
   402  		// Ascertain set of concrete types that implement I
   403  		// and update the 'implements' relation.
   404  		r.concreteTypes.Iterate(func(C types.Type, v interface{}) {
   405  			cinfo := v.(*concreteTypeInfo)
   406  			if implements(cinfo, iinfo) {
   407  				cinfo.implements = append(cinfo.implements, I)
   408  				iinfo.implementations = append(iinfo.implementations, C)
   409  			}
   410  		})
   411  	}
   412  	return iinfo.implementations
   413  }
   414  
   415  // addRuntimeType is called for each concrete type that can be the
   416  // dynamic type of some interface or reflect.Value.
   417  // Adapted from needMethods in go/ssa/builder.go
   418  func (r *rta) addRuntimeType(T types.Type, skip bool) {
   419  	// Never record aliases.
   420  	T = aliases.Unalias(T)
   421  
   422  	if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
   423  		if skip && !prev {
   424  			r.result.RuntimeTypes.Set(T, skip)
   425  		}
   426  		return
   427  	}
   428  	r.result.RuntimeTypes.Set(T, skip)
   429  
   430  	mset := r.prog.MethodSets.MethodSet(T)
   431  
   432  	if _, ok := T.Underlying().(*types.Interface); !ok {
   433  		// T is a new concrete type.
   434  		for i, n := 0, mset.Len(); i < n; i++ {
   435  			sel := mset.At(i)
   436  			m := sel.Obj()
   437  
   438  			if m.Exported() {
   439  				// Exported methods are always potentially callable via reflection.
   440  				r.addReachable(r.prog.MethodValue(sel), true)
   441  			}
   442  		}
   443  
   444  		// Add callgraph edge for each existing dynamic
   445  		// "invoke"-mode call via that interface.
   446  		for _, I := range r.interfaces(T) {
   447  			sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
   448  			for _, site := range sites {
   449  				r.addInvokeEdge(site, T)
   450  			}
   451  		}
   452  	}
   453  
   454  	// Precondition: T is not a method signature (*Signature with Recv()!=nil).
   455  	// Recursive case: skip => don't call makeMethods(T).
   456  	// Each package maintains its own set of types it has visited.
   457  
   458  	var n *types.Named
   459  	switch T := aliases.Unalias(T).(type) {
   460  	case *types.Named:
   461  		n = T
   462  	case *types.Pointer:
   463  		n, _ = aliases.Unalias(T.Elem()).(*types.Named)
   464  	}
   465  	if n != nil {
   466  		owner := n.Obj().Pkg()
   467  		if owner == nil {
   468  			return // built-in error type
   469  		}
   470  	}
   471  
   472  	// Recursion over signatures of each exported method.
   473  	for i := 0; i < mset.Len(); i++ {
   474  		if mset.At(i).Obj().Exported() {
   475  			sig := mset.At(i).Type().(*types.Signature)
   476  			r.addRuntimeType(sig.Params(), true)  // skip the Tuple itself
   477  			r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
   478  		}
   479  	}
   480  
   481  	switch t := T.(type) {
   482  	case *aliases.Alias:
   483  		panic("unreachable")
   484  
   485  	case *types.Basic:
   486  		// nop
   487  
   488  	case *types.Interface:
   489  		// nop---handled by recursion over method set.
   490  
   491  	case *types.Pointer:
   492  		r.addRuntimeType(t.Elem(), false)
   493  
   494  	case *types.Slice:
   495  		r.addRuntimeType(t.Elem(), false)
   496  
   497  	case *types.Chan:
   498  		r.addRuntimeType(t.Elem(), false)
   499  
   500  	case *types.Map:
   501  		r.addRuntimeType(t.Key(), false)
   502  		r.addRuntimeType(t.Elem(), false)
   503  
   504  	case *types.Signature:
   505  		if t.Recv() != nil {
   506  			panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
   507  		}
   508  		r.addRuntimeType(t.Params(), true)  // skip the Tuple itself
   509  		r.addRuntimeType(t.Results(), true) // skip the Tuple itself
   510  
   511  	case *types.Named:
   512  		// A pointer-to-named type can be derived from a named
   513  		// type via reflection.  It may have methods too.
   514  		r.addRuntimeType(types.NewPointer(T), false)
   515  
   516  		// Consider 'type T struct{S}' where S has methods.
   517  		// Reflection provides no way to get from T to struct{S},
   518  		// only to S, so the method set of struct{S} is unwanted,
   519  		// so set 'skip' flag during recursion.
   520  		r.addRuntimeType(t.Underlying(), true)
   521  
   522  	case *types.Array:
   523  		r.addRuntimeType(t.Elem(), false)
   524  
   525  	case *types.Struct:
   526  		for i, n := 0, t.NumFields(); i < n; i++ {
   527  			r.addRuntimeType(t.Field(i).Type(), false)
   528  		}
   529  
   530  	case *types.Tuple:
   531  		for i, n := 0, t.Len(); i < n; i++ {
   532  			r.addRuntimeType(t.At(i).Type(), false)
   533  		}
   534  
   535  	default:
   536  		panic(T)
   537  	}
   538  }
   539  
   540  // fingerprint returns a bitmask with one bit set per method id,
   541  // enabling 'implements' to quickly reject most candidates.
   542  func fingerprint(mset *types.MethodSet) uint64 {
   543  	var space [64]byte
   544  	var mask uint64
   545  	for i := 0; i < mset.Len(); i++ {
   546  		method := mset.At(i).Obj()
   547  		sig := method.Type().(*types.Signature)
   548  		sum := crc32.ChecksumIEEE(fmt.Appendf(space[:], "%s/%d/%d",
   549  			method.Id(),
   550  			sig.Params().Len(),
   551  			sig.Results().Len()))
   552  		mask |= 1 << (sum % 64)
   553  	}
   554  	return mask
   555  }
   556  
   557  // implements reports whether types.Implements(cinfo.C, iinfo.I),
   558  // but more efficiently.
   559  func implements(cinfo *concreteTypeInfo, iinfo *interfaceTypeInfo) (got bool) {
   560  	// The concrete type must have at least the methods
   561  	// (bits) of the interface type. Use a bitwise subset
   562  	// test to reject most candidates quickly.
   563  	return iinfo.fprint & ^cinfo.fprint == 0 && types.Implements(cinfo.C, iinfo.I)
   564  }
   565
View as plain text