shell.go

Documentation: github.com/letsencrypt/boulder/cmd

     1  // Package cmd provides utilities that underlie the specific commands.
     2  package cmd
     3  
     4  import (
     5  	"context"
     6  	"encoding/json"
     7  	"errors"
     8  	"expvar"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"log/syslog"
    13  	"net/http"
    14  	"net/http/pprof"
    15  	"os"
    16  	"os/signal"
    17  	"runtime"
    18  	"runtime/debug"
    19  	"strings"
    20  	"syscall"
    21  	"time"
    22  
    23  	"github.com/go-logr/stdr"
    24  	"github.com/go-sql-driver/mysql"
    25  	"github.com/prometheus/client_golang/prometheus"
    26  	"github.com/prometheus/client_golang/prometheus/collectors"
    27  	"github.com/prometheus/client_golang/prometheus/promhttp"
    28  	"github.com/redis/go-redis/v9"
    29  	"go.opentelemetry.io/otel"
    30  	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
    31  	"go.opentelemetry.io/otel/propagation"
    32  	"go.opentelemetry.io/otel/sdk/resource"
    33  	"go.opentelemetry.io/otel/sdk/trace"
    34  	semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
    35  	"google.golang.org/grpc/grpclog"
    36  
    37  	"github.com/letsencrypt/boulder/core"
    38  	blog "github.com/letsencrypt/boulder/log"
    39  	"github.com/letsencrypt/boulder/strictyaml"
    40  	"github.com/letsencrypt/validator/v10"
    41  )
    42  
    43  // Because we don't know when this init will be called with respect to
    44  // flag.Parse() and other flag definitions, we can't rely on the regular
    45  // flag mechanism. But this one is fine.
    46  func init() {
    47  	for _, v := range os.Args {
    48  		if v == "--version" || v == "-version" {
    49  			fmt.Println(VersionString())
    50  			os.Exit(0)
    51  		}
    52  	}
    53  }
    54  
    55  // mysqlLogger implements the mysql.Logger interface.
    56  type mysqlLogger struct {
    57  	blog.Logger
    58  }
    59  
    60  func (m mysqlLogger) Print(v ...interface{}) {
    61  	m.AuditErrf("[mysql] %s", fmt.Sprint(v...))
    62  }
    63  
    64  // grpcLogger implements the grpclog.LoggerV2 interface.
    65  type grpcLogger struct {
    66  	blog.Logger
    67  }
    68  
    69  // Ensure that fatal logs exit, because we use neither the gRPC default logger
    70  // nor the stdlib default logger, both of which would call os.Exit(1) for us.
    71  func (log grpcLogger) Fatal(args ...interface{}) {
    72  	log.Error(args...)
    73  	os.Exit(1)
    74  }
    75  func (log grpcLogger) Fatalf(format string, args ...interface{}) {
    76  	log.Errorf(format, args...)
    77  	os.Exit(1)
    78  }
    79  func (log grpcLogger) Fatalln(args ...interface{}) {
    80  	log.Errorln(args...)
    81  	os.Exit(1)
    82  }
    83  
    84  // Treat all gRPC error logs as potential audit events.
    85  func (log grpcLogger) Error(args ...interface{}) {
    86  	log.Logger.AuditErr(fmt.Sprint(args...))
    87  }
    88  func (log grpcLogger) Errorf(format string, args ...interface{}) {
    89  	log.Logger.AuditErrf(format, args...)
    90  }
    91  func (log grpcLogger) Errorln(args ...interface{}) {
    92  	log.Logger.AuditErr(fmt.Sprintln(args...))
    93  }
    94  
    95  // Pass through most Warnings, but filter out a few noisy ones.
    96  func (log grpcLogger) Warning(args ...interface{}) {
    97  	log.Logger.Warning(fmt.Sprint(args...))
    98  }
    99  func (log grpcLogger) Warningf(format string, args ...interface{}) {
   100  	log.Logger.Warningf(format, args...)
   101  }
   102  func (log grpcLogger) Warningln(args ...interface{}) {
   103  	msg := fmt.Sprintln(args...)
   104  	// See https://github.com/letsencrypt/boulder/issues/4628
   105  	if strings.Contains(msg, `ccResolverWrapper: error parsing service config: no JSON service config provided`) {
   106  		return
   107  	}
   108  	// See https://github.com/letsencrypt/boulder/issues/4379
   109  	if strings.Contains(msg, `Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"`) {
   110  		return
   111  	}
   112  	// Since we've already formatted the message, just pass through to .Warning()
   113  	log.Logger.Warning(msg)
   114  }
   115  
   116  // Don't log any INFO-level gRPC stuff. In practice this is all noise, like
   117  // failed TXT lookups for service discovery (we only use A records).
   118  func (log grpcLogger) Info(args ...interface{})                 {}
   119  func (log grpcLogger) Infof(format string, args ...interface{}) {}
   120  func (log grpcLogger) Infoln(args ...interface{})               {}
   121  
   122  // V returns true if the verbosity level l is less than the verbosity we want to
   123  // log at.
   124  func (log grpcLogger) V(l int) bool {
   125  	// We always return false. This causes gRPC to not log some things which are
   126  	// only logged conditionally if the logLevel is set below a certain value.
   127  	// TODO: Use the wrapped log.Logger.stdoutLevel and log.Logger.syslogLevel
   128  	// to determine a correct return value here.
   129  	return false
   130  }
   131  
   132  // promLogger implements the promhttp.Logger interface.
   133  type promLogger struct {
   134  	blog.Logger
   135  }
   136  
   137  func (log promLogger) Println(args ...interface{}) {
   138  	log.AuditErr(fmt.Sprint(args...))
   139  }
   140  
   141  type redisLogger struct {
   142  	blog.Logger
   143  }
   144  
   145  func (rl redisLogger) Printf(ctx context.Context, format string, v ...interface{}) {
   146  	rl.Infof(format, v...)
   147  }
   148  
   149  // logWriter implements the io.Writer interface.
   150  type logWriter struct {
   151  	blog.Logger
   152  }
   153  
   154  func (lw logWriter) Write(p []byte) (n int, err error) {
   155  	// Lines received by logWriter will always have a trailing newline.
   156  	lw.Logger.Info(strings.Trim(string(p), "\n"))
   157  	return
   158  }
   159  
   160  // logOutput implements the log.Logger interface's Output method for use with logr
   161  type logOutput struct {
   162  	blog.Logger
   163  }
   164  
   165  func (l logOutput) Output(calldepth int, logline string) error {
   166  	l.Logger.Info(logline)
   167  	return nil
   168  }
   169  
   170  // StatsAndLogging sets up an AuditLogger, Prometheus Registerer, and
   171  // OpenTelemetry tracing.  It returns the Registerer and AuditLogger, along
   172  // with a graceful shutdown function to be deferred.
   173  //
   174  // It spawns off an HTTP server on the provided port to report the stats and
   175  // provide pprof profiling handlers.
   176  //
   177  // The constructed AuditLogger as the default logger, and configures the mysql
   178  // and grpc packages to use our logger. This must be called before any gRPC code
   179  // is called, because gRPC's SetLogger doesn't use any locking.
   180  //
   181  // This function does not return an error, and will panic on problems.
   182  func StatsAndLogging(logConf SyslogConfig, otConf OpenTelemetryConfig, addr string) (prometheus.Registerer, blog.Logger, func(context.Context)) {
   183  	logger := NewLogger(logConf)
   184  
   185  	shutdown := NewOpenTelemetry(otConf, logger)
   186  
   187  	return newStatsRegistry(addr, logger), logger, shutdown
   188  }
   189  
   190  // NewLogger creates a logger object with the provided settings, sets it as
   191  // the global logger, and returns it.
   192  //
   193  // It also sets the logging systems for various packages we use to go through
   194  // the created logger, and sets up a periodic log event for the current timestamp.
   195  func NewLogger(logConf SyslogConfig) blog.Logger {
   196  	var logger blog.Logger
   197  	if logConf.SyslogLevel >= 0 {
   198  		syslogger, err := syslog.Dial(
   199  			"",
   200  			"",
   201  			syslog.LOG_INFO, // default, not actually used
   202  			core.Command())
   203  		FailOnError(err, "Could not connect to Syslog")
   204  		syslogLevel := int(syslog.LOG_INFO)
   205  		if logConf.SyslogLevel != 0 {
   206  			syslogLevel = logConf.SyslogLevel
   207  		}
   208  		logger, err = blog.New(syslogger, logConf.StdoutLevel, syslogLevel)
   209  		FailOnError(err, "Could not connect to Syslog")
   210  	} else {
   211  		logger = blog.StdoutLogger(logConf.StdoutLevel)
   212  	}
   213  
   214  	_ = blog.Set(logger)
   215  	_ = mysql.SetLogger(mysqlLogger{logger})
   216  	grpclog.SetLoggerV2(grpcLogger{logger})
   217  	log.SetOutput(logWriter{logger})
   218  	redis.SetLogger(redisLogger{logger})
   219  
   220  	// Periodically log the current timestamp, to ensure syslog timestamps match
   221  	// Boulder's conception of time.
   222  	go func() {
   223  		for {
   224  			time.Sleep(time.Minute)
   225  			logger.Info(fmt.Sprintf("time=%s", time.Now().Format(time.RFC3339Nano)))
   226  		}
   227  	}()
   228  	return logger
   229  }
   230  
   231  func newVersionCollector() prometheus.Collector {
   232  	buildTime := core.Unspecified
   233  	if core.GetBuildTime() != core.Unspecified {
   234  		// core.BuildTime is set by our Makefile using the shell command 'date
   235  		// -u' which outputs in a consistent format across all POSIX systems.
   236  		bt, err := time.Parse(time.UnixDate, core.BuildTime)
   237  		if err != nil {
   238  			// Should never happen unless the Makefile is changed.
   239  			buildTime = "Unparsable"
   240  		} else {
   241  			buildTime = bt.Format(time.RFC3339)
   242  		}
   243  	}
   244  	return prometheus.NewGaugeFunc(
   245  		prometheus.GaugeOpts{
   246  			Name: "version",
   247  			Help: fmt.Sprintf(
   248  				"A metric with a constant value of '1' labeled by the short commit-id (buildId), build timestamp in RFC3339 format (buildTime), and Go release tag like 'go1.3' (goVersion) from which %s was built.",
   249  				core.Command(),
   250  			),
   251  			ConstLabels: prometheus.Labels{
   252  				"buildId":   core.GetBuildID(),
   253  				"buildTime": buildTime,
   254  				"goVersion": runtime.Version(),
   255  			},
   256  		},
   257  		func() float64 { return 1 },
   258  	)
   259  }
   260  
   261  func newStatsRegistry(addr string, logger blog.Logger) prometheus.Registerer {
   262  	registry := prometheus.NewRegistry()
   263  	registry.MustRegister(collectors.NewGoCollector())
   264  	registry.MustRegister(collectors.NewProcessCollector(
   265  		collectors.ProcessCollectorOpts{}))
   266  	registry.MustRegister(newVersionCollector())
   267  
   268  	mux := http.NewServeMux()
   269  	// Register the available pprof handlers. These are all registered on
   270  	// DefaultServeMux just by importing pprof, but since we eschew
   271  	// DefaultServeMux, we need to explicitly register them on our own mux.
   272  	mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
   273  	mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
   274  	mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
   275  	mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
   276  	// These handlers are defined in runtime/pprof instead of net/http/pprof, and
   277  	// have to be accessed through net/http/pprof's Handler func.
   278  	mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
   279  	mux.Handle("/debug/pprof/block", pprof.Handler("block"))
   280  	mux.Handle("/debug/pprof/heap", pprof.Handler("heap"))
   281  	mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex"))
   282  	mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
   283  
   284  	mux.Handle("/debug/vars", expvar.Handler())
   285  	mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
   286  		ErrorLog: promLogger{logger},
   287  	}))
   288  
   289  	server := http.Server{
   290  		Addr:        addr,
   291  		Handler:     mux,
   292  		ReadTimeout: time.Minute,
   293  	}
   294  	go func() {
   295  		err := server.ListenAndServe()
   296  		if err != nil {
   297  			logger.Errf("unable to boot debug server on %s: %v", addr, err)
   298  			os.Exit(1)
   299  		}
   300  	}()
   301  	return registry
   302  }
   303  
   304  // NewOpenTelemetry sets up our OpenTelemetry tracing
   305  // It returns a graceful shutdown function to be deferred.
   306  func NewOpenTelemetry(config OpenTelemetryConfig, logger blog.Logger) func(ctx context.Context) {
   307  	otel.SetLogger(stdr.New(logOutput{logger}))
   308  	otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { logger.Errf("OpenTelemetry error: %v", err) }))
   309  
   310  	r, err := resource.Merge(
   311  		resource.Default(),
   312  		resource.NewWithAttributes(
   313  			semconv.SchemaURL,
   314  			semconv.ServiceNameKey.String(core.Command()),
   315  			semconv.ServiceVersionKey.String(core.GetBuildID()),
   316  		),
   317  	)
   318  	if err != nil {
   319  		FailOnError(err, "Could not create OpenTelemetry resource")
   320  	}
   321  
   322  	opts := []trace.TracerProviderOption{
   323  		trace.WithResource(r),
   324  		// Use a ParentBased sampler to respect the sample decisions on incoming
   325  		// traces, and TraceIDRatioBased to randomly sample new traces.
   326  		trace.WithSampler(trace.ParentBased(trace.TraceIDRatioBased(config.SampleRatio))),
   327  	}
   328  
   329  	if config.Endpoint != "" {
   330  		exporter, err := otlptracegrpc.New(
   331  			context.Background(),
   332  			otlptracegrpc.WithInsecure(),
   333  			otlptracegrpc.WithEndpoint(config.Endpoint))
   334  		if err != nil {
   335  			FailOnError(err, "Could not create OpenTelemetry OTLP exporter")
   336  		}
   337  
   338  		opts = append(opts, trace.WithBatcher(exporter))
   339  	}
   340  
   341  	tracerProvider := trace.NewTracerProvider(opts...)
   342  	otel.SetTracerProvider(tracerProvider)
   343  	otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{}))
   344  
   345  	return func(ctx context.Context) {
   346  		err := tracerProvider.Shutdown(ctx)
   347  		if err != nil {
   348  			logger.Errf("Error while shutting down OpenTelemetry: %v", err)
   349  		}
   350  	}
   351  }
   352  
   353  // AuditPanic catches and logs panics, then exits with exit code 1.
   354  // This method should be called in a defer statement as early as possible.
   355  func AuditPanic() {
   356  	err := recover()
   357  	// No panic, no problem
   358  	if err == nil {
   359  		return
   360  	}
   361  	// Get the global logger if it's initialized, or create a default one if not.
   362  	// We could wind up creating a default logger if we panic so early in a process'
   363  	// lifetime that we haven't yet parsed the config and created a logger.
   364  	log := blog.Get()
   365  	// For the special type `failure`, audit log the message and exit quietly
   366  	fail, ok := err.(failure)
   367  	if ok {
   368  		log.AuditErr(fail.msg)
   369  	} else {
   370  		// For all other values passed to `panic`, log them and a stack trace
   371  		log.AuditErrf("Panic caused by err: %s", err)
   372  
   373  		log.AuditErrf("Stack Trace (Current goroutine) %s", debug.Stack())
   374  	}
   375  	// Because this function is deferred as early as possible, there's no further defers to run after this one
   376  	// So it is safe to os.Exit to set the exit code and exit without losing any defers we haven't executed.
   377  	os.Exit(1)
   378  }
   379  
   380  // failure is a sentinel type that `Fail` passes to `panic` so `AuditPanic` can exit
   381  // quietly and print the msg.
   382  type failure struct {
   383  	msg string
   384  }
   385  
   386  func (f failure) String() string {
   387  	return f.msg
   388  }
   389  
   390  // Fail raises a panic with a special type that causes `AuditPanic` to audit log the provided message
   391  // and then exit nonzero (without printing a stack trace).
   392  func Fail(msg string) {
   393  	panic(failure{msg})
   394  }
   395  
   396  // FailOnError calls Fail if the provided error is non-nil.
   397  // This is useful for one-line error handling in top-level executables,
   398  // but should generally be avoided in libraries. The message argument is optional.
   399  func FailOnError(err error, msg string) {
   400  	if err == nil {
   401  		return
   402  	}
   403  	if msg == "" {
   404  		Fail(err.Error())
   405  	} else {
   406  		Fail(fmt.Sprintf("%s: %s", msg, err))
   407  	}
   408  }
   409  
   410  func decodeJSONStrict(in io.Reader, out interface{}) error {
   411  	decoder := json.NewDecoder(in)
   412  	decoder.DisallowUnknownFields()
   413  
   414  	return decoder.Decode(out)
   415  }
   416  
   417  // ReadConfigFile takes a file path as an argument and attempts to
   418  // unmarshal the content of the file into a struct containing a
   419  // configuration of a boulder component. Any config keys in the JSON
   420  // file which do not correspond to expected keys in the config struct
   421  // will result in errors.
   422  func ReadConfigFile(filename string, out interface{}) error {
   423  	file, err := os.Open(filename)
   424  	if err != nil {
   425  		return err
   426  	}
   427  	defer file.Close()
   428  
   429  	return decodeJSONStrict(file, out)
   430  }
   431  
   432  // ValidateJSONConfig takes a *ConfigValidator and an io.Reader containing a
   433  // JSON representation of a config. The JSON data is unmarshaled into the
   434  // *ConfigValidator's inner Config and then validated according to the
   435  // 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
   436  // to get a *ConfigValidator for a given Boulder component. This is exported for
   437  // use in SRE CI tooling.
   438  func ValidateJSONConfig(cv *ConfigValidator, in io.Reader) error {
   439  	if cv == nil {
   440  		return errors.New("config validator cannot be nil")
   441  	}
   442  
   443  	// Initialize the validator and load any custom tags.
   444  	validate := validator.New()
   445  	for tag, v := range cv.Validators {
   446  		err := validate.RegisterValidation(tag, v)
   447  		if err != nil {
   448  			return err
   449  		}
   450  	}
   451  
   452  	err := decodeJSONStrict(in, cv.Config)
   453  	if err != nil {
   454  		return err
   455  	}
   456  	err = validate.Struct(cv.Config)
   457  	if err != nil {
   458  		errs, ok := err.(validator.ValidationErrors)
   459  		if !ok {
   460  			// This should never happen.
   461  			return err
   462  		}
   463  		if len(errs) > 0 {
   464  			allErrs := []string{}
   465  			for _, e := range errs {
   466  				allErrs = append(allErrs, e.Error())
   467  			}
   468  			return errors.New(strings.Join(allErrs, ", "))
   469  		}
   470  	}
   471  	return nil
   472  }
   473  
   474  // ValidateYAMLConfig takes a *ConfigValidator and an io.Reader containing a
   475  // YAML representation of a config. The YAML data is unmarshaled into the
   476  // *ConfigValidator's inner Config and then validated according to the
   477  // 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
   478  // to get a *ConfigValidator for a given Boulder component. This is exported for
   479  // use in SRE CI tooling.
   480  func ValidateYAMLConfig(cv *ConfigValidator, in io.Reader) error {
   481  	if cv == nil {
   482  		return errors.New("config validator cannot be nil")
   483  	}
   484  
   485  	// Initialize the validator and load any custom tags.
   486  	validate := validator.New()
   487  	for tag, v := range cv.Validators {
   488  		err := validate.RegisterValidation(tag, v)
   489  		if err != nil {
   490  			return err
   491  		}
   492  	}
   493  
   494  	inBytes, err := io.ReadAll(in)
   495  	if err != nil {
   496  		return err
   497  	}
   498  	err = strictyaml.Unmarshal(inBytes, cv.Config)
   499  	if err != nil {
   500  		return err
   501  	}
   502  	err = validate.Struct(cv.Config)
   503  	if err != nil {
   504  		errs, ok := err.(validator.ValidationErrors)
   505  		if !ok {
   506  			// This should never happen.
   507  			return err
   508  		}
   509  		if len(errs) > 0 {
   510  			allErrs := []string{}
   511  			for _, e := range errs {
   512  				allErrs = append(allErrs, e.Error())
   513  			}
   514  			return errors.New(strings.Join(allErrs, ", "))
   515  		}
   516  	}
   517  	return nil
   518  }
   519  
   520  // VersionString produces a friendly Application version string.
   521  func VersionString() string {
   522  	return fmt.Sprintf("Versions: %s=(%s %s) Golang=(%s) BuildHost=(%s)", core.Command(), core.GetBuildID(), core.GetBuildTime(), runtime.Version(), core.GetBuildHost())
   523  }
   524  
   525  // CatchSignals blocks until a SIGTERM, SIGINT, or SIGHUP is received, then
   526  // executes the given callback. The callback should not block, it should simply
   527  // signal other goroutines (particularly the main goroutine) to clean themselves
   528  // up and exit. This function is intended to be called in its own goroutine,
   529  // while the main goroutine waits for an indication that the other goroutines
   530  // have exited cleanly.
   531  func CatchSignals(callback func()) {
   532  	WaitForSignal()
   533  	callback()
   534  }
   535  
   536  // WaitForSignal blocks until a SIGTERM, SIGINT, or SIGHUP is received. It then
   537  // returns, allowing execution to resume, generally allowing a main() function
   538  // to return and trigger and deferred cleanup functions. This function is
   539  // intended to be called directly from the main goroutine, while a gRPC or HTTP
   540  // server runs in a background goroutine.
   541  func WaitForSignal() {
   542  	sigChan := make(chan os.Signal, 1)
   543  	signal.Notify(sigChan, syscall.SIGTERM)
   544  	signal.Notify(sigChan, syscall.SIGINT)
   545  	signal.Notify(sigChan, syscall.SIGHUP)
   546  	<-sigChan
   547  }
   548
View as plain text