...

Source file src/github.com/letsencrypt/boulder/cmd/log-validator/main.go

Documentation: github.com/letsencrypt/boulder/cmd/log-validator

     1  package notmain
     2  
     3  import (
     4  	"context"
     5  	"encoding/base64"
     6  	"encoding/json"
     7  	"errors"
     8  	"flag"
     9  	"fmt"
    10  	"os"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/hpcloud/tail"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  
    17  	"github.com/letsencrypt/boulder/cmd"
    18  	blog "github.com/letsencrypt/boulder/log"
    19  )
    20  
    21  var errInvalidChecksum = errors.New("invalid checksum length")
    22  
    23  func lineValid(text string) error {
    24  	// Line format should match the following rsyslog omfile template:
    25  	//
    26  	//   template( name="LELogFormat" type="list" ) {
    27  	//  	property(name="timereported" dateFormat="rfc3339")
    28  	//  	constant(value=" ")
    29  	//  	property(name="hostname" field.delimiter="46" field.number="1")
    30  	//  	constant(value=" datacenter ")
    31  	//  	property(name="syslogseverity")
    32  	//  	constant(value=" ")
    33  	//  	property(name="syslogtag")
    34  	//  	property(name="msg" spifno1stsp="on" )
    35  	//  	property(name="msg" droplastlf="on" )
    36  	//  	constant(value="\n")
    37  	//   }
    38  	//
    39  	// This should result in a log line that looks like this:
    40  	//   timestamp hostname datacenter syslogseverity binary-name[pid]: checksum msg
    41  
    42  	fields := strings.Split(text, " ")
    43  	const errorPrefix = "log-validator:"
    44  	// Extract checksum from line
    45  	if len(fields) < 6 {
    46  		return fmt.Errorf("%s line doesn't match expected format", errorPrefix)
    47  	}
    48  	checksum := fields[5]
    49  	_, err := base64.RawURLEncoding.DecodeString(checksum)
    50  	if err != nil || len(checksum) != 7 {
    51  		return fmt.Errorf(
    52  			"%s expected a 7 character base64 raw URL decodable string, got %q: %w",
    53  			errorPrefix,
    54  			checksum,
    55  			errInvalidChecksum,
    56  		)
    57  	}
    58  
    59  	// Reconstruct just the message portion of the line
    60  	line := strings.Join(fields[6:], " ")
    61  
    62  	// If we are fed our own output, treat it as always valid. This
    63  	// prevents runaway scenarios where we generate ever-longer output.
    64  	if strings.Contains(text, errorPrefix) {
    65  		return nil
    66  	}
    67  	// Check the extracted checksum against the computed checksum
    68  	if computedChecksum := blog.LogLineChecksum(line); checksum != computedChecksum {
    69  		return fmt.Errorf("%s invalid checksum (expected %q, got %q)", errorPrefix, computedChecksum, checksum)
    70  	}
    71  	return nil
    72  }
    73  
    74  func validateFile(filename string) error {
    75  	file, err := os.ReadFile(filename)
    76  	if err != nil {
    77  		return err
    78  	}
    79  	badFile := false
    80  	for i, line := range strings.Split(string(file), "\n") {
    81  		if line == "" {
    82  			continue
    83  		}
    84  		err := lineValid(line)
    85  		if err != nil {
    86  			badFile = true
    87  			fmt.Fprintf(os.Stderr, "[line %d] %s: %s\n", i+1, err, line)
    88  		}
    89  	}
    90  
    91  	if badFile {
    92  		return errors.New("file contained invalid lines")
    93  	}
    94  	return nil
    95  }
    96  
    97  // tailLogger is an adapter to the hpcloud/tail module's logging interface.
    98  type tailLogger struct {
    99  	blog.Logger
   100  }
   101  
   102  func (tl tailLogger) Fatal(v ...interface{}) {
   103  	tl.AuditErr(fmt.Sprint(v...))
   104  }
   105  func (tl tailLogger) Fatalf(format string, v ...interface{}) {
   106  	tl.AuditErrf(format, v...)
   107  }
   108  func (tl tailLogger) Fatalln(v ...interface{}) {
   109  	tl.AuditErr(fmt.Sprint(v...) + "\n")
   110  }
   111  func (tl tailLogger) Panic(v ...interface{}) {
   112  	tl.AuditErr(fmt.Sprint(v...))
   113  }
   114  func (tl tailLogger) Panicf(format string, v ...interface{}) {
   115  	tl.AuditErrf(format, v...)
   116  }
   117  func (tl tailLogger) Panicln(v ...interface{}) {
   118  	tl.AuditErr(fmt.Sprint(v...) + "\n")
   119  }
   120  func (tl tailLogger) Print(v ...interface{}) {
   121  	tl.Info(fmt.Sprint(v...))
   122  }
   123  func (tl tailLogger) Printf(format string, v ...interface{}) {
   124  	tl.Infof(format, v...)
   125  }
   126  func (tl tailLogger) Println(v ...interface{}) {
   127  	tl.Info(fmt.Sprint(v...) + "\n")
   128  }
   129  
   130  type Config struct {
   131  	Files         []string `validate:"min=1,dive,required"`
   132  	DebugAddr     string   `validate:"required,hostname_port"`
   133  	Syslog        cmd.SyslogConfig
   134  	OpenTelemetry cmd.OpenTelemetryConfig
   135  }
   136  
   137  func main() {
   138  	configPath := flag.String("config", "", "File path to the configuration file for this service")
   139  	checkFile := flag.String("check-file", "", "File path to a file to directly validate, if this argument is provided the config will not be parsed and only this file will be inspected")
   140  	flag.Parse()
   141  
   142  	if *checkFile != "" {
   143  		err := validateFile(*checkFile)
   144  		cmd.FailOnError(err, "validation failed")
   145  		return
   146  	}
   147  
   148  	configBytes, err := os.ReadFile(*configPath)
   149  	cmd.FailOnError(err, "failed to read config file")
   150  	var config Config
   151  	err = json.Unmarshal(configBytes, &config)
   152  	cmd.FailOnError(err, "failed to parse config file")
   153  
   154  	stats, logger, oTelShutdown := cmd.StatsAndLogging(config.Syslog, config.OpenTelemetry, config.DebugAddr)
   155  	defer oTelShutdown(context.Background())
   156  	logger.Info(cmd.VersionString())
   157  	lineCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
   158  		Name: "log_lines",
   159  		Help: "A counter of log lines processed, with status",
   160  	}, []string{"filename", "status"})
   161  	stats.MustRegister(lineCounter)
   162  
   163  	// Emit no more than 1 error line per second. This prevents consuming large
   164  	// amounts of disk space in case there is problem that causes all log lines to
   165  	// be invalid.
   166  	outputLimiter := time.NewTicker(time.Second)
   167  
   168  	var tailers []*tail.Tail
   169  	for _, filename := range config.Files {
   170  		t, err := tail.TailFile(filename, tail.Config{
   171  			ReOpen:    true,
   172  			MustExist: false, // sometimes files won't exist, so we must tolerate that
   173  			Follow:    true,
   174  			Logger:    tailLogger{logger},
   175  		})
   176  		cmd.FailOnError(err, "failed to tail file")
   177  
   178  		go func() {
   179  			for line := range t.Lines {
   180  				if line.Err != nil {
   181  					logger.Errf("error while tailing %s: %s", t.Filename, line.Err)
   182  					continue
   183  				}
   184  				err := lineValid(line.Text)
   185  				if err != nil {
   186  					if errors.Is(err, errInvalidChecksum) {
   187  						lineCounter.WithLabelValues(t.Filename, "invalid checksum length").Inc()
   188  					} else {
   189  						lineCounter.WithLabelValues(t.Filename, "bad").Inc()
   190  					}
   191  					select {
   192  					case <-outputLimiter.C:
   193  						logger.Errf("%s: %s %q", t.Filename, err, line.Text)
   194  					default:
   195  					}
   196  				} else {
   197  					lineCounter.WithLabelValues(t.Filename, "ok").Inc()
   198  				}
   199  			}
   200  		}()
   201  
   202  		tailers = append(tailers, t)
   203  	}
   204  
   205  	defer func() {
   206  		for _, t := range tailers {
   207  			// The tail module seems to have a race condition that will generate
   208  			// errors like this on shutdown:
   209  			// failed to stop tailing file: <filename>: Failed to detect creation of
   210  			// <filename>: inotify watcher has been closed
   211  			// This is probably related to the module's shutdown logic triggering the
   212  			// "reopen" code path for files that are removed and then recreated.
   213  			// These errors are harmless so we ignore them to allow clean shutdown.
   214  			_ = t.Stop()
   215  			t.Cleanup()
   216  		}
   217  	}()
   218  
   219  	cmd.WaitForSignal()
   220  }
   221  
   222  func init() {
   223  	cmd.RegisterCommand("log-validator", main, &cmd.ConfigValidator{Config: &Config{}})
   224  }
   225  

View as plain text