text_parse.go

Documentation: github.com/prometheus/common/expfmt

     1  // Copyright 2014 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package expfmt
    15  
    16  import (
    17  	"bufio"
    18  	"bytes"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"math"
    23  	"strconv"
    24  	"strings"
    25  
    26  	dto "github.com/prometheus/client_model/go"
    27  
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"github.com/prometheus/common/model"
    31  )
    32  
    33  // A stateFn is a function that represents a state in a state machine. By
    34  // executing it, the state is progressed to the next state. The stateFn returns
    35  // another stateFn, which represents the new state. The end state is represented
    36  // by nil.
    37  type stateFn func() stateFn
    38  
    39  // ParseError signals errors while parsing the simple and flat text-based
    40  // exchange format.
    41  type ParseError struct {
    42  	Line int
    43  	Msg  string
    44  }
    45  
    46  // Error implements the error interface.
    47  func (e ParseError) Error() string {
    48  	return fmt.Sprintf("text format parsing error in line %d: %s", e.Line, e.Msg)
    49  }
    50  
    51  // TextParser is used to parse the simple and flat text-based exchange format. Its
    52  // zero value is ready to use.
    53  type TextParser struct {
    54  	metricFamiliesByName map[string]*dto.MetricFamily
    55  	buf                  *bufio.Reader // Where the parsed input is read through.
    56  	err                  error         // Most recent error.
    57  	lineCount            int           // Tracks the line count for error messages.
    58  	currentByte          byte          // The most recent byte read.
    59  	currentToken         bytes.Buffer  // Re-used each time a token has to be gathered from multiple bytes.
    60  	currentMF            *dto.MetricFamily
    61  	currentMetric        *dto.Metric
    62  	currentLabelPair     *dto.LabelPair
    63  
    64  	// The remaining member variables are only used for summaries/histograms.
    65  	currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le'
    66  	// Summary specific.
    67  	summaries       map[uint64]*dto.Metric // Key is created with LabelsToSignature.
    68  	currentQuantile float64
    69  	// Histogram specific.
    70  	histograms    map[uint64]*dto.Metric // Key is created with LabelsToSignature.
    71  	currentBucket float64
    72  	// These tell us if the currently processed line ends on '_count' or
    73  	// '_sum' respectively and belong to a summary/histogram, representing the sample
    74  	// count and sum of that summary/histogram.
    75  	currentIsSummaryCount, currentIsSummarySum     bool
    76  	currentIsHistogramCount, currentIsHistogramSum bool
    77  }
    78  
    79  // TextToMetricFamilies reads 'in' as the simple and flat text-based exchange
    80  // format and creates MetricFamily proto messages. It returns the MetricFamily
    81  // proto messages in a map where the metric names are the keys, along with any
    82  // error encountered.
    83  //
    84  // If the input contains duplicate metrics (i.e. lines with the same metric name
    85  // and exactly the same label set), the resulting MetricFamily will contain
    86  // duplicate Metric proto messages. Similar is true for duplicate label
    87  // names. Checks for duplicates have to be performed separately, if required.
    88  // Also note that neither the metrics within each MetricFamily are sorted nor
    89  // the label pairs within each Metric. Sorting is not required for the most
    90  // frequent use of this method, which is sample ingestion in the Prometheus
    91  // server. However, for presentation purposes, you might want to sort the
    92  // metrics, and in some cases, you must sort the labels, e.g. for consumption by
    93  // the metric family injection hook of the Prometheus registry.
    94  //
    95  // Summaries and histograms are rather special beasts. You would probably not
    96  // use them in the simple text format anyway. This method can deal with
    97  // summaries and histograms if they are presented in exactly the way the
    98  // text.Create function creates them.
    99  //
   100  // This method must not be called concurrently. If you want to parse different
   101  // input concurrently, instantiate a separate Parser for each goroutine.
   102  func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) {
   103  	p.reset(in)
   104  	for nextState := p.startOfLine; nextState != nil; nextState = nextState() {
   105  		// Magic happens here...
   106  	}
   107  	// Get rid of empty metric families.
   108  	for k, mf := range p.metricFamiliesByName {
   109  		if len(mf.GetMetric()) == 0 {
   110  			delete(p.metricFamiliesByName, k)
   111  		}
   112  	}
   113  	// If p.err is io.EOF now, we have run into a premature end of the input
   114  	// stream. Turn this error into something nicer and more
   115  	// meaningful. (io.EOF is often used as a signal for the legitimate end
   116  	// of an input stream.)
   117  	if p.err != nil && errors.Is(p.err, io.EOF) {
   118  		p.parseError("unexpected end of input stream")
   119  	}
   120  	return p.metricFamiliesByName, p.err
   121  }
   122  
   123  func (p *TextParser) reset(in io.Reader) {
   124  	p.metricFamiliesByName = map[string]*dto.MetricFamily{}
   125  	if p.buf == nil {
   126  		p.buf = bufio.NewReader(in)
   127  	} else {
   128  		p.buf.Reset(in)
   129  	}
   130  	p.err = nil
   131  	p.lineCount = 0
   132  	if p.summaries == nil || len(p.summaries) > 0 {
   133  		p.summaries = map[uint64]*dto.Metric{}
   134  	}
   135  	if p.histograms == nil || len(p.histograms) > 0 {
   136  		p.histograms = map[uint64]*dto.Metric{}
   137  	}
   138  	p.currentQuantile = math.NaN()
   139  	p.currentBucket = math.NaN()
   140  }
   141  
   142  // startOfLine represents the state where the next byte read from p.buf is the
   143  // start of a line (or whitespace leading up to it).
   144  func (p *TextParser) startOfLine() stateFn {
   145  	p.lineCount++
   146  	if p.skipBlankTab(); p.err != nil {
   147  		// This is the only place that we expect to see io.EOF,
   148  		// which is not an error but the signal that we are done.
   149  		// Any other error that happens to align with the start of
   150  		// a line is still an error.
   151  		if errors.Is(p.err, io.EOF) {
   152  			p.err = nil
   153  		}
   154  		return nil
   155  	}
   156  	switch p.currentByte {
   157  	case '#':
   158  		return p.startComment
   159  	case '\n':
   160  		return p.startOfLine // Empty line, start the next one.
   161  	}
   162  	return p.readingMetricName
   163  }
   164  
   165  // startComment represents the state where the next byte read from p.buf is the
   166  // start of a comment (or whitespace leading up to it).
   167  func (p *TextParser) startComment() stateFn {
   168  	if p.skipBlankTab(); p.err != nil {
   169  		return nil // Unexpected end of input.
   170  	}
   171  	if p.currentByte == '\n' {
   172  		return p.startOfLine
   173  	}
   174  	if p.readTokenUntilWhitespace(); p.err != nil {
   175  		return nil // Unexpected end of input.
   176  	}
   177  	// If we have hit the end of line already, there is nothing left
   178  	// to do. This is not considered a syntax error.
   179  	if p.currentByte == '\n' {
   180  		return p.startOfLine
   181  	}
   182  	keyword := p.currentToken.String()
   183  	if keyword != "HELP" && keyword != "TYPE" {
   184  		// Generic comment, ignore by fast forwarding to end of line.
   185  		for p.currentByte != '\n' {
   186  			if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
   187  				return nil // Unexpected end of input.
   188  			}
   189  		}
   190  		return p.startOfLine
   191  	}
   192  	// There is something. Next has to be a metric name.
   193  	if p.skipBlankTab(); p.err != nil {
   194  		return nil // Unexpected end of input.
   195  	}
   196  	if p.readTokenAsMetricName(); p.err != nil {
   197  		return nil // Unexpected end of input.
   198  	}
   199  	if p.currentByte == '\n' {
   200  		// At the end of the line already.
   201  		// Again, this is not considered a syntax error.
   202  		return p.startOfLine
   203  	}
   204  	if !isBlankOrTab(p.currentByte) {
   205  		p.parseError("invalid metric name in comment")
   206  		return nil
   207  	}
   208  	p.setOrCreateCurrentMF()
   209  	if p.skipBlankTab(); p.err != nil {
   210  		return nil // Unexpected end of input.
   211  	}
   212  	if p.currentByte == '\n' {
   213  		// At the end of the line already.
   214  		// Again, this is not considered a syntax error.
   215  		return p.startOfLine
   216  	}
   217  	switch keyword {
   218  	case "HELP":
   219  		return p.readingHelp
   220  	case "TYPE":
   221  		return p.readingType
   222  	}
   223  	panic(fmt.Sprintf("code error: unexpected keyword %q", keyword))
   224  }
   225  
   226  // readingMetricName represents the state where the last byte read (now in
   227  // p.currentByte) is the first byte of a metric name.
   228  func (p *TextParser) readingMetricName() stateFn {
   229  	if p.readTokenAsMetricName(); p.err != nil {
   230  		return nil
   231  	}
   232  	if p.currentToken.Len() == 0 {
   233  		p.parseError("invalid metric name")
   234  		return nil
   235  	}
   236  	p.setOrCreateCurrentMF()
   237  	// Now is the time to fix the type if it hasn't happened yet.
   238  	if p.currentMF.Type == nil {
   239  		p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
   240  	}
   241  	p.currentMetric = &dto.Metric{}
   242  	// Do not append the newly created currentMetric to
   243  	// currentMF.Metric right now. First wait if this is a summary,
   244  	// and the metric exists already, which we can only know after
   245  	// having read all the labels.
   246  	if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
   247  		return nil // Unexpected end of input.
   248  	}
   249  	return p.readingLabels
   250  }
   251  
   252  // readingLabels represents the state where the last byte read (now in
   253  // p.currentByte) is either the first byte of the label set (i.e. a '{'), or the
   254  // first byte of the value (otherwise).
   255  func (p *TextParser) readingLabels() stateFn {
   256  	// Summaries/histograms are special. We have to reset the
   257  	// currentLabels map, currentQuantile and currentBucket before starting to
   258  	// read labels.
   259  	if p.currentMF.GetType() == dto.MetricType_SUMMARY || p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
   260  		p.currentLabels = map[string]string{}
   261  		p.currentLabels[string(model.MetricNameLabel)] = p.currentMF.GetName()
   262  		p.currentQuantile = math.NaN()
   263  		p.currentBucket = math.NaN()
   264  	}
   265  	if p.currentByte != '{' {
   266  		return p.readingValue
   267  	}
   268  	return p.startLabelName
   269  }
   270  
   271  // startLabelName represents the state where the next byte read from p.buf is
   272  // the start of a label name (or whitespace leading up to it).
   273  func (p *TextParser) startLabelName() stateFn {
   274  	if p.skipBlankTab(); p.err != nil {
   275  		return nil // Unexpected end of input.
   276  	}
   277  	if p.currentByte == '}' {
   278  		if p.skipBlankTab(); p.err != nil {
   279  			return nil // Unexpected end of input.
   280  		}
   281  		return p.readingValue
   282  	}
   283  	if p.readTokenAsLabelName(); p.err != nil {
   284  		return nil // Unexpected end of input.
   285  	}
   286  	if p.currentToken.Len() == 0 {
   287  		p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
   288  		return nil
   289  	}
   290  	p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
   291  	if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
   292  		p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
   293  		return nil
   294  	}
   295  	// Special summary/histogram treatment. Don't add 'quantile' and 'le'
   296  	// labels to 'real' labels.
   297  	if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) &&
   298  		!(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) {
   299  		p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair)
   300  	}
   301  	if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
   302  		return nil // Unexpected end of input.
   303  	}
   304  	if p.currentByte != '=' {
   305  		p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
   306  		return nil
   307  	}
   308  	// Check for duplicate label names.
   309  	labels := make(map[string]struct{})
   310  	for _, l := range p.currentMetric.Label {
   311  		lName := l.GetName()
   312  		if _, exists := labels[lName]; !exists {
   313  			labels[lName] = struct{}{}
   314  		} else {
   315  			p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
   316  			return nil
   317  		}
   318  	}
   319  	return p.startLabelValue
   320  }
   321  
   322  // startLabelValue represents the state where the next byte read from p.buf is
   323  // the start of a (quoted) label value (or whitespace leading up to it).
   324  func (p *TextParser) startLabelValue() stateFn {
   325  	if p.skipBlankTab(); p.err != nil {
   326  		return nil // Unexpected end of input.
   327  	}
   328  	if p.currentByte != '"' {
   329  		p.parseError(fmt.Sprintf("expected '\"' at start of label value, found %q", p.currentByte))
   330  		return nil
   331  	}
   332  	if p.readTokenAsLabelValue(); p.err != nil {
   333  		return nil
   334  	}
   335  	if !model.LabelValue(p.currentToken.String()).IsValid() {
   336  		p.parseError(fmt.Sprintf("invalid label value %q", p.currentToken.String()))
   337  		return nil
   338  	}
   339  	p.currentLabelPair.Value = proto.String(p.currentToken.String())
   340  	// Special treatment of summaries:
   341  	// - Quantile labels are special, will result in dto.Quantile later.
   342  	// - Other labels have to be added to currentLabels for signature calculation.
   343  	if p.currentMF.GetType() == dto.MetricType_SUMMARY {
   344  		if p.currentLabelPair.GetName() == model.QuantileLabel {
   345  			if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
   346  				// Create a more helpful error message.
   347  				p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
   348  				return nil
   349  			}
   350  		} else {
   351  			p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
   352  		}
   353  	}
   354  	// Similar special treatment of histograms.
   355  	if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
   356  		if p.currentLabelPair.GetName() == model.BucketLabel {
   357  			if p.currentBucket, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
   358  				// Create a more helpful error message.
   359  				p.parseError(fmt.Sprintf("expected float as value for 'le' label, got %q", p.currentLabelPair.GetValue()))
   360  				return nil
   361  			}
   362  		} else {
   363  			p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
   364  		}
   365  	}
   366  	if p.skipBlankTab(); p.err != nil {
   367  		return nil // Unexpected end of input.
   368  	}
   369  	switch p.currentByte {
   370  	case ',':
   371  		return p.startLabelName
   372  
   373  	case '}':
   374  		if p.skipBlankTab(); p.err != nil {
   375  			return nil // Unexpected end of input.
   376  		}
   377  		return p.readingValue
   378  	default:
   379  		p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
   380  		return nil
   381  	}
   382  }
   383  
   384  // readingValue represents the state where the last byte read (now in
   385  // p.currentByte) is the first byte of the sample value (i.e. a float).
   386  func (p *TextParser) readingValue() stateFn {
   387  	// When we are here, we have read all the labels, so for the
   388  	// special case of a summary/histogram, we can finally find out
   389  	// if the metric already exists.
   390  	if p.currentMF.GetType() == dto.MetricType_SUMMARY {
   391  		signature := model.LabelsToSignature(p.currentLabels)
   392  		if summary := p.summaries[signature]; summary != nil {
   393  			p.currentMetric = summary
   394  		} else {
   395  			p.summaries[signature] = p.currentMetric
   396  			p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
   397  		}
   398  	} else if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
   399  		signature := model.LabelsToSignature(p.currentLabels)
   400  		if histogram := p.histograms[signature]; histogram != nil {
   401  			p.currentMetric = histogram
   402  		} else {
   403  			p.histograms[signature] = p.currentMetric
   404  			p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
   405  		}
   406  	} else {
   407  		p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
   408  	}
   409  	if p.readTokenUntilWhitespace(); p.err != nil {
   410  		return nil // Unexpected end of input.
   411  	}
   412  	value, err := parseFloat(p.currentToken.String())
   413  	if err != nil {
   414  		// Create a more helpful error message.
   415  		p.parseError(fmt.Sprintf("expected float as value, got %q", p.currentToken.String()))
   416  		return nil
   417  	}
   418  	switch p.currentMF.GetType() {
   419  	case dto.MetricType_COUNTER:
   420  		p.currentMetric.Counter = &dto.Counter{Value: proto.Float64(value)}
   421  	case dto.MetricType_GAUGE:
   422  		p.currentMetric.Gauge = &dto.Gauge{Value: proto.Float64(value)}
   423  	case dto.MetricType_UNTYPED:
   424  		p.currentMetric.Untyped = &dto.Untyped{Value: proto.Float64(value)}
   425  	case dto.MetricType_SUMMARY:
   426  		// *sigh*
   427  		if p.currentMetric.Summary == nil {
   428  			p.currentMetric.Summary = &dto.Summary{}
   429  		}
   430  		switch {
   431  		case p.currentIsSummaryCount:
   432  			p.currentMetric.Summary.SampleCount = proto.Uint64(uint64(value))
   433  		case p.currentIsSummarySum:
   434  			p.currentMetric.Summary.SampleSum = proto.Float64(value)
   435  		case !math.IsNaN(p.currentQuantile):
   436  			p.currentMetric.Summary.Quantile = append(
   437  				p.currentMetric.Summary.Quantile,
   438  				&dto.Quantile{
   439  					Quantile: proto.Float64(p.currentQuantile),
   440  					Value:    proto.Float64(value),
   441  				},
   442  			)
   443  		}
   444  	case dto.MetricType_HISTOGRAM:
   445  		// *sigh*
   446  		if p.currentMetric.Histogram == nil {
   447  			p.currentMetric.Histogram = &dto.Histogram{}
   448  		}
   449  		switch {
   450  		case p.currentIsHistogramCount:
   451  			p.currentMetric.Histogram.SampleCount = proto.Uint64(uint64(value))
   452  		case p.currentIsHistogramSum:
   453  			p.currentMetric.Histogram.SampleSum = proto.Float64(value)
   454  		case !math.IsNaN(p.currentBucket):
   455  			p.currentMetric.Histogram.Bucket = append(
   456  				p.currentMetric.Histogram.Bucket,
   457  				&dto.Bucket{
   458  					UpperBound:      proto.Float64(p.currentBucket),
   459  					CumulativeCount: proto.Uint64(uint64(value)),
   460  				},
   461  			)
   462  		}
   463  	default:
   464  		p.err = fmt.Errorf("unexpected type for metric name %q", p.currentMF.GetName())
   465  	}
   466  	if p.currentByte == '\n' {
   467  		return p.startOfLine
   468  	}
   469  	return p.startTimestamp
   470  }
   471  
   472  // startTimestamp represents the state where the next byte read from p.buf is
   473  // the start of the timestamp (or whitespace leading up to it).
   474  func (p *TextParser) startTimestamp() stateFn {
   475  	if p.skipBlankTab(); p.err != nil {
   476  		return nil // Unexpected end of input.
   477  	}
   478  	if p.readTokenUntilWhitespace(); p.err != nil {
   479  		return nil // Unexpected end of input.
   480  	}
   481  	timestamp, err := strconv.ParseInt(p.currentToken.String(), 10, 64)
   482  	if err != nil {
   483  		// Create a more helpful error message.
   484  		p.parseError(fmt.Sprintf("expected integer as timestamp, got %q", p.currentToken.String()))
   485  		return nil
   486  	}
   487  	p.currentMetric.TimestampMs = proto.Int64(timestamp)
   488  	if p.readTokenUntilNewline(false); p.err != nil {
   489  		return nil // Unexpected end of input.
   490  	}
   491  	if p.currentToken.Len() > 0 {
   492  		p.parseError(fmt.Sprintf("spurious string after timestamp: %q", p.currentToken.String()))
   493  		return nil
   494  	}
   495  	return p.startOfLine
   496  }
   497  
   498  // readingHelp represents the state where the last byte read (now in
   499  // p.currentByte) is the first byte of the docstring after 'HELP'.
   500  func (p *TextParser) readingHelp() stateFn {
   501  	if p.currentMF.Help != nil {
   502  		p.parseError(fmt.Sprintf("second HELP line for metric name %q", p.currentMF.GetName()))
   503  		return nil
   504  	}
   505  	// Rest of line is the docstring.
   506  	if p.readTokenUntilNewline(true); p.err != nil {
   507  		return nil // Unexpected end of input.
   508  	}
   509  	p.currentMF.Help = proto.String(p.currentToken.String())
   510  	return p.startOfLine
   511  }
   512  
   513  // readingType represents the state where the last byte read (now in
   514  // p.currentByte) is the first byte of the type hint after 'HELP'.
   515  func (p *TextParser) readingType() stateFn {
   516  	if p.currentMF.Type != nil {
   517  		p.parseError(fmt.Sprintf("second TYPE line for metric name %q, or TYPE reported after samples", p.currentMF.GetName()))
   518  		return nil
   519  	}
   520  	// Rest of line is the type.
   521  	if p.readTokenUntilNewline(false); p.err != nil {
   522  		return nil // Unexpected end of input.
   523  	}
   524  	metricType, ok := dto.MetricType_value[strings.ToUpper(p.currentToken.String())]
   525  	if !ok {
   526  		p.parseError(fmt.Sprintf("unknown metric type %q", p.currentToken.String()))
   527  		return nil
   528  	}
   529  	p.currentMF.Type = dto.MetricType(metricType).Enum()
   530  	return p.startOfLine
   531  }
   532  
   533  // parseError sets p.err to a ParseError at the current line with the given
   534  // message.
   535  func (p *TextParser) parseError(msg string) {
   536  	p.err = ParseError{
   537  		Line: p.lineCount,
   538  		Msg:  msg,
   539  	}
   540  }
   541  
   542  // skipBlankTab reads (and discards) bytes from p.buf until it encounters a byte
   543  // that is neither ' ' nor '\t'. That byte is left in p.currentByte.
   544  func (p *TextParser) skipBlankTab() {
   545  	for {
   546  		if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil || !isBlankOrTab(p.currentByte) {
   547  			return
   548  		}
   549  	}
   550  }
   551  
   552  // skipBlankTabIfCurrentBlankTab works exactly as skipBlankTab but doesn't do
   553  // anything if p.currentByte is neither ' ' nor '\t'.
   554  func (p *TextParser) skipBlankTabIfCurrentBlankTab() {
   555  	if isBlankOrTab(p.currentByte) {
   556  		p.skipBlankTab()
   557  	}
   558  }
   559  
   560  // readTokenUntilWhitespace copies bytes from p.buf into p.currentToken.  The
   561  // first byte considered is the byte already read (now in p.currentByte).  The
   562  // first whitespace byte encountered is still copied into p.currentByte, but not
   563  // into p.currentToken.
   564  func (p *TextParser) readTokenUntilWhitespace() {
   565  	p.currentToken.Reset()
   566  	for p.err == nil && !isBlankOrTab(p.currentByte) && p.currentByte != '\n' {
   567  		p.currentToken.WriteByte(p.currentByte)
   568  		p.currentByte, p.err = p.buf.ReadByte()
   569  	}
   570  }
   571  
   572  // readTokenUntilNewline copies bytes from p.buf into p.currentToken.  The first
   573  // byte considered is the byte already read (now in p.currentByte).  The first
   574  // newline byte encountered is still copied into p.currentByte, but not into
   575  // p.currentToken. If recognizeEscapeSequence is true, two escape sequences are
   576  // recognized: '\\' translates into '\', and '\n' into a line-feed character.
   577  // All other escape sequences are invalid and cause an error.
   578  func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
   579  	p.currentToken.Reset()
   580  	escaped := false
   581  	for p.err == nil {
   582  		if recognizeEscapeSequence && escaped {
   583  			switch p.currentByte {
   584  			case '\\':
   585  				p.currentToken.WriteByte(p.currentByte)
   586  			case 'n':
   587  				p.currentToken.WriteByte('\n')
   588  			default:
   589  				p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
   590  				return
   591  			}
   592  			escaped = false
   593  		} else {
   594  			switch p.currentByte {
   595  			case '\n':
   596  				return
   597  			case '\\':
   598  				escaped = true
   599  			default:
   600  				p.currentToken.WriteByte(p.currentByte)
   601  			}
   602  		}
   603  		p.currentByte, p.err = p.buf.ReadByte()
   604  	}
   605  }
   606  
   607  // readTokenAsMetricName copies a metric name from p.buf into p.currentToken.
   608  // The first byte considered is the byte already read (now in p.currentByte).
   609  // The first byte not part of a metric name is still copied into p.currentByte,
   610  // but not into p.currentToken.
   611  func (p *TextParser) readTokenAsMetricName() {
   612  	p.currentToken.Reset()
   613  	if !isValidMetricNameStart(p.currentByte) {
   614  		return
   615  	}
   616  	for {
   617  		p.currentToken.WriteByte(p.currentByte)
   618  		p.currentByte, p.err = p.buf.ReadByte()
   619  		if p.err != nil || !isValidMetricNameContinuation(p.currentByte) {
   620  			return
   621  		}
   622  	}
   623  }
   624  
   625  // readTokenAsLabelName copies a label name from p.buf into p.currentToken.
   626  // The first byte considered is the byte already read (now in p.currentByte).
   627  // The first byte not part of a label name is still copied into p.currentByte,
   628  // but not into p.currentToken.
   629  func (p *TextParser) readTokenAsLabelName() {
   630  	p.currentToken.Reset()
   631  	if !isValidLabelNameStart(p.currentByte) {
   632  		return
   633  	}
   634  	for {
   635  		p.currentToken.WriteByte(p.currentByte)
   636  		p.currentByte, p.err = p.buf.ReadByte()
   637  		if p.err != nil || !isValidLabelNameContinuation(p.currentByte) {
   638  			return
   639  		}
   640  	}
   641  }
   642  
   643  // readTokenAsLabelValue copies a label value from p.buf into p.currentToken.
   644  // In contrast to the other 'readTokenAs...' functions, which start with the
   645  // last read byte in p.currentByte, this method ignores p.currentByte and starts
   646  // with reading a new byte from p.buf. The first byte not part of a label value
   647  // is still copied into p.currentByte, but not into p.currentToken.
   648  func (p *TextParser) readTokenAsLabelValue() {
   649  	p.currentToken.Reset()
   650  	escaped := false
   651  	for {
   652  		if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
   653  			return
   654  		}
   655  		if escaped {
   656  			switch p.currentByte {
   657  			case '"', '\\':
   658  				p.currentToken.WriteByte(p.currentByte)
   659  			case 'n':
   660  				p.currentToken.WriteByte('\n')
   661  			default:
   662  				p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
   663  				return
   664  			}
   665  			escaped = false
   666  			continue
   667  		}
   668  		switch p.currentByte {
   669  		case '"':
   670  			return
   671  		case '\n':
   672  			p.parseError(fmt.Sprintf("label value %q contains unescaped new-line", p.currentToken.String()))
   673  			return
   674  		case '\\':
   675  			escaped = true
   676  		default:
   677  			p.currentToken.WriteByte(p.currentByte)
   678  		}
   679  	}
   680  }
   681  
   682  func (p *TextParser) setOrCreateCurrentMF() {
   683  	p.currentIsSummaryCount = false
   684  	p.currentIsSummarySum = false
   685  	p.currentIsHistogramCount = false
   686  	p.currentIsHistogramSum = false
   687  	name := p.currentToken.String()
   688  	if p.currentMF = p.metricFamiliesByName[name]; p.currentMF != nil {
   689  		return
   690  	}
   691  	// Try out if this is a _sum or _count for a summary/histogram.
   692  	summaryName := summaryMetricName(name)
   693  	if p.currentMF = p.metricFamiliesByName[summaryName]; p.currentMF != nil {
   694  		if p.currentMF.GetType() == dto.MetricType_SUMMARY {
   695  			if isCount(name) {
   696  				p.currentIsSummaryCount = true
   697  			}
   698  			if isSum(name) {
   699  				p.currentIsSummarySum = true
   700  			}
   701  			return
   702  		}
   703  	}
   704  	histogramName := histogramMetricName(name)
   705  	if p.currentMF = p.metricFamiliesByName[histogramName]; p.currentMF != nil {
   706  		if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
   707  			if isCount(name) {
   708  				p.currentIsHistogramCount = true
   709  			}
   710  			if isSum(name) {
   711  				p.currentIsHistogramSum = true
   712  			}
   713  			return
   714  		}
   715  	}
   716  	p.currentMF = &dto.MetricFamily{Name: proto.String(name)}
   717  	p.metricFamiliesByName[name] = p.currentMF
   718  }
   719  
   720  func isValidLabelNameStart(b byte) bool {
   721  	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
   722  }
   723  
   724  func isValidLabelNameContinuation(b byte) bool {
   725  	return isValidLabelNameStart(b) || (b >= '0' && b <= '9')
   726  }
   727  
   728  func isValidMetricNameStart(b byte) bool {
   729  	return isValidLabelNameStart(b) || b == ':'
   730  }
   731  
   732  func isValidMetricNameContinuation(b byte) bool {
   733  	return isValidLabelNameContinuation(b) || b == ':'
   734  }
   735  
   736  func isBlankOrTab(b byte) bool {
   737  	return b == ' ' || b == '\t'
   738  }
   739  
   740  func isCount(name string) bool {
   741  	return len(name) > 6 && name[len(name)-6:] == "_count"
   742  }
   743  
   744  func isSum(name string) bool {
   745  	return len(name) > 4 && name[len(name)-4:] == "_sum"
   746  }
   747  
   748  func isBucket(name string) bool {
   749  	return len(name) > 7 && name[len(name)-7:] == "_bucket"
   750  }
   751  
   752  func summaryMetricName(name string) string {
   753  	switch {
   754  	case isCount(name):
   755  		return name[:len(name)-6]
   756  	case isSum(name):
   757  		return name[:len(name)-4]
   758  	default:
   759  		return name
   760  	}
   761  }
   762  
   763  func histogramMetricName(name string) string {
   764  	switch {
   765  	case isCount(name):
   766  		return name[:len(name)-6]
   767  	case isSum(name):
   768  		return name[:len(name)-4]
   769  	case isBucket(name):
   770  		return name[:len(name)-7]
   771  	default:
   772  		return name
   773  	}
   774  }
   775  
   776  func parseFloat(s string) (float64, error) {
   777  	if strings.ContainsAny(s, "pP_") {
   778  		return 0, fmt.Errorf("unsupported character in float")
   779  	}
   780  	return strconv.ParseFloat(s, 64)
   781  }
   782
View as plain text