1
2
3
4
5
6
7
8
9
10
11
12
13
14 package expfmt
15
16 import (
17 "bufio"
18 "bytes"
19 "errors"
20 "fmt"
21 "io"
22 "math"
23 "strconv"
24 "strings"
25
26 dto "github.com/prometheus/client_model/go"
27
28 "google.golang.org/protobuf/proto"
29
30 "github.com/prometheus/common/model"
31 )
32
33
34
35
36
37 type stateFn func() stateFn
38
39
40
41 type ParseError struct {
42 Line int
43 Msg string
44 }
45
46
47 func (e ParseError) Error() string {
48 return fmt.Sprintf("text format parsing error in line %d: %s", e.Line, e.Msg)
49 }
50
51
52
53 type TextParser struct {
54 metricFamiliesByName map[string]*dto.MetricFamily
55 buf *bufio.Reader
56 err error
57 lineCount int
58 currentByte byte
59 currentToken bytes.Buffer
60 currentMF *dto.MetricFamily
61 currentMetric *dto.Metric
62 currentLabelPair *dto.LabelPair
63
64
65 currentLabels map[string]string
66
67 summaries map[uint64]*dto.Metric
68 currentQuantile float64
69
70 histograms map[uint64]*dto.Metric
71 currentBucket float64
72
73
74
75 currentIsSummaryCount, currentIsSummarySum bool
76 currentIsHistogramCount, currentIsHistogramSum bool
77 }
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) {
103 p.reset(in)
104 for nextState := p.startOfLine; nextState != nil; nextState = nextState() {
105
106 }
107
108 for k, mf := range p.metricFamiliesByName {
109 if len(mf.GetMetric()) == 0 {
110 delete(p.metricFamiliesByName, k)
111 }
112 }
113
114
115
116
117 if p.err != nil && errors.Is(p.err, io.EOF) {
118 p.parseError("unexpected end of input stream")
119 }
120 return p.metricFamiliesByName, p.err
121 }
122
123 func (p *TextParser) reset(in io.Reader) {
124 p.metricFamiliesByName = map[string]*dto.MetricFamily{}
125 if p.buf == nil {
126 p.buf = bufio.NewReader(in)
127 } else {
128 p.buf.Reset(in)
129 }
130 p.err = nil
131 p.lineCount = 0
132 if p.summaries == nil || len(p.summaries) > 0 {
133 p.summaries = map[uint64]*dto.Metric{}
134 }
135 if p.histograms == nil || len(p.histograms) > 0 {
136 p.histograms = map[uint64]*dto.Metric{}
137 }
138 p.currentQuantile = math.NaN()
139 p.currentBucket = math.NaN()
140 }
141
142
143
144 func (p *TextParser) startOfLine() stateFn {
145 p.lineCount++
146 if p.skipBlankTab(); p.err != nil {
147
148
149
150
151 if errors.Is(p.err, io.EOF) {
152 p.err = nil
153 }
154 return nil
155 }
156 switch p.currentByte {
157 case '#':
158 return p.startComment
159 case '\n':
160 return p.startOfLine
161 }
162 return p.readingMetricName
163 }
164
165
166
167 func (p *TextParser) startComment() stateFn {
168 if p.skipBlankTab(); p.err != nil {
169 return nil
170 }
171 if p.currentByte == '\n' {
172 return p.startOfLine
173 }
174 if p.readTokenUntilWhitespace(); p.err != nil {
175 return nil
176 }
177
178
179 if p.currentByte == '\n' {
180 return p.startOfLine
181 }
182 keyword := p.currentToken.String()
183 if keyword != "HELP" && keyword != "TYPE" {
184
185 for p.currentByte != '\n' {
186 if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
187 return nil
188 }
189 }
190 return p.startOfLine
191 }
192
193 if p.skipBlankTab(); p.err != nil {
194 return nil
195 }
196 if p.readTokenAsMetricName(); p.err != nil {
197 return nil
198 }
199 if p.currentByte == '\n' {
200
201
202 return p.startOfLine
203 }
204 if !isBlankOrTab(p.currentByte) {
205 p.parseError("invalid metric name in comment")
206 return nil
207 }
208 p.setOrCreateCurrentMF()
209 if p.skipBlankTab(); p.err != nil {
210 return nil
211 }
212 if p.currentByte == '\n' {
213
214
215 return p.startOfLine
216 }
217 switch keyword {
218 case "HELP":
219 return p.readingHelp
220 case "TYPE":
221 return p.readingType
222 }
223 panic(fmt.Sprintf("code error: unexpected keyword %q", keyword))
224 }
225
226
227
228 func (p *TextParser) readingMetricName() stateFn {
229 if p.readTokenAsMetricName(); p.err != nil {
230 return nil
231 }
232 if p.currentToken.Len() == 0 {
233 p.parseError("invalid metric name")
234 return nil
235 }
236 p.setOrCreateCurrentMF()
237
238 if p.currentMF.Type == nil {
239 p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
240 }
241 p.currentMetric = &dto.Metric{}
242
243
244
245
246 if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
247 return nil
248 }
249 return p.readingLabels
250 }
251
252
253
254
255 func (p *TextParser) readingLabels() stateFn {
256
257
258
259 if p.currentMF.GetType() == dto.MetricType_SUMMARY || p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
260 p.currentLabels = map[string]string{}
261 p.currentLabels[string(model.MetricNameLabel)] = p.currentMF.GetName()
262 p.currentQuantile = math.NaN()
263 p.currentBucket = math.NaN()
264 }
265 if p.currentByte != '{' {
266 return p.readingValue
267 }
268 return p.startLabelName
269 }
270
271
272
273 func (p *TextParser) startLabelName() stateFn {
274 if p.skipBlankTab(); p.err != nil {
275 return nil
276 }
277 if p.currentByte == '}' {
278 if p.skipBlankTab(); p.err != nil {
279 return nil
280 }
281 return p.readingValue
282 }
283 if p.readTokenAsLabelName(); p.err != nil {
284 return nil
285 }
286 if p.currentToken.Len() == 0 {
287 p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
288 return nil
289 }
290 p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
291 if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
292 p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
293 return nil
294 }
295
296
297 if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) &&
298 !(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) {
299 p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair)
300 }
301 if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
302 return nil
303 }
304 if p.currentByte != '=' {
305 p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
306 return nil
307 }
308
309 labels := make(map[string]struct{})
310 for _, l := range p.currentMetric.Label {
311 lName := l.GetName()
312 if _, exists := labels[lName]; !exists {
313 labels[lName] = struct{}{}
314 } else {
315 p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
316 return nil
317 }
318 }
319 return p.startLabelValue
320 }
321
322
323
324 func (p *TextParser) startLabelValue() stateFn {
325 if p.skipBlankTab(); p.err != nil {
326 return nil
327 }
328 if p.currentByte != '"' {
329 p.parseError(fmt.Sprintf("expected '\"' at start of label value, found %q", p.currentByte))
330 return nil
331 }
332 if p.readTokenAsLabelValue(); p.err != nil {
333 return nil
334 }
335 if !model.LabelValue(p.currentToken.String()).IsValid() {
336 p.parseError(fmt.Sprintf("invalid label value %q", p.currentToken.String()))
337 return nil
338 }
339 p.currentLabelPair.Value = proto.String(p.currentToken.String())
340
341
342
343 if p.currentMF.GetType() == dto.MetricType_SUMMARY {
344 if p.currentLabelPair.GetName() == model.QuantileLabel {
345 if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
346
347 p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
348 return nil
349 }
350 } else {
351 p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
352 }
353 }
354
355 if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
356 if p.currentLabelPair.GetName() == model.BucketLabel {
357 if p.currentBucket, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
358
359 p.parseError(fmt.Sprintf("expected float as value for 'le' label, got %q", p.currentLabelPair.GetValue()))
360 return nil
361 }
362 } else {
363 p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
364 }
365 }
366 if p.skipBlankTab(); p.err != nil {
367 return nil
368 }
369 switch p.currentByte {
370 case ',':
371 return p.startLabelName
372
373 case '}':
374 if p.skipBlankTab(); p.err != nil {
375 return nil
376 }
377 return p.readingValue
378 default:
379 p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
380 return nil
381 }
382 }
383
384
385
386 func (p *TextParser) readingValue() stateFn {
387
388
389
390 if p.currentMF.GetType() == dto.MetricType_SUMMARY {
391 signature := model.LabelsToSignature(p.currentLabels)
392 if summary := p.summaries[signature]; summary != nil {
393 p.currentMetric = summary
394 } else {
395 p.summaries[signature] = p.currentMetric
396 p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
397 }
398 } else if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
399 signature := model.LabelsToSignature(p.currentLabels)
400 if histogram := p.histograms[signature]; histogram != nil {
401 p.currentMetric = histogram
402 } else {
403 p.histograms[signature] = p.currentMetric
404 p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
405 }
406 } else {
407 p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
408 }
409 if p.readTokenUntilWhitespace(); p.err != nil {
410 return nil
411 }
412 value, err := parseFloat(p.currentToken.String())
413 if err != nil {
414
415 p.parseError(fmt.Sprintf("expected float as value, got %q", p.currentToken.String()))
416 return nil
417 }
418 switch p.currentMF.GetType() {
419 case dto.MetricType_COUNTER:
420 p.currentMetric.Counter = &dto.Counter{Value: proto.Float64(value)}
421 case dto.MetricType_GAUGE:
422 p.currentMetric.Gauge = &dto.Gauge{Value: proto.Float64(value)}
423 case dto.MetricType_UNTYPED:
424 p.currentMetric.Untyped = &dto.Untyped{Value: proto.Float64(value)}
425 case dto.MetricType_SUMMARY:
426
427 if p.currentMetric.Summary == nil {
428 p.currentMetric.Summary = &dto.Summary{}
429 }
430 switch {
431 case p.currentIsSummaryCount:
432 p.currentMetric.Summary.SampleCount = proto.Uint64(uint64(value))
433 case p.currentIsSummarySum:
434 p.currentMetric.Summary.SampleSum = proto.Float64(value)
435 case !math.IsNaN(p.currentQuantile):
436 p.currentMetric.Summary.Quantile = append(
437 p.currentMetric.Summary.Quantile,
438 &dto.Quantile{
439 Quantile: proto.Float64(p.currentQuantile),
440 Value: proto.Float64(value),
441 },
442 )
443 }
444 case dto.MetricType_HISTOGRAM:
445
446 if p.currentMetric.Histogram == nil {
447 p.currentMetric.Histogram = &dto.Histogram{}
448 }
449 switch {
450 case p.currentIsHistogramCount:
451 p.currentMetric.Histogram.SampleCount = proto.Uint64(uint64(value))
452 case p.currentIsHistogramSum:
453 p.currentMetric.Histogram.SampleSum = proto.Float64(value)
454 case !math.IsNaN(p.currentBucket):
455 p.currentMetric.Histogram.Bucket = append(
456 p.currentMetric.Histogram.Bucket,
457 &dto.Bucket{
458 UpperBound: proto.Float64(p.currentBucket),
459 CumulativeCount: proto.Uint64(uint64(value)),
460 },
461 )
462 }
463 default:
464 p.err = fmt.Errorf("unexpected type for metric name %q", p.currentMF.GetName())
465 }
466 if p.currentByte == '\n' {
467 return p.startOfLine
468 }
469 return p.startTimestamp
470 }
471
472
473
474 func (p *TextParser) startTimestamp() stateFn {
475 if p.skipBlankTab(); p.err != nil {
476 return nil
477 }
478 if p.readTokenUntilWhitespace(); p.err != nil {
479 return nil
480 }
481 timestamp, err := strconv.ParseInt(p.currentToken.String(), 10, 64)
482 if err != nil {
483
484 p.parseError(fmt.Sprintf("expected integer as timestamp, got %q", p.currentToken.String()))
485 return nil
486 }
487 p.currentMetric.TimestampMs = proto.Int64(timestamp)
488 if p.readTokenUntilNewline(false); p.err != nil {
489 return nil
490 }
491 if p.currentToken.Len() > 0 {
492 p.parseError(fmt.Sprintf("spurious string after timestamp: %q", p.currentToken.String()))
493 return nil
494 }
495 return p.startOfLine
496 }
497
498
499
500 func (p *TextParser) readingHelp() stateFn {
501 if p.currentMF.Help != nil {
502 p.parseError(fmt.Sprintf("second HELP line for metric name %q", p.currentMF.GetName()))
503 return nil
504 }
505
506 if p.readTokenUntilNewline(true); p.err != nil {
507 return nil
508 }
509 p.currentMF.Help = proto.String(p.currentToken.String())
510 return p.startOfLine
511 }
512
513
514
515 func (p *TextParser) readingType() stateFn {
516 if p.currentMF.Type != nil {
517 p.parseError(fmt.Sprintf("second TYPE line for metric name %q, or TYPE reported after samples", p.currentMF.GetName()))
518 return nil
519 }
520
521 if p.readTokenUntilNewline(false); p.err != nil {
522 return nil
523 }
524 metricType, ok := dto.MetricType_value[strings.ToUpper(p.currentToken.String())]
525 if !ok {
526 p.parseError(fmt.Sprintf("unknown metric type %q", p.currentToken.String()))
527 return nil
528 }
529 p.currentMF.Type = dto.MetricType(metricType).Enum()
530 return p.startOfLine
531 }
532
533
534
535 func (p *TextParser) parseError(msg string) {
536 p.err = ParseError{
537 Line: p.lineCount,
538 Msg: msg,
539 }
540 }
541
542
543
544 func (p *TextParser) skipBlankTab() {
545 for {
546 if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil || !isBlankOrTab(p.currentByte) {
547 return
548 }
549 }
550 }
551
552
553
554 func (p *TextParser) skipBlankTabIfCurrentBlankTab() {
555 if isBlankOrTab(p.currentByte) {
556 p.skipBlankTab()
557 }
558 }
559
560
561
562
563
564 func (p *TextParser) readTokenUntilWhitespace() {
565 p.currentToken.Reset()
566 for p.err == nil && !isBlankOrTab(p.currentByte) && p.currentByte != '\n' {
567 p.currentToken.WriteByte(p.currentByte)
568 p.currentByte, p.err = p.buf.ReadByte()
569 }
570 }
571
572
573
574
575
576
577
578 func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
579 p.currentToken.Reset()
580 escaped := false
581 for p.err == nil {
582 if recognizeEscapeSequence && escaped {
583 switch p.currentByte {
584 case '\\':
585 p.currentToken.WriteByte(p.currentByte)
586 case 'n':
587 p.currentToken.WriteByte('\n')
588 default:
589 p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
590 return
591 }
592 escaped = false
593 } else {
594 switch p.currentByte {
595 case '\n':
596 return
597 case '\\':
598 escaped = true
599 default:
600 p.currentToken.WriteByte(p.currentByte)
601 }
602 }
603 p.currentByte, p.err = p.buf.ReadByte()
604 }
605 }
606
607
608
609
610
611 func (p *TextParser) readTokenAsMetricName() {
612 p.currentToken.Reset()
613 if !isValidMetricNameStart(p.currentByte) {
614 return
615 }
616 for {
617 p.currentToken.WriteByte(p.currentByte)
618 p.currentByte, p.err = p.buf.ReadByte()
619 if p.err != nil || !isValidMetricNameContinuation(p.currentByte) {
620 return
621 }
622 }
623 }
624
625
626
627
628
629 func (p *TextParser) readTokenAsLabelName() {
630 p.currentToken.Reset()
631 if !isValidLabelNameStart(p.currentByte) {
632 return
633 }
634 for {
635 p.currentToken.WriteByte(p.currentByte)
636 p.currentByte, p.err = p.buf.ReadByte()
637 if p.err != nil || !isValidLabelNameContinuation(p.currentByte) {
638 return
639 }
640 }
641 }
642
643
644
645
646
647
648 func (p *TextParser) readTokenAsLabelValue() {
649 p.currentToken.Reset()
650 escaped := false
651 for {
652 if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
653 return
654 }
655 if escaped {
656 switch p.currentByte {
657 case '"', '\\':
658 p.currentToken.WriteByte(p.currentByte)
659 case 'n':
660 p.currentToken.WriteByte('\n')
661 default:
662 p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
663 return
664 }
665 escaped = false
666 continue
667 }
668 switch p.currentByte {
669 case '"':
670 return
671 case '\n':
672 p.parseError(fmt.Sprintf("label value %q contains unescaped new-line", p.currentToken.String()))
673 return
674 case '\\':
675 escaped = true
676 default:
677 p.currentToken.WriteByte(p.currentByte)
678 }
679 }
680 }
681
682 func (p *TextParser) setOrCreateCurrentMF() {
683 p.currentIsSummaryCount = false
684 p.currentIsSummarySum = false
685 p.currentIsHistogramCount = false
686 p.currentIsHistogramSum = false
687 name := p.currentToken.String()
688 if p.currentMF = p.metricFamiliesByName[name]; p.currentMF != nil {
689 return
690 }
691
692 summaryName := summaryMetricName(name)
693 if p.currentMF = p.metricFamiliesByName[summaryName]; p.currentMF != nil {
694 if p.currentMF.GetType() == dto.MetricType_SUMMARY {
695 if isCount(name) {
696 p.currentIsSummaryCount = true
697 }
698 if isSum(name) {
699 p.currentIsSummarySum = true
700 }
701 return
702 }
703 }
704 histogramName := histogramMetricName(name)
705 if p.currentMF = p.metricFamiliesByName[histogramName]; p.currentMF != nil {
706 if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
707 if isCount(name) {
708 p.currentIsHistogramCount = true
709 }
710 if isSum(name) {
711 p.currentIsHistogramSum = true
712 }
713 return
714 }
715 }
716 p.currentMF = &dto.MetricFamily{Name: proto.String(name)}
717 p.metricFamiliesByName[name] = p.currentMF
718 }
719
720 func isValidLabelNameStart(b byte) bool {
721 return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
722 }
723
724 func isValidLabelNameContinuation(b byte) bool {
725 return isValidLabelNameStart(b) || (b >= '0' && b <= '9')
726 }
727
728 func isValidMetricNameStart(b byte) bool {
729 return isValidLabelNameStart(b) || b == ':'
730 }
731
732 func isValidMetricNameContinuation(b byte) bool {
733 return isValidLabelNameContinuation(b) || b == ':'
734 }
735
736 func isBlankOrTab(b byte) bool {
737 return b == ' ' || b == '\t'
738 }
739
740 func isCount(name string) bool {
741 return len(name) > 6 && name[len(name)-6:] == "_count"
742 }
743
744 func isSum(name string) bool {
745 return len(name) > 4 && name[len(name)-4:] == "_sum"
746 }
747
748 func isBucket(name string) bool {
749 return len(name) > 7 && name[len(name)-7:] == "_bucket"
750 }
751
752 func summaryMetricName(name string) string {
753 switch {
754 case isCount(name):
755 return name[:len(name)-6]
756 case isSum(name):
757 return name[:len(name)-4]
758 default:
759 return name
760 }
761 }
762
763 func histogramMetricName(name string) string {
764 switch {
765 case isCount(name):
766 return name[:len(name)-6]
767 case isSum(name):
768 return name[:len(name)-4]
769 case isBucket(name):
770 return name[:len(name)-7]
771 default:
772 return name
773 }
774 }
775
776 func parseFloat(s string) (float64, error) {
777 if strings.ContainsAny(s, "pP_") {
778 return 0, fmt.Errorf("unsupported character in float")
779 }
780 return strconv.ParseFloat(s, 64)
781 }
782
View as plain text