1 // Copyright 2018 Prometheus Team 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package labels 15 16 import ( 17 "regexp" 18 "strings" 19 "unicode/utf8" 20 21 "github.com/pkg/errors" 22 ) 23 24 var ( 25 // '=~' has to come before '=' because otherwise only the '=' 26 // will be consumed, and the '~' will be part of the 3rd token. 27 re = regexp.MustCompile(`^\s*([a-zA-Z_:][a-zA-Z0-9_:]*)\s*(=~|=|!=|!~)\s*((?s).*?)\s*$`) 28 typeMap = map[string]MatchType{ 29 "=": MatchEqual, 30 "!=": MatchNotEqual, 31 "=~": MatchRegexp, 32 "!~": MatchNotRegexp, 33 } 34 ) 35 36 // ParseMatchers parses a comma-separated list of Matchers. A leading '{' and/or 37 // a trailing '}' is optional and will be trimmed before further 38 // parsing. Individual Matchers are separated by commas outside of quoted parts 39 // of the input string. Those commas may be surrounded by whitespace. Parts of the 40 // string inside unescaped double quotes ('"…"') are considered quoted (and 41 // commas don't act as separators there). If double quotes are escaped with a 42 // single backslash ('\"'), they are ignored for the purpose of identifying 43 // quoted parts of the input string. If the input string, after trimming the 44 // optional trailing '}', ends with a comma, followed by optional whitespace, 45 // this comma and whitespace will be trimmed. 46 // 47 // Examples for valid input strings: 48 // 49 // {foo = "bar", dings != "bums", } 50 // foo=bar,dings!=bums 51 // foo=bar, dings!=bums 52 // {quote="She said: \"Hi, ladies! That's gender-neutral…\""} 53 // statuscode=~"5.." 54 // 55 // See ParseMatcher for details on how an individual Matcher is parsed. 56 func ParseMatchers(s string) ([]*Matcher, error) { 57 matchers := []*Matcher{} 58 s = strings.TrimPrefix(s, "{") 59 s = strings.TrimSuffix(s, "}") 60 61 var ( 62 insideQuotes bool 63 escaped bool 64 token strings.Builder 65 tokens []string 66 ) 67 for _, r := range s { 68 switch r { 69 case ',': 70 if !insideQuotes { 71 tokens = append(tokens, token.String()) 72 token.Reset() 73 continue 74 } 75 case '"': 76 if !escaped { 77 insideQuotes = !insideQuotes 78 } else { 79 escaped = false 80 } 81 case '\\': 82 escaped = !escaped 83 default: 84 escaped = false 85 } 86 token.WriteRune(r) 87 } 88 if s := strings.TrimSpace(token.String()); s != "" { 89 tokens = append(tokens, s) 90 } 91 for _, token := range tokens { 92 m, err := ParseMatcher(token) 93 if err != nil { 94 return nil, err 95 } 96 matchers = append(matchers, m) 97 } 98 99 return matchers, nil 100 } 101 102 // ParseMatcher parses a matcher with a syntax inspired by PromQL and 103 // OpenMetrics. This syntax is convenient to describe filters and selectors in 104 // UIs and config files. To support the interactive nature of the use cases, the 105 // parser is in various aspects fairly tolerant. 106 // 107 // The syntax of a matcher consists of three tokens: (1) A valid Prometheus 108 // label name. (2) One of '=', '!=', '=~', or '!~', with the same meaning as 109 // known from PromQL selectors. (3) A UTF-8 string, which may be enclosed in 110 // double quotes. Before or after each token, there may be any amount of 111 // whitespace, which will be discarded. The 3rd token may be the empty 112 // string. Within the 3rd token, OpenMetrics escaping rules apply: '\"' for a 113 // double-quote, '\n' for a line feed, '\\' for a literal backslash. Unescaped 114 // '"' must not occur inside the 3rd token (only as the 1st or last 115 // character). However, literal line feed characters are tolerated, as are 116 // single '\' characters not followed by '\', 'n', or '"'. They act as a literal 117 // backslash in that case. 118 func ParseMatcher(s string) (_ *Matcher, err error) { 119 ms := re.FindStringSubmatch(s) 120 if len(ms) == 0 { 121 return nil, errors.Errorf("bad matcher format: %s", s) 122 } 123 124 var ( 125 rawValue = ms[3] 126 value strings.Builder 127 escaped bool 128 expectTrailingQuote bool 129 ) 130 131 if strings.HasPrefix(rawValue, "\"") { 132 rawValue = strings.TrimPrefix(rawValue, "\"") 133 expectTrailingQuote = true 134 } 135 136 if !utf8.ValidString(rawValue) { 137 return nil, errors.Errorf("matcher value not valid UTF-8: %s", ms[3]) 138 } 139 140 // Unescape the rawValue: 141 for i, r := range rawValue { 142 if escaped { 143 escaped = false 144 switch r { 145 case 'n': 146 value.WriteByte('\n') 147 case '"', '\\': 148 value.WriteRune(r) 149 default: 150 // This was a spurious escape, so treat the '\' as literal. 151 value.WriteByte('\\') 152 value.WriteRune(r) 153 } 154 continue 155 } 156 switch r { 157 case '\\': 158 if i < len(rawValue)-1 { 159 escaped = true 160 continue 161 } 162 // '\' encountered as last byte. Treat it as literal. 163 value.WriteByte('\\') 164 case '"': 165 if !expectTrailingQuote || i < len(rawValue)-1 { 166 return nil, errors.Errorf("matcher value contains unescaped double quote: %s", ms[3]) 167 } 168 expectTrailingQuote = false 169 default: 170 value.WriteRune(r) 171 } 172 } 173 174 if expectTrailingQuote { 175 return nil, errors.Errorf("matcher value contains unescaped double quote: %s", ms[3]) 176 } 177 178 return NewMatcher(typeMap[ms[2]], ms[1], value.String()) 179 } 180