1syntax = "proto3";
2option go_package = "./generated";
3
4import "google/protobuf/timestamp.proto";
5
6message Trace {
7 message CachePolicy {
8 enum Scope {
9 UNKNOWN = 0;
10 PUBLIC = 1;
11 PRIVATE = 2;
12 }
13
14 Scope scope = 1;
15 int64 max_age_ns = 2; // use 0 for absent, -1 for 0
16 }
17
18 message Details {
19 // The variables associated with this query (unless the reporting agent is
20 // configured to keep them all private). Values are JSON: ie, strings are
21 // enclosed in double quotes, etc. The value of a private variable is
22 // the empty string.
23 map<string, string> variables_json = 4;
24
25
26 // This is deprecated and only used for legacy applications
27 // don't include this in traces inside a FullTracesReport; the operation
28 // name for these traces comes from the key of the traces_per_query map.
29 string operation_name = 3;
30 }
31
32 message Error {
33 string message = 1; // required
34 repeated Location location = 2;
35 uint64 time_ns = 3;
36 string json = 4;
37 }
38
39 message HTTP {
40 message Values {
41 repeated string value = 1;
42 }
43
44 enum Method {
45 UNKNOWN = 0;
46 OPTIONS = 1;
47 GET = 2;
48 HEAD = 3;
49 POST = 4;
50 PUT = 5;
51 DELETE = 6;
52 TRACE = 7;
53 CONNECT = 8;
54 PATCH = 9;
55 }
56 Method method = 1;
57 string host = 2;
58 string path = 3;
59
60 // Should exclude manual blacklist ("Auth" by default)
61 map<string, Values> request_headers = 4;
62 map<string, Values> response_headers = 5;
63
64 uint32 status_code = 6;
65
66 bool secure = 8; // TLS was used
67 string protocol = 9; // by convention "HTTP/1.0", "HTTP/1.1", "HTTP/2" or "h2"
68 }
69
70 message Location {
71 uint32 line = 1;
72 uint32 column = 2;
73 }
74
75 // We store information on each resolver execution as a Node on a tree.
76 // The structure of the tree corresponds to the structure of the GraphQL
77 // response; it does not indicate the order in which resolvers were
78 // invoked. Note that nodes representing indexes (and the root node)
79 // don't contain all Node fields (eg types and times).
80 message Node {
81 // The name of the field (for Nodes representing a resolver call) or the
82 // index in a list (for intermediate Nodes representing elements of a list).
83 // field_name is the name of the field as it appears in the GraphQL
84 // response: ie, it may be an alias. (In that case, the original_field_name
85 // field holds the actual field name from the schema.) In any context where
86 // we're building up a path, we use the response_name rather than the
87 // original_field_name.
88 oneof id {
89 string response_name = 1;
90 uint32 index = 2;
91 }
92
93 string original_field_name = 14;
94
95 // The field's return type; e.g. "String!" for User.email:String!
96 string type = 3;
97
98 // The field's parent type; e.g. "User" for User.email:String!
99 string parent_type = 13;
100
101 CachePolicy cache_policy = 5;
102
103 // relative to the trace's start_time, in ns
104 uint64 start_time = 8;
105 // relative to the trace's start_time, in ns
106 uint64 end_time = 9;
107
108 repeated Error error = 11;
109 repeated Node child = 12;
110
111 reserved 4;
112 }
113
114 // represents a node in the query plan, under which there is a trace tree for that service fetch.
115 // In particular, each fetch node represents a call to an implementing service, and calls to implementing
116 // services may not be unique. See https://github.com/apollographql/apollo-server/blob/main/packages/apollo-gateway/src/QueryPlan.ts
117 // for more information and details.
118 message QueryPlanNode {
119 // This represents a set of nodes to be executed sequentially by the Gateway executor
120 message SequenceNode {
121 repeated QueryPlanNode nodes = 1;
122 }
123 // This represents a set of nodes to be executed in parallel by the Gateway executor
124 message ParallelNode {
125 repeated QueryPlanNode nodes = 1;
126 }
127 // This represents a node to send an operation to an implementing service
128 message FetchNode {
129 // XXX When we want to include more details about the sub-operation that was
130 // executed against this service, we should include that here in each fetch node.
131 // This might include an operation signature, requires directive, reference resolutions, etc.
132 string service_name = 1;
133
134 bool trace_parsing_failed = 2;
135
136 // This Trace only contains start_time, end_time, duration_ns, and root;
137 // all timings were calculated **on the federated service**, and clock skew
138 // will be handled by the ingress server.
139 Trace trace = 3;
140
141 // relative to the outer trace's start_time, in ns, measured in the gateway.
142 uint64 sent_time_offset = 4;
143
144 // Wallclock times measured in the gateway for when this operation was
145 // sent and received.
146 google.protobuf.Timestamp sent_time = 5;
147 google.protobuf.Timestamp received_time = 6;
148 }
149
150 // This node represents a way to reach into the response path and attach related entities.
151 // XXX Flatten is really not the right name and this node may be renamed in the query planner.
152 message FlattenNode {
153 repeated ResponsePathElement response_path = 1;
154 QueryPlanNode node = 2;
155 }
156 message ResponsePathElement {
157 oneof id {
158 string field_name = 1;
159 uint32 index = 2;
160 }
161 }
162 oneof node {
163 SequenceNode sequence = 1;
164 ParallelNode parallel = 2;
165 FetchNode fetch = 3;
166 FlattenNode flatten = 4;
167 }
168 }
169
170 // Wallclock time when the trace began.
171 google.protobuf.Timestamp start_time = 4; // required
172 // Wallclock time when the trace ended.
173 google.protobuf.Timestamp end_time = 3; // required
174 // High precision duration of the trace; may not equal end_time-start_time
175 // (eg, if your machine's clock changed during the trace).
176 uint64 duration_ns = 11; // required
177 // A tree containing information about all resolvers run directly by this
178 // service, including errors.
179 Node root = 14;
180
181 // -------------------------------------------------------------------------
182 // Fields below this line are *not* included in federated traces (the traces
183 // sent from federated services to the gateway).
184
185 // In addition to details.raw_query, we include a "signature" of the query,
186 // which can be normalized: for example, you may want to discard aliases, drop
187 // unused operations and fragments, sort fields, etc. The most important thing
188 // here is that the signature match the signature in StatsReports. In
189 // StatsReports signatures show up as the key in the per_query map (with the
190 // operation name prepended). The signature should be a valid GraphQL query.
191 // All traces must have a signature; if this Trace is in a FullTracesReport
192 // that signature is in the key of traces_per_query rather than in this field.
193 // Engineproxy provides the signature in legacy_signature_needs_resigning
194 // instead.
195 string signature = 19;
196
197 // Optional: when GraphQL parsing or validation against the GraphQL schema fails, these fields
198 // can include reference to the operation being sent for users to dig into the set of operations
199 // that are failing validation.
200 string unexecutedOperationBody = 27;
201 string unexecutedOperationName = 28;
202
203 Details details = 6;
204
205 string client_name = 7;
206 string client_version = 8;
207
208 HTTP http = 10;
209
210 CachePolicy cache_policy = 18;
211
212 // If this Trace was created by a gateway, this is the query plan, including
213 // sub-Traces for federated services. Note that the 'root' tree on the
214 // top-level Trace won't contain any resolvers (though it could contain errors
215 // that occurred in the gateway itself).
216 QueryPlanNode query_plan = 26;
217
218 // Was this response served from a full query response cache? (In that case
219 // the node tree will have no resolvers.)
220 bool full_query_cache_hit = 20;
221
222 // Was this query specified successfully as a persisted query hash?
223 bool persisted_query_hit = 21;
224 // Did this query contain both a full query string and a persisted query hash?
225 // (This typically means that a previous request was rejected as an unknown
226 // persisted query.)
227 bool persisted_query_register = 22;
228
229 // Was this operation registered and a part of the safelist?
230 bool registered_operation = 24;
231
232 // Was this operation forbidden due to lack of safelisting?
233 bool forbidden_operation = 25;
234
235 // Some servers don't do field-level instrumentation for every request and assign
236 // each request a "weight" for each request that they do instrument. When this
237 // trace is aggregated into field usage stats, it should count as this value
238 // towards the estimated_execution_count rather than just 1. This value should
239 // typically be at least 1.
240 //
241 // 0 is treated as 1 for backwards compatibility.
242 double field_execution_weight = 31;
243
244
245
246 // removed: Node parse = 12; Node validate = 13;
247 // Id128 server_id = 1; Id128 client_id = 2;
248 // String client_reference_id = 23; String client_address = 9;
249 reserved 1, 2, 9, 12, 13, 23;
250}
251
252// The `service` value embedded within the header key is not guaranteed to contain an actual service,
253// and, in most cases, the service information is trusted to come from upstream processing. If the
254// service _is_ specified in this header, then it is checked to match the context that is reporting it.
255// Otherwise, the service information is deduced from the token context of the reporter and then sent
256// along via other mechanisms (in Kafka, the `ReportKafkaKey). The other information (hostname,
257// agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that
258// information to any of our persistent storage.
259message ReportHeader {
260 // eg "mygraph@myvariant"
261 string graph_ref = 12;
262
263 // eg "host-01.example.com"
264 string hostname = 5;
265
266 // eg "engineproxy 0.1.0"
267 string agent_version = 6; // required
268 // eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`)
269 string service_version = 7;
270 // eg "node v4.6.0"
271 string runtime_version = 8;
272 // eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux"
273 string uname = 9;
274 // An id that is used to represent the schema to Apollo Graph Manager
275 // Using this in place of what used to be schema_hash, since that is no longer
276 // attached to a schema in the backend.
277 string executable_schema_id = 11;
278
279 reserved 3; // removed string service = 3;
280}
281
282message PathErrorStats {
283 map<string, PathErrorStats> children = 1;
284 uint64 errors_count = 4;
285 uint64 requests_with_errors_count = 5;
286}
287
288message QueryLatencyStats {
289 repeated sint64 latency_count = 13;
290 uint64 request_count = 2;
291 uint64 cache_hits = 3;
292 uint64 persisted_query_hits = 4;
293 uint64 persisted_query_misses = 5;
294 repeated sint64 cache_latency_count = 14;
295 PathErrorStats root_error_stats = 7;
296 uint64 requests_with_errors_count = 8;
297 repeated sint64 public_cache_ttl_count = 15;
298 repeated sint64 private_cache_ttl_count = 16;
299 uint64 registered_operation_count = 11;
300 uint64 forbidden_operation_count = 12;
301 // The number of requests that were executed without field-level
302 // instrumentation (and thus do not contribute to `observed_execution_count`
303 // fields on this message's cousin-twice-removed FieldStats).
304 uint64 requests_without_field_instrumentation = 17;
305 // 1, 6, 9, and 10 were old int64 histograms
306 reserved 1, 6, 9, 10;
307}
308
309message StatsContext {
310 // string client_reference_id = 1;
311 reserved 1;
312 string client_name = 2;
313 string client_version = 3;
314}
315
316message ContextualizedQueryLatencyStats {
317 QueryLatencyStats query_latency_stats = 1;
318 StatsContext context = 2;
319}
320
321message ContextualizedTypeStats {
322 StatsContext context = 1;
323 map<string, TypeStat> per_type_stat = 2;
324}
325
326message FieldStat {
327 string return_type = 3; // required; eg "String!" for User.email:String!
328 // Number of errors whose path is this field. Note that we assume that error
329 // tracking does *not* require field-level instrumentation so this *will*
330 // include errors from requests that don't contribute to the
331 // `observed_execution_count` field (and does not need to be scaled by
332 // field_execution_weight).
333 uint64 errors_count = 4;
334 // Number of times that the resolver for this field is directly observed being
335 // executed.
336 uint64 observed_execution_count = 5;
337 // Same as `count` but potentially scaled upwards if the server was only
338 // performing field-level instrumentation on a sampling of operations. For
339 // example, if the server randomly instruments 1% of requests for this
340 // operation, this number will be 100 times greater than
341 // `observed_execution_count`. (When aggregating a Trace into FieldStats,
342 // this number goes up by the trace's `field_execution_weight` for each
343 // observed field execution, while `observed_execution_count` above goes
344 // up by 1.)
345 uint64 estimated_execution_count = 10;
346 // Number of times the resolver for this field is executed that resulted in
347 // at least one error. "Request" is a misnomer here as this corresponds to
348 // resolver calls, not overall operations. Like `errors_count` above, this
349 // includes all requests rather than just requests with field-level
350 // instrumentation.
351 uint64 requests_with_errors_count = 6;
352 // Duration histogram for the latency of this field. Note that it is scaled in
353 // the same way as estimated_execution_count so its "total count" might be
354 // greater than `observed_execution_count` and may not exactly equal
355 // `estimated_execution_count` due to rounding.
356 repeated sint64 latency_count = 9;
357 reserved 1, 2, 7, 8;
358}
359
360message TypeStat {
361 // Key is (eg) "email" for User.email:String!
362 map<string, FieldStat> per_field_stat = 3;
363 reserved 1, 2;
364}
365
366message ReferencedFieldsForType {
367 // Contains (eg) "email" for User.email:String!
368 repeated string field_names = 1;
369 // True if this type is an interface.
370 bool is_interface = 2;
371}
372
373
374
375// This is the top-level message used by the new traces ingress. This
376// is designed for the apollo-engine-reporting TypeScript agent and will
377// eventually be documented as a public ingress API. This message consists
378// solely of traces; the equivalent of the StatsReport is automatically
379// generated server-side from this message. Agent should either send a trace or include it in the stats
380// for every request in this report. Generally, buffering up until a large
381// size has been reached (say, 4MB) or 5-10 seconds has passed is appropriate.
382// This message used to be know as FullTracesReport, but got renamed since it isn't just for traces anymore
383message Report {
384 ReportHeader header = 1;
385
386 // key is statsReportKey (# operationName\nsignature) Note that the nested
387 // traces will *not* have a signature or details.operationName (because the
388 // key is adequate).
389 //
390 // We also assume that traces don't have
391 // legacy_per_query_implicit_operation_name, and we don't require them to have
392 // details.raw_query (which would consume a lot of space and has privacy/data
393 // access issues, and isn't currently exposed by our app anyway).
394 map<string, TracesAndStats> traces_per_query = 5;
395
396 // This is the time that the requests in this trace are considered to have taken place
397 // If this field is not present the max of the end_time of each trace will be used instead.
398 // If there are no traces and no end_time present the report will not be able to be processed.
399 // Note: This will override the end_time from traces.
400 google.protobuf.Timestamp end_time = 2; // required if no traces in this message
401
402 // Total number of operations processed during this period.
403 uint64 operation_count = 6;
404}
405
406message ContextualizedStats {
407 StatsContext context = 1;
408 QueryLatencyStats query_latency_stats = 2;
409 // Key is type name. This structure provides data for the count and latency of individual
410 // field executions and thus only reflects operations for which field-level tracing occurred.
411 map<string, TypeStat> per_type_stat = 3;
412
413}
414
415// A sequence of traces and stats. An individual operation should either be described as a trace
416// or as part of stats, but not both.
417message TracesAndStats {
418 repeated Trace trace = 1;
419 repeated ContextualizedStats stats_with_context = 2;
420 // This describes the fields referenced in the operation. Note that this may
421 // include fields that don't show up in FieldStats (due to being interface fields,
422 // being nested under null fields or empty lists or non-matching fragments or
423 // `@include` or `@skip`, etc). It also may be missing fields that show up in FieldStats
424 // (as FieldStats will include the concrete object type for fields referenced
425 // via an interface type).
426 map<string, ReferencedFieldsForType> referenced_fields_by_type = 4;
427 // This field is used to validate that the algorithm used to construct `stats_with_context`
428 // matches similar algorithms in Apollo's servers. It is otherwise ignored and should not
429 // be included in reports.
430 repeated Trace internal_traces_contributing_to_stats = 3;
431}
View as plain text