Initialisation

Added the packages and files for the backend server
This commit is contained in:
jackbeeby
2024-12-15 17:48:45 +11:00
parent 25066e1ee8
commit b412dfe2ca
2732 changed files with 330572 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
# `apollo-reporting-protobuf`
> **Note:** The Apollo usage reporting API is subject to change. We strongly
> encourage developers to contact Apollo support at `support@apollographql.com`
> to discuss their use case prior to building their own reporting agent using
> this module.
This module provides JavaScript/TypeScript
[Protocol buffer](https://developers.google.com/protocol-buffers/) definitions
for the Apollo usage reporting API. These definitions are generated for
consumption from the `reports.proto` file which is defined internally within
Apollo.
## Development
> **Note:** Due to a dependency on Unix tools (e.g. `bash`, `grep`, etc.), the
> development of this module requires a Unix system. There is no reason why
> this can't be avoided, the time just hasn't been taken to make those changes.
> We'd happily accept a PR which makes the appropriate changes!
Currently, this package generates a majority of its code with
`@apollo/protobufjs` (a fork of
[`protobufjs`](https://www.npmjs.com/package/protobufjs) that we maintain
specifically for this package) based on the `reports.proto` file. The output is
generated with the `generate` npm script.
The root of the repository provides some `devDependencies` necessary to build
these definitions; these will be installed by running `npm install` at the root
of this workspace. When making changes to this module, run scripts via `npm run
SCRIPTNAME -w @apollo/usage-reporting-protobuf` in the **root** of this monorepo in
order to update the definitions in _this_ module. The `-w` flag is shorthand for
`--workspace`; this monorepo leverages NPM workspaces to manage its packages.
To update `reports.proto` to the current version recognized by the Studio usage
reporting ingress, run `npm run update-proto -w
@apollo/usage-reporting-protobuf`. To then regenerate the JS and TS files, run
`npm run generate -w @apollo/usage-reporting-protobuf`. We check in the
generated code and only regenerate it manually, partially to make builds faster
(no need to run pbjs on every `npm install`) and partially so that we don't have
to make sure that `pbjs` runs on every Node version that we support.

View File

@@ -0,0 +1 @@
{"type":"commonjs"}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
{"type":"module"}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,52 @@
{
"name": "@apollo/usage-reporting-protobuf",
"version": "4.1.1",
"description": "Protobuf format for Apollo usage reporting",
"type": "module",
"exports": {
".": {
"types": {
"require": "./generated/cjs/protobuf.d.ts",
"default": "./generated/esm/protobuf.d.ts"
},
"import": "./generated/esm/protobuf.js",
"require": "./generated/cjs/protobuf.js"
}
},
"main": "generated/cjs/protobuf.js",
"module": "generated/esm/protobuf.js",
"types": "generated/esm/protobuf.d.ts",
"scripts": {
"generate": "rm -rf generated && mkdir -p generated/{esm,cjs} && npm run pbjs-cjs && npm run pbjs-esm && npm run pbts",
"pbjs-cjs": "apollo-pbjs --target static-module --out generated/cjs/protobuf.cjs --wrap commonjs --force-number --no-from-object src/reports.proto",
"pbjs-esm": "apollo-pbjs --target static-module --out generated/esm/protobuf.mjs --es6 --force-number --no-from-object src/reports.proto",
"pbts-cjs": "mv generated/cjs/protobuf.{c,}js && apollo-pbts -o generated/cjs/protobuf.d.ts generated/cjs/protobuf.js",
"pbts-esm": "mv generated/esm/protobuf.{m,}js && apollo-pbts -o generated/esm/protobuf.d.ts generated/esm/protobuf.js",
"pbts": "npm run pbts-cjs && npm run pbts-esm",
"update-proto": "curl -sSfo src/reports.proto https://usage-reporting.api.apollographql.com/proto/reports.proto"
},
"repository": {
"type": "git",
"url": "https://github.com/apollographql/apollo-server",
"directory": "packages/usage-reporting-protobuf"
},
"keywords": [
"GraphQL",
"Apollo",
"Server",
"Javascript"
],
"author": "Apollo <packages@apollographql.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/apollographql/apollo-server/issues"
},
"homepage": "https://github.com/apollographql/apollo-server#readme",
"//": "Don't caret this, we want to be explicit about the version of our fork of protobufjs and update it intentionally if we need to.",
"dependencies": {
"@apollo/protobufjs": "1.2.7"
},
"volta": {
"extends": "../../package.json"
}
}

View File

@@ -0,0 +1,12 @@
# reports.proto is copied from an internal Apollo repository which applies these
# editorconfig standards.
root = true
[reports.proto]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = tab
insert_final_newline = true
trim_trailing_whitespace = true

View File

@@ -0,0 +1,476 @@
syntax = "proto3";
import "google/protobuf/timestamp.proto";
message Trace {
message CachePolicy {
enum Scope {
UNKNOWN = 0;
PUBLIC = 1;
PRIVATE = 2;
}
Scope scope = 1;
int64 max_age_ns = 2; // use 0 for absent, -1 for 0
}
message Details {
// The variables associated with this query (unless the reporting agent is
// configured to keep them all private). Values are JSON: ie, strings are
// enclosed in double quotes, etc. The value of a private variable is
// the empty string.
map<string, string> variables_json = 4;
// This is deprecated and only used for legacy applications
// don't include this in traces inside a FullTracesReport; the operation
// name for these traces comes from the key of the traces_per_query map.
string operation_name = 3;
}
message Error {
string message = 1; // required
repeated Location location = 2;
uint64 time_ns = 3;
string json = 4;
}
message HTTP {
message Values {
repeated string value = 1;
}
enum Method {
UNKNOWN = 0;
OPTIONS = 1;
GET = 2;
HEAD = 3;
POST = 4;
PUT = 5;
DELETE = 6;
TRACE = 7;
CONNECT = 8;
PATCH = 9;
}
Method method = 1;
// Should exclude manual blacklist ("Auth" by default)
map<string, Values> request_headers = 4;
map<string, Values> response_headers = 5;
uint32 status_code = 6;
reserved 2, 3, 8, 9;
}
message Location {
uint32 line = 1;
uint32 column = 2;
}
// We store information on each resolver execution as a Node on a tree.
// The structure of the tree corresponds to the structure of the GraphQL
// response; it does not indicate the order in which resolvers were
// invoked. Note that nodes representing indexes (and the root node)
// don't contain all Node fields (eg types and times).
message Node {
// The name of the field (for Nodes representing a resolver call) or the
// index in a list (for intermediate Nodes representing elements of a list).
// field_name is the name of the field as it appears in the GraphQL
// response: ie, it may be an alias. (In that case, the original_field_name
// field holds the actual field name from the schema.) In any context where
// we're building up a path, we use the response_name rather than the
// original_field_name.
oneof id {
string response_name = 1;
uint32 index = 2;
}
string original_field_name = 14;
// The field's return type; e.g. "String!" for User.email:String!
string type = 3;
// The field's parent type; e.g. "User" for User.email:String!
string parent_type = 13;
CachePolicy cache_policy = 5;
// relative to the trace's start_time, in ns
uint64 start_time = 8;
// relative to the trace's start_time, in ns
uint64 end_time = 9;
repeated Error error = 11;
repeated Node child = 12;
reserved 4;
}
// represents a node in the query plan, under which there is a trace tree for that service fetch.
// In particular, each fetch node represents a call to an implementing service, and calls to implementing
// services may not be unique. See https://github.com/apollographql/federation/blob/main/query-planner-js/src/QueryPlan.ts
// for more information and details.
message QueryPlanNode {
// This represents a set of nodes to be executed sequentially by the Router/Gateway executor
message SequenceNode {
repeated QueryPlanNode nodes = 1;
}
// This represents a set of nodes to be executed in parallel by the Router/Gateway executor
message ParallelNode {
repeated QueryPlanNode nodes = 1;
}
// This represents a node to send an operation to an implementing service
message FetchNode {
// XXX When we want to include more details about the sub-operation that was
// executed against this service, we should include that here in each fetch node.
// This might include an operation signature, requires directive, reference resolutions, etc.
string service_name = 1;
bool trace_parsing_failed = 2;
// This Trace only contains start_time, end_time, duration_ns, and root;
// all timings were calculated **on the subgraph**, and clock skew
// will be handled by the ingress server.
Trace trace = 3;
// relative to the outer trace's start_time, in ns, measured in the Router/Gateway.
uint64 sent_time_offset = 4;
// Wallclock times measured in the Router/Gateway for when this operation was
// sent and received.
google.protobuf.Timestamp sent_time = 5;
google.protobuf.Timestamp received_time = 6;
}
// This node represents a way to reach into the response path and attach related entities.
// XXX Flatten is really not the right name and this node may be renamed in the query planner.
message FlattenNode {
repeated ResponsePathElement response_path = 1;
QueryPlanNode node = 2;
}
// A `DeferNode` corresponds to one or more @defer at the same level of "nestedness" in the planned query.
message DeferNode {
DeferNodePrimary primary = 1;
repeated DeferredNode deferred = 2;
}
message ConditionNode {
string condition = 1;
QueryPlanNode if_clause = 2;
QueryPlanNode else_clause = 3;
}
message DeferNodePrimary {
QueryPlanNode node = 1;
}
message DeferredNode {
repeated DeferredNodeDepends depends = 1;
string label = 2;
repeated ResponsePathElement path = 3;
QueryPlanNode node = 4;
}
message DeferredNodeDepends {
string id = 1;
string defer_label = 2;
}
message ResponsePathElement {
oneof id {
string field_name = 1;
uint32 index = 2;
}
}
oneof node {
SequenceNode sequence = 1;
ParallelNode parallel = 2;
FetchNode fetch = 3;
FlattenNode flatten = 4;
DeferNode defer = 5;
ConditionNode condition = 6;
}
}
// Wallclock time when the trace began.
google.protobuf.Timestamp start_time = 4; // required
// Wallclock time when the trace ended.
google.protobuf.Timestamp end_time = 3; // required
// High precision duration of the trace; may not equal end_time-start_time
// (eg, if your machine's clock changed during the trace).
uint64 duration_ns = 11; // required
// A tree containing information about all resolvers run directly by this
// service, including errors.
Node root = 14;
// If this is true, the trace is potentially missing some nodes that were
// present on the query plan. This can happen if the trace span buffer used
// in the Router fills up and some spans have to be dropped. In these cases
// the overall trace timing will still be correct, but the trace data could
// be missing some referenced or executed fields, and some nodes may be
// missing. If this is true we should display a warning to the user when they
// view the trace in Explorer.
bool is_incomplete = 33;
// -------------------------------------------------------------------------
// Fields below this line are *not* included in inline traces (the traces
// sent from subgraphs to the Router/Gateway).
// In addition to details.raw_query, we include a "signature" of the query,
// which can be normalized: for example, you may want to discard aliases, drop
// unused operations and fragments, sort fields, etc. The most important thing
// here is that the signature match the signature in StatsReports. In
// StatsReports signatures show up as the key in the per_query map (with the
// operation name prepended). The signature should be a valid GraphQL query.
// All traces must have a signature; if this Trace is in a FullTracesReport
// that signature is in the key of traces_per_query rather than in this field.
// Engineproxy provides the signature in legacy_signature_needs_resigning
// instead.
string signature = 19;
// Optional: when GraphQL parsing or validation against the GraphQL schema fails, these fields
// can include reference to the operation being sent for users to dig into the set of operations
// that are failing validation.
string unexecutedOperationBody = 27;
string unexecutedOperationName = 28;
Details details = 6;
string client_name = 7;
string client_version = 8;
HTTP http = 10;
CachePolicy cache_policy = 18;
// If this Trace was created by a Router/Gateway, this is the query plan, including
// sub-Traces for subgraphs. Note that the 'root' tree on the
// top-level Trace won't contain any resolvers (though it could contain errors
// that occurred in the Router/Gateway itself).
QueryPlanNode query_plan = 26;
// Was this response served from a full query response cache? (In that case
// the node tree will have no resolvers.)
bool full_query_cache_hit = 20;
// Was this query specified successfully as a persisted query hash?
bool persisted_query_hit = 21;
// Did this query contain both a full query string and a persisted query hash?
// (This typically means that a previous request was rejected as an unknown
// persisted query.)
bool persisted_query_register = 22;
// Was this operation registered and a part of the safelist?
bool registered_operation = 24;
// Was this operation forbidden due to lack of safelisting?
bool forbidden_operation = 25;
// Some servers don't do field-level instrumentation for every request and assign
// each request a "weight" for each request that they do instrument. When this
// trace is aggregated into field usage stats, it should count as this value
// towards the estimated_execution_count rather than just 1. This value should
// typically be at least 1.
//
// 0 is treated as 1 for backwards compatibility.
double field_execution_weight = 31;
// removed: Node parse = 12; Node validate = 13;
// Id128 server_id = 1; Id128 client_id = 2;
// String client_reference_id = 23; String client_address = 9;
reserved 1, 2, 9, 12, 13, 23;
}
// The `service` value embedded within the header key is not guaranteed to contain an actual service,
// and, in most cases, the service information is trusted to come from upstream processing. If the
// service _is_ specified in this header, then it is checked to match the context that is reporting it.
// Otherwise, the service information is deduced from the token context of the reporter and then sent
// along via other mechanisms (in Kafka, the `ReportKafkaKey). The other information (hostname,
// agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that
// information to any of our persistent storage.
message ReportHeader {
// eg "mygraph@myvariant"
string graph_ref = 12;
// eg "host-01.example.com"
string hostname = 5;
// eg "engineproxy 0.1.0"
string agent_version = 6; // required
// eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`)
string service_version = 7;
// eg "node v4.6.0"
string runtime_version = 8;
// eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux"
string uname = 9;
// An id that is used to represent the schema to Apollo Graph Manager
// Using this in place of what used to be schema_hash, since that is no longer
// attached to a schema in the backend.
string executable_schema_id = 11;
reserved 3; // removed string service = 3;
}
message PathErrorStats {
map<string, PathErrorStats> children = 1;
uint64 errors_count = 4;
uint64 requests_with_errors_count = 5;
}
message QueryLatencyStats {
repeated sint64 latency_count = 13 [(js_use_toArray)=true];
uint64 request_count = 2;
uint64 cache_hits = 3;
uint64 persisted_query_hits = 4;
uint64 persisted_query_misses = 5;
repeated sint64 cache_latency_count = 14 [(js_use_toArray)=true];
PathErrorStats root_error_stats = 7;
uint64 requests_with_errors_count = 8;
repeated sint64 public_cache_ttl_count = 15 [(js_use_toArray)=true];
repeated sint64 private_cache_ttl_count = 16 [(js_use_toArray)=true];
uint64 registered_operation_count = 11;
uint64 forbidden_operation_count = 12;
// The number of requests that were executed without field-level
// instrumentation (and thus do not contribute to `observed_execution_count`
// fields on this message's cousin-twice-removed FieldStats).
uint64 requests_without_field_instrumentation = 17;
// 1, 6, 9, and 10 were old int64 histograms
reserved 1, 6, 9, 10;
}
message StatsContext {
// string client_reference_id = 1;
reserved 1;
string client_name = 2;
string client_version = 3;
}
message ContextualizedQueryLatencyStats {
QueryLatencyStats query_latency_stats = 1;
StatsContext context = 2;
}
message ContextualizedTypeStats {
StatsContext context = 1;
map<string, TypeStat> per_type_stat = 2;
}
message FieldStat {
string return_type = 3; // required; eg "String!" for User.email:String!
// Number of errors whose path is this field. Note that we assume that error
// tracking does *not* require field-level instrumentation so this *will*
// include errors from requests that don't contribute to the
// `observed_execution_count` field (and does not need to be scaled by
// field_execution_weight).
uint64 errors_count = 4;
// Number of times that the resolver for this field is directly observed being
// executed.
uint64 observed_execution_count = 5;
// Same as `count` but potentially scaled upwards if the server was only
// performing field-level instrumentation on a sampling of operations. For
// example, if the server randomly instruments 1% of requests for this
// operation, this number will be 100 times greater than
// `observed_execution_count`. (When aggregating a Trace into FieldStats,
// this number goes up by the trace's `field_execution_weight` for each
// observed field execution, while `observed_execution_count` above goes
// up by 1.)
uint64 estimated_execution_count = 10;
// Number of times the resolver for this field is executed that resulted in
// at least one error. "Request" is a misnomer here as this corresponds to
// resolver calls, not overall operations. Like `errors_count` above, this
// includes all requests rather than just requests with field-level
// instrumentation.
uint64 requests_with_errors_count = 6;
// Duration histogram for the latency of this field. Note that it is scaled in
// the same way as estimated_execution_count so its "total count" might be
// greater than `observed_execution_count` and may not exactly equal
// `estimated_execution_count` due to rounding.
repeated sint64 latency_count = 9 [(js_use_toArray)=true];
reserved 1, 2, 7, 8;
}
message TypeStat {
// Key is (eg) "email" for User.email:String!
map<string, FieldStat> per_field_stat = 3;
reserved 1, 2;
}
message ReferencedFieldsForType {
// Contains (eg) "email" for User.email:String!
repeated string field_names = 1;
// True if this type is an interface.
bool is_interface = 2;
}
// This is the top-level message used by the new traces ingress. This
// is designed for the apollo-engine-reporting TypeScript agent and will
// eventually be documented as a public ingress API. This message consists
// solely of traces; the equivalent of the StatsReport is automatically
// generated server-side from this message. Agent should either send a trace or include it in the stats
// for every request in this report. Generally, buffering up until a large
// size has been reached (say, 4MB) or 5-10 seconds has passed is appropriate.
// This message used to be know as FullTracesReport, but got renamed since it isn't just for traces anymore
message Report {
ReportHeader header = 1;
// key is statsReportKey (# operationName\nsignature) Note that the nested
// traces will *not* have a signature or details.operationName (because the
// key is adequate).
//
// We also assume that traces don't have
// legacy_per_query_implicit_operation_name, and we don't require them to have
// details.raw_query (which would consume a lot of space and has privacy/data
// access issues, and isn't currently exposed by our app anyway).
map<string, TracesAndStats> traces_per_query = 5;
// This is the time that the requests in this trace are considered to have taken place
// If this field is not present the max of the end_time of each trace will be used instead.
// If there are no traces and no end_time present the report will not be able to be processed.
// Note: This will override the end_time from traces.
google.protobuf.Timestamp end_time = 2; // required if no traces in this message
// Total number of operations processed during this period.
uint64 operation_count = 6;
// If this is set to true, the stats in TracesWithStats.stats_with_context
// represent all of the operations described from this report, and the
// traces in TracesWithStats.trace are a sampling of some of the same
// operations. If this is false, each operation is described in precisely
// one of those two fields.
bool traces_pre_aggregated = 7;
}
message ContextualizedStats {
StatsContext context = 1;
QueryLatencyStats query_latency_stats = 2;
// Key is type name. This structure provides data for the count and latency of individual
// field executions and thus only reflects operations for which field-level tracing occurred.
map<string, TypeStat> per_type_stat = 3;
}
// A sequence of traces and stats. If Report.traces_pre_aggregated (at the top
// level of the report) is false, an individual operation should either be
// described as a trace or as part of stats, but not both. If that flag
// is true, then all operations are described as stats and some are also
// described as traces.
message TracesAndStats {
repeated Trace trace = 1 [(js_preEncoded)=true];
repeated ContextualizedStats stats_with_context = 2 [(js_use_toArray)=true];
// This describes the fields referenced in the operation. Note that this may
// include fields that don't show up in FieldStats (due to being interface fields,
// being nested under null fields or empty lists or non-matching fragments or
// `@include` or `@skip`, etc). It also may be missing fields that show up in FieldStats
// (as FieldStats will include the concrete object type for fields referenced
// via an interface type).
map<string, ReferencedFieldsForType> referenced_fields_by_type = 4;
// This field is used to validate that the algorithm used to construct `stats_with_context`
// matches similar algorithms in Apollo's servers. It is otherwise ignored and should not
// be included in reports.
repeated Trace internal_traces_contributing_to_stats = 3 [(js_preEncoded)=true];
}