SaFi Bank Space : OpenTelemetry Flutter

OpenTelemetry is a collection of tools, APIs, and SDKs. Use it to instrument, generate, collect, and export telemetry data (metrics, logs, and traces) to help you analyze your software’s performance and behavior.

- https://opentelemetry.io/

SDK Setup

Right now, there is only one package for OpenTelemetry on flutter, it is https://pub.dev/packages/opentelemetry. This package is not null-safety, so we need to add --no-sound-null-safety whenever we want to run our app.

To set up this SDK in our app, these items are required:

  1. Exporter: this is the one that will send ours to our OTLP Receiver (CollectorExporter) or StdOut (ConsoleExporter). In this case, we are using CollectorExporter.

  2. SpanProcessor: It will handle how our traces will be sent. It requires one Exporter. This package provides us with two types:

    1. SimpleSpanProcessor: send the trace whenever a new trace is emitted.

    2. BatchSpanProcessor: collect the traces within the configured delay then send it.

  3. TracerProvider: Registers span processors, and is responsible for managing any tracers. It requires one or more SpanProcessor.

To simplify and isolate the non-null safety, we create a wrapper class called BaseOtel.

Base OTel

Our wrapper class isolates and simplifies the OTel trace usages.

BaseOtel.dart
import 'dart:async';
import 'dart:convert';
import 'dart:io';

import 'package:http/http.dart';
import 'package:http/io_client.dart';
// ignore: import_of_legacy_library_into_null_safe
import 'package:opentelemetry/api.dart';
// ignore: import_of_legacy_library_into_null_safe
import 'package:opentelemetry/sdk.dart' as otel_sdk;

abstract class BaseOtel {
  static final Map<String, dynamic> _spanAttributes = {};
  static const _customerIdAttributeKey = 'cuid';

  static otel_sdk.CollectorExporter getCollectorExporter({
    required Uri host,
  }) {
    return otel_sdk.CollectorExporter(host);
  }

  static otel_sdk.CollectorExporter getNewRelicExporter({
    required Uri host,
    required String apiKey,
  }) {
    return otel_sdk.CollectorExporter(
      host,
      httpClient: NewRelicExporterClient(apiKey),
    );
  }

  static otel_sdk.ConsoleExporter getConsoleExporter() {
    return otel_sdk.ConsoleExporter();
  }

  static Context get context => Context.current;

  static Future<T> withSpan<T>({
    required String instrumentationName,
    required String spanName,
    required FutureOr Function() fn,
    Map<String, dynamic> attributes = const {
      'key': 'value',
    },
    bool isInternal = true,
  }) {
    final Completer<T> completer = Completer();
    final ctx = context;
    final tracer = getGlobalTracer(instrumentationName);
    final cSpan = ctx.span ??
        tracer.startSpan(
          'start|$spanName',
          context: ctx,
        );
    ctx.withSpan(cSpan).execute(() async {
      final combinedAttribute = <String, dynamic>{
        ...attributes,
        ..._spanAttributes
      };
      final spanAttributes = combinedAttribute.entries
          .map(_spanAttributeMapping)
          .where((e) => e != null)
          .toList();
      final span = tracer.startSpan(
        spanName,
        attributes: spanAttributes,
        context: ctx,
        kind: isInternal ? SpanKind.internal : SpanKind.server,
      );
      try {
        final result = await fn();
        completer.complete(result);
        span.setStatus(StatusCode.ok);
      } catch (e, st) {
        span
          ..recordException(
            e,
            stackTrace: st,
          )
          ..setStatus(StatusCode.error);
        completer.completeError(e, st);
      } finally {
        span.end();
        cSpan.end();
      }
    });
    return completer.future;
  }

  static T withSpanSync<T>({
    required String instrumentationName,
    required String spanName,
    required T Function() fn,
    Map<String, dynamic> attributes = const {
      'key': 'value',
    },
    bool isInternal = true,
  }) {
    final ctx = context;
    final tracer = getGlobalTracer(instrumentationName);
    final cSpan = ctx.span ??
        tracer.startSpan(
          'start|$spanName',
          context: ctx,
        );
    late final T result;
    ctx.withSpan(cSpan).execute(() {
      final combinedAttribute = <String, dynamic>{
        ...attributes,
        ..._spanAttributes
      };
      final spanAttributes = combinedAttribute.entries
          .map(_spanAttributeMapping)
          .where((e) => e != null)
          .toList();
      final span = tracer.startSpan(
        spanName,
        attributes: spanAttributes,
        context: ctx,
        kind: isInternal ? SpanKind.internal : SpanKind.server,
      );
      try {
        result = fn();
        span.setStatus(StatusCode.ok);
      } catch (e, st) {
        span
          ..recordException(
            e,
            stackTrace: st,
          )
          ..setStatus(StatusCode.error);
      } finally {
        span.end();
        cSpan.end();
      }
    });
    return result;
  }

  static otel_sdk.SimpleSpanProcessor getSimpleSpanProcessor(
      SpanExporter exporter) {
    return otel_sdk.SimpleSpanProcessor(exporter);
  }

  static otel_sdk.BatchSpanProcessor getBatchSpanProcessor(
    SpanExporter exporter, {
    int? maxExportBatchSize,
    int? scheduledDelayMillis,
  }) {
    return otel_sdk.BatchSpanProcessor(
      exporter,
      maxExportBatchSize: maxExportBatchSize,
      scheduledDelayMillis: scheduledDelayMillis,
    );
  }

  static TracerProvider getProvider(List<SpanProcessor> processors) {
    return otel_sdk.TracerProviderBase(
      processors: processors,
    );
  }

  static Tracer getGlobalTracer(String instumentationName) {
    return globalTracerProvider.getTracer(instumentationName);
  }

  static set globalTracer(TracerProvider? tracer) {
    registerGlobalTracerProvider(tracer);
  }

  static set customerId(String? customerId) {
    if (customerId == null) {
      _spanAttributes.remove(_customerIdAttributeKey);
      return;
    }
    _spanAttributes[_customerIdAttributeKey] = customerId;
    context.span?.setAttribute(
        Attribute.fromString(_customerIdAttributeKey, customerId));
  }

  static Attribute? _spanAttributeMapping(MapEntry<String, dynamic> attr) {
    final key = attr.key;
    final value = attr.value;

    if (value == null) {
      return null;
    } else if (value is String) {
      return Attribute.fromString(key, value);
    } else if (value is List<String>) {
      return Attribute.fromStringList(key, value);
    } else if (value is bool) {
      return Attribute.fromBoolean(key, value);
    } else if (value is List<bool>) {
      return Attribute.fromBooleanList(key, value);
    } else if (value is int) {
      return Attribute.fromInt(key, value);
    } else if (value is List<int>) {
      return Attribute.fromIntList(key, value);
    } else if (value is double) {
      return Attribute.fromDouble(key, value);
    } else if (value is List<double>) {
      return Attribute.fromDoubleList(key, value);
    }
    throw UnsupportedError(
        'Otel Span Attribute does not support ${value.runtimeType}, key: $key');
  }
}

class NewRelicExporterClient extends IOClient {
  final String apiKey;

  NewRelicExporterClient(this.apiKey, {HttpClient? inner}) : super(inner);

  @override
  Future<Response> post(
    Uri url, {
    Map<String, String>? headers,
    Object? body,
    Encoding? encoding,
  }) {
    final overridenHeaders = <String, String>{
      ...?headers,
      'api-key': apiKey,
    };
    return super
        .post(
      url,
      headers: overridenHeaders,
      body: body,
      encoding: encoding,
    );
  }
}

NewRelicExporterClient

An HttpClient provides us to include the API Key so we can send our traces to NewRelic. It will be used as the client of CollectorExporter. It is an example client if our OTLP Receiver requires some authentication.

BaseOtel

The wrapper class migrates from non-null-safety to more null-safety.

  • getCollectorExporter: create a CollectorExporter by providing the Server URI.

  • getNewRelicExporter: same as getCollectorExporter, but it uses NewRelicExporterClient as the HTTP client.

  • getConsoleExporter: create a ConsoleExporter.

  • context (getter): return current OTel context.

  • withSpan: record a trace that runs an async process or function.

  • withSpanSync: same as withSpan, but it runs a sync process or function.

  • getSimpleSpanProcessor: create a SimpleSpanProcessor, it requires a SpanExporter.

  • getBatchSpanProcessor: create a BatchSpanProcessor, it requires a SpanExporter.

  • getProvider: create a TracerProvider by providing the SpanProcessor(s).

  • getGlobalTracer: return current global tracer.

  • globalTracer (setter): set a TracerProvider to be global.

  • customerId (setter): set/unset customerId as a common span attribute and set it to the current span (if exists).

  • _spanAttributeMapping: a mapping function that converts MapEntry<String, dynamic> to supported SpanAttribute. Returns null if value null, throws an UnsupportedError if the value is not String, List<String>, bool, List<bool>, int, List<int>, double, List<double>.

Collecting Span

The SDK provides two ways to collect the span, they are Passing The Span or Use The Current Span. We choose to Use The Current Span, this is the simplest way to collect the span. In general to collect the span we only need to create the span from a tracer.

final tracer = globalTracerProvider.getTracer('instrumentationName');
final span = tracer.startSpan('doingWork');
// Our Code
span.end();

But with our wrapper class, we can use withSpan or withSpanSync.

  • withSpan

    Future<List<AccountEntity>> getAll({
      bool forceRemote = false,
    }) {
      return BaseOtel.withSpan(
        instrumentationName: _instrumentationName,
        spanName: 'getAll',
        fn: () async {
          final accounts = await accountRepository.getAll(
            forceRemote: forceRemote,
          );
          return accounts;
        },
      );
    }

    We need to wrap up our Bloc, Usecase, Repository Implementation, and Datasource functions. So we can record the span.

  • withSpanSync

    bool doWork() {
      return BaseOtel.withSpanSync(
        instrumentationName: _instrumentationName,
        spanName: 'getAll',
        fn: () {
         // sync process
        },
      );
    }

Result:

ConsoleExporter
I/flutter ( 7351): {traceId: e7ae380b2c0a11482ca007768efe1729, parentId: 38159de1106c5e53, name: getPocketUsage, id: 4497c44191dd0846, timestamp: 1670219663168559000, duration: 1462468000, flags: 01, state: , status: StatusCode.ok}
I/flutter ( 7351): {traceId: e7ae380b2c0a11482ca007768efe1729, parentId: , name: start|getPocketUsage, id: 38159de1106c5e53, timestamp: 1670219663153294000, duration: 10876000, flags: 01, state: , status: StatusCode.unset}
I/flutter ( 7351): {traceId: e7ae380b2c0a11482ca007768efe1729, parentId: 38159de1106c5e53, name: getPocketUsage, id: 027dbfbbdf6d24bc, timestamp: 1670219663167886000, duration: 1467437000, flags: 01, state: , status: StatusCode.ok}
I/flutter ( 7351): {traceId: e7ae380b2c0a11482ca007768efe1729, parentId: , name: start|getPocketUsage, id: 38159de1106c5e53, timestamp: 1670219663153294000, duration: 10876000, flags: 01, state: , status: StatusCode.unset}

CollectorExporter

Time Series

Table Data

Trace

Summary:

Pros

Const

Export/send traces processes are relatively fast (using protobuf).

Not null-safety. (Hopefully they migrate it soon https://github.com/Workiva/opentelemetry-dart/issues/79 )

Since BE already use OTel, so it will centralized the way we trace App Behavior.

No auto-instrumentation. Need to wrap all our functions to be recorded.

Provide as the waterfall graph and the execution time.

Can trace the BE call to App Behavior

Can trace widget render time (Need to figure out how to get the flutter render event first)