001package io.prometheus.metrics.core.exemplars;
002
003import io.prometheus.metrics.tracer.common.SpanContext;
004import io.prometheus.metrics.model.snapshots.Exemplar;
005import io.prometheus.metrics.model.snapshots.Exemplars;
006import io.prometheus.metrics.model.snapshots.Labels;
007import io.prometheus.metrics.core.util.Scheduler;
008
009import java.util.ArrayList;
010import java.util.List;
011import java.util.concurrent.TimeUnit;
012import java.util.concurrent.atomic.AtomicBoolean;
013import java.util.function.LongSupplier;
014
015/**
016 * The ExemplarSampler selects Spans as exemplars.
017 * <p>
018 * There are two types of Exemplars: Regular exemplars are sampled implicitly if a supported tracing
019 * library is detected. Custom exemplars are provided explicitly in code, for example if a developer
020 * wants to make sure an Exemplar is created for a specific code path.
021 * <p>
022 * Spans will be marked as being an Exemplar by calling {@link SpanContext#markCurrentSpanAsExemplar()}.
023 * The tracer implementation should set a Span attribute to mark the current Span as an Exemplar.
024 * This attribute can be used by a trace sampling algorithm to make sure traces with Exemplars are sampled.
025 * <p>
026 * The ExemplarSample is rate-limited, so only a small fraction of Spans will be marked as Exemplars in
027 * an application with a large number of requests.
028 * <p>
029 * See {@link ExemplarSamplerConfig} for configuration options.
030 */
031public class ExemplarSampler {
032
033    private final ExemplarSamplerConfig config;
034    private final Exemplar[] exemplars;
035    private final Exemplar[] customExemplars; // Separate from exemplars, because we don't want custom exemplars
036    // to be overwritten by automatic exemplar sampling. exemplars.lengt == customExemplars.length
037    private final AtomicBoolean acceptingNewExemplars = new AtomicBoolean(true);
038    private final AtomicBoolean acceptingNewCustomExemplars = new AtomicBoolean(true);
039    private final SpanContext spanContext; // may be null, in that case SpanContextSupplier.getSpanContext() is used.
040
041    public ExemplarSampler(ExemplarSamplerConfig config) {
042        this(config, null);
043    }
044
045    /**
046     * Constructor with an additional {code spanContext} argument.
047     * This is useful for testing, but may also be useful in some production scenarios.
048     * If {@code spanContext != null} that spanContext is used and
049     * {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier SpanContextSupplier} is not used.
050     * If {@code spanContext == null}
051     * {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier#getSpanContext() SpanContextSupplier.getSpanContext()}
052     * is called to find a span context.
053     */
054    public ExemplarSampler(ExemplarSamplerConfig config, SpanContext spanContext) {
055        this.config = config;
056        this.exemplars = new Exemplar[config.getNumberOfExemplars()];
057        this.customExemplars = new Exemplar[exemplars.length];
058        this.spanContext = spanContext;
059    }
060
061    public Exemplars collect() {
062        // this may run in parallel with observe()
063        long now = System.currentTimeMillis();
064        List<Exemplar> result = new ArrayList<>(exemplars.length);
065        for (int i = 0; i < customExemplars.length; i++) {
066            Exemplar exemplar = customExemplars[i];
067            if (exemplar != null) {
068                if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
069                    customExemplars[i] = null;
070                } else {
071                    result.add(exemplar);
072                }
073            }
074        }
075        for (int i = 0; i < exemplars.length && result.size() < exemplars.length; i++) {
076            Exemplar exemplar = exemplars[i];
077            if (exemplar != null) {
078                if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
079                    exemplars[i] = null;
080                } else {
081                    result.add(exemplar);
082                }
083            }
084        }
085        return Exemplars.of(result);
086    }
087
088    public void reset() {
089        for (int i = 0; i < exemplars.length; i++) {
090            exemplars[i] = null;
091            customExemplars[i] = null;
092        }
093    }
094
095    public void observe(double value) {
096        if (!acceptingNewExemplars.get()) {
097            return; // This is the hot path in a high-throughput application and should be as efficient as possible.
098        }
099        rateLimitedObserve(acceptingNewExemplars, value, exemplars, () -> doObserve(value));
100    }
101
102    public void observeWithExemplar(double value, Labels labels) {
103        if (!acceptingNewCustomExemplars.get()) {
104            return; // This is the hot path in a high-throughput application and should be as efficient as possible.
105        }
106        rateLimitedObserve(acceptingNewCustomExemplars, value, customExemplars, () -> doObserveWithExemplar(value, labels));
107    }
108
109    private long doObserve(double value) {
110        if (exemplars.length == 1) {
111            return doObserveSingleExemplar(value);
112        } else if (config.getHistogramClassicUpperBounds() != null) {
113            return doObserveWithUpperBounds(value);
114        } else {
115            return doObserveWithoutUpperBounds(value);
116        }
117    }
118
119    private long doObserveSingleExemplar(double value) {
120        long now = System.currentTimeMillis();
121        Exemplar current = exemplars[0];
122        if (current == null || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
123            return updateExemplar(0, value, now);
124        }
125        return 0;
126    }
127
128    private long doObserveWithUpperBounds(double value) {
129        long now = System.currentTimeMillis();
130        double[] upperBounds = config.getHistogramClassicUpperBounds();
131        for (int i = 0; i < upperBounds.length; i++) {
132            if (value <= upperBounds[i]) {
133                Exemplar previous = exemplars[i];
134                if (previous == null || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
135                    return updateExemplar(i, value, now);
136                } else {
137                    return 0;
138                }
139            }
140        }
141        return 0; // will never happen, as upperBounds contains +Inf
142    }
143
144    private long doObserveWithoutUpperBounds(double value) {
145        final long now = System.currentTimeMillis();
146        Exemplar smallest = null;
147        int smallestIndex = -1;
148        Exemplar largest = null;
149        int largestIndex = -1;
150        int nullIndex = -1;
151        for (int i = exemplars.length - 1; i >= 0; i--) {
152            Exemplar exemplar = exemplars[i];
153            if (exemplar == null) {
154                nullIndex = i;
155            } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
156                exemplars[i] = null;
157                nullIndex = i;
158            } else {
159                if (smallest == null || exemplar.getValue() < smallest.getValue()) {
160                    smallest = exemplar;
161                    smallestIndex = i;
162                }
163                if (largest == null || exemplar.getValue() > largest.getValue()) {
164                    largest = exemplar;
165                    largestIndex = i;
166                }
167            }
168        }
169        if (nullIndex >= 0) {
170            return updateExemplar(nullIndex, value, now);
171        }
172        if (now - smallest.getTimestampMillis() > config.getMinRetentionPeriodMillis() && value < smallest.getValue()) {
173            return updateExemplar(smallestIndex, value, now);
174        }
175        if (now - largest.getTimestampMillis() > config.getMinRetentionPeriodMillis() && value > largest.getValue()) {
176            return updateExemplar(largestIndex, value, now);
177        }
178        long oldestTimestamp = 0;
179        int oldestIndex = -1;
180        for (int i = 0; i < exemplars.length; i++) {
181            Exemplar exemplar = exemplars[i];
182            if (exemplar != null && exemplar != smallest && exemplar != largest) {
183                if (oldestTimestamp == 0 || exemplar.getTimestampMillis() < oldestTimestamp) {
184                    oldestTimestamp = exemplar.getTimestampMillis();
185                    oldestIndex = i;
186                }
187            }
188        }
189        if (oldestIndex != -1 && now - oldestTimestamp > config.getMinRetentionPeriodMillis()) {
190            return updateExemplar(oldestIndex, value, now);
191        }
192        return 0;
193    }
194
195    // Returns the timestamp of the newly added Exemplar (which is System.currentTimeMillis())
196    // or 0 if no Exemplar was added.
197    private long doObserveWithExemplar(double amount, Labels labels) {
198        if (customExemplars.length == 1) {
199            return doObserveSingleExemplar(amount, labels);
200        } else if (config.getHistogramClassicUpperBounds() != null) {
201            return doObserveWithExemplarWithUpperBounds(amount, labels);
202        } else {
203            return doObserveWithExemplarWithoutUpperBounds(amount, labels);
204        }
205    }
206
207    private long doObserveSingleExemplar(double amount, Labels labels) {
208        long now = System.currentTimeMillis();
209        Exemplar current = customExemplars[0];
210        if (current == null || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
211            return updateCustomExemplar(0, amount, labels, now);
212        }
213        return 0;
214    }
215
216    private long doObserveWithExemplarWithUpperBounds(double value, Labels labels) {
217        long now = System.currentTimeMillis();
218        double[] upperBounds = config.getHistogramClassicUpperBounds();
219        for (int i = 0; i < upperBounds.length; i++) {
220            if (value <= upperBounds[i]) {
221                Exemplar previous = customExemplars[i];
222                if (previous == null || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
223                    return updateCustomExemplar(i, value, labels, now);
224                } else {
225                    return 0;
226                }
227            }
228        }
229        return 0; // will never happen, as upperBounds contains +Inf
230    }
231
232    private long doObserveWithExemplarWithoutUpperBounds(double amount, Labels labels) {
233        final long now = System.currentTimeMillis();
234        int nullPos = -1;
235        int oldestPos = -1;
236        Exemplar oldest = null;
237        for (int i = customExemplars.length - 1; i >= 0; i--) {
238            Exemplar exemplar = customExemplars[i];
239            if (exemplar == null) {
240                nullPos = i;
241            } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
242                customExemplars[i] = null;
243                nullPos = i;
244            } else {
245                if (oldest == null || exemplar.getTimestampMillis() < oldest.getTimestampMillis()) {
246                    oldest = exemplar;
247                    oldestPos = i;
248                }
249            }
250        }
251        if (nullPos != -1) {
252            return updateCustomExemplar(nullPos, amount, labels, now);
253        } else if (now - oldest.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
254            return updateCustomExemplar(oldestPos, amount, labels, now);
255        } else {
256            return 0;
257        }
258    }
259
260    /**
261     * Observing requires a system call to {@link System#currentTimeMillis()},
262     * and it requires iterating over the existing exemplars to check if one of the existing
263     * exemplars can be replaced.
264     * <p>
265     * To avoid performance issues, we rate limit observing exemplars to
266     * {@link ExemplarSamplerConfig#getSampleIntervalMillis()} milliseconds.
267     */
268    private void rateLimitedObserve(AtomicBoolean accepting, double value, Exemplar[] exemplars, LongSupplier observeFunc) {
269        if (Double.isNaN(value)) {
270            return;
271        }
272        if (!accepting.compareAndSet(true, false)) {
273            return;
274        }
275        // observeFunc returns the current timestamp or 0 if no Exemplar was added.
276        long now = observeFunc.getAsLong();
277        long sleepTime = now == 0 ? config.getSampleIntervalMillis() : durationUntilNextExemplarExpires(now);
278        Scheduler.schedule(() -> accepting.compareAndSet(false, true), sleepTime, TimeUnit.MILLISECONDS);
279    }
280
281    private long durationUntilNextExemplarExpires(long now) {
282        long oldestTimestamp = now;
283        for (Exemplar exemplar : exemplars) {
284            if (exemplar == null) {
285                return config.getSampleIntervalMillis();
286            } else if (exemplar.getTimestampMillis() < oldestTimestamp) {
287                oldestTimestamp = exemplar.getTimestampMillis();
288            }
289        }
290        long oldestAge = now - oldestTimestamp;
291        if (oldestAge < config.getMinRetentionPeriodMillis()) {
292            return config.getMinRetentionPeriodMillis() - oldestAge;
293        }
294        return config.getSampleIntervalMillis();
295    }
296
297    private long updateCustomExemplar(int index, double value, Labels labels, long now) {
298        if (!labels.contains(Exemplar.TRACE_ID) && !labels.contains(Exemplar.SPAN_ID)) {
299            labels = labels.merge(doSampleExemplar());
300        }
301        customExemplars[index] = Exemplar.builder()
302                .value(value)
303                .labels(labels)
304                .timestampMillis(now)
305                .build();
306        return now;
307    }
308
309    private long updateExemplar(int index, double value, long now) {
310        Labels traceLabels = doSampleExemplar();
311        if (!traceLabels.isEmpty()) {
312            exemplars[index] = Exemplar.builder()
313                    .value(value)
314                    .labels(traceLabels)
315                    .timestampMillis(now)
316                    .build();
317            return now;
318        } else {
319            return 0;
320        }
321    }
322
323    private Labels doSampleExemplar() {
324        // Using the qualified name so that Micrometer can exclude the dependency on prometheus-metrics-tracer-initializer
325        // as they provide their own implementation of SpanContextSupplier.
326        // If we had an import statement for SpanContextSupplier the dependency would be needed in any case.
327        SpanContext spanContext = this.spanContext != null ? this.spanContext : io.prometheus.metrics.tracer.initializer.SpanContextSupplier.getSpanContext();
328        try {
329            if (spanContext != null) {
330                if (spanContext.isCurrentSpanSampled()) {
331                    String spanId = spanContext.getCurrentSpanId();
332                    String traceId = spanContext.getCurrentTraceId();
333                    if (spanId != null && traceId != null) {
334                        spanContext.markCurrentSpanAsExemplar();
335                        return Labels.of(Exemplar.TRACE_ID, traceId, Exemplar.SPAN_ID, spanId);
336                    }
337                }
338            }
339        } catch (NoClassDefFoundError ignored) {
340        }
341        return Labels.EMPTY;
342    }
343}