Skip to content

Commit

Permalink
Support tracing trace query(SkyWalking and Zipkin) for debugging. (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
wankai123 committed Jun 27, 2024
1 parent b04fd3c commit 05373d6
Show file tree
Hide file tree
Showing 49 changed files with 1,260 additions and 619 deletions.
5 changes: 3 additions & 2 deletions docs/en/api/query-protocol.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,12 @@ full log text fuzzy queries, while others do not due to considerations related t

### Trace
```graphql
# Param, if debug is true will enable the query tracing and return DebuggingTrace in the result.
extend type Query {
# Search segment list with given conditions
queryBasicTraces(condition: TraceQueryCondition): TraceBrief
queryBasicTraces(condition: TraceQueryCondition, debug: Boolean): TraceBrief
# Read the specific trace ID with given trace ID
queryTrace(traceId: ID!): Trace
queryTrace(traceId: ID!, debug: Boolean): Trace
# Read the list of searchable keys
queryTraceTagAutocompleteKeys(duration: Duration!):[String!]
# Search the available value options of the given key.
Expand Down
1 change: 1 addition & 0 deletions docs/en/changes/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* Make more proper histogram buckets for the `persistence_timer_bulk_prepare_latency`,
`persistence_timer_bulk_execute_latency` and `persistence_timer_bulk_all_latency` metrics in PersistenceTimer.
* [Break Change] Update Nacos version to 2.3.2. Nacos 1.x server can't serve as cluster coordinator and configuration server.
* Support tracing trace query(SkyWalking and Zipkin) for debugging.

#### UI

Expand Down
317 changes: 205 additions & 112 deletions docs/en/debugging/query-tracing.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext;
import org.apache.skywalking.oap.server.library.util.StringUtil;

import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace.TRACE_CONTEXT;
import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext.TRACE_CONTEXT;

@Slf4j
public abstract class MQEVisitorBase extends MQEParserBaseVisitor<ExpressionResult> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
import org.apache.skywalking.oap.server.core.storage.annotation.ValueColumnMetadata;
import org.apache.skywalking.oap.server.library.util.StringUtil;

import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace.TRACE_CONTEXT;
import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext.TRACE_CONTEXT;

@Data
@ToString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace.TRACE_CONTEXT;
import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext.TRACE_CONTEXT;

/**
* RunningRule represents each rule in running status. Based on the {@link AlarmRule} definition,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingSpan;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext;
import org.apache.skywalking.oap.server.library.util.StringUtil;
Expand Down Expand Up @@ -70,7 +69,7 @@ private List<SelectedRecord> invokeSortMetrics(TopNCondition condition, Duration
final String serviceId = IDManager.ServiceID.buildId(condition.getParentService(), condition.getNormal());
additionalConditions.add(new KeyValue(InstanceTraffic.SERVICE_ID, serviceId));
}
final List<SelectedRecord> selectedRecords = getAggregationQueryDAO().sortMetrics(
final List<SelectedRecord> selectedRecords = getAggregationQueryDAO().sortMetricsDebuggable(
condition, valueCName, duration, additionalConditions);
selectedRecords.forEach(selectedRecord -> {
switch (condition.getScope()) {
Expand Down Expand Up @@ -115,7 +114,7 @@ private List<SelectedRecord> invokeSortMetrics(TopNCondition condition, Duration

public List<SelectedRecord> sortMetrics(TopNCondition condition,
Duration duration) throws IOException {
DebuggingTraceContext traceContext = DebuggingTrace.TRACE_CONTEXT.get();
DebuggingTraceContext traceContext = DebuggingTraceContext.TRACE_CONTEXT.get();
DebuggingSpan span = null;
try {
if (traceContext != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import org.apache.skywalking.oap.server.library.module.ModuleManager;
import org.apache.skywalking.oap.server.library.module.Service;

import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace.TRACE_CONTEXT;
import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext.TRACE_CONTEXT;

@Slf4j
public class MetricsQueryService implements Service {
Expand Down Expand Up @@ -80,7 +80,7 @@ private MetricsValues invokeReadMetricsValues(MetricsCondition condition, Durati
if (!condition.senseScope() || !condition.getEntity().isValid()) {
return new MetricsValues();
}
return getMetricQueryDAO().readMetricsValues(
return getMetricQueryDAO().readMetricsValuesDebuggable(
condition, ValueColumnMetadata.INSTANCE.getValueCName(condition.getName()), duration);
}

Expand Down Expand Up @@ -109,7 +109,7 @@ private List<MetricsValues> invokeReadLabeledMetricsValues(MetricsCondition cond
if (!condition.senseScope() || !condition.getEntity().isValid()) {
return Collections.emptyList();
}
return getMetricQueryDAO().readLabeledMetricsValues(
return getMetricQueryDAO().readLabeledMetricsValuesDebuggable(
condition, ValueColumnMetadata.INSTANCE.getValueCName(condition.getName()), labels, duration);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import java.io.IOException;
import java.util.List;

import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace.TRACE_CONTEXT;
import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext.TRACE_CONTEXT;

public class RecordQueryService implements Service {
private final ModuleManager moduleManager;
Expand All @@ -56,7 +56,7 @@ private List<Record> invokeReadRecords(RecordCondition condition, Duration durat
if (!condition.senseScope() || !condition.getParentEntity().isValid()) {
return Collections.emptyList();
}
return getRecordsQueryDAO().readRecords(
return getRecordsQueryDAO().readRecordsDebuggable(
condition, ValueColumnMetadata.INSTANCE.getValueCName(condition.getName()), duration);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
import org.apache.skywalking.oap.server.core.query.type.Trace;
import org.apache.skywalking.oap.server.core.query.type.TraceBrief;
import org.apache.skywalking.oap.server.core.query.type.TraceState;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingSpan;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext;
import org.apache.skywalking.oap.server.core.storage.StorageModule;
import org.apache.skywalking.oap.server.core.storage.query.ISpanAttachedEventQueryDAO;
import org.apache.skywalking.oap.server.core.storage.query.ITraceQueryDAO;
Expand All @@ -61,6 +63,7 @@
import org.apache.skywalking.oap.server.library.util.CollectionUtils;

import static java.util.Objects.nonNull;
import static org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext.TRACE_CONTEXT;

public class TraceQueryService implements Service {

Expand Down Expand Up @@ -107,18 +110,59 @@ public TraceBrief queryBasicTraces(final String serviceId,
final Pagination paging,
final Duration duration,
final List<Tag> tags) throws IOException {
PaginationUtils.Page page = PaginationUtils.INSTANCE.exchange(paging);

return getTraceQueryDAO().queryBasicTraces(
duration, minTraceDuration, maxTraceDuration, serviceId, serviceInstanceId, endpointId,
traceId, page.getLimit(), page.getFrom(), traceState, queryOrder, tags
);
DebuggingTraceContext traceContext = TRACE_CONTEXT.get();
DebuggingSpan span = null;
try {
if (traceContext != null) {
StringBuilder msg = new StringBuilder();
span = traceContext.createSpan("Query Service: queryBasicTraces");
msg.append("Condition: ServiceId: ").append(serviceId)
.append(", ServiceInstanceId: ").append(serviceInstanceId)
.append(", EndpointId: ").append(endpointId)
.append(", TraceId: ").append(traceId)
.append(", MinTraceDuration: ").append(minTraceDuration)
.append(", MaxTraceDuration: ").append(maxTraceDuration)
.append(", TraceState: ").append(traceState)
.append(", QueryOrder: ").append(queryOrder)
.append(", Pagination: ").append(paging)
.append(", Duration: ").append(duration)
.append(", Tags: ").append(tags);
span.setMsg(msg.toString());
}
PaginationUtils.Page page = PaginationUtils.INSTANCE.exchange(paging);

return getTraceQueryDAO().queryBasicTracesDebuggable(
duration, minTraceDuration, maxTraceDuration, serviceId, serviceInstanceId, endpointId,
traceId, page.getLimit(), page.getFrom(), traceState, queryOrder, tags
);
} finally {
if (traceContext != null && span != null) {
traceContext.stopSpan(span);
}
}
}

public Trace queryTrace(final String traceId) throws IOException {
DebuggingTraceContext traceContext = TRACE_CONTEXT.get();
DebuggingSpan span = null;
try {
if (traceContext != null) {
StringBuilder msg = new StringBuilder();
span = traceContext.createSpan("Query Service: queryTrace");
msg.append("Condition: TraceId: ").append(traceId);
}
return invokeQueryTrace(traceId);
} finally {
if (traceContext != null && span != null) {
traceContext.stopSpan(span);
}
}
}

private Trace invokeQueryTrace(final String traceId) throws IOException {
Trace trace = new Trace();

List<SegmentRecord> segmentRecords = getTraceQueryDAO().queryByTraceId(traceId);
List<SegmentRecord> segmentRecords = getTraceQueryDAO().queryByTraceIdDebuggable(traceId);
if (segmentRecords.isEmpty()) {
trace.getSpans().addAll(getTraceQueryDAO().doFlexibleTraceQuery(traceId));
} else {
Expand Down Expand Up @@ -147,8 +191,8 @@ public Trace queryTrace(final String traceId) throws IOException {

if (CollectionUtils.isNotEmpty(sortedSpans)) {
final List<SpanAttachedEventRecord> spanAttachedEvents = getSpanAttachedEventQueryDAO().
querySpanAttachedEvents(SpanAttachedEventTraceType.SKYWALKING, Arrays.asList(traceId));
appendAttachedEventsToSpan(sortedSpans, spanAttachedEvents);
querySpanAttachedEventsDebuggable(SpanAttachedEventTraceType.SKYWALKING, Arrays.asList(traceId));
appendAttachedEventsToSpanDebuggable(sortedSpans, spanAttachedEvents);
}

trace.getSpans().clear();
Expand Down Expand Up @@ -273,6 +317,22 @@ private void findChildren(List<Span> spans, Span parentSpan, List<Span> children
});
}

private void appendAttachedEventsToSpanDebuggable(List<Span> spans, List<SpanAttachedEventRecord> events) throws InvalidProtocolBufferException {
DebuggingTraceContext traceContext = DebuggingTraceContext.TRACE_CONTEXT.get();
DebuggingSpan debuggingSpan = null;
try {
if (traceContext != null) {
debuggingSpan = traceContext.createSpan("Query Service : appendAttachedEventsToSpan");
}
appendAttachedEventsToSpan(spans, events);
} finally {
if (traceContext != null && debuggingSpan != null) {
traceContext.stopSpan(debuggingSpan);

}
}
}

private void appendAttachedEventsToSpan(List<Span> spans, List<SpanAttachedEventRecord> events) throws InvalidProtocolBufferException {
if (CollectionUtils.isEmpty(events)) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import org.apache.skywalking.oap.server.core.analysis.manual.searchtag.Tag;
import org.apache.skywalking.oap.server.core.query.type.Pagination;
import org.apache.skywalking.oap.server.core.query.type.QueryOrder;
import org.apache.skywalking.oap.server.core.query.type.TraceState;

@Getter
@Setter
@ToString
public class TraceQueryCondition {
private String serviceId;
private String serviceInstanceId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;

@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@ToString
public class Pagination {
private int pageNum;
private int pageSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,15 @@
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace;

@Getter
public class Trace {
private final List<Span> spans;
//For OAP internal query debugging
@Setter
private DebuggingTrace debuggingTrace;

public Trace() {
this.spans = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,15 @@

import java.util.ArrayList;
import java.util.List;
import lombok.Setter;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTrace;

@Getter
public class TraceBrief {
private final List<BasicTrace> traces;
//For OAP internal query debugging
@Setter
private DebuggingTrace debuggingTrace;

public TraceBrief() {
this.traces = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

@Getter
public class DebuggingTrace {
public final static ThreadLocal<DebuggingTraceContext> TRACE_CONTEXT = new ThreadLocal<>();
private final String traceId;
private final String condition;
private final long startTime;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

@Getter
public class DebuggingTraceContext {
public final static ThreadLocal<DebuggingTraceContext> TRACE_CONTEXT = new ThreadLocal<>();
private final DebuggingTrace execTrace;
private final Stack<DebuggingSpan> spanStack = new Stack<>();
private int spanIdGenerator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import org.apache.skywalking.oap.server.core.query.input.TopNCondition;
import org.apache.skywalking.oap.server.core.query.type.KeyValue;
import org.apache.skywalking.oap.server.core.query.type.SelectedRecord;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingSpan;
import org.apache.skywalking.oap.server.core.query.type.debugging.DebuggingTraceContext;
import org.apache.skywalking.oap.server.core.storage.DAO;

/**
Expand All @@ -32,6 +34,25 @@
* @since 8.0.0
*/
public interface IAggregationQueryDAO extends DAO {
default List<SelectedRecord> sortMetricsDebuggable(final TopNCondition condition,
final String valueColumnName,
final Duration duration,
final List<KeyValue> additionalConditions) throws IOException {
DebuggingTraceContext traceContext = DebuggingTraceContext.TRACE_CONTEXT.get();
DebuggingSpan span = null;
try {
if (traceContext != null) {
span = traceContext.createSpan("Query Dao: sortMetrics");
span.setMsg("Condition: TopNCondition: " + condition + ", ValueColumnName: " + valueColumnName + ", Duration: " + duration + ", AdditionalConditions: " + additionalConditions);
}
return sortMetrics(condition, valueColumnName, duration, additionalConditions);
} finally {
if (traceContext != null && span != null) {
traceContext.stopSpan(span);
}
}
}

List<SelectedRecord> sortMetrics(TopNCondition condition,
String valueColumnName,
Duration duration,
Expand Down
Loading

0 comments on commit 05373d6

Please sign in to comment.