Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public static class Counter {
public static final String TXS = "tron:txs";
public static final String MINER = "tron:miner";
public static final String BLOCK_FORK = "tron:block_fork";
public static final String SR_SET_CHANGE = "tron:sr_set_change";
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name tron:sr_set_change can be misleading — it sounds like it tracks changes to the entire SR set, when it actually monitors rotation of the top 27 block-producing SRs (Active Witnesses).

Suggested alternatives for better clarity:

  • tron:active_sr_change

  • tron:active_witness_change

  • tron:top27_sr_change

This way, operators can immediately understand what the metric represents when viewing Prometheus dashboards.

public static final String P2P_ERROR = "tron:p2p_error";
public static final String P2P_DISCONNECT = "tron:p2p_disconnect";
public static final String INTERNAL_SERVICE_FAIL = "tron:internal_service_fail";
Expand Down Expand Up @@ -62,6 +63,7 @@ public static class Histogram {
public static final String MESSAGE_PROCESS_LATENCY = "tron:message_process_latency_seconds";
public static final String BLOCK_FETCH_LATENCY = "tron:block_fetch_latency_seconds";
public static final String BLOCK_RECEIVE_DELAY = "tron:block_receive_delay_seconds";
public static final String BLOCK_TRANSACTION_COUNT = "tron:block_transaction_count";

private Histogram() {
throw new IllegalStateException("Histogram");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ public static class Counter {
public static final String TXS_FAIL_SIG = "sig";
public static final String TXS_FAIL_TAPOS = "tapos";
public static final String TXS_FAIL_DUP = "dup";
public static final String SR_ADD = "add";
public static final String SR_REMOVE = "remove";

private Counter() {
throw new IllegalStateException("Counter");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class MetricsCounter {
init(MetricKeys.Counter.TXS, "tron txs info .", "type", "detail");
init(MetricKeys.Counter.MINER, "tron miner info .", "miner", "type");
init(MetricKeys.Counter.BLOCK_FORK, "tron block fork info .", "type");
init(MetricKeys.Counter.SR_SET_CHANGE, "tron sr set change .", "action", "witness");
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The witness label uses addresses. Although the Active SR pool is relatively limited (27 slots), as SRs rotate in and out over time, label values will accumulate monotonically — they only grow, never shrink. This is a known Prometheus anti-pattern.

Practical impact assessment:

  • TRON's SR candidate pool is not infinite, and rotation frequency is low, so it won't explode in the short term

  • However, as a long-running node metric, months or years of operation could still accumulate a significant number of time series

Possible approaches:

  • If the address dimension is truly needed, keep the current design, but document this characteristic

  • Or remove the witness label entirely, keeping only action (add/remove), and output address details via logs instead. Follow the principle that Prometheus should be used for alerting and awareness, while logs are for precise investigation and root-cause analysis.

init(MetricKeys.Counter.P2P_ERROR, "tron p2p error info .", "type");
init(MetricKeys.Counter.P2P_DISCONNECT, "tron p2p disconnect .", "type");
init(MetricKeys.Counter.INTERNAL_SERVICE_FAIL, "internal Service fail.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
public class MetricsHistogram {

private static final Map<String, Histogram> container = new ConcurrentHashMap<>();
private static final String MINER_LABEL = "miner";

static {
init(MetricKeys.Histogram.INTERNAL_SERVICE_LATENCY, "Internal Service latency.",
Expand All @@ -20,7 +21,7 @@ public class MetricsHistogram {
init(MetricKeys.Histogram.JSONRPC_SERVICE_LATENCY, "JsonRpc Service latency.",
"method");
init(MetricKeys.Histogram.MINER_LATENCY, "miner latency.",
"miner");
MINER_LABEL);
init(MetricKeys.Histogram.PING_PONG_LATENCY, "node ping pong latency.");
init(MetricKeys.Histogram.VERIFY_SIGN_LATENCY, "verify sign latency for trx , block.",
"type");
Expand All @@ -36,7 +37,7 @@ public class MetricsHistogram {
init(MetricKeys.Histogram.PROCESS_TRANSACTION_LATENCY, "process transaction latency.",
"type", "contract");
init(MetricKeys.Histogram.MINER_DELAY, "miner delay time, actualTime - planTime.",
"miner");
MINER_LABEL);
init(MetricKeys.Histogram.UDP_BYTES, "udp_bytes traffic.",
"type");
init(MetricKeys.Histogram.TCP_BYTES, "tcp_bytes traffic.",
Expand All @@ -48,6 +49,11 @@ public class MetricsHistogram {
init(MetricKeys.Histogram.BLOCK_FETCH_LATENCY, "fetch block latency.");
init(MetricKeys.Histogram.BLOCK_RECEIVE_DELAY,
"receive block delay time, receiveTime - blockTime.");

init(MetricKeys.Histogram.BLOCK_TRANSACTION_COUNT,
"Distribution of transaction counts per block.",
new double[]{0, 10, 50, 100, 200, 500, 1000, 2000, 5000, 10000},
MINER_LABEL);
}

private MetricsHistogram() {
Expand All @@ -62,6 +68,17 @@ private static void init(String name, String help, String... labels) {
.register());
}

private static void init(String name, String help, double[] buckets, String... labels) {
Histogram.Builder builder = Histogram.build()
.name(name)
.help(help)
.labelNames(labels);
if (buckets != null && buckets.length > 0) {
builder.buckets(buckets);
}
container.put(name, builder.register());
}

static Histogram.Timer startTimer(String key, String... labels) {
if (Metrics.enabled()) {
Histogram histogram = container.get(key);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import com.codahale.metrics.Counter;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import lombok.Getter;
import lombok.Setter;
import org.bouncycastle.util.encoders.Hex;
Expand Down Expand Up @@ -42,6 +45,9 @@ public class BlockChainMetricManager {
private long failProcessBlockNum = 0;
@Setter
private String failProcessBlockReason = "";
private final Set<String> lastActiveWitnesses = ConcurrentHashMap.newKeySet();
// To control SR set change metric update logic, -1 means not initialized
private long lastNextMaintenanceTime = -1;

public BlockChainInfo getBlockChainInfo() {
BlockChainInfo blockChainInfo = new BlockChainInfo();
Expand Down Expand Up @@ -169,6 +175,46 @@ public void applyBlock(BlockCapsule block) {
Metrics.counterInc(MetricKeys.Counter.TXS, block.getTransactions().size(),
MetricLabels.Counter.TXS_SUCCESS, MetricLabels.Counter.TXS_SUCCESS);
}
// Record transaction count distribution for all blocks (including empty blocks)
int txCount = block.getTransactions().size();
Metrics.histogramObserve(MetricKeys.Histogram.BLOCK_TRANSACTION_COUNT, txCount,
StringUtil.encode58Check(address));

// SR set change detection
long nextMaintenanceTime = dbManager.getDynamicPropertiesStore().getNextMaintenanceTime();
if (lastNextMaintenanceTime == -1) {
lastNextMaintenanceTime = nextMaintenanceTime;
lastActiveWitnesses.addAll(chainBaseManager.getWitnessScheduleStore().getActiveWitnesses()
.stream().map(w -> Hex.toHexString(w.toByteArray())).collect(Collectors.toSet()));
} else if (nextMaintenanceTime != lastNextMaintenanceTime) {
Set<String> currentWitnesses = chainBaseManager.getWitnessScheduleStore().getActiveWitnesses()
.stream()
.map(w -> Hex.toHexString(w.toByteArray()))
.collect(Collectors.toSet());
recordSrSetChange(currentWitnesses);
lastNextMaintenanceTime = nextMaintenanceTime;
}
}

private void recordSrSetChange(Set<String> currentWitnesses) {
Set<String> added = new HashSet<>(currentWitnesses);
added.removeAll(lastActiveWitnesses);

Set<String> removed = new HashSet<>(lastActiveWitnesses);
removed.removeAll(currentWitnesses);

for (String address : added) {
Metrics.counterInc(MetricKeys.Counter.SR_SET_CHANGE, 1,
MetricLabels.Counter.SR_ADD, StringUtil.encode58Check(Hex.decode(address)));
}
for (String address : removed) {
Metrics.counterInc(MetricKeys.Counter.SR_SET_CHANGE, 1,
MetricLabels.Counter.SR_REMOVE, StringUtil.encode58Check(Hex.decode(address)));
}
if (!added.isEmpty() || !removed.isEmpty()) {
lastActiveWitnesses.clear();
lastActiveWitnesses.addAll(currentWitnesses);
}
}

private List<WitnessInfo> getSrList() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
Expand All @@ -25,6 +26,7 @@
import org.tron.common.utils.ByteArray;
import org.tron.common.utils.PublicMethod;
import org.tron.common.utils.Sha256Hash;
import org.tron.common.utils.StringUtil;
import org.tron.common.utils.Utils;
import org.tron.consensus.dpos.DposSlot;
import org.tron.core.ChainBaseManager;
Expand All @@ -38,6 +40,9 @@

@Slf4j(topic = "metric")
public class PrometheusApiServiceTest extends BaseTest {

private static final String MINER_LABEL = "miner";

static LocalDateTime localDateTime = LocalDateTime.now();
@Resource
private DposSlot dposSlot;
Expand Down Expand Up @@ -65,7 +70,7 @@ protected static void initParameter(CommonParameter parameter) {
parameter.setMetricsPrometheusEnable(true);
}

protected void check() throws Exception {
protected void check(byte[] address, Map<ByteString, String> witnessAndAccount) throws Exception {
Double memoryBytes = CollectorRegistry.defaultRegistry.getSampleValue(
"system_total_physical_memory_bytes");
Assert.assertNotNull(memoryBytes);
Expand All @@ -80,6 +85,56 @@ protected void check() throws Exception {
new String[] {"sync"}, new String[] {"false"});
Assert.assertNotNull(pushBlock);
Assert.assertEquals(pushBlock.intValue(), blocks + 1);

String minerBase58 = StringUtil.encode58Check(address);
// Query histogram bucket le="0.0" for empty blocks
Double emptyBlock = CollectorRegistry.defaultRegistry.getSampleValue(
"tron:block_transaction_count_bucket",
new String[] {MINER_LABEL, "le"}, new String[] {minerBase58, "0.0"});

Assert.assertNotNull("Empty block bucket should exist for miner: " + minerBase58, emptyBlock);
Assert.assertEquals("Should have 1 empty block", 1, emptyBlock.intValue());

// Check SR_REMOVE for initial address (removed when addTestWitnessAndAccount() is called)
Double srRemoveCount = CollectorRegistry.defaultRegistry.getSampleValue(
"tron:sr_set_change_total",
new String[] {"action", "witness"},
new String[] {MetricLabels.Counter.SR_REMOVE, minerBase58}
);
Assert.assertNotNull(srRemoveCount);
Assert.assertEquals(1, srRemoveCount.intValue());

// Check SR_ADD and empty blocks for each new witness in witnessAndAccount
// (excluding initial address)
ByteString addressByteString = ByteString.copyFrom(address);
double totalNewWitnessEmptyBlocks = 0;
for (ByteString witnessAddress : witnessAndAccount.keySet()) {
if (witnessAddress.equals(addressByteString)) {
continue; // Skip initial address
}
String witnessBase58 = StringUtil.encode58Check(witnessAddress.toByteArray());

// Check SR_ADD
Double srAddCount = CollectorRegistry.defaultRegistry.getSampleValue(
"tron:sr_set_change_total",
new String[] {"action", "witness"},
new String[] {MetricLabels.Counter.SR_ADD, witnessBase58}
);
Assert.assertNotNull("SR_ADD should be recorded for witness: " + witnessBase58,
srAddCount);
Assert.assertEquals("Each new witness should have 1 SR_ADD record", 1,
srAddCount.intValue());

// Collect empty blocks count from histogram bucket
Double witnessEmptyBlock = CollectorRegistry.defaultRegistry.getSampleValue(
"tron:block_transaction_count_bucket",
new String[] {MINER_LABEL, "le"}, new String[] {witnessBase58, "0.0"});
Assert.assertNotNull("Empty block bucket should exist for witness: " + witnessBase58,
witnessEmptyBlock);
totalNewWitnessEmptyBlocks += witnessEmptyBlock;
}
Assert.assertEquals(blocks, (int)totalNewWitnessEmptyBlocks);

Double errorLogs = CollectorRegistry.defaultRegistry.getSampleValue(
"tron:error_info_total", new String[] {"net"}, new String[] {MetricLabels.UNDEFINED});
Assert.assertNull(errorLogs);
Expand Down Expand Up @@ -130,10 +185,23 @@ public void testMetric() throws Exception {

Map<ByteString, String> witnessAndAccount = addTestWitnessAndAccount();
witnessAndAccount.put(ByteString.copyFrom(address), key);

// Explicitly update WitnessScheduleStore to remove initial address,
// triggering SR_REMOVE metric
List<ByteString> newActiveWitnesses = new ArrayList<>(witnessAndAccount.keySet());
newActiveWitnesses.remove(ByteString.copyFrom(address));
chainBaseManager.getWitnessScheduleStore().saveActiveWitnesses(newActiveWitnesses);

// Update nextMaintenanceTime to trigger SR set change detection
long nextMaintenanceTime =
chainBaseManager.getDynamicPropertiesStore().getNextMaintenanceTime();
chainBaseManager.getDynamicPropertiesStore().updateNextMaintenanceTime(
nextMaintenanceTime + 3600_000L);

for (int i = 0; i < blocks; i++) {
generateBlock(witnessAndAccount);
}
check();
check(address, witnessAndAccount);
}

private Map<ByteString, String> addTestWitnessAndAccount() {
Expand Down
Loading