%% You should probably cite draft-calabria-bmwg-ai-fabric-training-bench-02 instead of this revision. @techreport{calabria-bmwg-ai-fabric-training-bench-00, number = {draft-calabria-bmwg-ai-fabric-training-bench-00}, type = {Internet-Draft}, institution = {Internet Engineering Task Force}, publisher = {Internet Engineering Task Force}, note = {Work in Progress}, url = {https://datatracker.ietf.org/doc/draft-calabria-bmwg-ai-fabric-training-bench/00/}, author = {Fernando Calabria and Carlos Pignataro and Qin Wu and Giuseppe Fioccola}, title = {{Benchmarking Methodology for AI Training Network Fabrics}}, pagetotal = 38, year = 2026, month = feb, day = 26, abstract = {This document defines benchmarking terminology, methodologies, and Key Performance Indicators (KPIs) for evaluating Ethernet-based AI training network fabrics. As large-scale distributed AI/ML training clusters grow to tens of thousands of accelerators (GPUs/XPUs), the backend network fabric becomes the critical bottleneck determining job completion time (JCT), training throughput, and accelerator utilization. This document establishes vendor-independent, reproducible test procedures for benchmarking fabric-level performance under realistic AI training workloads, covering RDMA/RoCEv2 transport, the Ultra Ethernet Transport (UET) protocol defined by the UEC Specification 1.0 {[}UEC-1.0{]}, congestion management (PFC, ECN, DCQCN, CBFC), load balancing strategies (ECMP, DLB, packet spraying), collective communication patterns (AllReduce, AlltoAll, AllGather), and scale/ soak testing. The methodology enables apples-to-apples comparison across different switch ASICs, vendor implementations, NIC transport stacks (RoCEv2 vs. UET), and fabric architectures (2-tier Clos, 3-tier Clos, rail- optimized).}, }