%% You should probably cite draft-calabria-bmwg-ai-fabric-training-bench-02 instead of this revision.
@techreport{calabria-bmwg-ai-fabric-training-bench-00,
    number =    {draft-calabria-bmwg-ai-fabric-training-bench-00},
    type =      {Internet-Draft},
    institution =   {Internet Engineering Task Force},
    publisher = {Internet Engineering Task Force},
    note =      {Work in Progress},
    url =       {https://datatracker.ietf.org/doc/draft-calabria-bmwg-ai-fabric-training-bench/00/},
    author =    {Fernando Calabria and Carlos Pignataro and Qin Wu and Giuseppe Fioccola},
    title =     {{Benchmarking Methodology for AI Training Network Fabrics}},
    pagetotal = 38,
    year =      2026,
    month =     feb,
    day =       26,
    abstract =  {This document defines benchmarking terminology, methodologies, and Key Performance Indicators (KPIs) for evaluating Ethernet-based AI training network fabrics. As large-scale distributed AI/ML training clusters grow to tens of thousands of accelerators (GPUs/XPUs), the backend network fabric becomes the critical bottleneck determining job completion time (JCT), training throughput, and accelerator utilization. This document establishes vendor-independent, reproducible test procedures for benchmarking fabric-level performance under realistic AI training workloads, covering RDMA/RoCEv2 transport, the Ultra Ethernet Transport (UET) protocol defined by the UEC Specification 1.0 {[}UEC-1.0{]}, congestion management (PFC, ECN, DCQCN, CBFC), load balancing strategies (ECMP, DLB, packet spraying), collective communication patterns (AllReduce, AlltoAll, AllGather), and scale/ soak testing. The methodology enables apples-to-apples comparison across different switch ASICs, vendor implementations, NIC transport stacks (RoCEv2 vs. UET), and fabric architectures (2-tier Clos, 3-tier Clos, rail- optimized).},
}