@techreport{gaikwad-llm-benchmarking-terminology-00, number = {draft-gaikwad-llm-benchmarking-terminology-00}, type = {Internet-Draft}, institution = {Internet Engineering Task Force}, publisher = {Internet Engineering Task Force}, note = {Work in Progress}, url = {https://datatracker.ietf.org/doc/draft-gaikwad-llm-benchmarking-terminology/00/}, author = {Madhava Gaikwad}, title = {{Benchmarking Terminology for Large Language Model Serving}}, pagetotal = 54, year = 2026, month = jan, day = 20, abstract = {This document defines terminology for benchmarking the performance of Large Language Model (LLM) inference serving systems. It establishes a shared vocabulary for latency, throughput, resource utilization, and quality metrics applicable to inference engines, application gateways, and compound agentic systems. This document defines terminology only and does not prescribe benchmark methodologies or acceptance thresholds.}, }