@techreport{chen-nmrg-multi-provider-inference-api-00, number = {draft-chen-nmrg-multi-provider-inference-api-00}, type = {Internet-Draft}, institution = {Internet Engineering Task Force}, publisher = {Internet Engineering Task Force}, note = {Work in Progress}, url = {https://datatracker.ietf.org/doc/draft-chen-nmrg-multi-provider-inference-api/00/}, author = {Huamin Chen and Luay Jalil and Nabeel Cocker}, title = {{Multi-Provider Extensions for Agentic AI Inference APIs}}, pagetotal = 44, year = 2025, month = oct, day = 19, abstract = {This document specifies extensions for multi-provider distributed AI inference using the widely-adopted OpenAI Responses API as the reference interface standard. These extensions enable provider diversity, load balancing, failover, and capability negotiation in distributed inference environments while maintaining full backward compatibility with existing implementations. The extensions do not require changes to standard API usage patterns or existing client applications. By treating the OpenAI Responses API as a de facto standard interface (similar to how HTTP serves as a standard protocol), these extensions provide an optional enhancement layer for multi-provider orchestration, intelligent routing, and distributed inference capabilities. The approach preserves the familiar API interface that developers already know and use, while enabling seamless integration across multiple AI inference providers without vendor lock-in.}, }