%% You should probably cite draft-zhang-rtgwg-llmmoe-multicast-02 instead of this revision. @techreport{zhang-rtgwg-llmmoe-multicast-00, number = {draft-zhang-rtgwg-llmmoe-multicast-00}, type = {Internet-Draft}, institution = {Internet Engineering Task Force}, publisher = {Internet Engineering Task Force}, note = {Work in Progress}, url = {https://datatracker.ietf.org/doc/draft-zhang-rtgwg-llmmoe-multicast/00/}, author = {Zheng Zhang and Wei Duan}, title = {{Multicast usage in LLM MoE}}, pagetotal = 6, year = , month = , day = , abstract = {Large Language Models (LLMs) have been widely used in recent years. The Mixture of Experts (MoE) architecture is one of the features of LLMs that enables efficient inference and cost-effective training. With the MoE architecture, there are potential multicast use cases such as tokens dispatching. This draft attempts to analyze these use cases.}, }