%% You should probably cite draft-kim-nmrg-rl-05 instead of this revision.
@techreport{kim-nmrg-rl-02,
    number =    {draft-kim-nmrg-rl-02},
    type =      {Internet-Draft},
    institution =   {Internet Engineering Task Force},
    publisher = {Internet Engineering Task Force},
    note =      {Work in Progress},
    url =       {https://datatracker.ietf.org/doc/draft-kim-nmrg-rl/02/},
    author =    {Min-Suk Kim and Yong-Geun Hong and Tae-Jin Ahn and Kwi-Hoon Kim and Youn-Hee Han},
    title =     {{Intelligent Management using Collaborative Reinforcement Multi-agent System}},
    pagetotal = 17,
    year =      ,
    month =     ,
    day =       ,
    abstract =  {This document describes intelligent network management system to autonomously manage and monitor using machine learning techniques. Reinforcement learning is one of the machine learning techniques that can provide autonomously management with multi-agent path-planning over a communication network. According to intelligent distributed multi-agent system, the main centralized node called by the global environment should not only manage all agents workflow in a hybrid peer-to-peer networking architecture and, but transfer and share information in distributed nodes. All agents in distributed nodes are able to be provided with a cumulative reward for each action that a given agent takes with respect to an optimized knowledge based on a to-be-learned policy over the learning process. The optimized and trained knowledge would be involved with a large state information by the control action over a network. A reward from the global environment is reflected to the next optimized control action autonomously for network management in distributed networking nodes. The Reinforcement Learning Process (RLP) have developed and expanded to Deep Reinforcement Learning (DRL) with model-driven or data-driven technical approaches for learning process. The trendy technique has been widely to attempt and apply to networking fields since DRL can be used in practical networking areas beyond dynamics and heterogeneous environment disturbances, so that in the technique can be intelligently learned in the effective strategy.},
}