class CapacityEnv(MultiAgentEnv):
def __init__(self):
self.action_space = Discrete(2) # 0 not transmit, 1 transmit
self.observation_space = Dict({
"1": Box(low=0, high=101, dtype=int), # Maximum capacity is 100 for each agent
"2": Box(low=0, high=101, dtype=int)
})
self.node_capacity = {"1": 100, "2": 100}
def step(self, action_dict):
node_choice_1 = action_dict["1"]
node_choice_2 = action_dict["2"]
rewards = {"1": 0, "2": 0}
if node_choice_1 == node_choice_2:
rewards = {"1": -10, "2": -10}
if node_choice_1 == 0 and node_choice_2 == 1:
if self.node_capacity["2"] >= self.node_capacity["1"]:
rewards = {"1": 10, "2": 10}
else:
rewards = {1: -10, 2: -10}
self.node_capacity["1"] = self.node_capacity["1"]
self.node_capacity["2"] = self.node_capacity["2"] - 5
elif node_choice_1 == 1 and node_choice_2 == 0:
if self.node_capacity["1"] >= self.node_capacity["2"]:
rewards = {"1": 10, "2": 10}
else:
rewards = {"1": -10, "2": -10}
self.node_capacity["1"] = self.node_capacity["1"] - 5
self.node_capacity["2"] = self.node_capacity["2"]
print(self.node_capacity)
observations = self.node_capacity
if self.node_capacity["1"] == 0 or self.node_capacity["2"] == 0:
done = True
else:
done = False
return observations, rewards, done, False, {}
def reset(self, *, seed=None, options=None):
self.node_capacity = {"1": 100, "2": 100}
print(self.node_capacity)
observations = self.node_capacity
return observations, {}
但是,当我使用 RLlib 训练我的代理时:
from ray.tune.logger.logger import pretty_print
config = DQNConfig().environment(CapacityEnv).training(gamma=0.9, lr=0.001, train_batch_size=512)
agent = config.build()
for i in range(2):
result = agent.train()
print(pretty_print(result))
,我遇到以下错误:
ValueError Traceback (most recent call last)
<ipython-input-69-7e2ec9487891> in <cell line: 2>()
1 from ray.tune.logger.logger import pretty_print
2 for i in range(2):
----> 3 result = agent.train()
4 print(pretty_print(result))
20 frames
/usr/local/lib/python3.10/dist-packages/tree/__init__.py in assert_same_structure(a, b, check_types)
286 str1 = str(map_structure(lambda _: _DOT, a))
287 str2 = str(map_structure(lambda _: _DOT, b))
--> 288 raise type(e)("%s\n"
289 "Entire first structure:\n%s\n"
290 "Entire second structure:\n%s"
ValueError: The two structures don't have the same nested structure.
First structure: type=int str=100
Second structure: type=OrderedDict str=OrderedDict([('1', 55), ('2', 94)])
More specifically: Substructure "type=OrderedDict str=OrderedDict([('1', 55), ('2', 94)])" is a sequence, while substructure "type=int str=100" is not
Entire first structure:
.
Entire second structure:
OrderedDict([('1', .), ('2', .)])
我认为这个问题可能与观察空间有关,我正在尝试解决它,但我无法修复它,特别是在 RLlib 和 MultiAgentEnv 的上下文中。任何有关如何解决此问题的指导或见解将不胜感激。谢谢!
self.observation_space = Box(low=0, high=101, dtype=int)
self._agent_ids = ["1","2"]
self.node_capacity["1"] = self.observation_space.sample()
self.node_capacity["2"] = self.observation_space.sample()
observations = self.node_capacity
希望这有帮助。