episodes = generate_training_episodes(num_episodes)
for episode in episodes:
for state, action, reward, next_state in episode:
# Get the available actions for the next state
next_available_actions = get_available_actions(next_state)
# Calculate the maximum Q-value for the next state
next_q_values = [Q_table[(next_state, next_action)] for next_action in next_available_actions]
max_next_q_value = max(next_q_values)
# Update the Q-value for the current state-action pair
q_value = Q_table[(state, action)]
Q_table[(state, action)] = q_value + ALPHA * (reward + GAMMA * max_next_q_value - q_value)
上面的代码给出了一个值错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-7df231fe06a5> in <cell line: 1>()
----> 1 episodes = generate_training_episodes(num_episodes)
2 for episode in episodes:
3 for state, action, reward, next_state in episode:
4 # Get the available actions for the next state
5 next_available_actions = get_available_actions(next_state)
1 frames
<ipython-input-15-84c03145c31c> in transition_function(state, action)
54
55 elif action.source.startswith("W"):
---> 56 source_index = int(source_id) - 1
57 new_wholesaler_inventories[source_index] -= action.quantity
58 if destination_prefix == "LM":
ValueError: invalid literal for int() with base 10: ''
这是转换函数的问题,我将其定义为
def transition_function(state, action):
new_farmer_inventories = state.farmer_inventories.copy()
new_local_trader_inventories = state.local_trader_inventories.copy()
new_ppc_inventories = state.ppc_inventories.copy()
new_wholesaler_inventories = state.wholesaler_inventories.copy()
new_ripening_storage_inventories = state.ripening_storage_inventories.copy()
new_local_market_demands = state.local_market_demands.copy()
new_farmers_market_demands = state.farmers_market_demands.copy()
new_retailer_demands = state.retailer_demands.copy()
source_id = action.source[1:] if action.source.startswith("F") else \
action.source[2:] if action.source.startswith(("LT", "PPC", "W", "RS")) else ""
destination_prefix = action.destination[:2] if action.destination.startswith(("LM", "FM")) else \
action.destination[0] if action.destination.startswith(("F", "R")) else ""
destination_id = action.destination[2:] if action.destination.startswith(("PPC", "LT", "W")) else \
action.destination[1:] if action.destination.startswith(("F", "R")) else \
action.destination[3:] if action.destination.startswith(("LM", "FM")) else ""
# Update inventory levels
if action.source.startswith("F"):
source_index = int(source_id) - 1
new_farmer_inventories[source_index] -= action.quantity
if action.destination.startswith("LT"):
destination_index = int(destination_id) - 1
new_local_trader_inventories[destination_index] += action.quantity
elif action.destination.startswith("PPC"):
destination_index = int(destination_id) - 1
new_ppc_inventories[destination_index] += action.quantity
elif action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
elif action.source.startswith("LT"):
source_index = int(source_id) - 1
new_local_trader_inventories[source_index] -= action.quantity
if action.destination.startswith("PPC"):
destination_index = int(destination_id) - 1
new_ppc_inventories[destination_index] += action.quantity
elif action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
elif action.source.startswith("PPC"):
source_index = int(source_id) - 1
new_ppc_inventories[source_index] -= action.quantity
if action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
elif action.destination.startswith("RS"):
destination_index = int(destination_id) - 1
new_ripening_storage_inventories[destination_index] += action.quantity
elif action.source.startswith("W"):
source_index = int(source_id) - 1
new_wholesaler_inventories[source_index] -= action.quantity
if destination_prefix == "LM":
destination_index = int(destination_id) - 1
new_local_market_demands[destination_index] = max(new_local_market_demands[destination_index] - action.quantity, 0)
elif destination_prefix == "FM":
destination_index = int(destination_id) - 1
new_farmers_market_demands[destination_index] = max(new_farmers_market_demands[destination_index] - action.quantity, 0)
elif destination_prefix == "R":
destination_index = int(destination_id) - 1
new_retailer_demands[destination_index] = max(new_retailer_demands[destination_index] - action.quantity, 0)
elif action.source.startswith("RS"):
source_index = int(source_id) - 1
new_ripening_storage_inventories[source_index] -= action.quantity
if action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
new_state = State(
new_farmer_inventories,
new_local_trader_inventories,
new_ppc_inventories,
new_wholesaler_inventories,
new_ripening_storage_inventories,
new_local_market_demands,
new_farmers_market_demands,
new_retailer_demands,
state.transportation_costs,
state.handling_costs,
)
return new_state
所以我已经尝试了上述方法,实际上我正在尝试使用 Q 学习创建马尔可夫决策过程,但是当我尝试训练剧集时我陷入了困境。
我能做什么?
这是我的协作链接:https://colab.research.google.com/drive/1gmIqq_GQOtlj8IQJQZHEymw9DE9n1EAi?usp=sharing
在排队中
source_id = action.source[1:] if action.source.startswith("F") else \
action.source[2:] if action.source.startswith(("LT", "PPC", "W", "RS")) else ""
source_id
可以是""
。稍后,您将 source_id
转换为整数,但 ""
无法转换为整数。最好的解决方案实际上取决于您想要实现的目标。