ValueError:Q-learning 中以 10 为基数的 int() 的文字无效:''

问题描述 投票:0回答:1
episodes = generate_training_episodes(num_episodes)
for episode in episodes:
    for state, action, reward, next_state in episode:
        # Get the available actions for the next state
        next_available_actions = get_available_actions(next_state)

        # Calculate the maximum Q-value for the next state
        next_q_values = [Q_table[(next_state, next_action)] for next_action in next_available_actions]
        max_next_q_value = max(next_q_values)

        # Update the Q-value for the current state-action pair
        q_value = Q_table[(state, action)]
        Q_table[(state, action)] = q_value + ALPHA * (reward + GAMMA * max_next_q_value - q_value)

上面的代码给出了一个值错误

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-7df231fe06a5> in <cell line: 1>()
----> 1 episodes = generate_training_episodes(num_episodes)
      2 for episode in episodes:
      3     for state, action, reward, next_state in episode:
      4         # Get the available actions for the next state
      5         next_available_actions = get_available_actions(next_state)

1 frames
<ipython-input-15-84c03145c31c> in transition_function(state, action)
     54 
     55     elif action.source.startswith("W"):
---> 56         source_index = int(source_id) - 1
     57         new_wholesaler_inventories[source_index] -= action.quantity
     58         if destination_prefix == "LM":

ValueError: invalid literal for int() with base 10: ''

这是转换函数的问题,我将其定义为

def transition_function(state, action):
    new_farmer_inventories = state.farmer_inventories.copy()
    new_local_trader_inventories = state.local_trader_inventories.copy()
    new_ppc_inventories = state.ppc_inventories.copy()
    new_wholesaler_inventories = state.wholesaler_inventories.copy()
    new_ripening_storage_inventories = state.ripening_storage_inventories.copy()
    new_local_market_demands = state.local_market_demands.copy()
    new_farmers_market_demands = state.farmers_market_demands.copy()
    new_retailer_demands = state.retailer_demands.copy()

    source_id = action.source[1:] if action.source.startswith("F") else \
                action.source[2:] if action.source.startswith(("LT", "PPC", "W", "RS")) else ""

    destination_prefix = action.destination[:2] if action.destination.startswith(("LM", "FM")) else \
                         action.destination[0] if action.destination.startswith(("F", "R")) else ""

    destination_id = action.destination[2:] if action.destination.startswith(("PPC", "LT", "W")) else \
                     action.destination[1:] if action.destination.startswith(("F", "R")) else \
                     action.destination[3:] if action.destination.startswith(("LM", "FM")) else ""

    # Update inventory levels
    if action.source.startswith("F"):
        source_index = int(source_id) - 1
        new_farmer_inventories[source_index] -= action.quantity
        if action.destination.startswith("LT"):
            destination_index = int(destination_id) - 1
            new_local_trader_inventories[destination_index] += action.quantity
        elif action.destination.startswith("PPC"):
            destination_index = int(destination_id) - 1
            new_ppc_inventories[destination_index] += action.quantity
        elif action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity

    elif action.source.startswith("LT"):
        source_index = int(source_id) - 1
        new_local_trader_inventories[source_index] -= action.quantity
        if action.destination.startswith("PPC"):
            destination_index = int(destination_id) - 1
            new_ppc_inventories[destination_index] += action.quantity
        elif action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity

    elif action.source.startswith("PPC"):
        source_index = int(source_id) - 1
        new_ppc_inventories[source_index] -= action.quantity
        if action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity
        elif action.destination.startswith("RS"):
            destination_index = int(destination_id) - 1
            new_ripening_storage_inventories[destination_index] += action.quantity

    elif action.source.startswith("W"):
        source_index = int(source_id) - 1
        new_wholesaler_inventories[source_index] -= action.quantity
        if destination_prefix == "LM":
            destination_index = int(destination_id) - 1
            new_local_market_demands[destination_index] = max(new_local_market_demands[destination_index] - action.quantity, 0)
        elif destination_prefix == "FM":
            destination_index = int(destination_id) - 1
            new_farmers_market_demands[destination_index] = max(new_farmers_market_demands[destination_index] - action.quantity, 0)
        elif destination_prefix == "R":
            destination_index = int(destination_id) - 1
            new_retailer_demands[destination_index] = max(new_retailer_demands[destination_index] - action.quantity, 0)

    elif action.source.startswith("RS"):
        source_index = int(source_id) - 1
        new_ripening_storage_inventories[source_index] -= action.quantity
        if action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity

    new_state = State(
        new_farmer_inventories,
        new_local_trader_inventories,
        new_ppc_inventories,
        new_wholesaler_inventories,
        new_ripening_storage_inventories,
        new_local_market_demands,
        new_farmers_market_demands,
        new_retailer_demands,
        state.transportation_costs,
        state.handling_costs,
    )

    return new_state

所以我已经尝试了上述方法,实际上我正在尝试使用 Q 学习创建马尔可夫决策过程,但是当我尝试训练剧集时我陷入了困境。

我能做什么?

这是我的协作链接:https://colab.research.google.com/drive/1gmIqq_GQOtlj8IQJQZHEymw9DE9n1EAi?usp=sharing

python machine-learning q-learning markov-decision-process
1个回答
0
投票

在排队中

source_id = action.source[1:] if action.source.startswith("F") else \
            action.source[2:] if action.source.startswith(("LT", "PPC", "W", "RS")) else ""

source_id
可以是
""
。稍后,您将
source_id
转换为整数,但
""
无法转换为整数。最好的解决方案实际上取决于您想要实现的目标。

© www.soinside.com 2019 - 2024. All rights reserved.