在以下强化学习代码中,当我在 google colabs 上运行它时,我没有得到错误,但在 Jupiter Notebooks 上我得到:
IndexError:只有整数、切片(
:
)、省略号(...
)、numpy.newaxis(None
)和整数或布尔数组是有效的索引。
应该改变什么?
import numpy as np
import gym
# Define the environment
env = gym.make("Taxi-v3").env
# Initialize the q-table with zero values
q_table = np.zeros([env.observation_space.n, env.action_space.n])
# Hyperparameters
alpha = 0.1 # learning-rate
gamma = 0.7 # discount-factor
epsilon = 0.1 # explor vs exploit
# Random generator
rng =np.random.default_rng()
# Perform 10,000 episodes
for i in range(10_000):
# Reset the environment and initialize total_reward
state = env.reset()
done = False
total_reward = 0
# Loop as long as the game is not over, i.e. done is not True
while not done:
if rng.random() < epsilon:
action = env.action_space.sample() # Explore the action space
else:
action = np.argmax(q_table[state]) # Exploit learned values
# Apply the action and see what happens
next_state, reward, done, info = env.step(action)
current_value = q_table[state, action] # current Q-value for the state/action couple
next_max = np.max(q_table[next_state]) # next best Q-value
# Compute the new Q-value with the Bellman equation
q_table[state, action] = (1 - alpha) * current_value + alpha * (reward + gamma * next_max)
# Update the current state and total_reward
state = next_state
total_reward += reward
# Print the total reward earned in this episode
print(f"Episode {i+1}: Total reward = {total_reward}")
代替 state = env.reset() 使用 state = env.reset()[0] 这将解决您的问题。