从健身房环境获取视频/gif

问题描述 投票:0回答:1

我希望你一切都好。我目前正在使用 Python 和强化学习编写代码,以便在 Atari 环境中玩 Breakout 游戏。我使用的环境是Gym,我把我写的代码放在下面

我付出了相当大的努力来捕获每集的输出作为视频,例如,看看我的人工智能在第 12 集中的表现如何。但是,无论我搜索多少,我都不知道如何这样做。

如果您能指导我如何从 Gym 环境中捕获视频或 gif,我将不胜感激。

我的代码:

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import gymnasium
from collections import namedtuple
from itertools import count
from torch.distributions import Categorical
import matplotlib.pyplot as plt
from gym.wrappers import RecordVideo
from PIL import Image
import PIL.ImageDraw as ImageDraw
# Define the architecture of the neural network
class Policy(nn.Module):
    def __init__(self, input_channels=3):
        super(Policy, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.fc1 = nn.Linear(self._conv_output_size((input_channels, 210, 160)), 512)
        self.fc2 = nn.Linear(512, 4)

    def _conv_output_size(self, shape):
        dummy_input = torch.zeros(1, *shape)
        x = self._conv_forward(dummy_input)
        return x.view(1, -1).size(1)

    def _conv_forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        return x

    def forward(self, x):
        x = self._conv_forward(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

# Define the experience tuple
Experience = namedtuple('Experience', ('state', 'action', 'reward'))

# Function to preprocess the input state
# Function to preprocess the input state
# Function to preprocess the input state
# Function to preprocess the input state
# Function to preprocess the input state
def preprocess(observation):
    if isinstance(observation, tuple):
        observation = observation[0]  # Extract the first element from the tuple

    state = np.array(observation)
    
    # Check if the observation is grayscale (H x W) or RGB (H x W x C)
    if len(state.shape) == 3 and state.shape[2] == 3:
        state = state.transpose(2, 0, 1)  # Change the order of dimensions (HWC to CHW)
    elif len(state.shape) == 2:
        state = np.expand_dims(state, axis=0)  # Add a batch dimension
    
    state = state.astype(np.float32)
    state = state / 255.0
    state = torch.from_numpy(state)
    state = state.unsqueeze(0)
    return state



# Function to calculate discounted rewards
def calculate_discounted_rewards(rewards, gamma=0.99):
    discounted_rewards = np.zeros_like(rewards, dtype=np.float32)
    running_add = 0
    for t in reversed(range(len(rewards))):
        running_add = running_add * gamma + rewards[t]
        discounted_rewards[t] = running_add
    return discounted_rewards

# Training function
def train(policy, optimizer, experiences):
    states = torch.cat([exp.state for exp in experiences])
    actions = torch.tensor([exp.action for exp in experiences], dtype=torch.long)
    rewards = torch.tensor(calculate_discounted_rewards([exp.reward for exp in experiences]), dtype=torch.float32)

    optimizer.zero_grad()
    action_probs = policy(states)
    selected_action_probs = action_probs.gather(1, actions.unsqueeze(1))
    loss = -torch.sum(torch.log(selected_action_probs) * rewards)
    loss.backward()
    optimizer.step()

# Main training loop
def main():
    env = gymnasium.make('ALE/Breakout-v5')

    policy = Policy()
    optimizer = optim.Adam(policy.parameters(), lr=1e-4)

    episode_rewards = []
    episode_durations = []  # New: to store the duration of each episode
    frames = []

    for episode in range(20):  # You may need more episodes
        state = preprocess(env.reset())
        episode_experiences = []

        for t in count():
            action_probs = policy(state)
            action_distribution = Categorical(action_probs)
            action = action_distribution.sample().item()

            step_result = env.step(action)
            observation, reward, done = step_result[:3]

            next_state = preprocess(observation)

            episode_experiences.append(Experience(state, action, reward))

            if done:
                episode_rewards.append(sum(ep.reward for ep in episode_experiences))
                episode_durations.append(t + 1)  # New: store the duration
                break

            state = next_state

        train(policy, optimizer, episode_experiences)

        if episode % 10 == 0:
            print(f'Episode {episode}, Average Reward: {np.mean(episode_rewards[-10:])}')
        env.render()

    env.close()

    # Plotting
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(episode_rewards)
    plt.title('Episode Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Reward')

    plt.subplot(1, 2, 2)
    plt.plot(episode_durations)
    plt.title('Episode Durations')
    plt.xlabel('Episode')
    plt.ylabel('Duration')

    plt.savefig('training_results.png')  # Save the plot as an image file

if __name__ == "__main__":
    main()

我尝试从 Gym 环境中捕获视频并评估人工智能的性能,但我做不到。我使用了几个库,遇到了各种错误。一般来说,我不熟悉如何使用库来完成此任务。我的Python版本也是3.11.

python pytorch artificial-intelligence reinforcement-learning openai-gym
1个回答
0
投票

您可以使用

RecordVideo
包装器来实现此目的:https://gymnasium.farama.org/api/wrappers/misc_wrappers/#gymnasium.wrappers.RecordVideo

例如,这将记录每1000集:

from gymnasium.wrappers import RecordVideo
trigger = lambda t: t % 1000 == 0
base_env = gymnasium.make('ALE/Breakout-v5')
env = RecordVideo(base_env, video_folder="./videos", episode_trigger=trigger, disable_logger=True)
© www.soinside.com 2019 - 2024. All rights reserved.