Stage-1 commit: Agent trained for 3500 episodes

Browse files

Files changed (6) hide show

README.md +9 -4
atari_breakout_v0-episode-0.mp4 +0 -0
main.py +167 -0
model.py +40 -0
atari_breakout_v0.pt → models/atari_breakout_v0.pt +0 -0
utils.py +40 -0

README.md CHANGED Viewed

@@ -1,8 +1,13 @@
 ---
 license: mit
 language:
-- en
 tags:
-- reinforcement learning
-- games
----

 ---
 license: mit
 language:
+  - en
 tags:
+  - reinforcement learning
+  - games
+---
+# Deep Q-Learning based Agent for Atari Breakout
+The agent showcased in this space is trained using the Deep Q-Learning algorithm.
+The agent was trained for $3500$ episodes with a learning rate of $0.00001$ and an epsilon value that decreased linearly over time.

atari_breakout_v0-episode-0.mp4 ADDED Viewed

Binary file (79.2 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,167 @@

+"""
+Main script to run the Atari Breakout-v0 game.
+The DQN algorithm was used to train the agent.
+@author: bvk1ng (Adityam Ghosh)
+Date: 12/28/2023
+"""
+from typing import List, Dict, Any, Callable, Tuple, Union
+import numpy as np
+import gymnasium as gym
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import albumentations as A
+import cv2
+import os
+import argparse
+from model import CNNModel
+from utils import play_atari_game, gym
+from gymnasium.wrappers.record_video import RecordVideo
+K = 4
+IM_SIZE = 84
+class ImageTransform:
+    def __init__(self):
+        self.compose = A.Compose(
+            [
+                A.Crop(x_min=0, y_min=34, x_max=160, y_max=200, always_apply=True),
+                A.Resize(
+                    height=IM_SIZE,
+                    width=IM_SIZE,
+                    interpolation=cv2.INTER_NEAREST,
+                    always_apply=True,
+                ),
+            ]
+        )
+    def transform(self, img: np.ndarray) -> np.ndarray:
+        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+        img_tf = self.compose(image=gray_img)
+        return img_tf["image"]
+class DQN:
+    def __init__(
+        self,
+        K: int,
+        cnn_params: List,
+        fully_connected_params: List,
+        device: str = "cuda",
+        load_path: str = None,
+    ):
+        self.K = K
+        self.cnn_model = CNNModel(
+            K=K,
+            cnn_params=cnn_params,
+            fully_connected_params=fully_connected_params,
+        ).to(device=device)
+        self.device = device
+        self.load(load_path)
+    def predict(self, states: np.ndarray) -> torch.Tensor:
+        states = np.transpose(states, (0, 3, 1, 2))  # (N, T, H, W)
+        states = torch.from_numpy(states).float().to(device=self.device)
+        states /= 255.0
+        return self.cnn_model(states).detach().cpu()
+    def load(self, path: str):
+        if path is not None:
+            self.cnn_model.load_state_dict(torch.load(path))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_folder",
+        "-mF",
+        type=str,
+        required=False,
+        default="./models",
+        help="the folder to store the models.",
+    )
+    parser.add_argument(
+        "--model_name",
+        "-mf",
+        type=str,
+        required=False,
+        default="atari_breakout_v0.pt",
+        help="the name of the model to save.",
+    )
+    parser.add_argument(
+        "--save_video",
+        "-s",
+        type=int,
+        required=False,
+        default=0,
+        help="whether to save a video of the gameplay or not.",
+    )
+    parser.add_argument(
+        "--video_folder",
+        "-V",
+        type=str,
+        required=False,
+        default="./videos",
+        help="where to save the video.",
+    )
+    parser.add_argument(
+        "--video_name",
+        "-v",
+        type=str,
+        required=False,
+        default="atari_breakout_v0",
+        help="the name of the video file.",
+    )
+    args = parser.parse_args()
+    model_folder = args.model_folder
+    model_name = args.model_name
+    save_video = args.save_video
+    video_folder = args.video_folder
+    video_name = args.video_name
+    cnn_params = [(32, 8, 4), (64, 4, 2), (64, 3, 1)]
+    fully_connected_params = [512]
+    load_path = None
+    if os.path.exists(os.path.join(model_folder, model_name)):
+        load_path = os.path.join(model_folder, model_name)
+    model = DQN(
+        K=K,
+        cnn_params=cnn_params,
+        fully_connected_params=fully_connected_params,
+        device="cuda",
+        lr=1e-5,
+        load_path=load_path,
+    )
+    img_transformer = ImageTransform()
+    if save_video:
+        env = gym.make("Breakout-v0", render_mode="rgb_array")
+        env = RecordVideo(env=env, video_folder=video_folder, name_prefix=video_name)
+        env.reset()
+        env.start_video_recorder()
+    else:
+        env = gym.make("Breakout-v0", render_mode="human")
+    play_atari_game(env=env, model=model, img_transform=img_transformer)

model.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""
+@author: bvk1ng (Adityam Ghosh)
+Date: 12/28/2023
+"""
+from typing import Any, List, Tuple, Dict, Union, Callable
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class CNNModel(nn.Module):
+    def __init__(self, K: int, cnn_params: List, fully_connected_params: List):
+        super().__init__()
+        self.network = nn.Sequential()
+        for idx, (out_channels, kernel_size, stride) in enumerate(cnn_params):
+            self.network.add_module(
+                f"conv2d_{idx}",
+                nn.LazyConv2d(
+                    out_channels=out_channels,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                ),
+            )
+            self.network.add_module(f"activation_{idx}", nn.ReLU())
+        self.network.add_module("flatten", nn.Flatten())
+        for idx, out_feats in enumerate(fully_connected_params):
+            self.network.add_module(f"fc_{idx}", nn.LazyLinear(out_features=out_feats))
+            self.network.add_module(f"fc_activation_{idx}", nn.ReLU())
+        self.network.add_module("final_layer", nn.LazyLinear(out_features=K))
+    def forward(self, X: torch.Tensor) -> torch.Tensor:
+        return self.network(X)

atari_breakout_v0.pt → models/atari_breakout_v0.pt RENAMED Viewed

File without changes

utils.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""
+@author: bvk1ng (Adityam Ghosh)
+Date: 12/28/2023
+"""
+from typing import Callable, List, Tuple, Any, Dict, Union
+import numpy as np
+import gymnasium as gym
+def update_state(state: np.ndarray, obs_small: np.ndarray) -> np.ndarray:
+    """Function to append the recent state into the state variable and remove the oldest using FIFO."""
+    return np.append(state[:, :, 1:], np.expand_dims(obs_small, axis=2), axis=2)
+def play_atari_game(env: gym.Env, model: Callable, img_transform: Callable):
+    """Function to play the atari game."""
+    obs, info = env.reset()
+    obs_small = img_transform.transform(obs)
+    state = np.stack([obs_small] * 4, axis=2)
+    done, truncated = False, False
+    episode_reward = 0
+    while not (done or truncated):
+        action = model.predict(np.expand_dims(state, axis=0)).numpy()
+        action = np.argmax(action, axis=1)[0]
+        obs, reward, done, truncated, info = env.step(action)
+        obs_small = img_transform.transform(obs)
+        episode_reward += reward
+        next_state = update_state(state=state, obs_small=obs_small)
+        state = next_state
+    print(f"Total reward earned: {episode_reward}")