In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

datajediai_hack49_alzheimer_dataset_path = kagglehub.dataset_download('datajediai/hack49-alzheimer-dataset')

print('Data source import complete.')


In [None]:
pip install boto3

[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import os
import pandas as pd
import torch
import torchaudio
import torch.nn as nn
import torch.optim as optim
# Custom collate function for DataLoader
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
dataset_dir = '/kaggle/input/hack49-alzheimer-dataset/Hack49-Alzheimer-Dataset'
# Custom Dataset class
class HealthAudioDataset(Dataset):
    def __init__(self, root_dir, device='cpu'):
        self.root_dir = root_dir
        self.file_list = []
        self.labels = []
        self.dataframe = []
        self.device = device
        for label, subdir in enumerate(['Healthy', 'NotHealthy']):
            subdir_path = os.path.join(root_dir, subdir)
            for wav_file in os.listdir(subdir_path):
                if wav_file.endswith('.wav'):
                    self.file_list.append(os.path.join(subdir_path, wav_file))
                    self.labels.append(label)
        self.dataframe = pd.DataFrame({'file_path': self.file_list, 'label': self.labels})

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        wav_path = self.file_list[idx]
        label = self.labels[idx]
        waveform, sample_rate = torchaudio.load(wav_path)
        waveform = waveform.to(self.device)
        if sample_rate != bundle.sample_rate:
            waveform = torchaudio.functional.resample(waveform, sample_rate, bundle.sample_rate)
        return waveform, label

    def get_dataframe(self):
        return self.dataframe


class SelfAttention(nn.Module):
    def __init__(self, input_dim, num_heads):
        super(SelfAttention, self).__init__()
        self.multihead_attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=num_heads)
        self.layer_norm = nn.LayerNorm(input_dim)

    def forward(self, x):
        attn_output, _ = self.multihead_attn(x, x, x)
        x = x + attn_output  # Add & Normalize
        x = self.layer_norm(x)
        return x

class TimeSeriesClassifier(nn.Module):
    def __init__(self, input_dim, num_heads, hidden_dim, output_dim):
        super(TimeSeriesClassifier, self).__init__()
        self.self_attention = SelfAttention(input_dim, num_heads)
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x: [batch_size, seq_len, input_dim]
        x = x.permute(1, 0, 2)  # Change to [seq_len, batch_size, input_dim]
        x = self.self_attention(x)
        x = x.permute(1, 0, 2)  # Change back to [batch_size, seq_len, input_dim]
        x = torch.mean(x, dim=1)  # Global Average Pooling over the time dimension
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))  # Sigmoid for binary classification
        return x


# Define the combined Encoder-Decoder model
class EncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    def forward(self, x):
        emission, _ = self.encoder(x)
        x = self.decoder(emission)
        return x




def collate_fn(batch):
    waveforms = [item[0] for item in batch]
    labels = [item[1] for item in batch]

    # Find the length of the longest waveform in the batch
    max_length = max(waveform.size(1) for waveform in waveforms)

    # Pad all waveforms to the length of the longest waveform
    padded_waveforms = [F.pad(waveform, (0, max_length - waveform.size(1))) for waveform in waveforms]
    padded_waveforms = torch.stack(padded_waveforms)

    labels = torch.tensor(labels)
    return padded_waveforms, labels






In [None]:
# Initialize everything
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
encoder = bundle.get_model().to(device)

# Example usage
batch_size = 32
seq_len = 10  # Number of time steps
input_dim = 29
hidden_dim = 15
# Creating a dummy input tensor with shape [batch_size, seq_len, input_dim]
dummy_input = torch.randn(batch_size, seq_len, input_dim)

decoder = TimeSeriesClassifier(input_dim = input_dim,
                               num_heads = input_dim,
                               hidden_dim = hidden_dim,
                               output_dim = 1)
output = decoder(dummy_input)
print(output)
decoder = decoder.to(device)
model = EncoderDecoder(encoder, decoder).to(device)

# Freeze the encoder parameters
for param in model.encoder.parameters():
    param.requires_grad = False

# Dataset and DataLoaders

dataset = HealthAudioDataset(dataset_dir, device)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

# Print the contents of the train_loader

tensor([[0.5726],
        [0.5791],
        [0.5756],
        [0.5718],
        [0.6160],
        [0.5886],
        [0.5634],
        [0.5896],
        [0.5754],
        [0.6070],
        [0.5597],
        [0.5673],
        [0.5942],
        [0.5967],
        [0.5815],
        [0.5582],
        [0.5873],
        [0.6225],
        [0.5830],
        [0.5879],
        [0.5958],
        [0.5540],
        [0.5812],
        [0.6039],
        [0.5825],
        [0.5635],
        [0.6145],
        [0.5483],
        [0.5770],
        [0.5952],
        [0.6086],
        [0.5806]], grad_fn=<SigmoidBackward0>)


In [None]:
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

def verification(model):
    # Testing script
    test_loss = 0.0
    with torch.no_grad():
        for i, (inputs_batch, labels_batch) in enumerate(test_loader):
            for batch_idx in range(inputs_batch.size(0)):
                inputs, labels = inputs_batch[batch_idx], labels_batch[batch_idx]
                inputs = inputs.to(device)
                labels = labels.to(device).float().unsqueeze(0)  # Convert to float and add batch dimension
                outputs = model(inputs)
                labels = labels.view(outputs.shape)  # Ensure labels match the shape of outputs
                loss = criterion(outputs, labels)
                test_loss += loss.item()
    ret = test_loss / len(test_loader)
    print(f'Test Loss: {ret:.4f}')
    return ret
verification(model)
# save the model


Test Loss: 1.6243


1.6242794593175252

In [None]:
print(f"Training started... Using {device}")
num_epochs = 0
while verification(model) > 0.10:
    model.train()
    running_loss = 0.0
    for i, (inputs_batch, labels_batch) in enumerate(train_loader):
# print(f'Batch {i + 1}:')
        for batch_idx in range(inputs_batch.size(0)):
            inputs, labels = inputs_batch[batch_idx], labels_batch[batch_idx]
            inputs = inputs.to(device)
            labels = labels.to(device).float().unsqueeze(0)  # Convert to float and add batch dimension

            # print(f'  Input {batch_idx + 1}: {inputs.size()}, Label: {labels.size()}')
            # print(f'  Input data type: {inputs.dtype}, Label data type: {labels.dtype}')

            optimizer.zero_grad()
            outputs = model(inputs)
            # print(f'  Output: {outputs.size()}')
            labels = labels.view(outputs.shape)  # Ensure labels match the shape of outputs
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()


    print(f'Epoch [{epoch + 1}/{num_epochs}], Batch [{i + 1}], Loss: {running_loss / 100:.4f}')
    epoch = epoch + 1

print("Training completed.")
# now generate the testing script



Training started... Using cuda
Test Loss: 0.1040
Epoch [283/0], Batch [12], Loss: 0.0445
Test Loss: 0.1708
Epoch [284/0], Batch [12], Loss: 0.0302
Test Loss: 0.1152
Epoch [285/0], Batch [12], Loss: 0.0298
Test Loss: 0.1269
Epoch [286/0], Batch [12], Loss: 0.0319
Test Loss: 0.1675
Epoch [287/0], Batch [12], Loss: 0.0276
Test Loss: 0.1064
Epoch [288/0], Batch [12], Loss: 0.0250
Test Loss: 0.1328
Epoch [289/0], Batch [12], Loss: 0.0220
Test Loss: 0.1303
Epoch [290/0], Batch [12], Loss: 0.0219
Test Loss: 0.1164
Epoch [291/0], Batch [12], Loss: 0.0200
Test Loss: 0.1139
Epoch [292/0], Batch [12], Loss: 0.0342
Test Loss: 0.1391
Epoch [293/0], Batch [12], Loss: 0.0213
Test Loss: 0.1166
Epoch [294/0], Batch [12], Loss: 0.0271
Test Loss: 0.0904
Training completed.


In [None]:
model

In [None]:
torch.save(model.state_dict(), 'hack49_encoder_decoder_model.pth')
import boto3

def upload_to_s3(file_path, bucket_name, object_name, access_key, secret_key):
    # Initialize a session using your AWS credentials
    s3_client = boto3.client('s3',
                             region_name='us-east-2',
                             aws_access_key_id=access_key,
                             aws_secret_access_key=secret_key)

    try:
        # Uploads the given file using a managed uploader
        s3_client.upload_file(file_path, bucket_name, object_name)
        print(f'Successfully uploaded {file_path} to {bucket_name}/{object_name}')
    except Exception as e:
        print(f'Error uploading file: {e}')

# Example usage
file_path = 'hack49_encoder_decoder_model.pth'
bucket_name = 'my-ai-models-darcy'
# get time
import datetime
now = datetime.datetime.now()
object_name = f'hack49_encoder_decoder_model_{now.strftime("%Y-%m-%d_%H-%M-%S")}.pth'
access_key = 'XXXX'
secret_key = 'XXXX'

upload_to_s3(file_path, bucket_name, object_name, access_key, secret_key)


Successfully uploaded hack49_encoder_decoder_model.pth to my-ai-models-darcy/hack49_encoder_decoder_model_2024-10-21_05-40-05.pth
