speechVSnoise / finetune.py
datasetsANDmodels's picture
Rename fine_ast.py to finetune.py
9f71ee0 verified
from datasets import load_dataset
dataset = load_dataset("audiofolder", data_dir="data")
#dataset= dataset["train"].train_test_split(seed=42, shuffle=True, test_size=0.1)
from transformers import ASTForAudioClassification
from transformers import ASTFeatureExtractor
from transformers import TrainingArguments
import numpy as np
from transformers import Trainer
import evaluate
batch_size = 8
gradient_accumulation_steps = 1
num_train_epochs = 10
labels=["noise","speech"]
num_labels = 2
max_duration = 5
model_id="bookbot/distil-ast-audioset"
model_name = "speechVSnoise"
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
label2id[label] = str(i)
id2label[str(i)] = label
model = ASTForAudioClassification.from_pretrained(
model_id,
num_labels=num_labels, label2id=label2id,
id2label=id2label,
ignore_mismatched_sizes=True
)
feature_extractor = ASTFeatureExtractor.from_pretrained(
model_id, do_normalize=True, return_attention_mask=False
)
def preprocess_function(examples):
audio_arrays = [x["array"] for x in examples["audio"]]
inputs = feature_extractor(
audio_arrays,
sampling_rate=feature_extractor.sampling_rate,
max_length=int(feature_extractor.sampling_rate * max_duration),
truncation=True,
)
return inputs
dataset_encoded = dataset.map(
preprocess_function,
batched=True,
batch_size=1674,
num_proc=1,
)
metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
predictions = np.argmax(eval_pred.predictions, axis=1)
return metric.compute(predictions=predictions, references=eval_pred.label_ids)
training_args = TrainingArguments(
f"{model_name}",
evaluation_strategy="epoch",
save_strategy="epoch",
learning_rate=5e-5,
per_device_train_batch_size=batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
per_device_eval_batch_size=batch_size,
num_train_epochs=num_train_epochs,
warmup_ratio=0.1,
logging_steps=5,
load_best_model_at_end=True,
# metric_for_best_model="accuracy",
# push_to_hub=True,
)
from transformers import Trainer
trainer = Trainer(
model,
training_args,
train_dataset=dataset_encoded["train"],
eval_dataset=dataset_encoded["train"],
tokenizer=feature_extractor,
# compute_metrics=compute_metrics,
)
trainer.train()