File size: 5,575 Bytes
aed0362 77885a5 aed0362 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 3116be6 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 f3970b8 77885a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
---
license: openrail
datasets:
- allenai/soda
language:
- en
pipeline_tag: conversational
---
# What is Cadet-Medium?
Inspired by Allen AI's **Cosmo-XL**, **Cadet-Medium** is a somewhat small conversational model trained off of the **SODA** dataset. **Cadet-Medium** is intended for inference at the edge (on something as small as a 2GB RAM Raspberry Pi).
**Cadet-Medium** is trained off of the **t5-base** pretrained model from Google.
If you have any questions, or any comments on improvements, please contact me at: **[email protected]**
# Google Colab Link
Here is the link to the Google Colab file, where I walk through the process of training the model and using the SODA public dataset from AI2.
https://colab.research.google.com/drive/1uekZ0gO3GqjPwno16tV1A4Gitrl7p3ur?usp=sharing
# Get Started With Cadet-Medium
Use the code snippet below to get started with Cadet-Medium!
```
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import colorful as cf
cf.use_true_colors()
cf.use_style('monokai')
class CadetMedAgent:
def __init__(self):
print(cf.bold | cf.purple("Waking up Cadet-Medium..."))
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.tokenizer = AutoTokenizer.from_pretrained("t5-base", model_max_length=512)
self.model = AutoModelForSeq2SeqLM.from_pretrained("ToddGoldfarb/Cadet-Medium", low_cpu_mem_usage=True).to(self.device)
self.conversation_history = ""
def observe(self, observation):
self.conversation_history = self.conversation_history + observation
# The number 400 below is just a truncation safety net. It leaves room for 112 input tokens.
if len(self.conversation_history) > 400:
self.conversation_history = self.conversation_history[112:]
def set_input(self, situation_narrative="", role_instruction=""):
input_text = "dialog: "
if situation_narrative != "":
input_text = input_text + situation_narrative
if role_instruction != "":
input_text = input_text + " <SEP> " + role_instruction
input_text = input_text + " <TURN> " + self.conversation_history
# Uncomment the line below to see what is fed to the model.
# print(input_text)
return input_text
def generate(self, situation_narrative, role_instruction, user_response):
user_response = user_response + " <TURN> "
self.observe(user_response)
input_text = self.set_input(situation_narrative, role_instruction)
inputs = self.tokenizer([input_text], return_tensors="pt").to(self.device)
# I encourage you to change the hyperparameters of the model! Start by trying to modify the temperature.
outputs = self.model.generate(inputs["input_ids"], max_new_tokens=512, temperature=1, top_p=.95,
do_sample=True)
cadet_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
added_turn = cadet_response + " <TURN> "
self.observe(added_turn)
return cadet_response
def reset_history(self):
self.conversation_history = []
def run(self):
def get_valid_input(prompt, default):
while True:
user_input = input(prompt)
if user_input in ["Y", "N", "y", "n"]:
return user_input
if user_input == "":
return default
while True:
continue_chat = ""
# MODIFY THESE STRINGS TO YOUR LIKING :)
situation_narrative = "Imagine you are Cadet-Medium talking to ???."
role_instruction = "You are Cadet-Medium, and you are talking to ???."
self.chat(situation_narrative, role_instruction)
continue_chat = get_valid_input(cf.purple("Start a new conversation with new setup? [Y/N]:"), "Y")
if continue_chat in ["N", "n"]:
break
print(cf.blue("CM: See you!"))
def chat(self, situation_narrative, role_instruction):
print(cf.green(
"Cadet-Medium is running! Input [RESET] to reset the conversation history and [END] to end the conversation."))
while True:
user_input = input("You: ")
if user_input == "[RESET]":
self.reset_history()
print(cf.green("[Conversation history cleared. Chat with Cadet-Medium!]"))
continue
if user_input == "[END]":
break
response = self.generate(situation_narrative, role_instruction, user_input)
print(cf.blue("CM: " + response))
def main():
print(cf.bold | cf.blue("LOADING MODEL"))
CadetMed = CadetMedAgent()
CadetMed.run()
if __name__ == '__main__':
main()
```
# Citations and Special Thanks
Special thanks to Hyunwoo Kim for discussing with me the best way to use the SODA dataset. If you haven't looked into their work with SODA, Prosocial-Dialog, or COSMO, I recommend you do so! As well, read the paper on SODA!
The article is listed below.
```
@article{kim2022soda,
title={SODA: Million-scale Dialogue Distillation with Social Commonsense Contextualization},
author={Hyunwoo Kim and Jack Hessel and Liwei Jiang and Peter West and Ximing Lu and Youngjae Yu and Pei Zhou and Ronan Le Bras and Malihe Alikhani and Gunhee Kim and Maarten Sap and Yejin Choi},
journal={ArXiv},
year={2022},
volume={abs/2212.10465}
}
``` |