|
|
--- |
|
|
license: mit |
|
|
language: |
|
|
- en |
|
|
tags: |
|
|
- text-2-text |
|
|
- natural-language |
|
|
- nlp |
|
|
- classification |
|
|
- call center |
|
|
- IT |
|
|
- summarization |
|
|
- text-generation |
|
|
--- |
|
|
# SITCC-T5-Classifier Model Card |
|
|
|
|
|
## Model Description |
|
|
The SITCC-T5-Classifier model is a fine-tuned version of the google/flan-t5-base model. It has been specifically trained to process IT ticket descriptions and extract the request/issue and the software/system that the ticket is about. The model was fine-tuned using 5716 synthetically generated input/output pairs generated with OpenAI GPT-4 Turbo. |
|
|
|
|
|
## Model Details |
|
|
- Base Model: google/flan-t5-base |
|
|
- Fine-tuning Data: 5716 synthetic IT ticket description pairs generated by OpenAI GPT-4 Turbo |
|
|
|
|
|
## Intended Use |
|
|
The SITCC-T5-Classifier model is designed to be used for IT ticket classification and information extraction tasks. It can be used to automatically identify the request/issue and the software/system mentioned in an IT ticket description. |
|
|
|
|
|
## Limitations and Known Issues |
|
|
- The model's performance may vary depending on the quality and diversity of the input IT ticket descriptions. |
|
|
- The model may struggle with understanding complex or ambiguous ticket descriptions. |
|
|
- The model may not perform well on ticket descriptions that are significantly different from the training data. |
|
|
|
|
|
## Example Usage |
|
|
This example is running on cpu |
|
|
``` python |
|
|
import re |
|
|
import pandas as pd |
|
|
from transformers import T5Tokenizer, T5ForConditionalGeneration |
|
|
|
|
|
from time import perf_counter |
|
|
|
|
|
class SITCC_T5_Classifier: |
|
|
""" |
|
|
A class for classifying text using the SITCC T5 model. |
|
|
|
|
|
Attributes: |
|
|
tokenizer (T5Tokenizer): The tokenizer for the T5 model. |
|
|
model (T5ForConditionalGeneration): The T5 model for classification. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
# Load the tokenizer and model from the fine-tuned model directory |
|
|
self.tokenizer = T5Tokenizer.from_pretrained("KameronB/sitcc-t5-classifier") |
|
|
self.model = T5ForConditionalGeneration.from_pretrained("KameronB/sitcc-t5-classifier", device_map="cpu") |
|
|
|
|
|
def process_response(self, response:str) -> dict: |
|
|
""" |
|
|
Process the response and extract the software/system and issue/request. |
|
|
|
|
|
Args: |
|
|
response (str): The response text. |
|
|
|
|
|
Returns: |
|
|
dict: A dictionary containing the software/system and issue/request. |
|
|
""" |
|
|
matches = re.search(r'Software/System: (.*) Issue/Request: (.*)</s>', response, re.DOTALL) |
|
|
return { |
|
|
"Software/System": matches.group(1), |
|
|
"Issue/Request": matches.group(2) |
|
|
} |
|
|
|
|
|
def classify_entry(self, entry:str, max_new_tokens=60) -> dict: |
|
|
""" |
|
|
Classify the input text and return the classification results. |
|
|
|
|
|
Args: |
|
|
entry (str): The input text to be classified. |
|
|
max_new_tokens (int): The maximum number of tokens to generate. |
|
|
|
|
|
Returns: |
|
|
dict: The classification results. |
|
|
""" |
|
|
# Tokenize the input text |
|
|
input_ids = self.tokenizer(entry, return_tensors="pt").input_ids.to("cpu") |
|
|
|
|
|
# Generate the output text |
|
|
outputs = self.model.generate(input_ids, max_new_tokens=max_new_tokens) |
|
|
|
|
|
# Decode and return the output text |
|
|
return self.process_response(self.tokenizer.decode(outputs[0])) |
|
|
|
|
|
# Create the SITCC T5 Classifier wrapper class for the fine-tuned T5 model |
|
|
sitcc_t5 = SITCC_T5_Classifier() |
|
|
|
|
|
# Define the input text |
|
|
|
|
|
input_text = [ |
|
|
"The customer is getting the following error when using rSATS:\nERROR: 'Failed to connect'. \nI have tried restarting the application and the computer, but the issue persists. \nEscalating to Team", |
|
|
"The customer is experiencing issues with their network connectivity, which is causing slow internet speeds and frequent disconnections.", |
|
|
"The customer is unable to access the shared drive on the network. They receive an error message stating 'Network path not found'. \nEscalating to Network Team", |
|
|
"The customer is unable to print from their computer. They have checked the printer connections and restarted the printer, but the issue persists. \nEscalating to Printer Support Team", |
|
|
] |
|
|
|
|
|
# measure the time performance of the model |
|
|
start = perf_counter() |
|
|
for i in range(len(input_text)): |
|
|
# Classify the input text |
|
|
print(sitcc_t5.classify_entry(input_text[i])) |
|
|
|
|
|
# measure the time performance of the model |
|
|
end = perf_counter() |
|
|
print(f"Time taken: {end - start} seconds") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |