Haiku Generator Model and Huggingface Spaces
A couple of friends and I trained a haiku generator model. The full project is on this link. Also, you can try the model. This project was made up by two main parts:
- Fine-tune a pretrained model (we used GPT-2 because it was the easiest to use).
- Develop a Hugging Face spaces for everyone to use and test our model with a easy to use interface.
Code
Here is how we trained the model.
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
from datasets import load_dataset
# Load the tokenizer and model
model_name = "gpt2-large"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# Add padding token to the tokenizer
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = GPT2LMHeadModel.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer)) # Adjust the model's embedding size
# Check if MPS (Metal Performance Shaders) is available
device = torch.device("mps" if torch.has_mps else "cpu")
model.to(device)
# Load the haiku dataset
dataset = load_dataset("davanstrien/haiku_kto")
# Inspect dataset keys
print(dataset['train'][0])
# Split the training data into train and validation sets (90% train, 10% validation)
train_val_split = dataset['train'].train_test_split(test_size=0.1)
train_data = train_val_split['train']
val_data = train_val_split['test']
# Extract haiku texts and tokenize them
def extract_and_tokenize_function(batch):
haikus = []
for example in batch['messages']:
haiku = next((message['content'] for message in example if message['role'] == 'assistant'), None)
if haiku:
haikus.append(haiku)
tokenized = tokenizer(haikus, truncation=True, padding='max_length', max_length=50)
input_ids = torch.tensor(tokenized['input_ids'])
attention_mask = torch.tensor(tokenized['attention_mask'])
labels = input_ids.clone()
labels[labels == tokenizer.pad_token_id] = -100
return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': labels}
# Apply tokenization function with batching
train_data = train_data.map(extract_and_tokenize_function, batched=True, remove_columns=train_data.column_names)
val_data = val_data.map(extract_and_tokenize_function, batched=True, remove_columns=val_data.column_names)
train_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
val_data.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
# Training arguments
training_args = TrainingArguments(
output_dir="./results",
overwrite_output_dir=True,
num_train_epochs=3,
per_device_train_batch_size=2,
per_device_eval_batch_size=2,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=10,
save_strategy="epoch", # Save strategy set to "epoch"
evaluation_strategy="epoch",
save_total_limit=2,
load_best_model_at_end=True,
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_data,
eval_dataset=val_data,
)
# Fine-tune the model
trainer.train()
# Save the fine-tuned model
model.save_pretrained("./fine-tuned-haiku-model")
tokenizer.save_pretrained("./fine-tuned-haiku-model")
We created an app.py with the folliwing code:
import streamlit as st
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
# Load the fine-tuned model and tokenizer
model_name_or_path = "./fine-tuned-haiku-model"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = GPT2Tokenizer.from_pretrained(model_name_or_path)
model = GPT2LMHeadModel.from_pretrained(model_name_or_path).to(device)
# Function to generate haiku
def generate_haiku(prompt, model, tokenizer, max_length=30):
inputs = tokenizer(prompt, return_tensors="pt").to(device)
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
outputs = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_length=max_length,
num_return_sequences=1,
no_repeat_ngram_size=2,
early_stopping=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Streamlit UI
st.set_page_config(
page_title="Haiku Generator",
page_icon="🍃",
layout="centered",
initial_sidebar_state="auto",
)
st.title("🍃 Haiku Generator 🍃")
st.markdown("Welcome to the Haiku Generator! Enter the first line of your haiku and let our fine-tuned model create a poem for you.")
st.image("https://i.etsystatic.com/15034562/r/il/90d004/1644188658/il_794xN.1644188658_5tia.jpg", use_column_width=True) # Add a relevant image (replace with actual image URL)
st.markdown(
"""
**Instructions:**
- Enter a single line as a prompt for the haiku.
- Click on "Generate Haiku" to see the result.
"""
)
prompt = st.text_area("Enter the first line of your haiku:", placeholder="The white and scilent office...")
if st.button("Generate Haiku"):
if prompt.strip():
with st.spinner("Generating your haiku..."):
haiku = generate_haiku(prompt, model, tokenizer)
st.subheader("Your Generated Haiku:")
st.write(f"\"{haiku}\"")
else:
st.warning("Please enter the first line of your haiku.")
st.markdown(
"""
**About the Model:**
This haiku generator is powered by a fine-tuned GPT-2 model, specifically trained on haiku poems to create coherent and artistic haikus based on your input.
**Credits:**
- Model fine-tuning: [Santiago Villasenor](https://github.com/SantiVillaRam7)
- Model fine-tuning: [ElCachorroHumano](https://github.com/elcachorrohumano)
- Model fine-tuning: [Oscar Martinez](https://github.com/Omarti34)
- Haiku Dataset: [Haiku KTO](https://huggingface.co/datasets/davanstrien/haiku_kto)
**Feedback:**
If you have any suggestions or feedback, please feel free to [contact us](mailto:pablo.alazraki@itam.mx).
"""
)
st.sidebar.title("About")
st.sidebar.info(
"""
This app generates haikus using a GPT-2 model fine-tuned on a haiku dataset.
Explore the beauty of haiku poetry and get inspired by the creativity of AI.
**GitHub Repository:**
[FineTune-Haiku](https://github.com/SantiVillaRam7/FineTune-Haiku)
**Powered by:**
- [Streamlit](https://streamlit.io/)
- [Hugging Face](https://huggingface.co/)
"""
)