| # train_grpo.py | |
| # | |
| # See https://github.com/willccbb/verifiers for ongoing developments | |
| # | |
| """ | |
| citation: | |
| @misc{brown2025grpodemo, | |
| title={Granular Format Rewards for Eliciting Mathematical Reasoning Capabilities in Small Language Models}, | |
| author={Brown, William}, |
Workshop at ODSC London 2024
Dr. Yves J. Hilpisch | The Python Quants | CPF Program
London, 06. September 2024
(short link to this Gist: http://bit.ly/odsc_ldn_2024)
Fine-tuning llama 2 7B to analyze financial reports and write “funny” tweets
Sharing some insights from a recent weekend fun project where I tried to analyze and summarize financial reports using a fine-tuned LLM.
My initial goal was to train a model to summarize the annual/quarterly financial reports of public companies (aka 10-K / 10-Q). But, realizing that straightforward financial summaries are boring, I thought of tuning LLM to generate sarcastic summaries of these reports. Something short I could post on Twitter.
Data exploration and dataset prep
Working with financial reports ain’t easy. You download them in html format, they’re pretty dense with ~100 pages filled with tables that can be tough to parse, many legal disclaimers and various useless info. I knew I wanted to get 3-5 funny tweets as an output from a report. But I spent quite some time figuring out what data to actually input to get the result - a page, a section, a table?
| from datetime import datetime, timedelta | |
| import concurrent.futures | |
| import csv | |
| import html | |
| import os | |
| import time | |
| from bs4 import BeautifulSoup | |
| from dotenv import load_dotenv | |
| import nltk |
This was a full fine-tune of llama-2-13b-hf using dataset https://huggingface.co/datasets/jondurbin/airoboros-gpt4-2.0
Convert the JSONL (newline delimeted JSON strings) into conversational format that FastChat expects:
import re| # coding=utf-8 | |
| # Copyright 2023 The HuggingFace Inc. team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| import torch | |
| import os | |
| import argparse | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--base_model_name_or_path", type=str) |