These instructions are based on DreamBooth usage with the https://github.com/ShivamShrirao/diffusers repo.
Below are 2 files. "convertToCkpt.py" and "toCkpt.sh". Create those files inside the examples/dreambooth folder with the code provided.
| # Make sure you are using the latest `bitsandbytes` (at least 0.46.0) and PyTorch nightlies (at least 2.8). | |
| # Put together by sayakpaul and anijain2305 | |
| from diffusers.quantizers import PipelineQuantizationConfig | |
| from diffusers import FluxPipeline | |
| import argparse | |
| import json | |
| import torch | |
| import time | |
| from functools import partial |
| # train_grpo.py | |
| # | |
| # See https://github.com/willccbb/verifiers for ongoing developments | |
| # | |
| """ | |
| citation: | |
| @misc{brown2025grpodemo, | |
| title={Granular Format Rewards for Eliciting Mathematical Reasoning Capabilities in Small Language Models}, | |
| author={Brown, William}, |
| #VERBOSE=0 torchrun --nproc_per_node 3 self_contained_pp_LOC.py | |
| import os, random, numpy as np, torch, torch.nn as nn, torch.distributed as dist, torch.nn.functional as F | |
| from torch.optim import AdamW | |
| from torch.utils.data import DataLoader, DistributedSampler | |
| from datasets import load_dataset | |
| from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer | |
| STEP, local_rank, world_size, verbose = 0, int(os.environ["LOCAL_RANK"]), int(os.environ["WORLD_SIZE"]), os.environ.get("VERBOSE", "0") == "1" | |
| def set_all_seed(seed): |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| from diffusers import FluxTransformer2DModel, DiffusionPipeline | |
| dtype, device = torch.bfloat16, "cuda" | |
| ckpt_id = "black-forest-labs/FLUX.1-schnell" | |
| with torch.device("meta"): | |
| config = FluxTransformer2DModel.load_config(ckpt_id, subfolder="transformer") | |
| model = FluxTransformer2DModel.from_config(config).to(dtype) |
| from diffusers import FluxPipeline, AutoencoderKL | |
| from diffusers.image_processor import VaeImageProcessor | |
| from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel | |
| import torch | |
| import gc | |
| def flush(): | |
| gc.collect() | |
| torch.cuda.empty_cache() |
| import torch | |
| from typing import Optional | |
| from flash_attn import flash_attn_func | |
| from diffusers.models.attention import Attention | |
| class FlashAttnProcessor: | |
| r""" | |
| Processor for implementing memory efficient attention using flash_attn. | |
| """ |
| from torch import FloatTensor | |
| vae_scale_factor = 8 | |
| typical_self_attn_key_length = (512/vae_scale_factor) * (512/vae_scale_factor) | |
| desired_self_attn_key_length = (768/vae_scale_factor) * (768/vae_scale_factor) | |
| key_length_factor=desired_self_attn_key_length/typical_self_attn_key_length if is_self_attn else 1. | |
| def softmax(x: FloatTensor, dim=-1) -> FloatTensor: | |
| maxes = x.max(dim, keepdim=True).values |
| from torch import FloatTensor | |
| def softmax(x: FloatTensor, dim=-1) -> FloatTensor: | |
| maxes = x.max(dim, keepdim=True).values | |
| diffs = x-maxes | |
| x_exp = diffs.exp() | |
| x_exp_sum = x_exp.sum(dim, keepdim=True) | |
| quotient = x_exp/x_exp_sum | |
| return quotient |
These instructions are based on DreamBooth usage with the https://github.com/ShivamShrirao/diffusers repo.
Below are 2 files. "convertToCkpt.py" and "toCkpt.sh". Create those files inside the examples/dreambooth folder with the code provided.
| import torch | |
| import numpy as np | |
| import k_diffusion as K | |
| from PIL import Image | |
| from torch import autocast | |
| from einops import rearrange, repeat | |
| def pil_img_to_torch(pil_img, half=False): | |
| image = np.array(pil_img).astype(np.float32) / 255.0 |