{ "cells": [ { "cell_type": "markdown", "source": [ "# Training GPT-Neo models with custom dataset\n", "You can either run this notebook on JupyterLab, or run commands below in a terminal\n", "\n", "Required packages:\n", "```\n", "conda create --name myenv python=3.6\n", "conda activate myenv\n", "pip install torch==1.9.0\n", "pip install datasets==1.10.2\n", "git clone https://github.com/huggingface/transformers.git\n", "git checkout tags/v4.9.1\n", "cd transformers\n", "pip install -e .\n", "cd ..\n", "```\n", "\n", "Additional commands if running on JupyterLab:\n", "```\n", "conda install jupyter\n", "conda install nb_conda\n", "conda install ipykernel\n", "python -m ipykernel install --user --name myenv\n", "```\n", "\n", "Recommended CPU instance: c5.12xlarge\n", "\n", "**Note:** This does not work on a GPU instance!" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "### Create or upload your custom dataset\n", "**Note:** when creating the training data file, put every data sample (a sequence of words) as a separate row and have only **one** column with the column name \"text\"" ], "metadata": {} }, { "cell_type": "code", "execution_count": 1, "source": [ "%%writefile train_data.csv\n", "text\n", "a\n", "b\n", "c" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing train_data.csv\n" ] } ], "metadata": {} }, { "cell_type": "markdown", "source": [ "### Continue to pre-train!" ], "metadata": {} }, { "cell_type": "code", "execution_count": 3, "source": [ "model = \"EleutherAI/gpt-neo-1.3B\" #@param [\"EleutherAI/gpt-neo-1.3B\", \"gpt2\"]\n", "num_train_epochs = 3 #@param {type:\"integer\"}" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 4, "source": [ "!python transformers/examples/pytorch/language-modeling/run_clm.py \\\n", " --model_name_or_path {model} \\\n", " --train_file train_data.csv \\\n", " --do_train --per_device_train_batch_size 1 \\\n", " --output_dir tmp/test-clm \\\n", " --overwrite_output_dir \\\n", " --num_train_epochs {num_train_epochs}" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "07/30/2021 20:47:37 - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False\n", "07/30/2021 20:47:37 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", "_n_gpu=0,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_find_unused_parameters=None,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=False,\n", "do_predict=False,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_steps=None,\n", "evaluation_strategy=IntervalStrategy.NO,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "gradient_accumulation_steps=1,\n", "greater_is_better=None,\n", "group_by_length=False,\n", "ignore_data_skip=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=5e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=False,\n", "local_rank=-1,\n", "log_level=-1,\n", "log_level_replica=-1,\n", "log_on_each_node=True,\n", "logging_dir=tmp/test-clm/runs/Jul30_20-47-37_ip-172-31-54-251,\n", "logging_first_step=False,\n", "logging_steps=500,\n", "logging_strategy=IntervalStrategy.STEPS,\n", "lr_scheduler_type=SchedulerType.LINEAR,\n", "max_grad_norm=1.0,\n", "max_steps=-1,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=3.0,\n", "output_dir=tmp/test-clm,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=8,\n", "per_device_train_batch_size=1,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=test-clm,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=None,\n", "remove_unused_columns=True,\n", "report_to=[],\n", "resume_from_checkpoint=None,\n", "run_name=tmp/test-clm,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=IntervalStrategy.STEPS,\n", "save_total_limit=None,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_legacy_prediction_loop=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", ")\n", "07/30/2021 20:47:37 - WARNING - datasets.builder - Using custom data configuration default-822c539f89fcc9d9\n", "07/30/2021 20:47:37 - INFO - datasets.builder - Generating dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)\n", "Downloading and preparing dataset csv/default (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff...\n", "100%|██████████████████████████████████████████| 1/1 [00:00<00:00, 11096.04it/s]\n", "07/30/2021 20:47:37 - INFO - datasets.utils.download_manager - Downloading took 0.0 min\n", "07/30/2021 20:47:37 - INFO - datasets.utils.download_manager - Checksum Computation took 0.0 min\n", "100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 2157.56it/s]\n", "07/30/2021 20:47:37 - INFO - datasets.utils.info_utils - Unable to verify checksums.\n", "07/30/2021 20:47:37 - INFO - datasets.builder - Generating split train\n", "07/30/2021 20:47:37 - INFO - datasets.utils.info_utils - Unable to verify splits sizes.\n", "Dataset csv downloaded and prepared to /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff. Subsequent calls will reuse this data.\n", "100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 1100.29it/s]\n", "07/30/2021 20:47:37 - WARNING - datasets.builder - Using custom data configuration default-822c539f89fcc9d9\n", "07/30/2021 20:47:37 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", "07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", "07/30/2021 20:47:37 - WARNING - datasets.builder - Reusing dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)\n", "07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", "07/30/2021 20:47:37 - WARNING - datasets.builder - Using custom data configuration default-822c539f89fcc9d9\n", "07/30/2021 20:47:37 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", "07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", "07/30/2021 20:47:37 - WARNING - datasets.builder - Reusing dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)\n", "07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", "[INFO|configuration_utils.py:545] 2021-07-30 20:47:37,750 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", "[INFO|configuration_utils.py:581] 2021-07-30 20:47:37,751 >> Model config GPTNeoConfig {\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPTNeoForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0,\n", " \"attention_layers\": [\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\"\n", " ],\n", " \"attention_types\": [\n", " [\n", " [\n", " \"global\",\n", " \"local\"\n", " ],\n", " 12\n", " ]\n", " ],\n", " \"bos_token_id\": 50256,\n", " \"embed_dropout\": 0,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_size\": 2048,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": null,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"max_position_embeddings\": 2048,\n", " \"model_type\": \"gpt_neo\",\n", " \"num_heads\": 16,\n", " \"num_layers\": 24,\n", " \"resid_dropout\": 0,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50,\n", " \"temperature\": 0.9\n", " }\n", " },\n", " \"tokenizer_class\": \"GPT2Tokenizer\",\n", " \"transformers_version\": \"4.9.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257,\n", " \"window_size\": 256\n", "}\n", "\n", "[INFO|configuration_utils.py:545] 2021-07-30 20:47:37,789 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", "[INFO|configuration_utils.py:581] 2021-07-30 20:47:37,789 >> Model config GPTNeoConfig {\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPTNeoForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0,\n", " \"attention_layers\": [\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\"\n", " ],\n", " \"attention_types\": [\n", " [\n", " [\n", " \"global\",\n", " \"local\"\n", " ],\n", " 12\n", " ]\n", " ],\n", " \"bos_token_id\": 50256,\n", " \"embed_dropout\": 0,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_size\": 2048,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": null,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"max_position_embeddings\": 2048,\n", " \"model_type\": \"gpt_neo\",\n", " \"num_heads\": 16,\n", " \"num_layers\": 24,\n", " \"resid_dropout\": 0,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50,\n", " \"temperature\": 0.9\n", " }\n", " },\n", " \"tokenizer_class\": \"GPT2Tokenizer\",\n", " \"transformers_version\": \"4.9.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257,\n", " \"window_size\": 256\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/vocab.json from cache at /home/ubuntu/.cache/huggingface/transformers/6111bc9bbed617156dc5c0b9fa9d6793147619aad08053f03b3697f1a5027973.a1b97b074a5ac71fad0544c8abc1b3581803d73832476184bde6cff06a67b6bb\n", "[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/merges.txt from cache at /home/ubuntu/.cache/huggingface/transformers/ec80888cdc98108f625f7ec7a29ec449eb361ae1325aa1e7e63006ce962c071c.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435\n", "[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/tokenizer.json from cache at None\n", "[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/transformers/1ae5a53fe395100a9213705940d92cc94554a2269777c062d951d1b710c39bb8.3ae9ae72462581d20e36bc528e9c47bb30cd671bb21add40ca0b24a0be9fac22\n", "[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/transformers/5fe35a59019a6fb05bfa29a31b59d407cd81ae59da93e6953772a783b740b4c0.c31b6b7d3225be0c43bc0f8e5d84d03a8b49fdb6b9f6009bbfff1f9cc5ec18bc\n", "[INFO|configuration_utils.py:545] 2021-07-30 20:47:37,941 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", "[INFO|configuration_utils.py:581] 2021-07-30 20:47:37,941 >> Model config GPTNeoConfig {\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPTNeoForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0,\n", " \"attention_layers\": [\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\"\n", " ],\n", " \"attention_types\": [\n", " [\n", " [\n", " \"global\",\n", " \"local\"\n", " ],\n", " 12\n", " ]\n", " ],\n", " \"bos_token_id\": 50256,\n", " \"embed_dropout\": 0,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_size\": 2048,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": null,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"max_position_embeddings\": 2048,\n", " \"model_type\": \"gpt_neo\",\n", " \"num_heads\": 16,\n", " \"num_layers\": 24,\n", " \"resid_dropout\": 0,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50,\n", " \"temperature\": 0.9\n", " }\n", " },\n", " \"tokenizer_class\": \"GPT2Tokenizer\",\n", " \"transformers_version\": \"4.9.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257,\n", " \"window_size\": 256\n", "}\n", "\n", "[INFO|configuration_utils.py:545] 2021-07-30 20:47:38,074 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", "[INFO|configuration_utils.py:581] 2021-07-30 20:47:38,075 >> Model config GPTNeoConfig {\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPTNeoForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0,\n", " \"attention_layers\": [\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\",\n", " \"global\",\n", " \"local\"\n", " ],\n", " \"attention_types\": [\n", " [\n", " [\n", " \"global\",\n", " \"local\"\n", " ],\n", " 12\n", " ]\n", " ],\n", " \"bos_token_id\": 50256,\n", " \"embed_dropout\": 0,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_size\": 2048,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": null,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"max_position_embeddings\": 2048,\n", " \"model_type\": \"gpt_neo\",\n", " \"num_heads\": 16,\n", " \"num_layers\": 24,\n", " \"resid_dropout\": 0,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50,\n", " \"temperature\": 0.9\n", " }\n", " },\n", " \"tokenizer_class\": \"GPT2Tokenizer\",\n", " \"transformers_version\": \"4.9.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257,\n", " \"window_size\": 256\n", "}\n", "\n", "[INFO|modeling_utils.py:1271] 2021-07-30 20:47:38,143 >> loading weights file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/transformers/7c5fac9d60b015cbc7c007ab8fe6d0512787fbaef81968922959898c49468d73.4c6a483fbfb5a25ac384bfcd71a1ff15245f06583a00c4ab4c44ed0f761f0b08\n", "[INFO|modeling_utils.py:1510] 2021-07-30 20:48:13,532 >> All model checkpoint weights were used when initializing GPTNeoForCausalLM.\n", "\n", "[INFO|modeling_utils.py:1519] 2021-07-30 20:48:13,532 >> All the weights of GPTNeoForCausalLM were initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPTNeoForCausalLM for predictions without further training.\n", "Running tokenizer on dataset: 0%| | 0/1 [00:00> ***** Running training *****\n", "[INFO|trainer.py:1165] 2021-07-30 20:48:13,598 >> Num examples = 1\n", "[INFO|trainer.py:1166] 2021-07-30 20:48:13,598 >> Num Epochs = 3\n", "[INFO|trainer.py:1167] 2021-07-30 20:48:13,598 >> Instantaneous batch size per device = 1\n", "[INFO|trainer.py:1168] 2021-07-30 20:48:13,598 >> Total train batch size (w. parallel, distributed & accumulation) = 1\n", "[INFO|trainer.py:1169] 2021-07-30 20:48:13,598 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1170] 2021-07-30 20:48:13,598 >> Total optimization steps = 3\n", "100%|█████████████████████████████████████████████| 3/3 [00:09<00:00, 3.09s/it][INFO|trainer.py:1360] 2021-07-30 20:48:23,151 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "{'train_runtime': 9.5525, 'train_samples_per_second': 0.314, 'train_steps_per_second': 0.314, 'train_loss': 4.490866978963216, 'epoch': 3.0}\n", "100%|█████████████████████████████████████████████| 3/3 [00:09<00:00, 3.18s/it]\n", "[INFO|trainer.py:1919] 2021-07-30 20:48:23,151 >> Saving model checkpoint to tmp/test-clm\n", "[INFO|configuration_utils.py:379] 2021-07-30 20:48:23,152 >> Configuration saved in tmp/test-clm/config.json\n", "[INFO|modeling_utils.py:997] 2021-07-30 20:48:36,143 >> Model weights saved in tmp/test-clm/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2006] 2021-07-30 20:48:36,143 >> tokenizer config file saved in tmp/test-clm/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2012] 2021-07-30 20:48:36,143 >> Special tokens file saved in tmp/test-clm/special_tokens_map.json\n", "***** train metrics *****\n", " epoch = 3.0\n", " train_loss = 4.4909\n", " train_runtime = 0:00:09.55\n", " train_samples = 1\n", " train_samples_per_second = 0.314\n", " train_steps_per_second = 0.314\n" ] } ], "metadata": {} }, { "cell_type": "markdown", "source": [ "### Use the trained model for generation" ], "metadata": {} }, { "cell_type": "code", "execution_count": 6, "source": [ "!git clone https://gist.github.com/zilunpeng/63358af14fefd035285ce7d09e6b5638" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into '63358af14fefd035285ce7d09e6b5638'...\n", "remote: Enumerating objects: 3, done.\u001b[K\n", "remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3\u001b[K\n", "Unpacking objects: 100% (3/3), done.\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 5, "source": [ "%%writefile testing_prompt.txt\n", "hi" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing testing_prompt.txt\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 11, "source": [ "!python 63358af14fefd035285ce7d09e6b5638/run_generation.py \\\n", " --model_type gpt-neo \\\n", " --model_name_or_path tmp/test-clm \\\n", " --seed 32 --prompt_path testing_prompt.txt \\\n", " --length 30" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "07/30/2021 21:02:03 - WARNING - __main__ - device: cpu, n_gpu: 0, 16-bits training: False\n", "07/30/2021 21:02:15 - INFO - __main__ - Namespace(device=device(type='cpu'), fp16=False, k=0, length=30, model_name_or_path='tmp/test-clm', model_type='gpt-neo', n_gpu=0, no_cuda=False, num_return_sequences=1, p=0.9, padding_text='', prefix='', prompt='', prompt_path='testing_prompt.txt', repetition_penalty=1.0, seed=32, stop_token=None, temperature=1.0, xlm_language='')\n", "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n", "=== GENERATED SEQUENCE 1 ===\n", "hibcol\n", "This happened at an event when some of you folks were also there. It is a\n", "great way to learn about it. If\n" ] } ], "metadata": {} } ], "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3.6.13 64-bit ('train_gpt_neo_fresh': conda)" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" }, "interpreter": { "hash": "3941a2565f46efd9744bf9b8a7fccfd29a4b2a0ba5ab610e281a799a536bf67d" } }, "nbformat": 4, "nbformat_minor": 5 }