text-gen-bot/train.py

21 lines
631 B
Python

from aitextgen.TokenDataset import TokenDataset
from aitextgen.tokenizers import train_tokenizer
from aitextgen.utils import GPT2ConfigCPU
from aitextgen import aitextgen
import json
with open('config.json', 'r') as file:
json_object = json.load(file)
file_name = json_object['file']
train_tokenizer(file_name)
tokenizer_file = "aitextgen.tokenizer.json"
config = GPT2ConfigCPU()
ai = aitextgen(tokenizer_file=tokenizer_file, config=config)
data = TokenDataset(
file_name, tokenizer_file=tokenizer_file, block_size=64)
ai.train(data, batch_size=16, num_steps=1000000,
generate_every=10000, save_every=5000)