20 lines
626 B
Python
20 lines
626 B
Python
|
from aitextgen.TokenDataset import TokenDataset
|
||
|
from aitextgen.tokenizers import train_tokenizer
|
||
|
from aitextgen.utils import GPT2ConfigCPU
|
||
|
from aitextgen import aitextgen
|
||
|
import json
|
||
|
|
||
|
with open('config.json', 'r') as file:
|
||
|
json_object = json.load(file)
|
||
|
|
||
|
file_name = json_object['file']
|
||
|
|
||
|
train_tokenizer(file_name)
|
||
|
tokenizer_file = "aitextgen.tokenizer.json"
|
||
|
config = GPT2ConfigCPU()
|
||
|
|
||
|
ai = aitextgen(tokenizer_file=tokenizer_file, config=config)
|
||
|
data = TokenDataset(
|
||
|
file_name, tokenizer_file=tokenizer_file, block_size=64)
|
||
|
ai.train(data, batch_size=8, num_steps=50000,
|
||
|
generate_every=5000, save_every=5000)
|