From 2fcda33cf8c8831eb71dc467af1694befb65c8fe Mon Sep 17 00:00:00 2001 From: array-in-a-matrix Date: Tue, 16 Aug 2022 01:25:38 -0400 Subject: [PATCH] created 2 functions to gen text and train --- textgen.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/textgen.py b/textgen.py index e69de29..a842992 100644 --- a/textgen.py +++ b/textgen.py @@ -0,0 +1,26 @@ +from aitextgen.TokenDataset import TokenDataset +from aitextgen.tokenizers import train_tokenizer +from aitextgen.utils import GPT2ConfigCPU +from aitextgen import aitextgen +import json + +with open('config.json', 'r') as file: + json_object = json.load(file) + +file_name = json_object['file'] + +def generate_message(): + ai = aitextgen(model_folder="trained_model", + tokenizer_file="aitextgen.tokenizer.json") + ai.generate() + +def train_ai(): + train_tokenizer(file_name) + tokenizer_file = "aitextgen.tokenizer.json" + config = GPT2ConfigCPU() + ai = aitextgen(tokenizer_file=tokenizer_file, config=config) + data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64) + ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000) + print("AI has been trained!") + +print(generate_message()) \ No newline at end of file