check arg passed

specify text generation in py process
added retrain var
2022-08-16 03:46:05 -04:00 · 2022-08-16 03:45:11 -04:00 · 2022-08-16 03:43:59 -04:00 · 2022-08-16 03:43:01 -04:00
4 changed files with 33 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -46,11 +46,13 @@ Before a bot can be used the fields in the `config.json` file must be populated

 ► user*         ⇢     Account's User ID.

-► file         ⇢     Path of file used for training the AI (.txt file only).
+► file          ⇢     Path of file used for training the AI (.txt file only).

-► prefix       ⇢     Bot listens to commands that start with this prefix.
+► prefix        ⇢     Bot listens to commands that start with this prefix.

-► frequency    ⇢     How often the bot sends a message (keep low to prevent spam).
+► frequency     ⇢     How often the bot sends a message (keep low to prevent spam).

-► size         ⇢     Bot starts generating messages when the number of lines in the training file is equal to this. The greater the size, the longer bot waits before messaging but might increase message quality.
+► size          ⇢     Bot starts generating messages when the number of lines in the training file is equal to this. The greater the size, the longer bot waits before messaging but might increase message quality.
+
+► retrain       ⇢      The bot retrains itself after this many extra lines of messages are recorded in the text file.
 ```
--- a/example.config.json
+++ b/example.config.json
@ -5,5 +5,6 @@
    "file": "training-matrix.txt",
    "prefix": "!",
    "frequency": "25",
-    "size": "5000"
+    "size": "5000",
+    "retrain": "10000"
 }
--- a/index.js
+++ b/index.js
@ -21,21 +21,21 @@ client.on("room.message", (roomId, event) => {
    });

    if (lineCount(config.file) < config.size) return; // ? don't start generating messages until a big enough dataset is present
-
-    // TODO: train AI every Nth message?
    // ? send message every N messages using the training data
    if (!(messageCounter % config.frequency)) {
        console.log("Generating message...");

-        const python = spawn('python', ["textgen.py"]);
+        const python = spawn('python', ["textgen.py", "generate"]);

        python.stdout.on('data', function (message) {
            message = message.toString();
-            console.log("bot:\t" + message);
            client.sendText(roomId, message);
        });
        python.on('close'); // ? close python process when finished
    };
+
+    // TODO: train AI every Nth message?
+
 });

 function lineCount(text) {
--- a/textgen.py
+++ b/textgen.py
@ -2,7 +2,8 @@ from aitextgen.TokenDataset import TokenDataset
 from aitextgen.tokenizers import train_tokenizer
 from aitextgen.utils import GPT2ConfigCPU
 from aitextgen import aitextgen
-import json, sys
+import json
+import sys

 with open('config.json', 'r') as file:
    json_object = json.load(file)
@ -10,21 +11,33 @@ with open('config.json', 'r') as file:
 file_name = json_object['file']

 # ? generate message using trained model
+
+
 def generate_message():
    ai = aitextgen(model_folder="trained_model",
-                tokenizer_file="aitextgen.tokenizer.json")
+                   tokenizer_file="aitextgen.tokenizer.json")
    ai.generate()

 # ? train model using text file
+
+
 def train_ai():
    train_tokenizer(file_name)
    tokenizer_file = "aitextgen.tokenizer.json"
    config = GPT2ConfigCPU()
    ai = aitextgen(tokenizer_file=tokenizer_file, config=config)
-    data = TokenDataset(file_name, tokenizer_file=tokenizer_file, block_size=64)
-    ai.train(data, batch_size=8, num_steps=50000, generate_every=5000, save_every=5000)
-    print("AI has been trained!")
+    data = TokenDataset(
+        file_name, tokenizer_file=tokenizer_file, block_size=64)
+    ai.train(data, batch_size=8, num_steps=50000,
+             generate_every=5000, save_every=5000)

-# ? send message to parent JS process
-print(generate_message()) 
-sys.stdout.flush()
+
+match sys.argv[1]:
+    case "generate":
+        # ? send message to parent JS process
+        print(generate_message())
+        sys.stdout.flush()
+    case "train":
+        train_ai()
+    case _:
+        raise NameError("Argument not provided.")
Author	SHA1	Message	Date
array-in-a-matrix	1b4b769b7a	check arg passed	2022-08-16 03:46:05 -04:00
array-in-a-matrix	8c6ad6d5a3	specify text generation in py process	2022-08-16 03:45:11 -04:00
array-in-a-matrix	5075827551	added retrain var	2022-08-16 03:43:59 -04:00
array-in-a-matrix	651a2b0729	spaces	2022-08-16 03:43:01 -04:00