aboutsummaryrefslogtreecommitdiff
path: root/cmd/tokenize/main.go
blob: 4e0db402814453e313aee091a00971290c1d2c70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
package main

import (
	"encoding/json"
	"flag"
	"fmt"
	"io"
	"log"
	"os"

	gpt3encoder "github.com/samber/go-gpt-3-encoder"
	"within.website/x/internal"
)

var (
	decode = flag.Bool("decode", false, "if true, decode instead of encode")
)

func main() {
	internal.HandleStartup()

	enc, err := gpt3encoder.NewEncoder()
	if err != nil {
		log.Fatal(err)
	}

	if *decode {
		var tokens []int
		if err := json.NewDecoder(os.Stdin).Decode(&tokens); err != nil {
			log.Fatal(err)
		}

		fmt.Fprintln(os.Stdout, enc.Decode(tokens))
		return
	}

	data, err := io.ReadAll(os.Stdin)
	if err != nil {
		log.Fatal(err)
	}

	tokens, err := enc.Encode(string(data))
	if err != nil {
		log.Fatal(err)
	}

	if err := json.NewEncoder(os.Stdout).Encode(&tokens); err != nil {
		log.Fatal(err)
	}
}