diff options
| author | Xe Iaso <me@xeiaso.net> | 2023-04-18 19:11:56 -0400 |
|---|---|---|
| committer | Xe Iaso <me@xeiaso.net> | 2023-04-18 19:12:08 -0400 |
| commit | 7f8a0be340b0c6a54dcac51cbdf5ebf7ce0d8293 (patch) | |
| tree | 7d9bb013d69bdf9522ad88edbf162e00924bb28f /cmd | |
| parent | 4c15f8fa4ce26a8e43ff1a86c316d5f25f899a1c (diff) | |
| download | x-7f8a0be340b0c6a54dcac51cbdf5ebf7ce0d8293.tar.xz x-7f8a0be340b0c6a54dcac51cbdf5ebf7ce0d8293.zip | |
tokenize
Signed-off-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'cmd')
| -rw-r--r-- | cmd/tokenize/main.go | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/cmd/tokenize/main.go b/cmd/tokenize/main.go new file mode 100644 index 0000000..4e0db40 --- /dev/null +++ b/cmd/tokenize/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "log" + "os" + + gpt3encoder "github.com/samber/go-gpt-3-encoder" + "within.website/x/internal" +) + +var ( + decode = flag.Bool("decode", false, "if true, decode instead of encode") +) + +func main() { + internal.HandleStartup() + + enc, err := gpt3encoder.NewEncoder() + if err != nil { + log.Fatal(err) + } + + if *decode { + var tokens []int + if err := json.NewDecoder(os.Stdin).Decode(&tokens); err != nil { + log.Fatal(err) + } + + fmt.Fprintln(os.Stdout, enc.Decode(tokens)) + return + } + + data, err := io.ReadAll(os.Stdin) + if err != nil { + log.Fatal(err) + } + + tokens, err := enc.Encode(string(data)) + if err != nil { + log.Fatal(err) + } + + if err := json.NewEncoder(os.Stdout).Encode(&tokens); err != nil { + log.Fatal(err) + } +} |
