From 7f8a0be340b0c6a54dcac51cbdf5ebf7ce0d8293 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Tue, 18 Apr 2023 19:11:56 -0400 Subject: tokenize Signed-off-by: Xe Iaso --- cmd/tokenize/main.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 cmd/tokenize/main.go (limited to 'cmd') diff --git a/cmd/tokenize/main.go b/cmd/tokenize/main.go new file mode 100644 index 0000000..4e0db40 --- /dev/null +++ b/cmd/tokenize/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "log" + "os" + + gpt3encoder "github.com/samber/go-gpt-3-encoder" + "within.website/x/internal" +) + +var ( + decode = flag.Bool("decode", false, "if true, decode instead of encode") +) + +func main() { + internal.HandleStartup() + + enc, err := gpt3encoder.NewEncoder() + if err != nil { + log.Fatal(err) + } + + if *decode { + var tokens []int + if err := json.NewDecoder(os.Stdin).Decode(&tokens); err != nil { + log.Fatal(err) + } + + fmt.Fprintln(os.Stdout, enc.Decode(tokens)) + return + } + + data, err := io.ReadAll(os.Stdin) + if err != nil { + log.Fatal(err) + } + + tokens, err := enc.Encode(string(data)) + if err != nil { + log.Fatal(err) + } + + if err := json.NewEncoder(os.Stdout).Encode(&tokens); err != nil { + log.Fatal(err) + } +} -- cgit v1.2.3