aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristine Dodrill <me@christine.website>2018-09-24 19:29:33 -0700
committerChristine Dodrill <me@christine.website>2018-09-24 19:31:51 -0700
commitdc0c9aa8ae31948ecdcddcb1ead0a6041005299d (patch)
tree71a54597b124dda33dac736cd3dc1770d0e29797
parent198b135a250c10ddbd4734c5fcaa70cb3586610b (diff)
downloadx-dc0c9aa8ae31948ecdcddcb1ead0a6041005299d.tar.xz
x-dc0c9aa8ae31948ecdcddcb1ead0a6041005299d.zip
discord/ilo-kesi: move toki pona tokenizing code into its own folder, add markov code (sitelen pakala)
-rw-r--r--discord/ilo-kesi/.gitignore3
-rw-r--r--discord/ilo-kesi/context.go54
-rw-r--r--discord/ilo-kesi/main.go43
-rw-r--r--discord/ilo-kesi/markov.go137
-rw-r--r--discord/ilo-kesi/toki_pona.go74
-rw-r--r--web/tokiponatokens/doc.go5
-rw-r--r--web/tokiponatokens/toki_pona.go111
-rw-r--r--web/tokiponatokens/toki_pona_test.go (renamed from discord/ilo-kesi/toki_pona_test.go)6
8 files changed, 334 insertions, 99 deletions
diff --git a/discord/ilo-kesi/.gitignore b/discord/ilo-kesi/.gitignore
index 1e74762..c8f15cb 100644
--- a/discord/ilo-kesi/.gitignore
+++ b/discord/ilo-kesi/.gitignore
@@ -1 +1,2 @@
-ilo-kesi \ No newline at end of file
+ilo-kesi
+cadey.gob \ No newline at end of file
diff --git a/discord/ilo-kesi/context.go b/discord/ilo-kesi/context.go
index c46471d..a8bee96 100644
--- a/discord/ilo-kesi/context.go
+++ b/discord/ilo-kesi/context.go
@@ -2,12 +2,17 @@ package main
import (
"errors"
+ "log"
"strings"
+ "time"
+
+ "github.com/Xe/x/web/tokiponatokens"
)
const (
- actionFront = "lawa,insa"
- actionWhat = "seme"
+ actionFront = "lawa,insa"
+ actionMarkov = "sitelen"
+ actionWhat = "seme"
)
var (
@@ -15,20 +20,28 @@ var (
)
type Request struct {
- Address []*part
+ Address []string
Action string
- Subject string // if null, user is asking for the info
+ Subject string
Punct string
}
-func parseRequest(inp Sentence) (*Request, error) {
+func parseRequest(inp tokiponatokens.Sentence) (*Request, error) {
var result Request
for _, part := range inp {
- switch part.Part {
- case partAddress:
- result.Address = part.Parts
- case partSubject:
+ log.Printf("%s", part)
+ switch part.Type {
+ case tokiponatokens.PartAddress:
+ for i, pt := range part.Parts {
+ if i == 0 {
+ result.Address = append(result.Address, pt.Tokens[0])
+ continue
+ }
+
+ result.Address = append(result.Address, strings.Title(strings.Join(pt.Tokens, "")))
+ }
+ case tokiponatokens.PartSubject:
if len(part.Tokens) == 0 {
sub := strings.Title(strings.Join(part.Parts[1].Tokens, ""))
result.Subject = sub
@@ -36,20 +49,37 @@ func parseRequest(inp Sentence) (*Request, error) {
sub := strings.Join(part.Tokens, " ")
result.Subject = sub
}
- case partObjectMarker:
+ case tokiponatokens.PartObjectMarker:
act := strings.Join(part.Tokens, ",")
switch act {
- case actionFront, actionWhat:
+ case actionFront, actionWhat, actionMarkov:
default:
return nil, ErrUnknownAction
}
result.Action = act
- case partPunctuation:
+ case tokiponatokens.PartPunctuation:
result.Punct = part.Tokens[0]
}
}
return &result, nil
}
+
+func TimeToQualifier(t time.Time) string {
+ const (
+ nowRange = 15 * time.Minute
+ )
+
+ s := time.Since(t)
+ if s > 0 {
+ return "tenpo kama"
+ }
+
+ if s < nowRange {
+ return "tenpo ni"
+ }
+
+ return "tenpo pini"
+}
diff --git a/discord/ilo-kesi/main.go b/discord/ilo-kesi/main.go
index 0d4f325..84350fc 100644
--- a/discord/ilo-kesi/main.go
+++ b/discord/ilo-kesi/main.go
@@ -4,10 +4,12 @@ import (
"context"
"fmt"
"log"
+ "math/rand"
"net/http"
"time"
"github.com/Xe/x/web/switchcounter"
+ "github.com/Xe/x/web/tokiponatokens"
"github.com/joeshaw/envdecode"
_ "github.com/joho/godotenv/autoload"
"github.com/peterh/liner"
@@ -16,9 +18,13 @@ import (
// lipuSona is the configuration.
type lipuSona struct {
//DiscordToken string `env:"DISCORD_TOKEN,required"` // lipu pi lukin ala
- TokiPonaTokenizerAPIURL string `env:"TOKI_PONA_TOKENIZER_API_URL,default=https://us-central1-golden-cove-408.cloudfunctions.net/function-1"`
- SwitchCounterWebhook string `env:"SWITCH_COUNTER_WEBHOOK,required"`
- IloNimi []string `env:"IJO_NIMI,default=ke;si"`
+ TokiPonaTokenizerAPIURL string `env:"TOKI_PONA_TOKENIZER_API_URL,default=https://us-central1-golden-cove-408.cloudfunctions.net/function-1"`
+ SwitchCounterWebhook string `env:"SWITCH_COUNTER_WEBHOOK,required"`
+ IloNimi string `env:"ILO_NIMI,default=Kesi"`
+}
+
+func init() {
+ rand.Seed(time.Now().UnixNano())
}
func main() {
@@ -35,6 +41,12 @@ func main() {
line := liner.NewLiner()
defer line.Close()
+ chain := NewChain(3)
+ err = chain.Load("cadey.gob")
+ if err != nil {
+ log.Fatal(err)
+ }
+
line.SetCtrlCAborts(true)
for {
@@ -45,13 +57,11 @@ func main() {
line.AppendHistory(inp)
- parts, err := TokenizeTokiPona(cfg.TokiPonaTokenizerAPIURL, inp)
+ parts, err := tokiponatokens.Tokenize(cfg.TokiPonaTokenizerAPIURL, inp)
if err != nil {
log.Printf("Can't parse: %v", err)
}
- //pretty.Println(parts)
-
for _, sent := range parts {
req, err := parseRequest(sent)
if err != nil {
@@ -59,11 +69,21 @@ func main() {
continue
}
- if req.Address == nil {
+ if len(req.Address) != 2 {
log.Println("ilo Kesi was not addressed")
continue
}
+ if req.Address[0] != "ilo" {
+ log.Println("Addressed non-ilo")
+ continue
+ }
+
+ if req.Address[1] != cfg.IloNimi {
+ log.Printf("ilo %s was addressed, not ilo %s", req.Address[1], cfg.IloNimi)
+ continue
+ }
+
switch req.Action {
case actionFront:
if req.Subject == actionWhat {
@@ -87,13 +107,20 @@ func main() {
continue
}
- fmt.Printf("ijo Kesi\\ tenpo ni la jan %s li lawa insa.\n", req.Subject)
+ fmt.Printf("ilo Kesi\\ tenpo ni la jan %s li lawa insa.\n", req.Subject)
case actionWhat:
switch req.Subject {
case "tenpo ni":
fmt.Printf("ilo Kesi\\ ni li tenpo %s\n", time.Now().Format(time.Kitchen))
+ continue
}
}
+
+ switch req.Subject {
+ case "sitelen pakala":
+ fmt.Printf("ilo Kesi\\ %s\n", chain.Generate(20))
+ continue
+ }
}
} else if err == liner.ErrPromptAborted {
log.Print("Aborted")
diff --git a/discord/ilo-kesi/markov.go b/discord/ilo-kesi/markov.go
new file mode 100644
index 0000000..f31c133
--- /dev/null
+++ b/discord/ilo-kesi/markov.go
@@ -0,0 +1,137 @@
+package main
+
+// This Markov chain code is taken from the "Generating arbitrary text"
+// codewalk: http://golang.org/doc/codewalk/markov/
+//
+// Minor modifications have been made to make it easier to integrate
+// with a webserver and to save/load state
+
+import (
+ "encoding/gob"
+ "fmt"
+ "math/rand"
+ "os"
+ "strings"
+ "sync"
+)
+
+// Prefix is a Markov chain prefix of one or more words.
+type Prefix []string
+
+// String returns the Prefix as a string (for use as a map key).
+func (p Prefix) String() string {
+ return strings.Join(p, " ")
+}
+
+// Shift removes the first word from the Prefix and appends the given word.
+func (p Prefix) Shift(word string) {
+ copy(p, p[1:])
+ p[len(p)-1] = word
+}
+
+// Chain contains a map ("chain") of prefixes to a list of suffixes.
+// A prefix is a string of prefixLen words joined with spaces.
+// A suffix is a single word. A prefix can have multiple suffixes.
+type Chain struct {
+ Chain map[string][]string
+ prefixLen int
+ mu sync.Mutex
+}
+
+// NewChain returns a new Chain with prefixes of prefixLen words.
+func NewChain(prefixLen int) *Chain {
+ return &Chain{
+ Chain: make(map[string][]string),
+ prefixLen: prefixLen,
+ }
+}
+
+// Write parses the bytes into prefixes and suffixes that are stored in Chain.
+func (c *Chain) Write(in string) (int, error) {
+ sr := strings.NewReader(in)
+ p := make(Prefix, c.prefixLen)
+ for {
+ var s string
+ if _, err := fmt.Fscan(sr, &s); err != nil {
+ break
+ }
+ key := p.String()
+ c.mu.Lock()
+ c.Chain[key] = append(c.Chain[key], s)
+ c.mu.Unlock()
+ p.Shift(s)
+ }
+ return len(in), nil
+}
+
+// Generate returns a string of at most n words generated from Chain.
+func (c *Chain) Generate(n int) string {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ p := make(Prefix, c.prefixLen)
+ var words []string
+ for i := 0; i < n; i++ {
+ choices := c.Chain[p.String()]
+ if len(choices) == 0 {
+ break
+ }
+ next := choices[rand.Intn(len(choices))]
+ words = append(words, next)
+ p.Shift(next)
+ }
+ return strings.Join(words, " ")
+}
+
+// Save the chain to a file
+func (c *Chain) Save(fileName string) error {
+ // Open the file for writing
+ fo, err := os.Create(fileName)
+ if err != nil {
+ return err
+ }
+ // close fo on exit and check for its returned error
+ defer func() {
+ if err := fo.Close(); err != nil {
+ panic(err)
+ }
+ }()
+
+ // Create an encoder and dump to it
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ enc := gob.NewEncoder(fo)
+ err = enc.Encode(c)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// Load the chain from a file
+func (c *Chain) Load(fileName string) error {
+ // Open the file for reading
+ fi, err := os.Open(fileName)
+ if err != nil {
+ return err
+ }
+ // close fi on exit and check for its returned error
+ defer func() {
+ if err := fi.Close(); err != nil {
+ panic(err)
+ }
+ }()
+
+ // Create a decoder and read from it
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ dec := gob.NewDecoder(fi)
+ err = dec.Decode(c)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/discord/ilo-kesi/toki_pona.go b/discord/ilo-kesi/toki_pona.go
deleted file mode 100644
index d1ade93..0000000
--- a/discord/ilo-kesi/toki_pona.go
+++ /dev/null
@@ -1,74 +0,0 @@
-package main
-
-import (
- "bytes"
- "encoding/json"
- "net/http"
- "time"
-)
-
-type part struct {
- Part string `json:"part"`
- Sep *string `json:"sep"`
- Tokens []string `json:"tokens"`
- Parts []*part `json:"parts"`
-}
-
-const (
- partAddress = `address`
- partSubject = `subject`
- partObjectMarker = `objectMarker`
- partPrepPhrase = `prepPhrase`
- partInterjection = `interjection`
- partCartouche = `cartouche`
- partPunctuation = `punctuation`
-
- punctPeriod = `period`
- punctQuestion = `question`
- punctExclamation = `exclamation`
-)
-
-// A sentence is a series of sentence parts.
-type Sentence []part
-
-// TokenizeTokiPona returns a series of toki pona tokens.
-func TokenizeTokiPona(aurl, text string) ([]Sentence, error) {
- buf := bytes.NewBuffer([]byte(text))
- req, err := http.NewRequest(http.MethodPost, aurl, buf)
- if err != nil {
- return nil, err
- }
-
- req.Header.Add("Content-Type", "text/plain")
-
- resp, err := http.DefaultClient.Do(req)
- if err != nil {
- return nil, err
- }
- defer resp.Body.Close()
-
- var result []Sentence
- err = json.NewDecoder(resp.Body).Decode(&result)
- if err != nil {
- return nil, err
- }
-
- return result, nil
-}
-
-func TimeToQualifier(t time.Time) string {
- const (
- nowRange = 15 * time.Minute
- )
-
- s := time.Since(t)
- if s > 0 {
- return "tenpo kama"
- }
-
- if s < nowRange {
- return "tenpo ni"
- }
-
- return "tenpo pini"
-}
diff --git a/web/tokiponatokens/doc.go b/web/tokiponatokens/doc.go
new file mode 100644
index 0000000..f5c2e9c
--- /dev/null
+++ b/web/tokiponatokens/doc.go
@@ -0,0 +1,5 @@
+/*
+Package tokiponatokens is a wrapper to a Toki Poka tokenizer. I have an instance set up here:
+https://us-central1-golden-cove-408.cloudfunctions.net/function-1
+*/
+package tokiponatokens
diff --git a/web/tokiponatokens/toki_pona.go b/web/tokiponatokens/toki_pona.go
new file mode 100644
index 0000000..6313f21
--- /dev/null
+++ b/web/tokiponatokens/toki_pona.go
@@ -0,0 +1,111 @@
+package tokiponatokens
+
+import (
+ "bytes"
+ "encoding/json"
+ "net/http"
+ "strings"
+)
+
+// Part is an individual part of a sentence.
+type Part struct {
+ Type string `json:"part"`
+ Sep *string `json:"sep"`
+ Tokens []string `json:"tokens"`
+ Parts []*Part `json:"parts"`
+}
+
+func (p Part) String() string {
+ switch p.Type {
+ case PartPunctuation:
+ switch p.Tokens[0] {
+ case PunctExclamation:
+ return "!"
+ case PunctPeriod:
+ return "."
+ case PunctQuestion:
+ return "?"
+ case PunctComma:
+ return ","
+ }
+
+ panic("unknown punctuation " + p.Tokens[0])
+ case PartAddress:
+ if p.Parts == nil {
+ if p.Sep == nil {
+ return strings.Join(p.Tokens, " ")
+ }
+
+ return strings.Title(strings.Join(p.Tokens, ""))
+ }
+ }
+
+ var sb strings.Builder
+
+ for _, pt := range p.Parts {
+ sb.WriteString(pt.String())
+ sb.WriteRune(' ')
+ }
+
+ if p.Sep != nil {
+ sb.WriteString(*p.Sep)
+ sb.WriteRune(' ')
+ }
+
+ if len(p.Tokens) != 0 {
+ sb.WriteString(strings.Join(p.Tokens, " "))
+ sb.WriteRune(' ')
+ }
+
+ return sb.String()
+}
+
+// Individual part type values.
+const (
+ // Who/what the sentence is addressed to in Parts.
+ PartAddress = `address`
+ PartSubject = `subject`
+ PartObjectMarker = `objectMarker`
+ PartPrepPhrase = `prepPhrase`
+ PartInterjection = `interjection`
+ // A foreign name.
+ PartCartouche = `cartouche`
+ // Most sentences will end in this.
+ PartPunctuation = `punctuation`
+)
+
+// Punctuation constants.
+const (
+ PunctPeriod = `period`
+ PunctQuestion = `question`
+ PunctExclamation = `exclamation`
+ PunctComma = `comma`
+)
+
+// Sentence is a series of sentence parts. This correlates to one Toki Pona sentence.
+type Sentence []Part
+
+// Tokenize returns a series of toki pona tokens.
+func Tokenize(aurl, text string) ([]Sentence, error) {
+ buf := bytes.NewBuffer([]byte(text))
+ req, err := http.NewRequest(http.MethodPost, aurl, buf)
+ if err != nil {
+ return nil, err
+ }
+
+ req.Header.Add("Content-Type", "text/plain")
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ var result []Sentence
+ err = json.NewDecoder(resp.Body).Decode(&result)
+ if err != nil {
+ return nil, err
+ }
+
+ return result, nil
+}
diff --git a/discord/ilo-kesi/toki_pona_test.go b/web/tokiponatokens/toki_pona_test.go
index 619a57f..e31eb8b 100644
--- a/discord/ilo-kesi/toki_pona_test.go
+++ b/web/tokiponatokens/toki_pona_test.go
@@ -1,8 +1,6 @@
-package main
+package tokiponatokens
-import (
- "testing"
-)
+import "testing"
func TestTokenizeTokiPona(t *testing.T) {
_, err := TokenizeTokiPona("https://us-central1-golden-cove-408.cloudfunctions.net/function-1", "mi olin e sina.")