diff options
| author | Christine Dodrill <me@christine.website> | 2018-09-24 19:29:33 -0700 |
|---|---|---|
| committer | Christine Dodrill <me@christine.website> | 2018-09-24 19:31:51 -0700 |
| commit | dc0c9aa8ae31948ecdcddcb1ead0a6041005299d (patch) | |
| tree | 71a54597b124dda33dac736cd3dc1770d0e29797 | |
| parent | 198b135a250c10ddbd4734c5fcaa70cb3586610b (diff) | |
| download | x-dc0c9aa8ae31948ecdcddcb1ead0a6041005299d.tar.xz x-dc0c9aa8ae31948ecdcddcb1ead0a6041005299d.zip | |
discord/ilo-kesi: move toki pona tokenizing code into its own folder, add markov code (sitelen pakala)
| -rw-r--r-- | discord/ilo-kesi/.gitignore | 3 | ||||
| -rw-r--r-- | discord/ilo-kesi/context.go | 54 | ||||
| -rw-r--r-- | discord/ilo-kesi/main.go | 43 | ||||
| -rw-r--r-- | discord/ilo-kesi/markov.go | 137 | ||||
| -rw-r--r-- | discord/ilo-kesi/toki_pona.go | 74 | ||||
| -rw-r--r-- | web/tokiponatokens/doc.go | 5 | ||||
| -rw-r--r-- | web/tokiponatokens/toki_pona.go | 111 | ||||
| -rw-r--r-- | web/tokiponatokens/toki_pona_test.go (renamed from discord/ilo-kesi/toki_pona_test.go) | 6 |
8 files changed, 334 insertions, 99 deletions
diff --git a/discord/ilo-kesi/.gitignore b/discord/ilo-kesi/.gitignore index 1e74762..c8f15cb 100644 --- a/discord/ilo-kesi/.gitignore +++ b/discord/ilo-kesi/.gitignore @@ -1 +1,2 @@ -ilo-kesi
\ No newline at end of file +ilo-kesi +cadey.gob
\ No newline at end of file diff --git a/discord/ilo-kesi/context.go b/discord/ilo-kesi/context.go index c46471d..a8bee96 100644 --- a/discord/ilo-kesi/context.go +++ b/discord/ilo-kesi/context.go @@ -2,12 +2,17 @@ package main import ( "errors" + "log" "strings" + "time" + + "github.com/Xe/x/web/tokiponatokens" ) const ( - actionFront = "lawa,insa" - actionWhat = "seme" + actionFront = "lawa,insa" + actionMarkov = "sitelen" + actionWhat = "seme" ) var ( @@ -15,20 +20,28 @@ var ( ) type Request struct { - Address []*part + Address []string Action string - Subject string // if null, user is asking for the info + Subject string Punct string } -func parseRequest(inp Sentence) (*Request, error) { +func parseRequest(inp tokiponatokens.Sentence) (*Request, error) { var result Request for _, part := range inp { - switch part.Part { - case partAddress: - result.Address = part.Parts - case partSubject: + log.Printf("%s", part) + switch part.Type { + case tokiponatokens.PartAddress: + for i, pt := range part.Parts { + if i == 0 { + result.Address = append(result.Address, pt.Tokens[0]) + continue + } + + result.Address = append(result.Address, strings.Title(strings.Join(pt.Tokens, ""))) + } + case tokiponatokens.PartSubject: if len(part.Tokens) == 0 { sub := strings.Title(strings.Join(part.Parts[1].Tokens, "")) result.Subject = sub @@ -36,20 +49,37 @@ func parseRequest(inp Sentence) (*Request, error) { sub := strings.Join(part.Tokens, " ") result.Subject = sub } - case partObjectMarker: + case tokiponatokens.PartObjectMarker: act := strings.Join(part.Tokens, ",") switch act { - case actionFront, actionWhat: + case actionFront, actionWhat, actionMarkov: default: return nil, ErrUnknownAction } result.Action = act - case partPunctuation: + case tokiponatokens.PartPunctuation: result.Punct = part.Tokens[0] } } return &result, nil } + +func TimeToQualifier(t time.Time) string { + const ( + nowRange = 15 * time.Minute + ) + + s := time.Since(t) + if s > 0 { + return "tenpo kama" + } + + if s < nowRange { + return "tenpo ni" + } + + return "tenpo pini" +} diff --git a/discord/ilo-kesi/main.go b/discord/ilo-kesi/main.go index 0d4f325..84350fc 100644 --- a/discord/ilo-kesi/main.go +++ b/discord/ilo-kesi/main.go @@ -4,10 +4,12 @@ import ( "context" "fmt" "log" + "math/rand" "net/http" "time" "github.com/Xe/x/web/switchcounter" + "github.com/Xe/x/web/tokiponatokens" "github.com/joeshaw/envdecode" _ "github.com/joho/godotenv/autoload" "github.com/peterh/liner" @@ -16,9 +18,13 @@ import ( // lipuSona is the configuration. type lipuSona struct { //DiscordToken string `env:"DISCORD_TOKEN,required"` // lipu pi lukin ala - TokiPonaTokenizerAPIURL string `env:"TOKI_PONA_TOKENIZER_API_URL,default=https://us-central1-golden-cove-408.cloudfunctions.net/function-1"` - SwitchCounterWebhook string `env:"SWITCH_COUNTER_WEBHOOK,required"` - IloNimi []string `env:"IJO_NIMI,default=ke;si"` + TokiPonaTokenizerAPIURL string `env:"TOKI_PONA_TOKENIZER_API_URL,default=https://us-central1-golden-cove-408.cloudfunctions.net/function-1"` + SwitchCounterWebhook string `env:"SWITCH_COUNTER_WEBHOOK,required"` + IloNimi string `env:"ILO_NIMI,default=Kesi"` +} + +func init() { + rand.Seed(time.Now().UnixNano()) } func main() { @@ -35,6 +41,12 @@ func main() { line := liner.NewLiner() defer line.Close() + chain := NewChain(3) + err = chain.Load("cadey.gob") + if err != nil { + log.Fatal(err) + } + line.SetCtrlCAborts(true) for { @@ -45,13 +57,11 @@ func main() { line.AppendHistory(inp) - parts, err := TokenizeTokiPona(cfg.TokiPonaTokenizerAPIURL, inp) + parts, err := tokiponatokens.Tokenize(cfg.TokiPonaTokenizerAPIURL, inp) if err != nil { log.Printf("Can't parse: %v", err) } - //pretty.Println(parts) - for _, sent := range parts { req, err := parseRequest(sent) if err != nil { @@ -59,11 +69,21 @@ func main() { continue } - if req.Address == nil { + if len(req.Address) != 2 { log.Println("ilo Kesi was not addressed") continue } + if req.Address[0] != "ilo" { + log.Println("Addressed non-ilo") + continue + } + + if req.Address[1] != cfg.IloNimi { + log.Printf("ilo %s was addressed, not ilo %s", req.Address[1], cfg.IloNimi) + continue + } + switch req.Action { case actionFront: if req.Subject == actionWhat { @@ -87,13 +107,20 @@ func main() { continue } - fmt.Printf("ijo Kesi\\ tenpo ni la jan %s li lawa insa.\n", req.Subject) + fmt.Printf("ilo Kesi\\ tenpo ni la jan %s li lawa insa.\n", req.Subject) case actionWhat: switch req.Subject { case "tenpo ni": fmt.Printf("ilo Kesi\\ ni li tenpo %s\n", time.Now().Format(time.Kitchen)) + continue } } + + switch req.Subject { + case "sitelen pakala": + fmt.Printf("ilo Kesi\\ %s\n", chain.Generate(20)) + continue + } } } else if err == liner.ErrPromptAborted { log.Print("Aborted") diff --git a/discord/ilo-kesi/markov.go b/discord/ilo-kesi/markov.go new file mode 100644 index 0000000..f31c133 --- /dev/null +++ b/discord/ilo-kesi/markov.go @@ -0,0 +1,137 @@ +package main + +// This Markov chain code is taken from the "Generating arbitrary text" +// codewalk: http://golang.org/doc/codewalk/markov/ +// +// Minor modifications have been made to make it easier to integrate +// with a webserver and to save/load state + +import ( + "encoding/gob" + "fmt" + "math/rand" + "os" + "strings" + "sync" +) + +// Prefix is a Markov chain prefix of one or more words. +type Prefix []string + +// String returns the Prefix as a string (for use as a map key). +func (p Prefix) String() string { + return strings.Join(p, " ") +} + +// Shift removes the first word from the Prefix and appends the given word. +func (p Prefix) Shift(word string) { + copy(p, p[1:]) + p[len(p)-1] = word +} + +// Chain contains a map ("chain") of prefixes to a list of suffixes. +// A prefix is a string of prefixLen words joined with spaces. +// A suffix is a single word. A prefix can have multiple suffixes. +type Chain struct { + Chain map[string][]string + prefixLen int + mu sync.Mutex +} + +// NewChain returns a new Chain with prefixes of prefixLen words. +func NewChain(prefixLen int) *Chain { + return &Chain{ + Chain: make(map[string][]string), + prefixLen: prefixLen, + } +} + +// Write parses the bytes into prefixes and suffixes that are stored in Chain. +func (c *Chain) Write(in string) (int, error) { + sr := strings.NewReader(in) + p := make(Prefix, c.prefixLen) + for { + var s string + if _, err := fmt.Fscan(sr, &s); err != nil { + break + } + key := p.String() + c.mu.Lock() + c.Chain[key] = append(c.Chain[key], s) + c.mu.Unlock() + p.Shift(s) + } + return len(in), nil +} + +// Generate returns a string of at most n words generated from Chain. +func (c *Chain) Generate(n int) string { + c.mu.Lock() + defer c.mu.Unlock() + p := make(Prefix, c.prefixLen) + var words []string + for i := 0; i < n; i++ { + choices := c.Chain[p.String()] + if len(choices) == 0 { + break + } + next := choices[rand.Intn(len(choices))] + words = append(words, next) + p.Shift(next) + } + return strings.Join(words, " ") +} + +// Save the chain to a file +func (c *Chain) Save(fileName string) error { + // Open the file for writing + fo, err := os.Create(fileName) + if err != nil { + return err + } + // close fo on exit and check for its returned error + defer func() { + if err := fo.Close(); err != nil { + panic(err) + } + }() + + // Create an encoder and dump to it + c.mu.Lock() + defer c.mu.Unlock() + + enc := gob.NewEncoder(fo) + err = enc.Encode(c) + if err != nil { + return err + } + + return nil +} + +// Load the chain from a file +func (c *Chain) Load(fileName string) error { + // Open the file for reading + fi, err := os.Open(fileName) + if err != nil { + return err + } + // close fi on exit and check for its returned error + defer func() { + if err := fi.Close(); err != nil { + panic(err) + } + }() + + // Create a decoder and read from it + c.mu.Lock() + defer c.mu.Unlock() + + dec := gob.NewDecoder(fi) + err = dec.Decode(c) + if err != nil { + return err + } + + return nil +} diff --git a/discord/ilo-kesi/toki_pona.go b/discord/ilo-kesi/toki_pona.go deleted file mode 100644 index d1ade93..0000000 --- a/discord/ilo-kesi/toki_pona.go +++ /dev/null @@ -1,74 +0,0 @@ -package main - -import ( - "bytes" - "encoding/json" - "net/http" - "time" -) - -type part struct { - Part string `json:"part"` - Sep *string `json:"sep"` - Tokens []string `json:"tokens"` - Parts []*part `json:"parts"` -} - -const ( - partAddress = `address` - partSubject = `subject` - partObjectMarker = `objectMarker` - partPrepPhrase = `prepPhrase` - partInterjection = `interjection` - partCartouche = `cartouche` - partPunctuation = `punctuation` - - punctPeriod = `period` - punctQuestion = `question` - punctExclamation = `exclamation` -) - -// A sentence is a series of sentence parts. -type Sentence []part - -// TokenizeTokiPona returns a series of toki pona tokens. -func TokenizeTokiPona(aurl, text string) ([]Sentence, error) { - buf := bytes.NewBuffer([]byte(text)) - req, err := http.NewRequest(http.MethodPost, aurl, buf) - if err != nil { - return nil, err - } - - req.Header.Add("Content-Type", "text/plain") - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - var result []Sentence - err = json.NewDecoder(resp.Body).Decode(&result) - if err != nil { - return nil, err - } - - return result, nil -} - -func TimeToQualifier(t time.Time) string { - const ( - nowRange = 15 * time.Minute - ) - - s := time.Since(t) - if s > 0 { - return "tenpo kama" - } - - if s < nowRange { - return "tenpo ni" - } - - return "tenpo pini" -} diff --git a/web/tokiponatokens/doc.go b/web/tokiponatokens/doc.go new file mode 100644 index 0000000..f5c2e9c --- /dev/null +++ b/web/tokiponatokens/doc.go @@ -0,0 +1,5 @@ +/* +Package tokiponatokens is a wrapper to a Toki Poka tokenizer. I have an instance set up here: +https://us-central1-golden-cove-408.cloudfunctions.net/function-1 +*/ +package tokiponatokens diff --git a/web/tokiponatokens/toki_pona.go b/web/tokiponatokens/toki_pona.go new file mode 100644 index 0000000..6313f21 --- /dev/null +++ b/web/tokiponatokens/toki_pona.go @@ -0,0 +1,111 @@ +package tokiponatokens + +import ( + "bytes" + "encoding/json" + "net/http" + "strings" +) + +// Part is an individual part of a sentence. +type Part struct { + Type string `json:"part"` + Sep *string `json:"sep"` + Tokens []string `json:"tokens"` + Parts []*Part `json:"parts"` +} + +func (p Part) String() string { + switch p.Type { + case PartPunctuation: + switch p.Tokens[0] { + case PunctExclamation: + return "!" + case PunctPeriod: + return "." + case PunctQuestion: + return "?" + case PunctComma: + return "," + } + + panic("unknown punctuation " + p.Tokens[0]) + case PartAddress: + if p.Parts == nil { + if p.Sep == nil { + return strings.Join(p.Tokens, " ") + } + + return strings.Title(strings.Join(p.Tokens, "")) + } + } + + var sb strings.Builder + + for _, pt := range p.Parts { + sb.WriteString(pt.String()) + sb.WriteRune(' ') + } + + if p.Sep != nil { + sb.WriteString(*p.Sep) + sb.WriteRune(' ') + } + + if len(p.Tokens) != 0 { + sb.WriteString(strings.Join(p.Tokens, " ")) + sb.WriteRune(' ') + } + + return sb.String() +} + +// Individual part type values. +const ( + // Who/what the sentence is addressed to in Parts. + PartAddress = `address` + PartSubject = `subject` + PartObjectMarker = `objectMarker` + PartPrepPhrase = `prepPhrase` + PartInterjection = `interjection` + // A foreign name. + PartCartouche = `cartouche` + // Most sentences will end in this. + PartPunctuation = `punctuation` +) + +// Punctuation constants. +const ( + PunctPeriod = `period` + PunctQuestion = `question` + PunctExclamation = `exclamation` + PunctComma = `comma` +) + +// Sentence is a series of sentence parts. This correlates to one Toki Pona sentence. +type Sentence []Part + +// Tokenize returns a series of toki pona tokens. +func Tokenize(aurl, text string) ([]Sentence, error) { + buf := bytes.NewBuffer([]byte(text)) + req, err := http.NewRequest(http.MethodPost, aurl, buf) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", "text/plain") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var result []Sentence + err = json.NewDecoder(resp.Body).Decode(&result) + if err != nil { + return nil, err + } + + return result, nil +} diff --git a/discord/ilo-kesi/toki_pona_test.go b/web/tokiponatokens/toki_pona_test.go index 619a57f..e31eb8b 100644 --- a/discord/ilo-kesi/toki_pona_test.go +++ b/web/tokiponatokens/toki_pona_test.go @@ -1,8 +1,6 @@ -package main +package tokiponatokens -import ( - "testing" -) +import "testing" func TestTokenizeTokiPona(t *testing.T) { _, err := TokenizeTokiPona("https://us-central1-golden-cove-408.cloudfunctions.net/function-1", "mi olin e sina.") |
