aboutsummaryrefslogtreecommitdiff
path: root/web
diff options
context:
space:
mode:
authorChristine Dodrill <me@christine.website>2018-09-24 19:29:33 -0700
committerChristine Dodrill <me@christine.website>2018-09-24 19:31:51 -0700
commitdc0c9aa8ae31948ecdcddcb1ead0a6041005299d (patch)
tree71a54597b124dda33dac736cd3dc1770d0e29797 /web
parent198b135a250c10ddbd4734c5fcaa70cb3586610b (diff)
downloadx-dc0c9aa8ae31948ecdcddcb1ead0a6041005299d.tar.xz
x-dc0c9aa8ae31948ecdcddcb1ead0a6041005299d.zip
discord/ilo-kesi: move toki pona tokenizing code into its own folder, add markov code (sitelen pakala)
Diffstat (limited to 'web')
-rw-r--r--web/tokiponatokens/doc.go5
-rw-r--r--web/tokiponatokens/toki_pona.go111
-rw-r--r--web/tokiponatokens/toki_pona_test.go10
3 files changed, 126 insertions, 0 deletions
diff --git a/web/tokiponatokens/doc.go b/web/tokiponatokens/doc.go
new file mode 100644
index 0000000..f5c2e9c
--- /dev/null
+++ b/web/tokiponatokens/doc.go
@@ -0,0 +1,5 @@
+/*
+Package tokiponatokens is a wrapper to a Toki Poka tokenizer. I have an instance set up here:
+https://us-central1-golden-cove-408.cloudfunctions.net/function-1
+*/
+package tokiponatokens
diff --git a/web/tokiponatokens/toki_pona.go b/web/tokiponatokens/toki_pona.go
new file mode 100644
index 0000000..6313f21
--- /dev/null
+++ b/web/tokiponatokens/toki_pona.go
@@ -0,0 +1,111 @@
+package tokiponatokens
+
+import (
+ "bytes"
+ "encoding/json"
+ "net/http"
+ "strings"
+)
+
+// Part is an individual part of a sentence.
+type Part struct {
+ Type string `json:"part"`
+ Sep *string `json:"sep"`
+ Tokens []string `json:"tokens"`
+ Parts []*Part `json:"parts"`
+}
+
+func (p Part) String() string {
+ switch p.Type {
+ case PartPunctuation:
+ switch p.Tokens[0] {
+ case PunctExclamation:
+ return "!"
+ case PunctPeriod:
+ return "."
+ case PunctQuestion:
+ return "?"
+ case PunctComma:
+ return ","
+ }
+
+ panic("unknown punctuation " + p.Tokens[0])
+ case PartAddress:
+ if p.Parts == nil {
+ if p.Sep == nil {
+ return strings.Join(p.Tokens, " ")
+ }
+
+ return strings.Title(strings.Join(p.Tokens, ""))
+ }
+ }
+
+ var sb strings.Builder
+
+ for _, pt := range p.Parts {
+ sb.WriteString(pt.String())
+ sb.WriteRune(' ')
+ }
+
+ if p.Sep != nil {
+ sb.WriteString(*p.Sep)
+ sb.WriteRune(' ')
+ }
+
+ if len(p.Tokens) != 0 {
+ sb.WriteString(strings.Join(p.Tokens, " "))
+ sb.WriteRune(' ')
+ }
+
+ return sb.String()
+}
+
+// Individual part type values.
+const (
+ // Who/what the sentence is addressed to in Parts.
+ PartAddress = `address`
+ PartSubject = `subject`
+ PartObjectMarker = `objectMarker`
+ PartPrepPhrase = `prepPhrase`
+ PartInterjection = `interjection`
+ // A foreign name.
+ PartCartouche = `cartouche`
+ // Most sentences will end in this.
+ PartPunctuation = `punctuation`
+)
+
+// Punctuation constants.
+const (
+ PunctPeriod = `period`
+ PunctQuestion = `question`
+ PunctExclamation = `exclamation`
+ PunctComma = `comma`
+)
+
+// Sentence is a series of sentence parts. This correlates to one Toki Pona sentence.
+type Sentence []Part
+
+// Tokenize returns a series of toki pona tokens.
+func Tokenize(aurl, text string) ([]Sentence, error) {
+ buf := bytes.NewBuffer([]byte(text))
+ req, err := http.NewRequest(http.MethodPost, aurl, buf)
+ if err != nil {
+ return nil, err
+ }
+
+ req.Header.Add("Content-Type", "text/plain")
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+
+ var result []Sentence
+ err = json.NewDecoder(resp.Body).Decode(&result)
+ if err != nil {
+ return nil, err
+ }
+
+ return result, nil
+}
diff --git a/web/tokiponatokens/toki_pona_test.go b/web/tokiponatokens/toki_pona_test.go
new file mode 100644
index 0000000..e31eb8b
--- /dev/null
+++ b/web/tokiponatokens/toki_pona_test.go
@@ -0,0 +1,10 @@
+package tokiponatokens
+
+import "testing"
+
+func TestTokenizeTokiPona(t *testing.T) {
+ _, err := TokenizeTokiPona("https://us-central1-golden-cove-408.cloudfunctions.net/function-1", "mi olin e sina.")
+ if err != nil {
+ t.Fatal(err)
+ }
+}