diff options
| author | Christine Dodrill <me@christine.website> | 2019-01-11 17:02:37 -0800 |
|---|---|---|
| committer | Christine Dodrill <me@christine.website> | 2019-01-11 17:02:37 -0800 |
| commit | 87dd824357569aed0b2c2de8f864649ffbaf2bd0 (patch) | |
| tree | 9661b5cb1c0e5cc890149da5efc270db313afaf5 | |
| parent | 572aebebbc4f28359eacc805c52f15dd905beaa9 (diff) | |
| download | x-87dd824357569aed0b2c2de8f864649ffbaf2bd0.tar.xz x-87dd824357569aed0b2c2de8f864649ffbaf2bd0.zip | |
la-baujmi: particle pi #2
| -rw-r--r-- | la-baujmi/corpus.txt | 5 | ||||
| -rw-r--r-- | la-baujmi/fact.go | 57 | ||||
| -rw-r--r-- | la-baujmi/fact_test.go | 22 |
3 files changed, 84 insertions, 0 deletions
diff --git a/la-baujmi/corpus.txt b/la-baujmi/corpus.txt new file mode 100644 index 0000000..6d3ddda --- /dev/null +++ b/la-baujmi/corpus.txt @@ -0,0 +1,5 @@ +sina li lukin e ni la sina pilin pona. sina li lukin e ni la pilin pona ni li suli suwi. sina li lukin e ni la sina li pona ale. sina li lukin e ni la sina li pali e ijo mute mute. + +sina li pona ale la sina pali ijo mute mute. + +sina li jan pi pali ijo mute mute. sina ken pali ijo mute mute. diff --git a/la-baujmi/fact.go b/la-baujmi/fact.go index 549c524..3caf53b 100644 --- a/la-baujmi/fact.go +++ b/la-baujmi/fact.go @@ -64,12 +64,69 @@ func SentenceToSelbris(s tokiponatokens.Sentence) ([]Selbri, error) { continue } + if len(pt.Parts) != 0 { + var sb strings.Builder + sb.WriteString("subject(") + + for i, sp := range pt.Parts { + if i != 0 { + sb.WriteString(", ") + } + if sp.Sep != nil && *sp.Sep == "pi" { + sb.WriteString("pi(") + for j, tk := range sp.Tokens { + if j != 0 { + sb.WriteString(", ") + } + + sb.WriteString(tk) + } + sb.WriteString(")") + } else { + sb.WriteString(strings.Join(sp.Tokens, "_")) + } + } + + sb.WriteString(")") + + subjects = append(objects, sb.String()) + continue + } + subjects = append(subjects, strings.Join(pt.Tokens, "_")) case tokiponatokens.PartVerbMarker: verbs = append(verbs, strings.Join(pt.Tokens, "_")) case tokiponatokens.PartObjectMarker: + if len(pt.Parts) != 0 { + var sb strings.Builder + sb.WriteString("object(") + + for i, sp := range pt.Parts { + if i != 0 { + sb.WriteString(", ") + } + if sp.Sep != nil && *sp.Sep == "pi" { + sb.WriteString("pi(") + for j, tk := range sp.Tokens { + if j != 0 { + sb.WriteString(", ") + } + + sb.WriteString(tk) + } + sb.WriteString(")") + } else { + sb.WriteString(strings.Join(sp.Tokens, "_")) + } + } + + sb.WriteString(")") + + objects = append(objects, sb.String()) + continue + } objects = append(objects, strings.Join(pt.Tokens, "_")) case tokiponatokens.PartPunctuation: diff --git a/la-baujmi/fact_test.go b/la-baujmi/fact_test.go index 5199300..3e7c481 100644 --- a/la-baujmi/fact_test.go +++ b/la-baujmi/fact_test.go @@ -92,6 +92,28 @@ func TestSentenceToSelbris(t *testing.T) { wantFacts: []string{"ala(tenpo_ni, A)."}, }, { + name: "pi_subject", + json: []byte(`[{"part":"subject","parts":[{"part":"subject","tokens":["ilo","mi"]},{"part":"subject","sep":"pi","tokens":["kasi","nasa"]}]},{"part":"verbMarker","sep":"li","tokens":["pona","ale"]}]`), + want: []Selbri{ + { + Predicate: "pona_ale", + Arguments: []string{"subject(ilo_mi, pi(kasi, nasa))"}, + }, + }, + wantFacts: []string{"pona_ale(subject(ilo_mi, pi(kasi, nasa)))."}, + }, + { + name: "pi_object", + json: []byte(`[{"part":"subject","tokens":["mi"]},{"part":"verbMarker","sep":"li","tokens":["esun"]},{"part":"objectMarker","sep":"e","parts":[{"part":"objectMarker","tokens":["ilo"]},{"part":"objectMarker","sep":"pi","tokens":["kalama","musi"]}]},{"part":"punctuation","tokens":["period"]}]`), + want: []Selbri{ + { + Predicate: "esun", + Arguments: []string{"mi", "object(ilo, pi(kalama, musi))"}, + }, + }, + wantFacts: []string{"esun(mi, object(ilo, pi(kalama, musi)))."}, + }, + { name: "multiple verbs", json: []byte(`[{"part":"subject","tokens":["ona"]},{"part":"verbMarker","sep":"li","tokens":["sona"]},{"part":"verbMarker","sep":"li","tokens":["pona"]},{"part":"objectMarker","sep":"e","tokens":["mute"]},{"part":"punctuation","tokens":["period"]}]`), want: []Selbri{ |
