diff options
| author | Xe Iaso <me@xeiaso.net> | 2023-03-05 21:24:06 -0500 |
|---|---|---|
| committer | Xe Iaso <me@xeiaso.net> | 2023-03-05 21:24:06 -0500 |
| commit | c6c01e513b6129af7c4c0ba1f95d02508edeed22 (patch) | |
| tree | 72320abb2a759853ca67f970bd5d58d370b1b905 /discord/ilo-kesi/function | |
| parent | dcbadadc86eef30bf6673d8446ebdb8fe7b45177 (diff) | |
| download | x-1.4.0.tar.xz x-1.4.0.zip | |
delete some code lolv1.4.0
Signed-off-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'discord/ilo-kesi/function')
| -rw-r--r-- | discord/ilo-kesi/function/index.js | 459 | ||||
| -rw-r--r-- | discord/ilo-kesi/function/package.json | 6 |
2 files changed, 0 insertions, 465 deletions
diff --git a/discord/ilo-kesi/function/index.js b/discord/ilo-kesi/function/index.js deleted file mode 100644 index 692df13..0000000 --- a/discord/ilo-kesi/function/index.js +++ /dev/null @@ -1,459 +0,0 @@ -"use strict"; -/** - * Parsing toki pona texts into sentences and sentence parts, and then into structured sentences that reflect the - * structure of sitelen sitelen blocks. - * - * @type {{parse}} - */ -var sitelenParser = function () { - 'use strict'; - - /** - * Core parser into sitelen sitelen structure. - * @param parseable a sentence to parse - * @returns {*[]} a structured sentence array - */ - function getSimpleStructuredSentence(parseable) { - var tokens = parseable.split(' '), - prepositions = ['tawa', 'tan', 'lon', 'kepeken', 'sama', 'poka'], - objectMarker = ['li', 'e'], - part = {part: 'subject', tokens: []}, - sentence = [part]; - - tokens.forEach(function (token, index) { - if (objectMarker.indexOf(token) > -1 && - index < tokens.length - 1) { - sentence.push({part: 'objectMarker', sep: token, tokens: []}); - part = sentence[sentence.length - 1]; - return; - } else if (prepositions.indexOf(token) > -1 && objectMarker.indexOf(tokens[index - 1]) === -1 && - index < tokens.length - 1 && objectMarker.indexOf(tokens[index + 1]) === -1) { - sentence.push({part: 'prepPhrase', sep: token, tokens: []}); - part = sentence[sentence.length - 1]; - return; - } else if (token === 'o' && part.tokens.length > 0) { - // the o token should be in a container when it is used to address something, not in commands - part.part = 'address'; - part.sep = 'o'; - sentence.push({part: 'subject', tokens: []}); - part = sentence[sentence.length - 1]; - return; - } else if (token === 'a' && part.tokens.length > 0 && part.sep) { - // the a token should never be in a container - sentence.push({part: 'interjection', sep: null, tokens: [token]}); - part = sentence[sentence.length - 1]; - return; - } - - part.tokens.push(token); - - if (allowedWords.indexOf(token) === -1){ - throw {type: 'illegal token', message: 'illegal token: ' + token}; - } - }); - - // filter out empty parts - sentence = sentence.filter(function (part) { - return part.tokens.length > 0; - }); - return sentence; - - } - - /** - * Preformats a given text, so that it splits it on punctuation marks. - * @param text text to preformat - * @returns {{parsable: Array, raw: Array}} parsable array of raw text and punctuation - */ - function preformat(text) { - var result = text.match(/[^\.!\?#]+[\.!\?#]+/g), - punctuation = ['.', ':', '?', '!', ',']; - - var parsableParts = [], rawParts = []; - if (!result) { // allow sentence fractions without any punctuation - result = [text + (punctuation.indexOf(text) === -1 ? '|' : '')]; - // console.log('WARNING: sentence fraction without punctuation'); - } - result.forEach(function (sentence) { - sentence = sentence.trim(); - - var parsableSentence = []; - parsableParts.push(parsableSentence); - rawParts.push(sentence); - - var body = sentence.substr(0, sentence.length - 1); - - // remove the comma before the la-clause and before a repeating li clause - body = body.replace(', la ', ' la '); - body = body.replace(', li ', ' li '); - - // split on context separators comma and colon - var laparts = body.split(/ la /); - laparts.forEach(function (lapart, index) { - var colonparts = lapart.split(/:/); - colonparts.forEach(function (colonpart, index) { - var commaparts = colonpart.split(/,/); - commaparts.forEach(function (commapart, index) { - commapart = commapart.trim(); - - parsableSentence.push({content: commapart}); - if (index < commaparts.length - 1) { - parsableSentence.push({punctuation: ['comma']}); - } - }); - - if (index < colonparts.length - 1) { - parsableSentence.push({punctuation: ['colon']}); - } - }); - if (laparts.length === 2 && index === 0) { - parsableSentence.push({punctuation: ['la']}); - } - }); - - var terminator = sentence.substr(-1); - switch (terminator) { - case '.': - parsableSentence.push({punctuation: ['period']}); - break; - case ':': - parsableSentence.push({punctuation: ['colon']}); - break; - case '!': - parsableSentence.push({punctuation: ['exclamation']}); - break; - case '?': - parsableSentence.push({punctuation: ['question']}); - break; - case '#': - parsableSentence.push({punctuation: ['banner']}); - break; - default: - break; - } - - }); - return {parsable: parsableParts, raw: rawParts}; - } - - /** - * Split proper names into Toki Pona syllables. It is assumed that the proper name follows standard Toki Pona rules. - * @param properName the proper name string to split into syllables - */ - function splitProperIntoSyllables(properName) { - if (properName.length === 0) { - return []; - } - - var vowels = ['o', 'u', 'i', 'a', 'e'], - allowed = ['o', 'u', 'i', 'a', 'e', 'mo', 'mu', 'mi', 'ma', 'me', 'no', 'nu', 'ni', 'na', 'ne', 'po', 'pu', 'pi', 'pa', 'pe', 'to', 'tu', 'ta', 'te', 'ko', 'ku', 'ki', 'ka', 'ke', 'so', 'su', 'si', 'sa', 'se', 'wi', 'wa', 'we', 'lo', 'lu', 'li', 'la', 'le', 'jo', 'ju', 'ja', 'je', 'on', 'un', 'in', 'an', 'en', 'mon', 'mun', 'min', 'man', 'men', 'non', 'nun', 'nin', 'nan', 'nen', 'pon', 'pun', 'pin', 'pan', 'pen', 'ton', 'tun', 'tan', 'ten', 'kon', 'kun', 'kin', 'kan', 'ken', 'son', 'sun', 'sin', 'san', 'sen', 'win', 'wan', 'wen', 'lon', 'lun', 'lin', 'lan', 'len', 'jon', 'jun', 'jan', 'jen'], - syllables = [], - first = properName.substr(0, 1), - third = properName.substr(2, 1), - fourth = properName.substr(3, 1); - - // ponoman, monsi, akesi - - if (vowels.indexOf(first) === -1) { - if (third === 'n' && vowels.indexOf(fourth) === -1) { - syllables.push(properName.substr(0, 3)); - syllables = syllables.concat(splitProperIntoSyllables(properName.substr(3))); - } else { - syllables.push(properName.substr(0, 2)); - syllables = syllables.concat(splitProperIntoSyllables(properName.substr(2))); - } - } else { - if (properName.length === 2) { - return [properName]; - } else { - syllables.push(first); - syllables = syllables.concat(splitProperIntoSyllables(properName.substr(1))); - } - } - - syllables.forEach(function(syllable){ - if (allowed.indexOf(syllable) === -1){ - throw {type: 'illegal syllable' ,message: 'following syllabe not allowed: ' + syllable}; - } - }); - - return syllables; - } - - /** - * Postprocessing for the simple parses that splits the structured sentence into more structure, such as prepositional - * phrases, proper names and the pi-construct. - * - * @param sentence the structured sentence - * @returns {*} a processed structured sentence - */ - function postprocessing(sentence) { - var prepositionContainers = ['lon', 'tan', 'kepeken', 'tawa', 'sama', 'poka', 'pi'], - prepositionSplitIndex; - - // split prepositional phrases inside containers (such as the verb li-container) - sentence.forEach(function (part, index) { - prepositionSplitIndex = -1; - part.tokens.forEach(function (token, tokenIndex) { - if (prepositionContainers.indexOf(token) > -1 && tokenIndex < part.tokens.length - 1) { - prepositionSplitIndex = tokenIndex; - } - }); - - if (prepositionSplitIndex > -1) { - var newParts = []; - if (prepositionSplitIndex > 0) { - newParts.push({part: part.part, tokens: part.tokens.slice(0, prepositionSplitIndex)}); - } - newParts.push({ - part: part.part, - sep: part.tokens[prepositionSplitIndex], - tokens: part.tokens.slice(prepositionSplitIndex + 1) - }); - sentence[index] = {part: part.part, sep: part.sep, parts: newParts}; - } - }); - - // split proper names inside containers - sentence.forEach(function (part, index) { - var parts = [part]; - if (!part.tokens) { - if (part.parts) { - parts = part.parts; - } else { - return; - } - } - parts.forEach(function (part) { - var nameSplitIndex = []; - part.tokens.forEach(function (token, tokenIndex) { - if (token.substr(0, 1).toUpperCase() === token.substr(0, 1)) { - nameSplitIndex.push(tokenIndex); - } - }); - var last = -1; - var newParts = []; - nameSplitIndex.forEach(function (idx) { - if (idx > last + 1) { - newParts.push({part: part.part, tokens: part.tokens.slice(last + 1, idx)}); - } - newParts.push({ - part: part.part, - sep: 'cartouche', - tokens: splitProperIntoSyllables(part.tokens[idx].toLowerCase()) - }); - last = idx; - }); - if (nameSplitIndex.length > 0 && nameSplitIndex[nameSplitIndex.length - 1] < part.tokens.length - 1) { - newParts.push({ - part: part.part, - tokens: part.tokens.slice(nameSplitIndex[nameSplitIndex.length - 1] + 1) - }); - } - if (nameSplitIndex.length > 0) { - sentence[index] = {part: part.part, sep: part.sep, parts: newParts}; - } - }); - }); - return sentence; - } - - /** - * Main parser that processes a sentence. - * - * @param sentence the input sentence - * @returns {Array} the structured sentence - */ - function parseSentence(sentence) { - var structuredSentence = []; - - sentence.forEach(function (part) { - if (part.content) { - // find proper names - var properNames = []; - part.content = part.content.replace(/([A-Z][\w-]*)/g, function (item) { - properNames.push(item); - return '\'Name\''; - }); - - var value = getSimpleStructuredSentence(part.content); - - value.forEach(function (part) { - part.tokens.forEach(function (token, index) { - if (token === '\'Name\'') { - part.tokens[index] = properNames.shift(); - } - }); - }); - structuredSentence.push.apply(structuredSentence, value); - } else if (part.punctuation) { - structuredSentence.push({part: 'punctuation', tokens: part.punctuation}); - } - }); - - structuredSentence = postprocessing(structuredSentence); - return structuredSentence; - } - - /** - * Parser wrapper that splits a text into sentences that are parsed. - * @param text a full text - * @returns {Array} an array of structured sentences - */ - function parse(text) { - return preformat(text.replace(/\s\s+/g, ' ')).parsable.map(function (sentence) { - return parseSentence(sentence); - }); - } - - return { - parse: parse - }; -}(); - -var tokiPonaDictionary = [ - {name: 'a', gloss: 'ah', grammar: ['interj']}, - {name: 'akesi', category: 'animal', gloss: 'reptile', grammar: ['n']}, - {name: 'ala', gloss: 'no', grammar: ['mod', 'n', 'interj']}, - {name: 'ali', gloss: 'all', grammar: ['n', 'mod']}, - {name: 'anpa', gloss: 'under', grammar: ['n', 'mod']}, - {name: 'ante', gloss: 'different', grammar: ['n', 'mod', 'conj', 'vt']}, - {name: 'anu', category: 'separator', gloss: 'or', grammar: ['conj']}, - {name: 'awen', gloss: 'remain', grammar: ['vi', 'vt', 'mod']}, - {name: 'e', category: 'separator', gloss: 'object marker', grammar: ['sep']}, - {name: 'en', category: 'separator', gloss: 'and', grammar: ['conj']}, - {name: 'esun', gloss: 'shop', grammar: ['n']}, - {name: 'ijo', gloss: 'thing', grammar: ['n', 'mod', 'vt']}, - {name: 'ike', gloss: 'evil', grammar: ['mod', 'interj', 'n', 'vt', 'vi']}, - {name: 'ilo', gloss: 'tool', grammar: ['n']}, - {name: 'insa', gloss: 'inside', grammar: ['n', 'mod']}, - {name: 'jaki', gloss: 'dirty', grammar: ['mod', 'n', 'vt', 'interj']}, - {name: 'jan', category: 'animal', gloss: 'person', grammar: ['n', 'mod', 'vt']}, - {name: 'jelo', category: 'color', gloss: 'yellow', grammar: ['mod']}, - {name: 'jo', gloss: 'have', grammar: ['vt', 'n']}, - {name: 'kala', category: 'animal', gloss: 'fish', grammar: ['n']}, - {name: 'kalama', gloss: 'sound', grammar: ['n', 'vi', 'vt']}, - {name: 'kama', gloss: 'come', grammar: ['vi', 'n', 'mod', 'vt']}, - {name: 'kasi', gloss: 'plant', grammar: ['n']}, - {name: 'ken', gloss: 'possible', grammar: ['vi', 'n', 'vt']}, - {name: 'kepeken', gloss: 'use', grammar: ['vt', 'prep']}, - {name: 'kili', gloss: 'fruit', grammar: ['n']}, - {name: 'kin', gloss: 'also', grammar: ['mod']}, - {name: 'kiwen', gloss: 'rock', grammar: ['mod', 'n']}, - {name: 'ko', gloss: 'squishy', grammar: ['n']}, - {name: 'kon', gloss: 'soul', grammar: ['n', 'mod']}, - {name: 'kule', gloss: 'color', grammar: ['n', 'mod', 'vt']}, - {name: 'kulupu', gloss: 'group', grammar: ['n', 'mod']}, - {name: 'kute', gloss: 'listen', grammar: ['vt', 'mod']}, - {name: 'la', category: 'separator', gloss: 'in context', grammar: ['sep']}, - {name: 'lape', gloss: 'rest', grammar: ['n', 'vi', 'mod']}, - {name: 'laso', category: 'color', gloss: 'blue/green', grammar: ['mod']}, - {name: 'lawa', gloss: 'head', grammar: ['n', 'mod', 'vt']}, - {name: 'len', gloss: 'cloth', grammar: ['n']}, - {name: 'lete', gloss: 'cold', grammar: ['n', 'mod', 'vt']}, - {name: 'li', category: 'separator', gloss: 'is', grammar: ['sep']}, - {name: 'lili', gloss: 'small', grammar: ['mod', 'vt']}, - {name: 'linja', gloss: 'string', grammar: ['n']}, - {name: 'lipu', gloss: 'paper', grammar: ['n']}, - {name: 'loje', category: 'color', gloss: 'red', grammar: ['mod']}, - {name: 'lon', gloss: 'located', grammar: ['prep', 'vi']}, - {name: 'luka', gloss: 'hand', grammar: ['n']}, - {name: 'lukin', gloss: 'see', grammar: ['vt', 'vi', 'mod']}, - {name: 'lupa', gloss: 'hole', grammar: ['n']}, - {name: 'ma', gloss: 'land', grammar: ['n']}, - {name: 'mama', category: 'animal', gloss: 'parent', grammar: ['n', 'mod']}, - {name: 'mani', gloss: 'money', grammar: ['n']}, - {name: 'meli', category: 'animal', gloss: 'female', grammar: ['n', 'mod']}, - {name: 'mi', gloss: 'I/we', grammar: ['n', 'mod']}, - {name: 'mije', category: 'animal', gloss: 'male', grammar: ['n', 'mod']}, - {name: 'moku', gloss: 'food', grammar: ['n', 'vt']}, - {name: 'moli', gloss: 'death', grammar: ['n', 'vi', 'vt', 'mod']}, - {name: 'monsi', gloss: 'back', grammar: ['n', 'mod']}, - {name: 'mu', gloss: 'moo!', grammar: ['interj']}, - {name: 'mun', gloss: 'moon', grammar: ['n', 'mod']}, - {name: 'musi', gloss: 'play', grammar: ['n', 'mod', 'vi', 'vt']}, - {name: 'mute', gloss: 'many', grammar: ['mod', 'n', 'vt']}, - {name: 'namako', gloss: 'extra', grammar: ['n', 'mod']}, - {name: 'nanpa', gloss: 'number', grammar: ['n']}, - {name: 'nasa', gloss: 'crazy', grammar: ['mod', 'vt']}, - {name: 'nasin', gloss: 'manner', grammar: ['n']}, - {name: 'nena', gloss: 'bump', grammar: ['n']}, - {name: 'ni', gloss: 'this', grammar: ['mod']}, - {name: 'nimi', gloss: 'name', grammar: ['n']}, - {name: 'noka', gloss: 'leg', grammar: ['n']}, - {name: 'o', gloss: 'imperative', grammar: ['sep', 'interj']}, - {name: 'oko', gloss: 'eye', grammar: ['n']}, - {name: 'olin', gloss: 'love', grammar: ['n', 'mod', 'vt']}, - {name: 'ona', gloss: 'he/she/it', grammar: ['n', 'mod']}, - {name: 'open', gloss: 'open', grammar: ['vt']}, - {name: 'pakala', gloss: 'destroy', grammar: ['n', 'vt', 'vi', 'interj']}, - {name: 'pali', gloss: 'make', grammar: ['n', 'mod', 'vt', 'vi']}, - {name: 'palisa', gloss: 'rod', grammar: ['n']}, - {name: 'pan', gloss: 'grain', grammar: ['n']}, - {name: 'pana', gloss: 'give', grammar: ['vt', 'n']}, - {name: 'pi', category: 'separator', gloss: 'of', grammar: ['sep']}, - {name: 'pilin', gloss: 'feel', grammar: ['n', 'vi', 'vt']}, - {name: 'pimeja', category: 'color', gloss: 'black', grammar: ['mod', 'n', 'vt']}, - {name: 'pini', gloss: 'end', grammar: ['n', 'mod', 'vt']}, - {name: 'pipi', category: 'animal', gloss: 'insect', grammar: ['n']}, - {name: 'poka', gloss: 'side', grammar: ['n', 'mod', 'prep']}, - {name: 'poki', gloss: 'box', grammar: ['n']}, - {name: 'pona', gloss: 'good', grammar: ['n', 'mod', 'interj', 'vt']}, - {name: 'pu', gloss: 'toki ponist', grammar: ['n', 'mod', 'vi']}, - {name: 'sama', gloss: 'similar', grammar: ['mod', 'prep']}, - {name: 'seli', gloss: 'warm', grammar: ['n', 'mod', 'vt']}, - {name: 'selo', gloss: 'surface', grammar: ['n']}, - {name: 'seme', gloss: 'what', grammar: ['n', 'mod', 'vi']}, - {name: 'sewi', gloss: 'superior', grammar: ['n', 'mod']}, - {name: 'sijelo', gloss: 'body', grammar: ['n']}, - {name: 'sike', gloss: 'circle', grammar: ['n', 'mod']}, - {name: 'sin', gloss: 'new', grammar: ['mod', 'vt']}, - {name: 'sina', gloss: 'you', grammar: ['n', 'mod']}, - {name: 'sinpin', gloss: 'front', grammar: ['n']}, - {name: 'sitelen', gloss: 'draw', grammar: ['n', 'vt']}, - {name: 'sona', gloss: 'wisdom', grammar: ['n', 'vt', 'vi']}, - {name: 'soweli', category: 'animal', gloss: 'mammal', grammar: ['n']}, - {name: 'suli', gloss: 'big', grammar: ['mod', 'vt', 'n']}, - {name: 'suno', gloss: 'light', grammar: ['n']}, - {name: 'supa', gloss: 'table', grammar: ['n']}, - {name: 'suwi', gloss: 'sweet', grammar: ['n', 'mod', 'vt']}, - {name: 'tan', gloss: 'because', grammar: ['prep', 'n']}, - {name: 'taso', gloss: 'only', grammar: ['mod', 'conj']}, - {name: 'tawa', gloss: 'move', grammar: ['prep', 'vi', 'n', 'mod', 'vt']}, - {name: 'telo', gloss: 'liquid', grammar: ['n', 'vt']}, - {name: 'tenpo', gloss: 'time', grammar: ['n']}, - {name: 'toki', gloss: 'talking', grammar: ['n', 'mod', 'vt', 'vi', 'interj']}, - {name: 'tomo', gloss: 'house', grammar: ['n', 'mod']}, - {name: 'tu', gloss: 'two', grammar: ['mod', 'n']}, - {name: 'unpa', gloss: 'sex', grammar: ['n', 'mod', 'vt', 'vi']}, - {name: 'uta', gloss: 'mouth', grammar: ['n', 'mod']}, - {name: 'utala', gloss: 'attack', grammar: ['n', 'vt']}, - {name: 'walo', category: 'color', gloss: 'white', grammar: ['mod', 'n']}, - {name: 'wan', gloss: 'one', grammar: ['mod', 'n', 'vt']}, - {name: 'waso', category: 'animal', gloss: 'bird', grammar: ['n']}, - {name: 'wawa', gloss: 'power', grammar: ['n', 'mod', 'vt']}, - {name: 'weka', gloss: 'away', grammar: ['mod', 'n', 'vt']}, - {name: 'wile', gloss: 'need', grammar: ['vt', 'n', 'mod']}, - {name: '.', category: 'separator', gloss: 'period', type: 'punctuation', grammar: ['punct']}, - {name: '?', category: 'separator', gloss: 'question', type: 'punctuation', grammar: ['punct']}, - {name: '!', category: 'separator', gloss: 'exclamation', type: 'punctuation', grammar: ['punct']}, - {name: ':', category: 'separator', gloss: 'colon', type: 'punctuation', grammar: ['punct']}, - {name: ',', category: 'separator', gloss: 'comma', type: 'punctuation', grammar: ['punct']} -]; - -var allowedWords = tokiPonaDictionary.map(function(item){ - return item.name; -}); -allowedWords.push('ale'); -allowedWords.push("'Name'"); - -exports.httpResp = (req, res) => { - switch (req.get("content-type")) { - case "text/plain": - break; - default: - res.status(400).send("Content-Type: text/plain"); - } - - res.status(200).send(sitelenParser.parse(req.body)); -} diff --git a/discord/ilo-kesi/function/package.json b/discord/ilo-kesi/function/package.json deleted file mode 100644 index e52d0f8..0000000 --- a/discord/ilo-kesi/function/package.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "sample-http", - "version": "0.0.1" -} - - |
