Misty Programming Language:
The Misty Parse Demo
Put misty source text in the box and push the button.
Source text
misty module tokenizer var at: null var character: null var column_nr: null var line_nr: null var source: null var token: null var tokenator: null var tokenators: null var tokens: null def error: ƒ (reason) { set token.kind: "error" set token.text: text(source, token.at, at + 1) set token.where: at set token.reason: reason return null } def next: ƒ () { set at: at + 1 set column_nr: column_nr + 1 set character: source[at] set tokenator: tokenators[character] return null } def special: ƒ () { set token.kind: character set token.text: character return next() } def newline: ƒ () { set token.kind: "newline" set token.text: "\n" set column_nr: -1 set line_nr: line_nr + 1 return next() } def carriage_return: ƒ carriage_return() { set token.kind: "newline" set line_nr: line_nr + 1 call next() if character = "\n" set token.text: "\r\n" set column_nr: -1 return next() else set token.text: "\n" set column_nr: 0 return null fi } def comment: ƒ () { do call next() if character = "\n" \/ character = null break fi od set token.kind: "comment" set token.text: source.slice(token.at, at) return null } def space: ƒ () { do call next() if character <> " " break fi od set token.kind: "space" set token.text: source.slice(token.at, at) return null } def slash: ƒ () { call next() if character <> "\b" set token.kind: "/" set token.text: "/" return null fi set token.kind: "/\b" set token.text: "/\b" return next() } def backslash: ƒ () { call next() if character <> "/" set token.kind: "\b" set token.text: "\b" return null fi set token.kind: "\b/" set token.text: "\b/" return next() } def less: ƒ () { call next() if character = "=" set token.kind: "<=" set token.text: "<=" return next() fi if character = ">" set token.kind: "<>" set token.text: "<>" return next() fi set token.kind: "<" set token.text: "<" return null } def greater: ƒ () { call next() if character = "=" set token.kind: ">=" set token.text: ">=" return next() fi set token.kind: ">" set token.text: ">" return null } def digit: ƒ () { var e_seen: false var period_seen: false do loop call next() if tokenator <> digit if character = "." if period_seen return error("Unexpected '.' in number") fi set period_seen: true call next() if tokenator <> digit return error("Expected digits after '.'") fi else if character = "e" if e_seen return error("Unexpected 'e' in number") fi set e_seen: true set period_seen: true call next() if character = "-" call next() fi if tokenator <> digit return error("Expected digits after 'e'") fi else break loop fi fi od loop set token.kind: "number" set token.text: text(source, token.at, at) return null } def minus: ƒ () { call next() if tokenator <> digit set token.kind: "-" set token.text: "-" else call digit() set token.text: "-" + token.text fi return null } def letter: ƒ () { def middle: ƒ () { call next() if tokenator = letter \/ tokenator = digit return middle() fi if character = "_" \/ character = "$" call next() if tokenator <> letter /\ tokenator <> digit return error("Misplaced separator") fi return middle() fi return null } call middle() if token.kind <> "error" if character = "?" call next() fi set token.kind: "name" set token.text: source.slice(token.at, at) fi return null } def escape: { b: "\b" # backslash d: "»" # droite g: "«" # gauche n: "\n" # newline q: "\q" # double quote r: "\r" # carriage return t: "\t" # tab } def hex: { "0": true "1": true "2": true "3": true "4": true "5": true "6": true "7": true "8": true "9": true "A": true "B": true "C": true "D": true "E": true "F": true } def quote: ƒ () { var codepoint: null var escapee: null var value: "" do call next() if character = "\q" break fi if character = "\n" \/ character = null return error("Unclosed text literal") fi if character = "\b" call next() set escapee: escape[character] if text?(escapee) set value: value ~ escapee else if character = "u" call next() if character <> "{" return error("Missing '{'") fi call next() if not(hex[character]) return error("Missing hex codepoint") fi set escapee: character do call next() if not(hex[character]) break fi set escapee: escapee ~ character od if character <> "}" return error("Missing '}'") fi set codepoint: number(escapee, 16) if codepoint >= 4294967296 \/ codepoint = null return error("Bad codepoint") fi set value: value ~ character(codepoint) else return error("Bad escapement") fi else set value: value ~ character fi od set token.kind: "text" set token.text: value set token.quote: "\q" return next() } def chevron: ƒ () { var nesting: 0 do call next() if character = "»" if nesting = 0 break fi set nesting: nesting - 1 else if character = "«" set nesting: nesting + 1 else if character = "" return error("Unclosed text literal") fi od call next() set token.kind: "text" set token.text: text(source, token.at + 1, at - 1) set token.quote: "«" return null } set tokenators: { "\n": newline "\r": carriage_return " ": space "#": comment ".": special ",": special ":": special "/": slash "\b": backslash "|": special "ƒ": special "¶": special "!": special "\q": quote "«": chevron "-": minus "+": special "*": special "@": special "(": special ")": special "[": special "]": special "{": special "}": special "=": special "<": less ">": greater "≠": special "≤": special "≥": special "÷": special "~": special "≈": special "%": special "^": special "&": special "'": special "`": special "$": special "_": special "?": special "0": digit "1": digit "2": digit "3": digit "4": digit "5": digit "6": digit "7": digit "8": digit "9": digit "A": letter "B": letter "C": letter "D": letter "E": letter "F": letter "G": letter "H": letter "I": letter "J": letter "K": letter "L": letter "M": letter "N": letter "O": letter "P": letter "Q": letter "R": letter "S": letter "T": letter "U": letter "V": letter "W": letter "X": letter "Y": letter "Z": letter "a": letter "b": letter "c": letter "d": letter "e": letter "f": letter "g": letter "h": letter "i": letter "j": letter "k": letter "l": letter "m": letter "n": letter "o": letter "p": letter "q": letter "r": letter "s": letter "t": letter "u": letter "v": letter "w": letter "x": letter "y": letter "z": letter } return ƒ tokenize(source_text) { set source: source_text set at: 0 set line_nr: 0 set column_nr: 0 set tokens: [] set character: source[0] set tokenator: tokenators[character] do if character = null break fi set token: { at column_nr line_nr } if function?(tokenator) call tokenator() else call error("Bad token") fi set tokens[]: token if token.kind = "error" return token fi od return tokens } end tokenizer
Syntax tree (in JSON)