""" pygments.lexers.mojo ~~~~~~~~~~~~~~~~~~~~ Lexers for Mojo and related languages. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import keyword from pygments import unistring as uni from pygments.lexer import ( RegexLexer, bygroups, combined, default, include, this, using, words, ) from pygments.token import ( Comment, # Error, Keyword, Name, Number, Operator, Punctuation, String, Text, Whitespace, ) from pygments.util import shebang_matches __all__ = ["MojoLexer"] class MojoLexer(RegexLexer): """ For Mojo source code (version 24.2.1). """ name = "Mojo" url = "https://docs.modular.com/mojo/" aliases = ["mojo", "🔥"] filenames = [ "*.mojo", "*.🔥", ] mimetypes = [ "text/x-mojo", "application/x-mojo", ] version_added = "2.18" uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*" def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) ( r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?" "[hlL]?[E-GXc-giorsaux%]", String.Interpol, ), # the new style '{}'.format(...) string formatting ( r"\{" r"((\w+)((\.\w+)|(\[[^\]]+\]))*)?" # field name r"(\![sra])?" # conversion r"(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?" r"\}", String.Interpol, ), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r"%|(\{{1,2})", ttype), # newlines are an error (use "nl" state) ] def fstring_rules(ttype): return [ # Assuming that a '}' is the closing brace after format specifier. # Sadly, this means that we won't detect syntax error. But it's # more important to parse correct syntax correctly, than to # highlight invalid syntax. (r"\}", String.Interpol), (r"\{", String.Interpol, "expr-inside-fstring"), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"{}\n]+', ttype), (r'[\'"\\]', ttype), # newlines are an error (use "nl" state) ] tokens = { "root": [ (r"\s+", Whitespace), ( r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Whitespace, String.Affix, String.Doc), ), ( r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Whitespace, String.Affix, String.Doc), ), (r"\A#!.+$", Comment.Hashbang), (r"#.*$", Comment.Single), (r"\\\n", Whitespace), (r"\\", Whitespace), include("keywords"), include("soft-keywords"), # In the original PR, all the below here used ((?:\s|\\\s)+) to # designate whitespace, but I can't find any example of this being # needed in the example file, so we're replacing it with `\s+`. ( r"(alias)(\s+)", bygroups(Keyword, Whitespace), "varname", # TODO varname the right fit? ), (r"(var)(\s+)", bygroups(Keyword, Whitespace), "varname"), (r"(def)(\s+)", bygroups(Keyword, Whitespace), "funcname"), (r"(fn)(\s+)", bygroups(Keyword, Whitespace), "funcname"), ( r"(class)(\s+)", bygroups(Keyword, Whitespace), "classname", ), # not implemented yet (r"(struct)(\s+)", bygroups(Keyword, Whitespace), "structname"), (r"(trait)(\s+)", bygroups(Keyword, Whitespace), "structname"), (r"(from)(\s+)", bygroups(Keyword.Namespace, Whitespace), "fromimport"), (r"(import)(\s+)", bygroups(Keyword.Namespace, Whitespace), "import"), include("expr"), ], "expr": [ # raw f-strings ( '(?i)(rf|fr)(""")', bygroups(String.Affix, String.Double), combined("rfstringescape", "tdqf"), ), ( "(?i)(rf|fr)(''')", bygroups(String.Affix, String.Single), combined("rfstringescape", "tsqf"), ), ( '(?i)(rf|fr)(")', bygroups(String.Affix, String.Double), combined("rfstringescape", "dqf"), ), ( "(?i)(rf|fr)(')", bygroups(String.Affix, String.Single), combined("rfstringescape", "sqf"), ), # non-raw f-strings ( '([fF])(""")', bygroups(String.Affix, String.Double), combined("fstringescape", "tdqf"), ), ( "([fF])(''')", bygroups(String.Affix, String.Single), combined("fstringescape", "tsqf"), ), ( '([fF])(")', bygroups(String.Affix, String.Double), combined("fstringescape", "dqf"), ), ( "([fF])(')", bygroups(String.Affix, String.Single), combined("fstringescape", "sqf"), ), # raw bytes and strings ('(?i)(rb|br|r)(""")', bygroups(String.Affix, String.Double), "tdqs"), ("(?i)(rb|br|r)(''')", bygroups(String.Affix, String.Single), "tsqs"), ('(?i)(rb|br|r)(")', bygroups(String.Affix, String.Double), "dqs"), ("(?i)(rb|br|r)(')", bygroups(String.Affix, String.Single), "sqs"), # non-raw strings ( '([uU]?)(""")', bygroups(String.Affix, String.Double), combined("stringescape", "tdqs"), ), ( "([uU]?)(''')", bygroups(String.Affix, String.Single), combined("stringescape", "tsqs"), ), ( '([uU]?)(")', bygroups(String.Affix, String.Double), combined("stringescape", "dqs"), ), ( "([uU]?)(')", bygroups(String.Affix, String.Single), combined("stringescape", "sqs"), ), # non-raw bytes ( '([bB])(""")', bygroups(String.Affix, String.Double), combined("bytesescape", "tdqs"), ), ( "([bB])(''')", bygroups(String.Affix, String.Single), combined("bytesescape", "tsqs"), ), ( '([bB])(")', bygroups(String.Affix, String.Double), combined("bytesescape", "dqs"), ), ( "([bB])(')", bygroups(String.Affix, String.Single), combined("bytesescape", "sqs"), ), (r"[^\S\n]+", Text), include("numbers"), (r"!=|==|<<|>>|:=|[-~+/*%=<>&^|.]", Operator), (r"([]{}:\(\),;[])+", Punctuation), (r"(in|is|and|or|not)\b", Operator.Word), include("expr-keywords"), include("builtins"), include("magicfuncs"), include("magicvars"), include("name"), ], "expr-inside-fstring": [ (r"[{([]", Punctuation, "expr-inside-fstring-inner"), # without format specifier ( r"(=\s*)?" # debug (https://bugs.python.org/issue36817) r"(\![sraf])?" # conversion r"\}", String.Interpol, "#pop", ), # with format specifier # we'll catch the remaining '}' in the outer scope ( r"(=\s*)?" # debug (https://bugs.python.org/issue36817) r"(\![sraf])?" # conversion r":", String.Interpol, "#pop", ), (r"\s+", Whitespace), # allow new lines include("expr"), ], "expr-inside-fstring-inner": [ (r"[{([]", Punctuation, "expr-inside-fstring-inner"), (r"[])}]", Punctuation, "#pop"), (r"\s+", Whitespace), # allow new lines include("expr"), ], "expr-keywords": [ # Based on https://docs.python.org/3/reference/expressions.html ( words( ( "async for", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with "async with", # TODO https://docs.modular.com/mojo/roadmap#no-async-for-or-async-with "await", "else", "for", "if", "lambda", "yield", "yield from", ), suffix=r"\b", ), Keyword, ), (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant), ], "keywords": [ ( words( ( "assert", "async", "await", "borrowed", "break", "continue", "del", "elif", "else", "except", "finally", "for", "global", "if", "lambda", "pass", "raise", "nonlocal", "return", "try", "while", "yield", "yield from", "as", "with", ), suffix=r"\b", ), Keyword, ), (words(("True", "False", "None"), suffix=r"\b"), Keyword.Constant), ], "soft-keywords": [ # `match`, `case` and `_` soft keywords ( r"(^[ \t]*)" # at beginning of line + possible indentation r"(match|case)\b" # a possible keyword r"(?![ \t]*(?:" # not followed by... r"[:,;=^&|@~)\]}]|(?:" + # characters and keywords that mean this isn't # pattern matching (but None/True/False is ok) r"|".join(k for k in keyword.kwlist if k[0].islower()) + r")\b))", bygroups(Whitespace, Keyword), "soft-keywords-inner", ), ], "soft-keywords-inner": [ # optional `_` keyword (r"(\s+)([^\n_]*)(_\b)", bygroups(Whitespace, using(this), Keyword)), default("#pop"), ], "builtins": [ ( words( ( "__import__", "abs", "aiter", "all", "any", "bin", "bool", "bytearray", "breakpoint", "bytes", "callable", "chr", "classmethod", "compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter", "float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "hex", "id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map", "max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print", "property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted", "staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", # Mojo builtin types: https://docs.modular.com/mojo/stdlib/builtin/ "AnyType", "Coroutine", "DType", "Error", "Int", "List", "ListLiteral", "Scalar", "Int8", "UInt8", "Int16", "UInt16", "Int32", "UInt32", "Int64", "UInt64", "BFloat16", "Float16", "Float32", "Float64", "SIMD", "String", "Tensor", "Tuple", "Movable", "Copyable", "CollectionElement", ), prefix=r"(?