grammar.txt

Documentation: go.starlark.net/syntax

     1
     2Grammar of Starlark
     3==================
     4
     5File = {Statement | newline} eof .
     6
     7Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .
     8
     9DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
    10
    11Parameters = Parameter {',' Parameter}.
    12
    13Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .
    14
    15IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
    16
    17ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
    18
    19WhileStmt = 'while' Test ':' Suite .
    20
    21Suite = [newline indent {Statement} outdent] | SimpleStmt .
    22
    23SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
    24# NOTE: '\n' optional at EOF
    25
    26SmallStmt = ReturnStmt
    27          | BreakStmt | ContinueStmt | PassStmt
    28          | AssignStmt
    29          | ExprStmt
    30          | LoadStmt
    31          .
    32
    33ReturnStmt   = 'return' [Expression] .
    34BreakStmt    = 'break' .
    35ContinueStmt = 'continue' .
    36PassStmt     = 'pass' .
    37AssignStmt   = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
    38ExprStmt     = Expression .
    39
    40LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
    41
    42Test = LambdaExpr
    43     | IfExpr
    44     | PrimaryExpr
    45     | UnaryExpr
    46     | BinaryExpr
    47     .
    48
    49LambdaExpr = 'lambda' [Parameters] ':' Test .
    50
    51IfExpr = Test 'if' Test 'else' Test .
    52
    53PrimaryExpr = Operand
    54            | PrimaryExpr DotSuffix
    55            | PrimaryExpr CallSuffix
    56            | PrimaryExpr SliceSuffix
    57            .
    58
    59Operand = identifier
    60        | int | float | string
    61        | ListExpr | ListComp
    62        | DictExpr | DictComp
    63        | '(' [Expression [',']] ')'
    64        | ('-' | '+') PrimaryExpr
    65        .
    66
    67DotSuffix   = '.' identifier .
    68CallSuffix  = '(' [Arguments [',']] ')' .
    69SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
    70
    71Arguments = Argument {',' Argument} .
    72Argument  = Test | identifier '=' Test | '*' Test | '**' Test .
    73
    74ListExpr = '[' [Expression [',']] ']' .
    75ListComp = '[' Test {CompClause} ']'.
    76
    77DictExpr = '{' [Entries [',']] '}' .
    78DictComp = '{' Entry {CompClause} '}' .
    79Entries  = Entry {',' Entry} .
    80Entry    = Test ':' Test .
    81
    82CompClause = 'for' LoopVariables 'in' Test | 'if' Test .
    83
    84UnaryExpr = 'not' Test .
    85
    86BinaryExpr = Test {Binop Test} .
    87
    88Binop = 'or'
    89      | 'and'
    90      | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
    91      | '|'
    92      | '^'
    93      | '&'
    94      | '-' | '+'
    95      | '*' | '%' | '/' | '//'
    96      .
    97
    98Expression = Test {',' Test} .
    99# NOTE: trailing comma permitted only when within [...] or (...).
   100
   101LoopVariables = PrimaryExpr {',' PrimaryExpr} .
   102
   103
   104# Notation (similar to Go spec):
   105- lowercase and 'quoted' items are lexical tokens.
   106- Capitalized names denote grammar productions.
   107- (...) implies grouping
   108- x | y means either x or y.
   109- [x] means x is optional
   110- {x} means x is repeated zero or more times
   111- The end of each declaration is marked with a period.
   112
   113# Tokens
   114- spaces: newline, eof, indent, outdent.
   115- identifier.
   116- literals: string, int, float.
   117- plus all quoted tokens such as '+=', 'return'.
   118
   119# Notes:
   120- Ambiguity is resolved using operator precedence.
   121- The grammar does not enforce the legal order of params and args,
   122  nor that the first compclause must be a 'for'.
   123
   124TODO:
   125- explain how the lexer generates indent, outdent, and newline tokens.
   126- why is unary NOT separated from unary - and +?
   127- the grammar is (mostly) in LL(1) style so, for example,
   128  dot expressions are formed suffixes, not complete expressions,
   129  which makes the spec harder to read.  Reorganize into non-LL(1) form?
View as plain text