sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 216 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 217 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 218 "LIKE": build_like, 219 "LOG": build_logarithm, 220 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 221 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 222 "LOWER": build_lower, 223 "LPAD": lambda args: build_pad(args), 224 "LEFTPAD": lambda args: build_pad(args), 225 "LTRIM": lambda args: build_trim(args), 226 "MOD": build_mod, 227 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 228 "RPAD": lambda args: build_pad(args, is_left=False), 229 "RTRIM": lambda args: build_trim(args, is_left=False), 230 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 231 if len(args) != 2 232 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 233 "TIME_TO_TIME_STR": lambda args: exp.Cast( 234 this=seq_get(args, 0), 235 to=exp.DataType(this=exp.DataType.Type.TEXT), 236 ), 237 "TO_HEX": build_hex, 238 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 239 this=exp.Cast( 240 this=seq_get(args, 0), 241 to=exp.DataType(this=exp.DataType.Type.TEXT), 242 ), 243 start=exp.Literal.number(1), 244 length=exp.Literal.number(10), 245 ), 246 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 247 "UPPER": build_upper, 248 "VAR_MAP": build_var_map, 249 } 250 251 NO_PAREN_FUNCTIONS = { 252 TokenType.CURRENT_DATE: exp.CurrentDate, 253 TokenType.CURRENT_DATETIME: exp.CurrentDate, 254 TokenType.CURRENT_TIME: exp.CurrentTime, 255 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 256 TokenType.CURRENT_USER: exp.CurrentUser, 257 } 258 259 STRUCT_TYPE_TOKENS = { 260 TokenType.NESTED, 261 TokenType.OBJECT, 262 TokenType.STRUCT, 263 TokenType.UNION, 264 } 265 266 NESTED_TYPE_TOKENS = { 267 TokenType.ARRAY, 268 TokenType.LIST, 269 TokenType.LOWCARDINALITY, 270 TokenType.MAP, 271 TokenType.NULLABLE, 272 TokenType.RANGE, 273 *STRUCT_TYPE_TOKENS, 274 } 275 276 ENUM_TYPE_TOKENS = { 277 TokenType.ENUM, 278 TokenType.ENUM8, 279 TokenType.ENUM16, 280 } 281 282 AGGREGATE_TYPE_TOKENS = { 283 TokenType.AGGREGATEFUNCTION, 284 TokenType.SIMPLEAGGREGATEFUNCTION, 285 } 286 287 TYPE_TOKENS = { 288 TokenType.BIT, 289 TokenType.BOOLEAN, 290 TokenType.TINYINT, 291 TokenType.UTINYINT, 292 TokenType.SMALLINT, 293 TokenType.USMALLINT, 294 TokenType.INT, 295 TokenType.UINT, 296 TokenType.BIGINT, 297 TokenType.UBIGINT, 298 TokenType.INT128, 299 TokenType.UINT128, 300 TokenType.INT256, 301 TokenType.UINT256, 302 TokenType.MEDIUMINT, 303 TokenType.UMEDIUMINT, 304 TokenType.FIXEDSTRING, 305 TokenType.FLOAT, 306 TokenType.DOUBLE, 307 TokenType.CHAR, 308 TokenType.NCHAR, 309 TokenType.VARCHAR, 310 TokenType.NVARCHAR, 311 TokenType.BPCHAR, 312 TokenType.TEXT, 313 TokenType.MEDIUMTEXT, 314 TokenType.LONGTEXT, 315 TokenType.MEDIUMBLOB, 316 TokenType.LONGBLOB, 317 TokenType.BINARY, 318 TokenType.VARBINARY, 319 TokenType.JSON, 320 TokenType.JSONB, 321 TokenType.INTERVAL, 322 TokenType.TINYBLOB, 323 TokenType.TINYTEXT, 324 TokenType.TIME, 325 TokenType.TIMETZ, 326 TokenType.TIMESTAMP, 327 TokenType.TIMESTAMP_S, 328 TokenType.TIMESTAMP_MS, 329 TokenType.TIMESTAMP_NS, 330 TokenType.TIMESTAMPTZ, 331 TokenType.TIMESTAMPLTZ, 332 TokenType.TIMESTAMPNTZ, 333 TokenType.DATETIME, 334 TokenType.DATETIME64, 335 TokenType.DATE, 336 TokenType.DATE32, 337 TokenType.INT4RANGE, 338 TokenType.INT4MULTIRANGE, 339 TokenType.INT8RANGE, 340 TokenType.INT8MULTIRANGE, 341 TokenType.NUMRANGE, 342 TokenType.NUMMULTIRANGE, 343 TokenType.TSRANGE, 344 TokenType.TSMULTIRANGE, 345 TokenType.TSTZRANGE, 346 TokenType.TSTZMULTIRANGE, 347 TokenType.DATERANGE, 348 TokenType.DATEMULTIRANGE, 349 TokenType.DECIMAL, 350 TokenType.DECIMAL32, 351 TokenType.DECIMAL64, 352 TokenType.DECIMAL128, 353 TokenType.UDECIMAL, 354 TokenType.BIGDECIMAL, 355 TokenType.UUID, 356 TokenType.GEOGRAPHY, 357 TokenType.GEOMETRY, 358 TokenType.HLLSKETCH, 359 TokenType.HSTORE, 360 TokenType.PSEUDO_TYPE, 361 TokenType.SUPER, 362 TokenType.SERIAL, 363 TokenType.SMALLSERIAL, 364 TokenType.BIGSERIAL, 365 TokenType.XML, 366 TokenType.YEAR, 367 TokenType.UNIQUEIDENTIFIER, 368 TokenType.USERDEFINED, 369 TokenType.MONEY, 370 TokenType.SMALLMONEY, 371 TokenType.ROWVERSION, 372 TokenType.IMAGE, 373 TokenType.VARIANT, 374 TokenType.VECTOR, 375 TokenType.OBJECT, 376 TokenType.OBJECT_IDENTIFIER, 377 TokenType.INET, 378 TokenType.IPADDRESS, 379 TokenType.IPPREFIX, 380 TokenType.IPV4, 381 TokenType.IPV6, 382 TokenType.UNKNOWN, 383 TokenType.NULL, 384 TokenType.NAME, 385 TokenType.TDIGEST, 386 *ENUM_TYPE_TOKENS, 387 *NESTED_TYPE_TOKENS, 388 *AGGREGATE_TYPE_TOKENS, 389 } 390 391 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 392 TokenType.BIGINT: TokenType.UBIGINT, 393 TokenType.INT: TokenType.UINT, 394 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 395 TokenType.SMALLINT: TokenType.USMALLINT, 396 TokenType.TINYINT: TokenType.UTINYINT, 397 TokenType.DECIMAL: TokenType.UDECIMAL, 398 } 399 400 SUBQUERY_PREDICATES = { 401 TokenType.ANY: exp.Any, 402 TokenType.ALL: exp.All, 403 TokenType.EXISTS: exp.Exists, 404 TokenType.SOME: exp.Any, 405 } 406 407 RESERVED_TOKENS = { 408 *Tokenizer.SINGLE_TOKENS.values(), 409 TokenType.SELECT, 410 } - {TokenType.IDENTIFIER} 411 412 DB_CREATABLES = { 413 TokenType.DATABASE, 414 TokenType.DICTIONARY, 415 TokenType.MODEL, 416 TokenType.SCHEMA, 417 TokenType.SEQUENCE, 418 TokenType.STORAGE_INTEGRATION, 419 TokenType.TABLE, 420 TokenType.TAG, 421 TokenType.VIEW, 422 TokenType.WAREHOUSE, 423 TokenType.STREAMLIT, 424 } 425 426 CREATABLES = { 427 TokenType.COLUMN, 428 TokenType.CONSTRAINT, 429 TokenType.FOREIGN_KEY, 430 TokenType.FUNCTION, 431 TokenType.INDEX, 432 TokenType.PROCEDURE, 433 *DB_CREATABLES, 434 } 435 436 ALTERABLES = { 437 TokenType.INDEX, 438 TokenType.TABLE, 439 TokenType.VIEW, 440 } 441 442 # Tokens that can represent identifiers 443 ID_VAR_TOKENS = { 444 TokenType.ALL, 445 TokenType.VAR, 446 TokenType.ANTI, 447 TokenType.APPLY, 448 TokenType.ASC, 449 TokenType.ASOF, 450 TokenType.AUTO_INCREMENT, 451 TokenType.BEGIN, 452 TokenType.BPCHAR, 453 TokenType.CACHE, 454 TokenType.CASE, 455 TokenType.COLLATE, 456 TokenType.COMMAND, 457 TokenType.COMMENT, 458 TokenType.COMMIT, 459 TokenType.CONSTRAINT, 460 TokenType.COPY, 461 TokenType.CUBE, 462 TokenType.DEFAULT, 463 TokenType.DELETE, 464 TokenType.DESC, 465 TokenType.DESCRIBE, 466 TokenType.DICTIONARY, 467 TokenType.DIV, 468 TokenType.END, 469 TokenType.EXECUTE, 470 TokenType.ESCAPE, 471 TokenType.FALSE, 472 TokenType.FIRST, 473 TokenType.FILTER, 474 TokenType.FINAL, 475 TokenType.FORMAT, 476 TokenType.FULL, 477 TokenType.IDENTIFIER, 478 TokenType.IS, 479 TokenType.ISNULL, 480 TokenType.INTERVAL, 481 TokenType.KEEP, 482 TokenType.KILL, 483 TokenType.LEFT, 484 TokenType.LOAD, 485 TokenType.MERGE, 486 TokenType.NATURAL, 487 TokenType.NEXT, 488 TokenType.OFFSET, 489 TokenType.OPERATOR, 490 TokenType.ORDINALITY, 491 TokenType.OVERLAPS, 492 TokenType.OVERWRITE, 493 TokenType.PARTITION, 494 TokenType.PERCENT, 495 TokenType.PIVOT, 496 TokenType.PRAGMA, 497 TokenType.RANGE, 498 TokenType.RECURSIVE, 499 TokenType.REFERENCES, 500 TokenType.REFRESH, 501 TokenType.RENAME, 502 TokenType.REPLACE, 503 TokenType.RIGHT, 504 TokenType.ROLLUP, 505 TokenType.ROW, 506 TokenType.ROWS, 507 TokenType.SEMI, 508 TokenType.SET, 509 TokenType.SETTINGS, 510 TokenType.SHOW, 511 TokenType.TEMPORARY, 512 TokenType.TOP, 513 TokenType.TRUE, 514 TokenType.TRUNCATE, 515 TokenType.UNIQUE, 516 TokenType.UNNEST, 517 TokenType.UNPIVOT, 518 TokenType.UPDATE, 519 TokenType.USE, 520 TokenType.VOLATILE, 521 TokenType.WINDOW, 522 *CREATABLES, 523 *SUBQUERY_PREDICATES, 524 *TYPE_TOKENS, 525 *NO_PAREN_FUNCTIONS, 526 } 527 ID_VAR_TOKENS.remove(TokenType.UNION) 528 529 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 530 531 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 532 TokenType.ANTI, 533 TokenType.APPLY, 534 TokenType.ASOF, 535 TokenType.FULL, 536 TokenType.LEFT, 537 TokenType.LOCK, 538 TokenType.NATURAL, 539 TokenType.OFFSET, 540 TokenType.RIGHT, 541 TokenType.SEMI, 542 TokenType.WINDOW, 543 } 544 545 ALIAS_TOKENS = ID_VAR_TOKENS 546 547 ARRAY_CONSTRUCTORS = { 548 "ARRAY": exp.Array, 549 "LIST": exp.List, 550 } 551 552 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 553 554 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 555 556 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 557 558 FUNC_TOKENS = { 559 TokenType.COLLATE, 560 TokenType.COMMAND, 561 TokenType.CURRENT_DATE, 562 TokenType.CURRENT_DATETIME, 563 TokenType.CURRENT_TIMESTAMP, 564 TokenType.CURRENT_TIME, 565 TokenType.CURRENT_USER, 566 TokenType.FILTER, 567 TokenType.FIRST, 568 TokenType.FORMAT, 569 TokenType.GLOB, 570 TokenType.IDENTIFIER, 571 TokenType.INDEX, 572 TokenType.ISNULL, 573 TokenType.ILIKE, 574 TokenType.INSERT, 575 TokenType.LIKE, 576 TokenType.MERGE, 577 TokenType.OFFSET, 578 TokenType.PRIMARY_KEY, 579 TokenType.RANGE, 580 TokenType.REPLACE, 581 TokenType.RLIKE, 582 TokenType.ROW, 583 TokenType.UNNEST, 584 TokenType.VAR, 585 TokenType.LEFT, 586 TokenType.RIGHT, 587 TokenType.SEQUENCE, 588 TokenType.DATE, 589 TokenType.DATETIME, 590 TokenType.TABLE, 591 TokenType.TIMESTAMP, 592 TokenType.TIMESTAMPTZ, 593 TokenType.TRUNCATE, 594 TokenType.WINDOW, 595 TokenType.XOR, 596 *TYPE_TOKENS, 597 *SUBQUERY_PREDICATES, 598 } 599 600 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 601 TokenType.AND: exp.And, 602 } 603 604 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 605 TokenType.COLON_EQ: exp.PropertyEQ, 606 } 607 608 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 609 TokenType.OR: exp.Or, 610 } 611 612 EQUALITY = { 613 TokenType.EQ: exp.EQ, 614 TokenType.NEQ: exp.NEQ, 615 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 616 } 617 618 COMPARISON = { 619 TokenType.GT: exp.GT, 620 TokenType.GTE: exp.GTE, 621 TokenType.LT: exp.LT, 622 TokenType.LTE: exp.LTE, 623 } 624 625 BITWISE = { 626 TokenType.AMP: exp.BitwiseAnd, 627 TokenType.CARET: exp.BitwiseXor, 628 TokenType.PIPE: exp.BitwiseOr, 629 } 630 631 TERM = { 632 TokenType.DASH: exp.Sub, 633 TokenType.PLUS: exp.Add, 634 TokenType.MOD: exp.Mod, 635 TokenType.COLLATE: exp.Collate, 636 } 637 638 FACTOR = { 639 TokenType.DIV: exp.IntDiv, 640 TokenType.LR_ARROW: exp.Distance, 641 TokenType.SLASH: exp.Div, 642 TokenType.STAR: exp.Mul, 643 } 644 645 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 646 647 TIMES = { 648 TokenType.TIME, 649 TokenType.TIMETZ, 650 } 651 652 TIMESTAMPS = { 653 TokenType.TIMESTAMP, 654 TokenType.TIMESTAMPTZ, 655 TokenType.TIMESTAMPLTZ, 656 *TIMES, 657 } 658 659 SET_OPERATIONS = { 660 TokenType.UNION, 661 TokenType.INTERSECT, 662 TokenType.EXCEPT, 663 } 664 665 JOIN_METHODS = { 666 TokenType.ASOF, 667 TokenType.NATURAL, 668 TokenType.POSITIONAL, 669 } 670 671 JOIN_SIDES = { 672 TokenType.LEFT, 673 TokenType.RIGHT, 674 TokenType.FULL, 675 } 676 677 JOIN_KINDS = { 678 TokenType.ANTI, 679 TokenType.CROSS, 680 TokenType.INNER, 681 TokenType.OUTER, 682 TokenType.SEMI, 683 TokenType.STRAIGHT_JOIN, 684 } 685 686 JOIN_HINTS: t.Set[str] = set() 687 688 LAMBDAS = { 689 TokenType.ARROW: lambda self, expressions: self.expression( 690 exp.Lambda, 691 this=self._replace_lambda( 692 self._parse_assignment(), 693 expressions, 694 ), 695 expressions=expressions, 696 ), 697 TokenType.FARROW: lambda self, expressions: self.expression( 698 exp.Kwarg, 699 this=exp.var(expressions[0].name), 700 expression=self._parse_assignment(), 701 ), 702 } 703 704 COLUMN_OPERATORS = { 705 TokenType.DOT: None, 706 TokenType.DCOLON: lambda self, this, to: self.expression( 707 exp.Cast if self.STRICT_CAST else exp.TryCast, 708 this=this, 709 to=to, 710 ), 711 TokenType.ARROW: lambda self, this, path: self.expression( 712 exp.JSONExtract, 713 this=this, 714 expression=self.dialect.to_json_path(path), 715 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 716 ), 717 TokenType.DARROW: lambda self, this, path: self.expression( 718 exp.JSONExtractScalar, 719 this=this, 720 expression=self.dialect.to_json_path(path), 721 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 722 ), 723 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 724 exp.JSONBExtract, 725 this=this, 726 expression=path, 727 ), 728 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 729 exp.JSONBExtractScalar, 730 this=this, 731 expression=path, 732 ), 733 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 734 exp.JSONBContains, 735 this=this, 736 expression=key, 737 ), 738 } 739 740 EXPRESSION_PARSERS = { 741 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 742 exp.Column: lambda self: self._parse_column(), 743 exp.Condition: lambda self: self._parse_assignment(), 744 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 745 exp.Expression: lambda self: self._parse_expression(), 746 exp.From: lambda self: self._parse_from(joins=True), 747 exp.Group: lambda self: self._parse_group(), 748 exp.Having: lambda self: self._parse_having(), 749 exp.Identifier: lambda self: self._parse_id_var(), 750 exp.Join: lambda self: self._parse_join(), 751 exp.Lambda: lambda self: self._parse_lambda(), 752 exp.Lateral: lambda self: self._parse_lateral(), 753 exp.Limit: lambda self: self._parse_limit(), 754 exp.Offset: lambda self: self._parse_offset(), 755 exp.Order: lambda self: self._parse_order(), 756 exp.Ordered: lambda self: self._parse_ordered(), 757 exp.Properties: lambda self: self._parse_properties(), 758 exp.Qualify: lambda self: self._parse_qualify(), 759 exp.Returning: lambda self: self._parse_returning(), 760 exp.Select: lambda self: self._parse_select(), 761 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 762 exp.Table: lambda self: self._parse_table_parts(), 763 exp.TableAlias: lambda self: self._parse_table_alias(), 764 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 765 exp.Where: lambda self: self._parse_where(), 766 exp.Window: lambda self: self._parse_named_window(), 767 exp.With: lambda self: self._parse_with(), 768 "JOIN_TYPE": lambda self: self._parse_join_parts(), 769 } 770 771 STATEMENT_PARSERS = { 772 TokenType.ALTER: lambda self: self._parse_alter(), 773 TokenType.BEGIN: lambda self: self._parse_transaction(), 774 TokenType.CACHE: lambda self: self._parse_cache(), 775 TokenType.COMMENT: lambda self: self._parse_comment(), 776 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 777 TokenType.COPY: lambda self: self._parse_copy(), 778 TokenType.CREATE: lambda self: self._parse_create(), 779 TokenType.DELETE: lambda self: self._parse_delete(), 780 TokenType.DESC: lambda self: self._parse_describe(), 781 TokenType.DESCRIBE: lambda self: self._parse_describe(), 782 TokenType.DROP: lambda self: self._parse_drop(), 783 TokenType.GRANT: lambda self: self._parse_grant(), 784 TokenType.INSERT: lambda self: self._parse_insert(), 785 TokenType.KILL: lambda self: self._parse_kill(), 786 TokenType.LOAD: lambda self: self._parse_load(), 787 TokenType.MERGE: lambda self: self._parse_merge(), 788 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 789 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 790 TokenType.REFRESH: lambda self: self._parse_refresh(), 791 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 792 TokenType.SET: lambda self: self._parse_set(), 793 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 794 TokenType.UNCACHE: lambda self: self._parse_uncache(), 795 TokenType.UPDATE: lambda self: self._parse_update(), 796 TokenType.USE: lambda self: self.expression( 797 exp.Use, 798 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 799 this=self._parse_table(schema=False), 800 ), 801 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 802 } 803 804 UNARY_PARSERS = { 805 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 806 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 807 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 808 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 809 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 810 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 811 } 812 813 STRING_PARSERS = { 814 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 815 exp.RawString, this=token.text 816 ), 817 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 818 exp.National, this=token.text 819 ), 820 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 821 TokenType.STRING: lambda self, token: self.expression( 822 exp.Literal, this=token.text, is_string=True 823 ), 824 TokenType.UNICODE_STRING: lambda self, token: self.expression( 825 exp.UnicodeString, 826 this=token.text, 827 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 828 ), 829 } 830 831 NUMERIC_PARSERS = { 832 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 833 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 834 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 835 TokenType.NUMBER: lambda self, token: self.expression( 836 exp.Literal, this=token.text, is_string=False 837 ), 838 } 839 840 PRIMARY_PARSERS = { 841 **STRING_PARSERS, 842 **NUMERIC_PARSERS, 843 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 844 TokenType.NULL: lambda self, _: self.expression(exp.Null), 845 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 846 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 847 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 848 TokenType.STAR: lambda self, _: self._parse_star_ops(), 849 } 850 851 PLACEHOLDER_PARSERS = { 852 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 853 TokenType.PARAMETER: lambda self: self._parse_parameter(), 854 TokenType.COLON: lambda self: ( 855 self.expression(exp.Placeholder, this=self._prev.text) 856 if self._match_set(self.ID_VAR_TOKENS) 857 else None 858 ), 859 } 860 861 RANGE_PARSERS = { 862 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 863 TokenType.GLOB: binary_range_parser(exp.Glob), 864 TokenType.ILIKE: binary_range_parser(exp.ILike), 865 TokenType.IN: lambda self, this: self._parse_in(this), 866 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 867 TokenType.IS: lambda self, this: self._parse_is(this), 868 TokenType.LIKE: binary_range_parser(exp.Like), 869 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 870 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 871 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 872 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 873 } 874 875 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 876 "ALLOWED_VALUES": lambda self: self.expression( 877 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 878 ), 879 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 880 "AUTO": lambda self: self._parse_auto_property(), 881 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 882 "BACKUP": lambda self: self.expression( 883 exp.BackupProperty, this=self._parse_var(any_token=True) 884 ), 885 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 886 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 887 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHECKSUM": lambda self: self._parse_checksum(), 889 "CLUSTER BY": lambda self: self._parse_cluster(), 890 "CLUSTERED": lambda self: self._parse_clustered_by(), 891 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 892 exp.CollateProperty, **kwargs 893 ), 894 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 895 "CONTAINS": lambda self: self._parse_contains_property(), 896 "COPY": lambda self: self._parse_copy_property(), 897 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 898 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 899 "DEFINER": lambda self: self._parse_definer(), 900 "DETERMINISTIC": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 902 ), 903 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 904 "DUPLICATE": lambda self: self._parse_duplicate(), 905 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 906 "DISTKEY": lambda self: self._parse_distkey(), 907 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 908 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 909 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 910 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 911 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 912 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 913 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 914 "FREESPACE": lambda self: self._parse_freespace(), 915 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 916 "HEAP": lambda self: self.expression(exp.HeapProperty), 917 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 918 "IMMUTABLE": lambda self: self.expression( 919 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 920 ), 921 "INHERITS": lambda self: self.expression( 922 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 923 ), 924 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 925 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 926 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 927 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 928 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 929 "LIKE": lambda self: self._parse_create_like(), 930 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 931 "LOCK": lambda self: self._parse_locking(), 932 "LOCKING": lambda self: self._parse_locking(), 933 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 934 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 935 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 936 "MODIFIES": lambda self: self._parse_modifies_property(), 937 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 938 "NO": lambda self: self._parse_no_property(), 939 "ON": lambda self: self._parse_on_property(), 940 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 941 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 942 "PARTITION": lambda self: self._parse_partitioned_of(), 943 "PARTITION BY": lambda self: self._parse_partitioned_by(), 944 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 946 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 947 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 948 "READS": lambda self: self._parse_reads_property(), 949 "REMOTE": lambda self: self._parse_remote_with_connection(), 950 "RETURNS": lambda self: self._parse_returns(), 951 "STRICT": lambda self: self.expression(exp.StrictProperty), 952 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 953 "ROW": lambda self: self._parse_row(), 954 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 955 "SAMPLE": lambda self: self.expression( 956 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 957 ), 958 "SECURE": lambda self: self.expression(exp.SecureProperty), 959 "SECURITY": lambda self: self._parse_security(), 960 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 961 "SETTINGS": lambda self: self._parse_settings_property(), 962 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 963 "SORTKEY": lambda self: self._parse_sortkey(), 964 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 965 "STABLE": lambda self: self.expression( 966 exp.StabilityProperty, this=exp.Literal.string("STABLE") 967 ), 968 "STORED": lambda self: self._parse_stored(), 969 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 970 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 971 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 972 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 973 "TO": lambda self: self._parse_to_table(), 974 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 975 "TRANSFORM": lambda self: self.expression( 976 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 977 ), 978 "TTL": lambda self: self._parse_ttl(), 979 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 980 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 981 "VOLATILE": lambda self: self._parse_volatile_property(), 982 "WITH": lambda self: self._parse_with_property(), 983 } 984 985 CONSTRAINT_PARSERS = { 986 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 987 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 988 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 989 "CHARACTER SET": lambda self: self.expression( 990 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 991 ), 992 "CHECK": lambda self: self.expression( 993 exp.CheckColumnConstraint, 994 this=self._parse_wrapped(self._parse_assignment), 995 enforced=self._match_text_seq("ENFORCED"), 996 ), 997 "COLLATE": lambda self: self.expression( 998 exp.CollateColumnConstraint, 999 this=self._parse_identifier() or self._parse_column(), 1000 ), 1001 "COMMENT": lambda self: self.expression( 1002 exp.CommentColumnConstraint, this=self._parse_string() 1003 ), 1004 "COMPRESS": lambda self: self._parse_compress(), 1005 "CLUSTERED": lambda self: self.expression( 1006 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1007 ), 1008 "NONCLUSTERED": lambda self: self.expression( 1009 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "DEFAULT": lambda self: self.expression( 1012 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1013 ), 1014 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1015 "EPHEMERAL": lambda self: self.expression( 1016 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "EXCLUDE": lambda self: self.expression( 1019 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1020 ), 1021 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1022 "FORMAT": lambda self: self.expression( 1023 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1024 ), 1025 "GENERATED": lambda self: self._parse_generated_as_identity(), 1026 "IDENTITY": lambda self: self._parse_auto_increment(), 1027 "INLINE": lambda self: self._parse_inline(), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "NOT": lambda self: self._parse_not_constraint(), 1030 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1031 "ON": lambda self: ( 1032 self._match(TokenType.UPDATE) 1033 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1034 ) 1035 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1036 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1037 "PERIOD": lambda self: self._parse_period_for_system_time(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1039 "REFERENCES": lambda self: self._parse_references(match=False), 1040 "TITLE": lambda self: self.expression( 1041 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1042 ), 1043 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1044 "UNIQUE": lambda self: self._parse_unique(), 1045 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1046 "WITH": lambda self: self.expression( 1047 exp.Properties, expressions=self._parse_wrapped_properties() 1048 ), 1049 } 1050 1051 ALTER_PARSERS = { 1052 "ADD": lambda self: self._parse_alter_table_add(), 1053 "ALTER": lambda self: self._parse_alter_table_alter(), 1054 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1055 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1056 "DROP": lambda self: self._parse_alter_table_drop(), 1057 "RENAME": lambda self: self._parse_alter_table_rename(), 1058 "SET": lambda self: self._parse_alter_table_set(), 1059 "AS": lambda self: self._parse_select(), 1060 } 1061 1062 ALTER_ALTER_PARSERS = { 1063 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1064 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1065 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1066 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1067 } 1068 1069 SCHEMA_UNNAMED_CONSTRAINTS = { 1070 "CHECK", 1071 "EXCLUDE", 1072 "FOREIGN KEY", 1073 "LIKE", 1074 "PERIOD", 1075 "PRIMARY KEY", 1076 "UNIQUE", 1077 } 1078 1079 NO_PAREN_FUNCTION_PARSERS = { 1080 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1081 "CASE": lambda self: self._parse_case(), 1082 "CONNECT_BY_ROOT": lambda self: self.expression( 1083 exp.ConnectByRoot, this=self._parse_column() 1084 ), 1085 "IF": lambda self: self._parse_if(), 1086 "NEXT": lambda self: self._parse_next_value_for(), 1087 } 1088 1089 INVALID_FUNC_NAME_TOKENS = { 1090 TokenType.IDENTIFIER, 1091 TokenType.STRING, 1092 } 1093 1094 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1095 1096 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1097 1098 FUNCTION_PARSERS = { 1099 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1100 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1101 "DECODE": lambda self: self._parse_decode(), 1102 "EXTRACT": lambda self: self._parse_extract(), 1103 "GAP_FILL": lambda self: self._parse_gap_fill(), 1104 "JSON_OBJECT": lambda self: self._parse_json_object(), 1105 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1106 "JSON_TABLE": lambda self: self._parse_json_table(), 1107 "MATCH": lambda self: self._parse_match_against(), 1108 "NORMALIZE": lambda self: self._parse_normalize(), 1109 "OPENJSON": lambda self: self._parse_open_json(), 1110 "OVERLAY": lambda self: self._parse_overlay(), 1111 "POSITION": lambda self: self._parse_position(), 1112 "PREDICT": lambda self: self._parse_predict(), 1113 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1114 "STRING_AGG": lambda self: self._parse_string_agg(), 1115 "SUBSTRING": lambda self: self._parse_substring(), 1116 "TRIM": lambda self: self._parse_trim(), 1117 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1119 } 1120 1121 QUERY_MODIFIER_PARSERS = { 1122 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1123 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1124 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1125 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1126 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1127 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1128 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1129 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1130 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1131 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1132 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1133 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1134 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1135 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1137 TokenType.CLUSTER_BY: lambda self: ( 1138 "cluster", 1139 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1140 ), 1141 TokenType.DISTRIBUTE_BY: lambda self: ( 1142 "distribute", 1143 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1144 ), 1145 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1146 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1147 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1148 } 1149 1150 SET_PARSERS = { 1151 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1152 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1153 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1154 "TRANSACTION": lambda self: self._parse_set_transaction(), 1155 } 1156 1157 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1158 1159 TYPE_LITERAL_PARSERS = { 1160 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1161 } 1162 1163 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1164 1165 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1166 1167 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1168 1169 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1170 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1171 "ISOLATION": ( 1172 ("LEVEL", "REPEATABLE", "READ"), 1173 ("LEVEL", "READ", "COMMITTED"), 1174 ("LEVEL", "READ", "UNCOMITTED"), 1175 ("LEVEL", "SERIALIZABLE"), 1176 ), 1177 "READ": ("WRITE", "ONLY"), 1178 } 1179 1180 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1181 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1182 ) 1183 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1184 1185 CREATE_SEQUENCE: OPTIONS_TYPE = { 1186 "SCALE": ("EXTEND", "NOEXTEND"), 1187 "SHARD": ("EXTEND", "NOEXTEND"), 1188 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1189 **dict.fromkeys( 1190 ( 1191 "SESSION", 1192 "GLOBAL", 1193 "KEEP", 1194 "NOKEEP", 1195 "ORDER", 1196 "NOORDER", 1197 "NOCACHE", 1198 "CYCLE", 1199 "NOCYCLE", 1200 "NOMINVALUE", 1201 "NOMAXVALUE", 1202 "NOSCALE", 1203 "NOSHARD", 1204 ), 1205 tuple(), 1206 ), 1207 } 1208 1209 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1210 1211 USABLES: OPTIONS_TYPE = dict.fromkeys( 1212 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1213 ) 1214 1215 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1216 1217 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1218 "TYPE": ("EVOLUTION",), 1219 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1220 } 1221 1222 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1223 "NOT": ("ENFORCED",), 1224 "MATCH": ( 1225 "FULL", 1226 "PARTIAL", 1227 "SIMPLE", 1228 ), 1229 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1230 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1231 } 1232 1233 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1234 1235 CLONE_KEYWORDS = {"CLONE", "COPY"} 1236 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1237 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1238 1239 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1240 1241 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1242 1243 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1244 1245 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1246 1247 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1248 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1249 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1250 1251 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1252 1253 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1254 1255 ADD_CONSTRAINT_TOKENS = { 1256 TokenType.CONSTRAINT, 1257 TokenType.FOREIGN_KEY, 1258 TokenType.INDEX, 1259 TokenType.KEY, 1260 TokenType.PRIMARY_KEY, 1261 TokenType.UNIQUE, 1262 } 1263 1264 DISTINCT_TOKENS = {TokenType.DISTINCT} 1265 1266 NULL_TOKENS = {TokenType.NULL} 1267 1268 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1269 1270 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1271 1272 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1273 1274 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1275 1276 ODBC_DATETIME_LITERALS = { 1277 "d": exp.Date, 1278 "t": exp.Time, 1279 "ts": exp.Timestamp, 1280 } 1281 1282 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1283 1284 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1285 1286 STRICT_CAST = True 1287 1288 PREFIXED_PIVOT_COLUMNS = False 1289 IDENTIFY_PIVOT_STRINGS = False 1290 1291 LOG_DEFAULTS_TO_LN = False 1292 1293 # Whether ADD is present for each column added by ALTER TABLE 1294 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1295 1296 # Whether the table sample clause expects CSV syntax 1297 TABLESAMPLE_CSV = False 1298 1299 # The default method used for table sampling 1300 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1301 1302 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1303 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1304 1305 # Whether the TRIM function expects the characters to trim as its first argument 1306 TRIM_PATTERN_FIRST = False 1307 1308 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1309 STRING_ALIASES = False 1310 1311 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1312 MODIFIERS_ATTACHED_TO_SET_OP = True 1313 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1314 1315 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1316 NO_PAREN_IF_COMMANDS = True 1317 1318 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1319 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1320 1321 # Whether the `:` operator is used to extract a value from a VARIANT column 1322 COLON_IS_VARIANT_EXTRACT = False 1323 1324 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1325 # If this is True and '(' is not found, the keyword will be treated as an identifier 1326 VALUES_FOLLOWED_BY_PAREN = True 1327 1328 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1329 SUPPORTS_IMPLICIT_UNNEST = False 1330 1331 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1332 INTERVAL_SPANS = True 1333 1334 # Whether a PARTITION clause can follow a table reference 1335 SUPPORTS_PARTITION_SELECTION = False 1336 1337 __slots__ = ( 1338 "error_level", 1339 "error_message_context", 1340 "max_errors", 1341 "dialect", 1342 "sql", 1343 "errors", 1344 "_tokens", 1345 "_index", 1346 "_curr", 1347 "_next", 1348 "_prev", 1349 "_prev_comments", 1350 ) 1351 1352 # Autofilled 1353 SHOW_TRIE: t.Dict = {} 1354 SET_TRIE: t.Dict = {} 1355 1356 def __init__( 1357 self, 1358 error_level: t.Optional[ErrorLevel] = None, 1359 error_message_context: int = 100, 1360 max_errors: int = 3, 1361 dialect: DialectType = None, 1362 ): 1363 from sqlglot.dialects import Dialect 1364 1365 self.error_level = error_level or ErrorLevel.IMMEDIATE 1366 self.error_message_context = error_message_context 1367 self.max_errors = max_errors 1368 self.dialect = Dialect.get_or_raise(dialect) 1369 self.reset() 1370 1371 def reset(self): 1372 self.sql = "" 1373 self.errors = [] 1374 self._tokens = [] 1375 self._index = 0 1376 self._curr = None 1377 self._next = None 1378 self._prev = None 1379 self._prev_comments = None 1380 1381 def parse( 1382 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1383 ) -> t.List[t.Optional[exp.Expression]]: 1384 """ 1385 Parses a list of tokens and returns a list of syntax trees, one tree 1386 per parsed SQL statement. 1387 1388 Args: 1389 raw_tokens: The list of tokens. 1390 sql: The original SQL string, used to produce helpful debug messages. 1391 1392 Returns: 1393 The list of the produced syntax trees. 1394 """ 1395 return self._parse( 1396 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1397 ) 1398 1399 def parse_into( 1400 self, 1401 expression_types: exp.IntoType, 1402 raw_tokens: t.List[Token], 1403 sql: t.Optional[str] = None, 1404 ) -> t.List[t.Optional[exp.Expression]]: 1405 """ 1406 Parses a list of tokens into a given Expression type. If a collection of Expression 1407 types is given instead, this method will try to parse the token list into each one 1408 of them, stopping at the first for which the parsing succeeds. 1409 1410 Args: 1411 expression_types: The expression type(s) to try and parse the token list into. 1412 raw_tokens: The list of tokens. 1413 sql: The original SQL string, used to produce helpful debug messages. 1414 1415 Returns: 1416 The target Expression. 1417 """ 1418 errors = [] 1419 for expression_type in ensure_list(expression_types): 1420 parser = self.EXPRESSION_PARSERS.get(expression_type) 1421 if not parser: 1422 raise TypeError(f"No parser registered for {expression_type}") 1423 1424 try: 1425 return self._parse(parser, raw_tokens, sql) 1426 except ParseError as e: 1427 e.errors[0]["into_expression"] = expression_type 1428 errors.append(e) 1429 1430 raise ParseError( 1431 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1432 errors=merge_errors(errors), 1433 ) from errors[-1] 1434 1435 def _parse( 1436 self, 1437 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1438 raw_tokens: t.List[Token], 1439 sql: t.Optional[str] = None, 1440 ) -> t.List[t.Optional[exp.Expression]]: 1441 self.reset() 1442 self.sql = sql or "" 1443 1444 total = len(raw_tokens) 1445 chunks: t.List[t.List[Token]] = [[]] 1446 1447 for i, token in enumerate(raw_tokens): 1448 if token.token_type == TokenType.SEMICOLON: 1449 if token.comments: 1450 chunks.append([token]) 1451 1452 if i < total - 1: 1453 chunks.append([]) 1454 else: 1455 chunks[-1].append(token) 1456 1457 expressions = [] 1458 1459 for tokens in chunks: 1460 self._index = -1 1461 self._tokens = tokens 1462 self._advance() 1463 1464 expressions.append(parse_method(self)) 1465 1466 if self._index < len(self._tokens): 1467 self.raise_error("Invalid expression / Unexpected token") 1468 1469 self.check_errors() 1470 1471 return expressions 1472 1473 def check_errors(self) -> None: 1474 """Logs or raises any found errors, depending on the chosen error level setting.""" 1475 if self.error_level == ErrorLevel.WARN: 1476 for error in self.errors: 1477 logger.error(str(error)) 1478 elif self.error_level == ErrorLevel.RAISE and self.errors: 1479 raise ParseError( 1480 concat_messages(self.errors, self.max_errors), 1481 errors=merge_errors(self.errors), 1482 ) 1483 1484 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1485 """ 1486 Appends an error in the list of recorded errors or raises it, depending on the chosen 1487 error level setting. 1488 """ 1489 token = token or self._curr or self._prev or Token.string("") 1490 start = token.start 1491 end = token.end + 1 1492 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1493 highlight = self.sql[start:end] 1494 end_context = self.sql[end : end + self.error_message_context] 1495 1496 error = ParseError.new( 1497 f"{message}. Line {token.line}, Col: {token.col}.\n" 1498 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1499 description=message, 1500 line=token.line, 1501 col=token.col, 1502 start_context=start_context, 1503 highlight=highlight, 1504 end_context=end_context, 1505 ) 1506 1507 if self.error_level == ErrorLevel.IMMEDIATE: 1508 raise error 1509 1510 self.errors.append(error) 1511 1512 def expression( 1513 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1514 ) -> E: 1515 """ 1516 Creates a new, validated Expression. 1517 1518 Args: 1519 exp_class: The expression class to instantiate. 1520 comments: An optional list of comments to attach to the expression. 1521 kwargs: The arguments to set for the expression along with their respective values. 1522 1523 Returns: 1524 The target expression. 1525 """ 1526 instance = exp_class(**kwargs) 1527 instance.add_comments(comments) if comments else self._add_comments(instance) 1528 return self.validate_expression(instance) 1529 1530 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1531 if expression and self._prev_comments: 1532 expression.add_comments(self._prev_comments) 1533 self._prev_comments = None 1534 1535 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1536 """ 1537 Validates an Expression, making sure that all its mandatory arguments are set. 1538 1539 Args: 1540 expression: The expression to validate. 1541 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1542 1543 Returns: 1544 The validated expression. 1545 """ 1546 if self.error_level != ErrorLevel.IGNORE: 1547 for error_message in expression.error_messages(args): 1548 self.raise_error(error_message) 1549 1550 return expression 1551 1552 def _find_sql(self, start: Token, end: Token) -> str: 1553 return self.sql[start.start : end.end + 1] 1554 1555 def _is_connected(self) -> bool: 1556 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1557 1558 def _advance(self, times: int = 1) -> None: 1559 self._index += times 1560 self._curr = seq_get(self._tokens, self._index) 1561 self._next = seq_get(self._tokens, self._index + 1) 1562 1563 if self._index > 0: 1564 self._prev = self._tokens[self._index - 1] 1565 self._prev_comments = self._prev.comments 1566 else: 1567 self._prev = None 1568 self._prev_comments = None 1569 1570 def _retreat(self, index: int) -> None: 1571 if index != self._index: 1572 self._advance(index - self._index) 1573 1574 def _warn_unsupported(self) -> None: 1575 if len(self._tokens) <= 1: 1576 return 1577 1578 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1579 # interested in emitting a warning for the one being currently processed. 1580 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1581 1582 logger.warning( 1583 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1584 ) 1585 1586 def _parse_command(self) -> exp.Command: 1587 self._warn_unsupported() 1588 return self.expression( 1589 exp.Command, 1590 comments=self._prev_comments, 1591 this=self._prev.text.upper(), 1592 expression=self._parse_string(), 1593 ) 1594 1595 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1596 """ 1597 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1598 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1599 solve this by setting & resetting the parser state accordingly 1600 """ 1601 index = self._index 1602 error_level = self.error_level 1603 1604 self.error_level = ErrorLevel.IMMEDIATE 1605 try: 1606 this = parse_method() 1607 except ParseError: 1608 this = None 1609 finally: 1610 if not this or retreat: 1611 self._retreat(index) 1612 self.error_level = error_level 1613 1614 return this 1615 1616 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1617 start = self._prev 1618 exists = self._parse_exists() if allow_exists else None 1619 1620 self._match(TokenType.ON) 1621 1622 materialized = self._match_text_seq("MATERIALIZED") 1623 kind = self._match_set(self.CREATABLES) and self._prev 1624 if not kind: 1625 return self._parse_as_command(start) 1626 1627 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1628 this = self._parse_user_defined_function(kind=kind.token_type) 1629 elif kind.token_type == TokenType.TABLE: 1630 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1631 elif kind.token_type == TokenType.COLUMN: 1632 this = self._parse_column() 1633 else: 1634 this = self._parse_id_var() 1635 1636 self._match(TokenType.IS) 1637 1638 return self.expression( 1639 exp.Comment, 1640 this=this, 1641 kind=kind.text, 1642 expression=self._parse_string(), 1643 exists=exists, 1644 materialized=materialized, 1645 ) 1646 1647 def _parse_to_table( 1648 self, 1649 ) -> exp.ToTableProperty: 1650 table = self._parse_table_parts(schema=True) 1651 return self.expression(exp.ToTableProperty, this=table) 1652 1653 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1654 def _parse_ttl(self) -> exp.Expression: 1655 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1656 this = self._parse_bitwise() 1657 1658 if self._match_text_seq("DELETE"): 1659 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1660 if self._match_text_seq("RECOMPRESS"): 1661 return self.expression( 1662 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1663 ) 1664 if self._match_text_seq("TO", "DISK"): 1665 return self.expression( 1666 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1667 ) 1668 if self._match_text_seq("TO", "VOLUME"): 1669 return self.expression( 1670 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1671 ) 1672 1673 return this 1674 1675 expressions = self._parse_csv(_parse_ttl_action) 1676 where = self._parse_where() 1677 group = self._parse_group() 1678 1679 aggregates = None 1680 if group and self._match(TokenType.SET): 1681 aggregates = self._parse_csv(self._parse_set_item) 1682 1683 return self.expression( 1684 exp.MergeTreeTTL, 1685 expressions=expressions, 1686 where=where, 1687 group=group, 1688 aggregates=aggregates, 1689 ) 1690 1691 def _parse_statement(self) -> t.Optional[exp.Expression]: 1692 if self._curr is None: 1693 return None 1694 1695 if self._match_set(self.STATEMENT_PARSERS): 1696 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1697 1698 if self._match_set(self.dialect.tokenizer.COMMANDS): 1699 return self._parse_command() 1700 1701 expression = self._parse_expression() 1702 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1703 return self._parse_query_modifiers(expression) 1704 1705 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1706 start = self._prev 1707 temporary = self._match(TokenType.TEMPORARY) 1708 materialized = self._match_text_seq("MATERIALIZED") 1709 1710 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1711 if not kind: 1712 return self._parse_as_command(start) 1713 1714 concurrently = self._match_text_seq("CONCURRENTLY") 1715 if_exists = exists or self._parse_exists() 1716 table = self._parse_table_parts( 1717 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1718 ) 1719 1720 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1721 1722 if self._match(TokenType.L_PAREN, advance=False): 1723 expressions = self._parse_wrapped_csv(self._parse_types) 1724 else: 1725 expressions = None 1726 1727 return self.expression( 1728 exp.Drop, 1729 comments=start.comments, 1730 exists=if_exists, 1731 this=table, 1732 expressions=expressions, 1733 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1734 temporary=temporary, 1735 materialized=materialized, 1736 cascade=self._match_text_seq("CASCADE"), 1737 constraints=self._match_text_seq("CONSTRAINTS"), 1738 purge=self._match_text_seq("PURGE"), 1739 cluster=cluster, 1740 concurrently=concurrently, 1741 ) 1742 1743 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1744 return ( 1745 self._match_text_seq("IF") 1746 and (not not_ or self._match(TokenType.NOT)) 1747 and self._match(TokenType.EXISTS) 1748 ) 1749 1750 def _parse_create(self) -> exp.Create | exp.Command: 1751 # Note: this can't be None because we've matched a statement parser 1752 start = self._prev 1753 comments = self._prev_comments 1754 1755 replace = ( 1756 start.token_type == TokenType.REPLACE 1757 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1758 or self._match_pair(TokenType.OR, TokenType.ALTER) 1759 ) 1760 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1761 1762 unique = self._match(TokenType.UNIQUE) 1763 1764 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1765 clustered = True 1766 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1767 "COLUMNSTORE" 1768 ): 1769 clustered = False 1770 else: 1771 clustered = None 1772 1773 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1774 self._advance() 1775 1776 properties = None 1777 create_token = self._match_set(self.CREATABLES) and self._prev 1778 1779 if not create_token: 1780 # exp.Properties.Location.POST_CREATE 1781 properties = self._parse_properties() 1782 create_token = self._match_set(self.CREATABLES) and self._prev 1783 1784 if not properties or not create_token: 1785 return self._parse_as_command(start) 1786 1787 concurrently = self._match_text_seq("CONCURRENTLY") 1788 exists = self._parse_exists(not_=True) 1789 this = None 1790 expression: t.Optional[exp.Expression] = None 1791 indexes = None 1792 no_schema_binding = None 1793 begin = None 1794 end = None 1795 clone = None 1796 1797 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1798 nonlocal properties 1799 if properties and temp_props: 1800 properties.expressions.extend(temp_props.expressions) 1801 elif temp_props: 1802 properties = temp_props 1803 1804 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1805 this = self._parse_user_defined_function(kind=create_token.token_type) 1806 1807 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1808 extend_props(self._parse_properties()) 1809 1810 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1811 extend_props(self._parse_properties()) 1812 1813 if not expression: 1814 if self._match(TokenType.COMMAND): 1815 expression = self._parse_as_command(self._prev) 1816 else: 1817 begin = self._match(TokenType.BEGIN) 1818 return_ = self._match_text_seq("RETURN") 1819 1820 if self._match(TokenType.STRING, advance=False): 1821 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1822 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1823 expression = self._parse_string() 1824 extend_props(self._parse_properties()) 1825 else: 1826 expression = self._parse_statement() 1827 1828 end = self._match_text_seq("END") 1829 1830 if return_: 1831 expression = self.expression(exp.Return, this=expression) 1832 elif create_token.token_type == TokenType.INDEX: 1833 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1834 if not self._match(TokenType.ON): 1835 index = self._parse_id_var() 1836 anonymous = False 1837 else: 1838 index = None 1839 anonymous = True 1840 1841 this = self._parse_index(index=index, anonymous=anonymous) 1842 elif create_token.token_type in self.DB_CREATABLES: 1843 table_parts = self._parse_table_parts( 1844 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1845 ) 1846 1847 # exp.Properties.Location.POST_NAME 1848 self._match(TokenType.COMMA) 1849 extend_props(self._parse_properties(before=True)) 1850 1851 this = self._parse_schema(this=table_parts) 1852 1853 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1854 extend_props(self._parse_properties()) 1855 1856 self._match(TokenType.ALIAS) 1857 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1858 # exp.Properties.Location.POST_ALIAS 1859 extend_props(self._parse_properties()) 1860 1861 if create_token.token_type == TokenType.SEQUENCE: 1862 expression = self._parse_types() 1863 extend_props(self._parse_properties()) 1864 else: 1865 expression = self._parse_ddl_select() 1866 1867 if create_token.token_type == TokenType.TABLE: 1868 # exp.Properties.Location.POST_EXPRESSION 1869 extend_props(self._parse_properties()) 1870 1871 indexes = [] 1872 while True: 1873 index = self._parse_index() 1874 1875 # exp.Properties.Location.POST_INDEX 1876 extend_props(self._parse_properties()) 1877 if not index: 1878 break 1879 else: 1880 self._match(TokenType.COMMA) 1881 indexes.append(index) 1882 elif create_token.token_type == TokenType.VIEW: 1883 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1884 no_schema_binding = True 1885 1886 shallow = self._match_text_seq("SHALLOW") 1887 1888 if self._match_texts(self.CLONE_KEYWORDS): 1889 copy = self._prev.text.lower() == "copy" 1890 clone = self.expression( 1891 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1892 ) 1893 1894 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1895 return self._parse_as_command(start) 1896 1897 create_kind_text = create_token.text.upper() 1898 return self.expression( 1899 exp.Create, 1900 comments=comments, 1901 this=this, 1902 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1903 replace=replace, 1904 refresh=refresh, 1905 unique=unique, 1906 expression=expression, 1907 exists=exists, 1908 properties=properties, 1909 indexes=indexes, 1910 no_schema_binding=no_schema_binding, 1911 begin=begin, 1912 end=end, 1913 clone=clone, 1914 concurrently=concurrently, 1915 clustered=clustered, 1916 ) 1917 1918 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1919 seq = exp.SequenceProperties() 1920 1921 options = [] 1922 index = self._index 1923 1924 while self._curr: 1925 self._match(TokenType.COMMA) 1926 if self._match_text_seq("INCREMENT"): 1927 self._match_text_seq("BY") 1928 self._match_text_seq("=") 1929 seq.set("increment", self._parse_term()) 1930 elif self._match_text_seq("MINVALUE"): 1931 seq.set("minvalue", self._parse_term()) 1932 elif self._match_text_seq("MAXVALUE"): 1933 seq.set("maxvalue", self._parse_term()) 1934 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1935 self._match_text_seq("=") 1936 seq.set("start", self._parse_term()) 1937 elif self._match_text_seq("CACHE"): 1938 # T-SQL allows empty CACHE which is initialized dynamically 1939 seq.set("cache", self._parse_number() or True) 1940 elif self._match_text_seq("OWNED", "BY"): 1941 # "OWNED BY NONE" is the default 1942 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1943 else: 1944 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1945 if opt: 1946 options.append(opt) 1947 else: 1948 break 1949 1950 seq.set("options", options if options else None) 1951 return None if self._index == index else seq 1952 1953 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1954 # only used for teradata currently 1955 self._match(TokenType.COMMA) 1956 1957 kwargs = { 1958 "no": self._match_text_seq("NO"), 1959 "dual": self._match_text_seq("DUAL"), 1960 "before": self._match_text_seq("BEFORE"), 1961 "default": self._match_text_seq("DEFAULT"), 1962 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1963 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1964 "after": self._match_text_seq("AFTER"), 1965 "minimum": self._match_texts(("MIN", "MINIMUM")), 1966 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1967 } 1968 1969 if self._match_texts(self.PROPERTY_PARSERS): 1970 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1971 try: 1972 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1973 except TypeError: 1974 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1975 1976 return None 1977 1978 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1979 return self._parse_wrapped_csv(self._parse_property) 1980 1981 def _parse_property(self) -> t.Optional[exp.Expression]: 1982 if self._match_texts(self.PROPERTY_PARSERS): 1983 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1984 1985 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1986 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1987 1988 if self._match_text_seq("COMPOUND", "SORTKEY"): 1989 return self._parse_sortkey(compound=True) 1990 1991 if self._match_text_seq("SQL", "SECURITY"): 1992 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1993 1994 index = self._index 1995 key = self._parse_column() 1996 1997 if not self._match(TokenType.EQ): 1998 self._retreat(index) 1999 return self._parse_sequence_properties() 2000 2001 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2002 if isinstance(key, exp.Column): 2003 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2004 2005 value = self._parse_bitwise() or self._parse_var(any_token=True) 2006 2007 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2008 if isinstance(value, exp.Column): 2009 value = exp.var(value.name) 2010 2011 return self.expression(exp.Property, this=key, value=value) 2012 2013 def _parse_stored(self) -> exp.FileFormatProperty: 2014 self._match(TokenType.ALIAS) 2015 2016 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2017 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2018 2019 return self.expression( 2020 exp.FileFormatProperty, 2021 this=( 2022 self.expression( 2023 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2024 ) 2025 if input_format or output_format 2026 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2027 ), 2028 ) 2029 2030 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2031 field = self._parse_field() 2032 if isinstance(field, exp.Identifier) and not field.quoted: 2033 field = exp.var(field) 2034 2035 return field 2036 2037 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2038 self._match(TokenType.EQ) 2039 self._match(TokenType.ALIAS) 2040 2041 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2042 2043 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2044 properties = [] 2045 while True: 2046 if before: 2047 prop = self._parse_property_before() 2048 else: 2049 prop = self._parse_property() 2050 if not prop: 2051 break 2052 for p in ensure_list(prop): 2053 properties.append(p) 2054 2055 if properties: 2056 return self.expression(exp.Properties, expressions=properties) 2057 2058 return None 2059 2060 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2061 return self.expression( 2062 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2063 ) 2064 2065 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2066 if self._match_texts(("DEFINER", "INVOKER")): 2067 security_specifier = self._prev.text.upper() 2068 return self.expression(exp.SecurityProperty, this=security_specifier) 2069 return None 2070 2071 def _parse_settings_property(self) -> exp.SettingsProperty: 2072 return self.expression( 2073 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2074 ) 2075 2076 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2077 if self._index >= 2: 2078 pre_volatile_token = self._tokens[self._index - 2] 2079 else: 2080 pre_volatile_token = None 2081 2082 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2083 return exp.VolatileProperty() 2084 2085 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2086 2087 def _parse_retention_period(self) -> exp.Var: 2088 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2089 number = self._parse_number() 2090 number_str = f"{number} " if number else "" 2091 unit = self._parse_var(any_token=True) 2092 return exp.var(f"{number_str}{unit}") 2093 2094 def _parse_system_versioning_property( 2095 self, with_: bool = False 2096 ) -> exp.WithSystemVersioningProperty: 2097 self._match(TokenType.EQ) 2098 prop = self.expression( 2099 exp.WithSystemVersioningProperty, 2100 **{ # type: ignore 2101 "on": True, 2102 "with": with_, 2103 }, 2104 ) 2105 2106 if self._match_text_seq("OFF"): 2107 prop.set("on", False) 2108 return prop 2109 2110 self._match(TokenType.ON) 2111 if self._match(TokenType.L_PAREN): 2112 while self._curr and not self._match(TokenType.R_PAREN): 2113 if self._match_text_seq("HISTORY_TABLE", "="): 2114 prop.set("this", self._parse_table_parts()) 2115 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2116 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2117 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2118 prop.set("retention_period", self._parse_retention_period()) 2119 2120 self._match(TokenType.COMMA) 2121 2122 return prop 2123 2124 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2125 self._match(TokenType.EQ) 2126 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2127 prop = self.expression(exp.DataDeletionProperty, on=on) 2128 2129 if self._match(TokenType.L_PAREN): 2130 while self._curr and not self._match(TokenType.R_PAREN): 2131 if self._match_text_seq("FILTER_COLUMN", "="): 2132 prop.set("filter_column", self._parse_column()) 2133 elif self._match_text_seq("RETENTION_PERIOD", "="): 2134 prop.set("retention_period", self._parse_retention_period()) 2135 2136 self._match(TokenType.COMMA) 2137 2138 return prop 2139 2140 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2141 kind = "HASH" 2142 expressions: t.Optional[t.List[exp.Expression]] = None 2143 if self._match_text_seq("BY", "HASH"): 2144 expressions = self._parse_wrapped_csv(self._parse_id_var) 2145 elif self._match_text_seq("BY", "RANDOM"): 2146 kind = "RANDOM" 2147 2148 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2149 buckets: t.Optional[exp.Expression] = None 2150 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2151 buckets = self._parse_number() 2152 2153 return self.expression( 2154 exp.DistributedByProperty, 2155 expressions=expressions, 2156 kind=kind, 2157 buckets=buckets, 2158 order=self._parse_order(), 2159 ) 2160 2161 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2162 self._match_text_seq("KEY") 2163 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2164 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2165 2166 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2167 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2168 prop = self._parse_system_versioning_property(with_=True) 2169 self._match_r_paren() 2170 return prop 2171 2172 if self._match(TokenType.L_PAREN, advance=False): 2173 return self._parse_wrapped_properties() 2174 2175 if self._match_text_seq("JOURNAL"): 2176 return self._parse_withjournaltable() 2177 2178 if self._match_texts(self.VIEW_ATTRIBUTES): 2179 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2180 2181 if self._match_text_seq("DATA"): 2182 return self._parse_withdata(no=False) 2183 elif self._match_text_seq("NO", "DATA"): 2184 return self._parse_withdata(no=True) 2185 2186 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2187 return self._parse_serde_properties(with_=True) 2188 2189 if self._match(TokenType.SCHEMA): 2190 return self.expression( 2191 exp.WithSchemaBindingProperty, 2192 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2193 ) 2194 2195 if not self._next: 2196 return None 2197 2198 return self._parse_withisolatedloading() 2199 2200 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2201 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2202 self._match(TokenType.EQ) 2203 2204 user = self._parse_id_var() 2205 self._match(TokenType.PARAMETER) 2206 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2207 2208 if not user or not host: 2209 return None 2210 2211 return exp.DefinerProperty(this=f"{user}@{host}") 2212 2213 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2214 self._match(TokenType.TABLE) 2215 self._match(TokenType.EQ) 2216 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2217 2218 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2219 return self.expression(exp.LogProperty, no=no) 2220 2221 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2222 return self.expression(exp.JournalProperty, **kwargs) 2223 2224 def _parse_checksum(self) -> exp.ChecksumProperty: 2225 self._match(TokenType.EQ) 2226 2227 on = None 2228 if self._match(TokenType.ON): 2229 on = True 2230 elif self._match_text_seq("OFF"): 2231 on = False 2232 2233 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2234 2235 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2236 return self.expression( 2237 exp.Cluster, 2238 expressions=( 2239 self._parse_wrapped_csv(self._parse_ordered) 2240 if wrapped 2241 else self._parse_csv(self._parse_ordered) 2242 ), 2243 ) 2244 2245 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2246 self._match_text_seq("BY") 2247 2248 self._match_l_paren() 2249 expressions = self._parse_csv(self._parse_column) 2250 self._match_r_paren() 2251 2252 if self._match_text_seq("SORTED", "BY"): 2253 self._match_l_paren() 2254 sorted_by = self._parse_csv(self._parse_ordered) 2255 self._match_r_paren() 2256 else: 2257 sorted_by = None 2258 2259 self._match(TokenType.INTO) 2260 buckets = self._parse_number() 2261 self._match_text_seq("BUCKETS") 2262 2263 return self.expression( 2264 exp.ClusteredByProperty, 2265 expressions=expressions, 2266 sorted_by=sorted_by, 2267 buckets=buckets, 2268 ) 2269 2270 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2271 if not self._match_text_seq("GRANTS"): 2272 self._retreat(self._index - 1) 2273 return None 2274 2275 return self.expression(exp.CopyGrantsProperty) 2276 2277 def _parse_freespace(self) -> exp.FreespaceProperty: 2278 self._match(TokenType.EQ) 2279 return self.expression( 2280 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2281 ) 2282 2283 def _parse_mergeblockratio( 2284 self, no: bool = False, default: bool = False 2285 ) -> exp.MergeBlockRatioProperty: 2286 if self._match(TokenType.EQ): 2287 return self.expression( 2288 exp.MergeBlockRatioProperty, 2289 this=self._parse_number(), 2290 percent=self._match(TokenType.PERCENT), 2291 ) 2292 2293 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2294 2295 def _parse_datablocksize( 2296 self, 2297 default: t.Optional[bool] = None, 2298 minimum: t.Optional[bool] = None, 2299 maximum: t.Optional[bool] = None, 2300 ) -> exp.DataBlocksizeProperty: 2301 self._match(TokenType.EQ) 2302 size = self._parse_number() 2303 2304 units = None 2305 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2306 units = self._prev.text 2307 2308 return self.expression( 2309 exp.DataBlocksizeProperty, 2310 size=size, 2311 units=units, 2312 default=default, 2313 minimum=minimum, 2314 maximum=maximum, 2315 ) 2316 2317 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2318 self._match(TokenType.EQ) 2319 always = self._match_text_seq("ALWAYS") 2320 manual = self._match_text_seq("MANUAL") 2321 never = self._match_text_seq("NEVER") 2322 default = self._match_text_seq("DEFAULT") 2323 2324 autotemp = None 2325 if self._match_text_seq("AUTOTEMP"): 2326 autotemp = self._parse_schema() 2327 2328 return self.expression( 2329 exp.BlockCompressionProperty, 2330 always=always, 2331 manual=manual, 2332 never=never, 2333 default=default, 2334 autotemp=autotemp, 2335 ) 2336 2337 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2338 index = self._index 2339 no = self._match_text_seq("NO") 2340 concurrent = self._match_text_seq("CONCURRENT") 2341 2342 if not self._match_text_seq("ISOLATED", "LOADING"): 2343 self._retreat(index) 2344 return None 2345 2346 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2347 return self.expression( 2348 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2349 ) 2350 2351 def _parse_locking(self) -> exp.LockingProperty: 2352 if self._match(TokenType.TABLE): 2353 kind = "TABLE" 2354 elif self._match(TokenType.VIEW): 2355 kind = "VIEW" 2356 elif self._match(TokenType.ROW): 2357 kind = "ROW" 2358 elif self._match_text_seq("DATABASE"): 2359 kind = "DATABASE" 2360 else: 2361 kind = None 2362 2363 if kind in ("DATABASE", "TABLE", "VIEW"): 2364 this = self._parse_table_parts() 2365 else: 2366 this = None 2367 2368 if self._match(TokenType.FOR): 2369 for_or_in = "FOR" 2370 elif self._match(TokenType.IN): 2371 for_or_in = "IN" 2372 else: 2373 for_or_in = None 2374 2375 if self._match_text_seq("ACCESS"): 2376 lock_type = "ACCESS" 2377 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2378 lock_type = "EXCLUSIVE" 2379 elif self._match_text_seq("SHARE"): 2380 lock_type = "SHARE" 2381 elif self._match_text_seq("READ"): 2382 lock_type = "READ" 2383 elif self._match_text_seq("WRITE"): 2384 lock_type = "WRITE" 2385 elif self._match_text_seq("CHECKSUM"): 2386 lock_type = "CHECKSUM" 2387 else: 2388 lock_type = None 2389 2390 override = self._match_text_seq("OVERRIDE") 2391 2392 return self.expression( 2393 exp.LockingProperty, 2394 this=this, 2395 kind=kind, 2396 for_or_in=for_or_in, 2397 lock_type=lock_type, 2398 override=override, 2399 ) 2400 2401 def _parse_partition_by(self) -> t.List[exp.Expression]: 2402 if self._match(TokenType.PARTITION_BY): 2403 return self._parse_csv(self._parse_assignment) 2404 return [] 2405 2406 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2407 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2408 if self._match_text_seq("MINVALUE"): 2409 return exp.var("MINVALUE") 2410 if self._match_text_seq("MAXVALUE"): 2411 return exp.var("MAXVALUE") 2412 return self._parse_bitwise() 2413 2414 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2415 expression = None 2416 from_expressions = None 2417 to_expressions = None 2418 2419 if self._match(TokenType.IN): 2420 this = self._parse_wrapped_csv(self._parse_bitwise) 2421 elif self._match(TokenType.FROM): 2422 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2423 self._match_text_seq("TO") 2424 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2425 elif self._match_text_seq("WITH", "(", "MODULUS"): 2426 this = self._parse_number() 2427 self._match_text_seq(",", "REMAINDER") 2428 expression = self._parse_number() 2429 self._match_r_paren() 2430 else: 2431 self.raise_error("Failed to parse partition bound spec.") 2432 2433 return self.expression( 2434 exp.PartitionBoundSpec, 2435 this=this, 2436 expression=expression, 2437 from_expressions=from_expressions, 2438 to_expressions=to_expressions, 2439 ) 2440 2441 # https://www.postgresql.org/docs/current/sql-createtable.html 2442 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2443 if not self._match_text_seq("OF"): 2444 self._retreat(self._index - 1) 2445 return None 2446 2447 this = self._parse_table(schema=True) 2448 2449 if self._match(TokenType.DEFAULT): 2450 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2451 elif self._match_text_seq("FOR", "VALUES"): 2452 expression = self._parse_partition_bound_spec() 2453 else: 2454 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2455 2456 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2457 2458 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2459 self._match(TokenType.EQ) 2460 return self.expression( 2461 exp.PartitionedByProperty, 2462 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2463 ) 2464 2465 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2466 if self._match_text_seq("AND", "STATISTICS"): 2467 statistics = True 2468 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2469 statistics = False 2470 else: 2471 statistics = None 2472 2473 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2474 2475 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2476 if self._match_text_seq("SQL"): 2477 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2478 return None 2479 2480 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2481 if self._match_text_seq("SQL", "DATA"): 2482 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2483 return None 2484 2485 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2486 if self._match_text_seq("PRIMARY", "INDEX"): 2487 return exp.NoPrimaryIndexProperty() 2488 if self._match_text_seq("SQL"): 2489 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2490 return None 2491 2492 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2493 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2494 return exp.OnCommitProperty() 2495 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2496 return exp.OnCommitProperty(delete=True) 2497 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2498 2499 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2500 if self._match_text_seq("SQL", "DATA"): 2501 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2502 return None 2503 2504 def _parse_distkey(self) -> exp.DistKeyProperty: 2505 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2506 2507 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2508 table = self._parse_table(schema=True) 2509 2510 options = [] 2511 while self._match_texts(("INCLUDING", "EXCLUDING")): 2512 this = self._prev.text.upper() 2513 2514 id_var = self._parse_id_var() 2515 if not id_var: 2516 return None 2517 2518 options.append( 2519 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2520 ) 2521 2522 return self.expression(exp.LikeProperty, this=table, expressions=options) 2523 2524 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2525 return self.expression( 2526 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2527 ) 2528 2529 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2530 self._match(TokenType.EQ) 2531 return self.expression( 2532 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2533 ) 2534 2535 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2536 self._match_text_seq("WITH", "CONNECTION") 2537 return self.expression( 2538 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2539 ) 2540 2541 def _parse_returns(self) -> exp.ReturnsProperty: 2542 value: t.Optional[exp.Expression] 2543 null = None 2544 is_table = self._match(TokenType.TABLE) 2545 2546 if is_table: 2547 if self._match(TokenType.LT): 2548 value = self.expression( 2549 exp.Schema, 2550 this="TABLE", 2551 expressions=self._parse_csv(self._parse_struct_types), 2552 ) 2553 if not self._match(TokenType.GT): 2554 self.raise_error("Expecting >") 2555 else: 2556 value = self._parse_schema(exp.var("TABLE")) 2557 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2558 null = True 2559 value = None 2560 else: 2561 value = self._parse_types() 2562 2563 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2564 2565 def _parse_describe(self) -> exp.Describe: 2566 kind = self._match_set(self.CREATABLES) and self._prev.text 2567 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2568 if self._match(TokenType.DOT): 2569 style = None 2570 self._retreat(self._index - 2) 2571 this = self._parse_table(schema=True) 2572 properties = self._parse_properties() 2573 expressions = properties.expressions if properties else None 2574 partition = self._parse_partition() 2575 return self.expression( 2576 exp.Describe, 2577 this=this, 2578 style=style, 2579 kind=kind, 2580 expressions=expressions, 2581 partition=partition, 2582 ) 2583 2584 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2585 kind = self._prev.text.upper() 2586 expressions = [] 2587 2588 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2589 if self._match(TokenType.WHEN): 2590 expression = self._parse_disjunction() 2591 self._match(TokenType.THEN) 2592 else: 2593 expression = None 2594 2595 else_ = self._match(TokenType.ELSE) 2596 2597 if not self._match(TokenType.INTO): 2598 return None 2599 2600 return self.expression( 2601 exp.ConditionalInsert, 2602 this=self.expression( 2603 exp.Insert, 2604 this=self._parse_table(schema=True), 2605 expression=self._parse_derived_table_values(), 2606 ), 2607 expression=expression, 2608 else_=else_, 2609 ) 2610 2611 expression = parse_conditional_insert() 2612 while expression is not None: 2613 expressions.append(expression) 2614 expression = parse_conditional_insert() 2615 2616 return self.expression( 2617 exp.MultitableInserts, 2618 kind=kind, 2619 comments=comments, 2620 expressions=expressions, 2621 source=self._parse_table(), 2622 ) 2623 2624 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2625 comments = ensure_list(self._prev_comments) 2626 hint = self._parse_hint() 2627 overwrite = self._match(TokenType.OVERWRITE) 2628 ignore = self._match(TokenType.IGNORE) 2629 local = self._match_text_seq("LOCAL") 2630 alternative = None 2631 is_function = None 2632 2633 if self._match_text_seq("DIRECTORY"): 2634 this: t.Optional[exp.Expression] = self.expression( 2635 exp.Directory, 2636 this=self._parse_var_or_string(), 2637 local=local, 2638 row_format=self._parse_row_format(match_row=True), 2639 ) 2640 else: 2641 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2642 comments += ensure_list(self._prev_comments) 2643 return self._parse_multitable_inserts(comments) 2644 2645 if self._match(TokenType.OR): 2646 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2647 2648 self._match(TokenType.INTO) 2649 comments += ensure_list(self._prev_comments) 2650 self._match(TokenType.TABLE) 2651 is_function = self._match(TokenType.FUNCTION) 2652 2653 this = ( 2654 self._parse_table(schema=True, parse_partition=True) 2655 if not is_function 2656 else self._parse_function() 2657 ) 2658 2659 returning = self._parse_returning() 2660 2661 return self.expression( 2662 exp.Insert, 2663 comments=comments, 2664 hint=hint, 2665 is_function=is_function, 2666 this=this, 2667 stored=self._match_text_seq("STORED") and self._parse_stored(), 2668 by_name=self._match_text_seq("BY", "NAME"), 2669 exists=self._parse_exists(), 2670 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2671 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2672 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2673 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2674 conflict=self._parse_on_conflict(), 2675 returning=returning or self._parse_returning(), 2676 overwrite=overwrite, 2677 alternative=alternative, 2678 ignore=ignore, 2679 source=self._match(TokenType.TABLE) and self._parse_table(), 2680 ) 2681 2682 def _parse_kill(self) -> exp.Kill: 2683 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2684 2685 return self.expression( 2686 exp.Kill, 2687 this=self._parse_primary(), 2688 kind=kind, 2689 ) 2690 2691 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2692 conflict = self._match_text_seq("ON", "CONFLICT") 2693 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2694 2695 if not conflict and not duplicate: 2696 return None 2697 2698 conflict_keys = None 2699 constraint = None 2700 2701 if conflict: 2702 if self._match_text_seq("ON", "CONSTRAINT"): 2703 constraint = self._parse_id_var() 2704 elif self._match(TokenType.L_PAREN): 2705 conflict_keys = self._parse_csv(self._parse_id_var) 2706 self._match_r_paren() 2707 2708 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2709 if self._prev.token_type == TokenType.UPDATE: 2710 self._match(TokenType.SET) 2711 expressions = self._parse_csv(self._parse_equality) 2712 else: 2713 expressions = None 2714 2715 return self.expression( 2716 exp.OnConflict, 2717 duplicate=duplicate, 2718 expressions=expressions, 2719 action=action, 2720 conflict_keys=conflict_keys, 2721 constraint=constraint, 2722 ) 2723 2724 def _parse_returning(self) -> t.Optional[exp.Returning]: 2725 if not self._match(TokenType.RETURNING): 2726 return None 2727 return self.expression( 2728 exp.Returning, 2729 expressions=self._parse_csv(self._parse_expression), 2730 into=self._match(TokenType.INTO) and self._parse_table_part(), 2731 ) 2732 2733 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2734 if not self._match(TokenType.FORMAT): 2735 return None 2736 return self._parse_row_format() 2737 2738 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2739 index = self._index 2740 with_ = with_ or self._match_text_seq("WITH") 2741 2742 if not self._match(TokenType.SERDE_PROPERTIES): 2743 self._retreat(index) 2744 return None 2745 return self.expression( 2746 exp.SerdeProperties, 2747 **{ # type: ignore 2748 "expressions": self._parse_wrapped_properties(), 2749 "with": with_, 2750 }, 2751 ) 2752 2753 def _parse_row_format( 2754 self, match_row: bool = False 2755 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2756 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2757 return None 2758 2759 if self._match_text_seq("SERDE"): 2760 this = self._parse_string() 2761 2762 serde_properties = self._parse_serde_properties() 2763 2764 return self.expression( 2765 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2766 ) 2767 2768 self._match_text_seq("DELIMITED") 2769 2770 kwargs = {} 2771 2772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2773 kwargs["fields"] = self._parse_string() 2774 if self._match_text_seq("ESCAPED", "BY"): 2775 kwargs["escaped"] = self._parse_string() 2776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2777 kwargs["collection_items"] = self._parse_string() 2778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2779 kwargs["map_keys"] = self._parse_string() 2780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2781 kwargs["lines"] = self._parse_string() 2782 if self._match_text_seq("NULL", "DEFINED", "AS"): 2783 kwargs["null"] = self._parse_string() 2784 2785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2786 2787 def _parse_load(self) -> exp.LoadData | exp.Command: 2788 if self._match_text_seq("DATA"): 2789 local = self._match_text_seq("LOCAL") 2790 self._match_text_seq("INPATH") 2791 inpath = self._parse_string() 2792 overwrite = self._match(TokenType.OVERWRITE) 2793 self._match_pair(TokenType.INTO, TokenType.TABLE) 2794 2795 return self.expression( 2796 exp.LoadData, 2797 this=self._parse_table(schema=True), 2798 local=local, 2799 overwrite=overwrite, 2800 inpath=inpath, 2801 partition=self._parse_partition(), 2802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2803 serde=self._match_text_seq("SERDE") and self._parse_string(), 2804 ) 2805 return self._parse_as_command(self._prev) 2806 2807 def _parse_delete(self) -> exp.Delete: 2808 # This handles MySQL's "Multiple-Table Syntax" 2809 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2810 tables = None 2811 comments = self._prev_comments 2812 if not self._match(TokenType.FROM, advance=False): 2813 tables = self._parse_csv(self._parse_table) or None 2814 2815 returning = self._parse_returning() 2816 2817 return self.expression( 2818 exp.Delete, 2819 comments=comments, 2820 tables=tables, 2821 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2822 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2823 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2824 where=self._parse_where(), 2825 returning=returning or self._parse_returning(), 2826 limit=self._parse_limit(), 2827 ) 2828 2829 def _parse_update(self) -> exp.Update: 2830 comments = self._prev_comments 2831 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2832 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2833 returning = self._parse_returning() 2834 return self.expression( 2835 exp.Update, 2836 comments=comments, 2837 **{ # type: ignore 2838 "this": this, 2839 "expressions": expressions, 2840 "from": self._parse_from(joins=True), 2841 "where": self._parse_where(), 2842 "returning": returning or self._parse_returning(), 2843 "order": self._parse_order(), 2844 "limit": self._parse_limit(), 2845 }, 2846 ) 2847 2848 def _parse_uncache(self) -> exp.Uncache: 2849 if not self._match(TokenType.TABLE): 2850 self.raise_error("Expecting TABLE after UNCACHE") 2851 2852 return self.expression( 2853 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2854 ) 2855 2856 def _parse_cache(self) -> exp.Cache: 2857 lazy = self._match_text_seq("LAZY") 2858 self._match(TokenType.TABLE) 2859 table = self._parse_table(schema=True) 2860 2861 options = [] 2862 if self._match_text_seq("OPTIONS"): 2863 self._match_l_paren() 2864 k = self._parse_string() 2865 self._match(TokenType.EQ) 2866 v = self._parse_string() 2867 options = [k, v] 2868 self._match_r_paren() 2869 2870 self._match(TokenType.ALIAS) 2871 return self.expression( 2872 exp.Cache, 2873 this=table, 2874 lazy=lazy, 2875 options=options, 2876 expression=self._parse_select(nested=True), 2877 ) 2878 2879 def _parse_partition(self) -> t.Optional[exp.Partition]: 2880 if not self._match(TokenType.PARTITION): 2881 return None 2882 2883 return self.expression( 2884 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2885 ) 2886 2887 def _parse_value(self) -> t.Optional[exp.Tuple]: 2888 if self._match(TokenType.L_PAREN): 2889 expressions = self._parse_csv(self._parse_expression) 2890 self._match_r_paren() 2891 return self.expression(exp.Tuple, expressions=expressions) 2892 2893 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2894 expression = self._parse_expression() 2895 if expression: 2896 return self.expression(exp.Tuple, expressions=[expression]) 2897 return None 2898 2899 def _parse_projections(self) -> t.List[exp.Expression]: 2900 return self._parse_expressions() 2901 2902 def _parse_select( 2903 self, 2904 nested: bool = False, 2905 table: bool = False, 2906 parse_subquery_alias: bool = True, 2907 parse_set_operation: bool = True, 2908 ) -> t.Optional[exp.Expression]: 2909 cte = self._parse_with() 2910 2911 if cte: 2912 this = self._parse_statement() 2913 2914 if not this: 2915 self.raise_error("Failed to parse any statement following CTE") 2916 return cte 2917 2918 if "with" in this.arg_types: 2919 this.set("with", cte) 2920 else: 2921 self.raise_error(f"{this.key} does not support CTE") 2922 this = cte 2923 2924 return this 2925 2926 # duckdb supports leading with FROM x 2927 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2928 2929 if self._match(TokenType.SELECT): 2930 comments = self._prev_comments 2931 2932 hint = self._parse_hint() 2933 2934 if self._next and not self._next.token_type == TokenType.DOT: 2935 all_ = self._match(TokenType.ALL) 2936 distinct = self._match_set(self.DISTINCT_TOKENS) 2937 else: 2938 all_, distinct = None, None 2939 2940 kind = ( 2941 self._match(TokenType.ALIAS) 2942 and self._match_texts(("STRUCT", "VALUE")) 2943 and self._prev.text.upper() 2944 ) 2945 2946 if distinct: 2947 distinct = self.expression( 2948 exp.Distinct, 2949 on=self._parse_value() if self._match(TokenType.ON) else None, 2950 ) 2951 2952 if all_ and distinct: 2953 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2954 2955 limit = self._parse_limit(top=True) 2956 projections = self._parse_projections() 2957 2958 this = self.expression( 2959 exp.Select, 2960 kind=kind, 2961 hint=hint, 2962 distinct=distinct, 2963 expressions=projections, 2964 limit=limit, 2965 ) 2966 this.comments = comments 2967 2968 into = self._parse_into() 2969 if into: 2970 this.set("into", into) 2971 2972 if not from_: 2973 from_ = self._parse_from() 2974 2975 if from_: 2976 this.set("from", from_) 2977 2978 this = self._parse_query_modifiers(this) 2979 elif (table or nested) and self._match(TokenType.L_PAREN): 2980 if self._match(TokenType.PIVOT): 2981 this = self._parse_simplified_pivot() 2982 elif self._match(TokenType.FROM): 2983 this = exp.select("*").from_( 2984 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2985 ) 2986 else: 2987 this = ( 2988 self._parse_table() 2989 if table 2990 else self._parse_select(nested=True, parse_set_operation=False) 2991 ) 2992 2993 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2994 # in case a modifier (e.g. join) is following 2995 if table and isinstance(this, exp.Values) and this.alias: 2996 alias = this.args["alias"].pop() 2997 this = exp.Table(this=this, alias=alias) 2998 2999 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3000 3001 self._match_r_paren() 3002 3003 # We return early here so that the UNION isn't attached to the subquery by the 3004 # following call to _parse_set_operations, but instead becomes the parent node 3005 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3006 elif self._match(TokenType.VALUES, advance=False): 3007 this = self._parse_derived_table_values() 3008 elif from_: 3009 this = exp.select("*").from_(from_.this, copy=False) 3010 elif self._match(TokenType.SUMMARIZE): 3011 table = self._match(TokenType.TABLE) 3012 this = self._parse_select() or self._parse_string() or self._parse_table() 3013 return self.expression(exp.Summarize, this=this, table=table) 3014 elif self._match(TokenType.DESCRIBE): 3015 this = self._parse_describe() 3016 elif self._match_text_seq("STREAM"): 3017 this = self.expression(exp.Stream, this=self._parse_function()) 3018 else: 3019 this = None 3020 3021 return self._parse_set_operations(this) if parse_set_operation else this 3022 3023 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3024 if not skip_with_token and not self._match(TokenType.WITH): 3025 return None 3026 3027 comments = self._prev_comments 3028 recursive = self._match(TokenType.RECURSIVE) 3029 3030 expressions = [] 3031 while True: 3032 expressions.append(self._parse_cte()) 3033 3034 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3035 break 3036 else: 3037 self._match(TokenType.WITH) 3038 3039 return self.expression( 3040 exp.With, comments=comments, expressions=expressions, recursive=recursive 3041 ) 3042 3043 def _parse_cte(self) -> exp.CTE: 3044 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3045 if not alias or not alias.this: 3046 self.raise_error("Expected CTE to have alias") 3047 3048 self._match(TokenType.ALIAS) 3049 comments = self._prev_comments 3050 3051 if self._match_text_seq("NOT", "MATERIALIZED"): 3052 materialized = False 3053 elif self._match_text_seq("MATERIALIZED"): 3054 materialized = True 3055 else: 3056 materialized = None 3057 3058 return self.expression( 3059 exp.CTE, 3060 this=self._parse_wrapped(self._parse_statement), 3061 alias=alias, 3062 materialized=materialized, 3063 comments=comments, 3064 ) 3065 3066 def _parse_table_alias( 3067 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3068 ) -> t.Optional[exp.TableAlias]: 3069 any_token = self._match(TokenType.ALIAS) 3070 alias = ( 3071 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3072 or self._parse_string_as_identifier() 3073 ) 3074 3075 index = self._index 3076 if self._match(TokenType.L_PAREN): 3077 columns = self._parse_csv(self._parse_function_parameter) 3078 self._match_r_paren() if columns else self._retreat(index) 3079 else: 3080 columns = None 3081 3082 if not alias and not columns: 3083 return None 3084 3085 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3086 3087 # We bubble up comments from the Identifier to the TableAlias 3088 if isinstance(alias, exp.Identifier): 3089 table_alias.add_comments(alias.pop_comments()) 3090 3091 return table_alias 3092 3093 def _parse_subquery( 3094 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3095 ) -> t.Optional[exp.Subquery]: 3096 if not this: 3097 return None 3098 3099 return self.expression( 3100 exp.Subquery, 3101 this=this, 3102 pivots=self._parse_pivots(), 3103 alias=self._parse_table_alias() if parse_alias else None, 3104 sample=self._parse_table_sample(), 3105 ) 3106 3107 def _implicit_unnests_to_explicit(self, this: E) -> E: 3108 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3109 3110 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3111 for i, join in enumerate(this.args.get("joins") or []): 3112 table = join.this 3113 normalized_table = table.copy() 3114 normalized_table.meta["maybe_column"] = True 3115 normalized_table = _norm(normalized_table, dialect=self.dialect) 3116 3117 if isinstance(table, exp.Table) and not join.args.get("on"): 3118 if normalized_table.parts[0].name in refs: 3119 table_as_column = table.to_column() 3120 unnest = exp.Unnest(expressions=[table_as_column]) 3121 3122 # Table.to_column creates a parent Alias node that we want to convert to 3123 # a TableAlias and attach to the Unnest, so it matches the parser's output 3124 if isinstance(table.args.get("alias"), exp.TableAlias): 3125 table_as_column.replace(table_as_column.this) 3126 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3127 3128 table.replace(unnest) 3129 3130 refs.add(normalized_table.alias_or_name) 3131 3132 return this 3133 3134 def _parse_query_modifiers( 3135 self, this: t.Optional[exp.Expression] 3136 ) -> t.Optional[exp.Expression]: 3137 if isinstance(this, (exp.Query, exp.Table)): 3138 for join in self._parse_joins(): 3139 this.append("joins", join) 3140 for lateral in iter(self._parse_lateral, None): 3141 this.append("laterals", lateral) 3142 3143 while True: 3144 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3145 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3146 key, expression = parser(self) 3147 3148 if expression: 3149 this.set(key, expression) 3150 if key == "limit": 3151 offset = expression.args.pop("offset", None) 3152 3153 if offset: 3154 offset = exp.Offset(expression=offset) 3155 this.set("offset", offset) 3156 3157 limit_by_expressions = expression.expressions 3158 expression.set("expressions", None) 3159 offset.set("expressions", limit_by_expressions) 3160 continue 3161 break 3162 3163 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3164 this = self._implicit_unnests_to_explicit(this) 3165 3166 return this 3167 3168 def _parse_hint(self) -> t.Optional[exp.Hint]: 3169 if self._match(TokenType.HINT): 3170 hints = [] 3171 for hint in iter( 3172 lambda: self._parse_csv( 3173 lambda: self._parse_function() or self._parse_var(upper=True) 3174 ), 3175 [], 3176 ): 3177 hints.extend(hint) 3178 3179 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3180 self.raise_error("Expected */ after HINT") 3181 3182 return self.expression(exp.Hint, expressions=hints) 3183 3184 return None 3185 3186 def _parse_into(self) -> t.Optional[exp.Into]: 3187 if not self._match(TokenType.INTO): 3188 return None 3189 3190 temp = self._match(TokenType.TEMPORARY) 3191 unlogged = self._match_text_seq("UNLOGGED") 3192 self._match(TokenType.TABLE) 3193 3194 return self.expression( 3195 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3196 ) 3197 3198 def _parse_from( 3199 self, joins: bool = False, skip_from_token: bool = False 3200 ) -> t.Optional[exp.From]: 3201 if not skip_from_token and not self._match(TokenType.FROM): 3202 return None 3203 3204 return self.expression( 3205 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3206 ) 3207 3208 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3209 return self.expression( 3210 exp.MatchRecognizeMeasure, 3211 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3212 this=self._parse_expression(), 3213 ) 3214 3215 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3216 if not self._match(TokenType.MATCH_RECOGNIZE): 3217 return None 3218 3219 self._match_l_paren() 3220 3221 partition = self._parse_partition_by() 3222 order = self._parse_order() 3223 3224 measures = ( 3225 self._parse_csv(self._parse_match_recognize_measure) 3226 if self._match_text_seq("MEASURES") 3227 else None 3228 ) 3229 3230 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3231 rows = exp.var("ONE ROW PER MATCH") 3232 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3233 text = "ALL ROWS PER MATCH" 3234 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3235 text += " SHOW EMPTY MATCHES" 3236 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3237 text += " OMIT EMPTY MATCHES" 3238 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3239 text += " WITH UNMATCHED ROWS" 3240 rows = exp.var(text) 3241 else: 3242 rows = None 3243 3244 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3245 text = "AFTER MATCH SKIP" 3246 if self._match_text_seq("PAST", "LAST", "ROW"): 3247 text += " PAST LAST ROW" 3248 elif self._match_text_seq("TO", "NEXT", "ROW"): 3249 text += " TO NEXT ROW" 3250 elif self._match_text_seq("TO", "FIRST"): 3251 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3252 elif self._match_text_seq("TO", "LAST"): 3253 text += f" TO LAST {self._advance_any().text}" # type: ignore 3254 after = exp.var(text) 3255 else: 3256 after = None 3257 3258 if self._match_text_seq("PATTERN"): 3259 self._match_l_paren() 3260 3261 if not self._curr: 3262 self.raise_error("Expecting )", self._curr) 3263 3264 paren = 1 3265 start = self._curr 3266 3267 while self._curr and paren > 0: 3268 if self._curr.token_type == TokenType.L_PAREN: 3269 paren += 1 3270 if self._curr.token_type == TokenType.R_PAREN: 3271 paren -= 1 3272 3273 end = self._prev 3274 self._advance() 3275 3276 if paren > 0: 3277 self.raise_error("Expecting )", self._curr) 3278 3279 pattern = exp.var(self._find_sql(start, end)) 3280 else: 3281 pattern = None 3282 3283 define = ( 3284 self._parse_csv(self._parse_name_as_expression) 3285 if self._match_text_seq("DEFINE") 3286 else None 3287 ) 3288 3289 self._match_r_paren() 3290 3291 return self.expression( 3292 exp.MatchRecognize, 3293 partition_by=partition, 3294 order=order, 3295 measures=measures, 3296 rows=rows, 3297 after=after, 3298 pattern=pattern, 3299 define=define, 3300 alias=self._parse_table_alias(), 3301 ) 3302 3303 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3304 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3305 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3306 cross_apply = False 3307 3308 if cross_apply is not None: 3309 this = self._parse_select(table=True) 3310 view = None 3311 outer = None 3312 elif self._match(TokenType.LATERAL): 3313 this = self._parse_select(table=True) 3314 view = self._match(TokenType.VIEW) 3315 outer = self._match(TokenType.OUTER) 3316 else: 3317 return None 3318 3319 if not this: 3320 this = ( 3321 self._parse_unnest() 3322 or self._parse_function() 3323 or self._parse_id_var(any_token=False) 3324 ) 3325 3326 while self._match(TokenType.DOT): 3327 this = exp.Dot( 3328 this=this, 3329 expression=self._parse_function() or self._parse_id_var(any_token=False), 3330 ) 3331 3332 if view: 3333 table = self._parse_id_var(any_token=False) 3334 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3335 table_alias: t.Optional[exp.TableAlias] = self.expression( 3336 exp.TableAlias, this=table, columns=columns 3337 ) 3338 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3339 # We move the alias from the lateral's child node to the lateral itself 3340 table_alias = this.args["alias"].pop() 3341 else: 3342 table_alias = self._parse_table_alias() 3343 3344 return self.expression( 3345 exp.Lateral, 3346 this=this, 3347 view=view, 3348 outer=outer, 3349 alias=table_alias, 3350 cross_apply=cross_apply, 3351 ) 3352 3353 def _parse_join_parts( 3354 self, 3355 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3356 return ( 3357 self._match_set(self.JOIN_METHODS) and self._prev, 3358 self._match_set(self.JOIN_SIDES) and self._prev, 3359 self._match_set(self.JOIN_KINDS) and self._prev, 3360 ) 3361 3362 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3363 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3364 this = self._parse_column() 3365 if isinstance(this, exp.Column): 3366 return this.this 3367 return this 3368 3369 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3370 3371 def _parse_join( 3372 self, skip_join_token: bool = False, parse_bracket: bool = False 3373 ) -> t.Optional[exp.Join]: 3374 if self._match(TokenType.COMMA): 3375 return self.expression(exp.Join, this=self._parse_table()) 3376 3377 index = self._index 3378 method, side, kind = self._parse_join_parts() 3379 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3380 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3381 3382 if not skip_join_token and not join: 3383 self._retreat(index) 3384 kind = None 3385 method = None 3386 side = None 3387 3388 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3389 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3390 3391 if not skip_join_token and not join and not outer_apply and not cross_apply: 3392 return None 3393 3394 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3395 3396 if method: 3397 kwargs["method"] = method.text 3398 if side: 3399 kwargs["side"] = side.text 3400 if kind: 3401 kwargs["kind"] = kind.text 3402 if hint: 3403 kwargs["hint"] = hint 3404 3405 if self._match(TokenType.MATCH_CONDITION): 3406 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3407 3408 if self._match(TokenType.ON): 3409 kwargs["on"] = self._parse_assignment() 3410 elif self._match(TokenType.USING): 3411 kwargs["using"] = self._parse_using_identifiers() 3412 elif ( 3413 not (outer_apply or cross_apply) 3414 and not isinstance(kwargs["this"], exp.Unnest) 3415 and not (kind and kind.token_type == TokenType.CROSS) 3416 ): 3417 index = self._index 3418 joins: t.Optional[list] = list(self._parse_joins()) 3419 3420 if joins and self._match(TokenType.ON): 3421 kwargs["on"] = self._parse_assignment() 3422 elif joins and self._match(TokenType.USING): 3423 kwargs["using"] = self._parse_using_identifiers() 3424 else: 3425 joins = None 3426 self._retreat(index) 3427 3428 kwargs["this"].set("joins", joins if joins else None) 3429 3430 comments = [c for token in (method, side, kind) if token for c in token.comments] 3431 return self.expression(exp.Join, comments=comments, **kwargs) 3432 3433 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3434 this = self._parse_assignment() 3435 3436 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3437 return this 3438 3439 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3440 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3441 3442 return this 3443 3444 def _parse_index_params(self) -> exp.IndexParameters: 3445 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3446 3447 if self._match(TokenType.L_PAREN, advance=False): 3448 columns = self._parse_wrapped_csv(self._parse_with_operator) 3449 else: 3450 columns = None 3451 3452 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3453 partition_by = self._parse_partition_by() 3454 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3455 tablespace = ( 3456 self._parse_var(any_token=True) 3457 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3458 else None 3459 ) 3460 where = self._parse_where() 3461 3462 on = self._parse_field() if self._match(TokenType.ON) else None 3463 3464 return self.expression( 3465 exp.IndexParameters, 3466 using=using, 3467 columns=columns, 3468 include=include, 3469 partition_by=partition_by, 3470 where=where, 3471 with_storage=with_storage, 3472 tablespace=tablespace, 3473 on=on, 3474 ) 3475 3476 def _parse_index( 3477 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3478 ) -> t.Optional[exp.Index]: 3479 if index or anonymous: 3480 unique = None 3481 primary = None 3482 amp = None 3483 3484 self._match(TokenType.ON) 3485 self._match(TokenType.TABLE) # hive 3486 table = self._parse_table_parts(schema=True) 3487 else: 3488 unique = self._match(TokenType.UNIQUE) 3489 primary = self._match_text_seq("PRIMARY") 3490 amp = self._match_text_seq("AMP") 3491 3492 if not self._match(TokenType.INDEX): 3493 return None 3494 3495 index = self._parse_id_var() 3496 table = None 3497 3498 params = self._parse_index_params() 3499 3500 return self.expression( 3501 exp.Index, 3502 this=index, 3503 table=table, 3504 unique=unique, 3505 primary=primary, 3506 amp=amp, 3507 params=params, 3508 ) 3509 3510 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3511 hints: t.List[exp.Expression] = [] 3512 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3513 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3514 hints.append( 3515 self.expression( 3516 exp.WithTableHint, 3517 expressions=self._parse_csv( 3518 lambda: self._parse_function() or self._parse_var(any_token=True) 3519 ), 3520 ) 3521 ) 3522 self._match_r_paren() 3523 else: 3524 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3525 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3526 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3527 3528 self._match_set((TokenType.INDEX, TokenType.KEY)) 3529 if self._match(TokenType.FOR): 3530 hint.set("target", self._advance_any() and self._prev.text.upper()) 3531 3532 hint.set("expressions", self._parse_wrapped_id_vars()) 3533 hints.append(hint) 3534 3535 return hints or None 3536 3537 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3538 return ( 3539 (not schema and self._parse_function(optional_parens=False)) 3540 or self._parse_id_var(any_token=False) 3541 or self._parse_string_as_identifier() 3542 or self._parse_placeholder() 3543 ) 3544 3545 def _parse_table_parts( 3546 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3547 ) -> exp.Table: 3548 catalog = None 3549 db = None 3550 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3551 3552 while self._match(TokenType.DOT): 3553 if catalog: 3554 # This allows nesting the table in arbitrarily many dot expressions if needed 3555 table = self.expression( 3556 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3557 ) 3558 else: 3559 catalog = db 3560 db = table 3561 # "" used for tsql FROM a..b case 3562 table = self._parse_table_part(schema=schema) or "" 3563 3564 if ( 3565 wildcard 3566 and self._is_connected() 3567 and (isinstance(table, exp.Identifier) or not table) 3568 and self._match(TokenType.STAR) 3569 ): 3570 if isinstance(table, exp.Identifier): 3571 table.args["this"] += "*" 3572 else: 3573 table = exp.Identifier(this="*") 3574 3575 # We bubble up comments from the Identifier to the Table 3576 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3577 3578 if is_db_reference: 3579 catalog = db 3580 db = table 3581 table = None 3582 3583 if not table and not is_db_reference: 3584 self.raise_error(f"Expected table name but got {self._curr}") 3585 if not db and is_db_reference: 3586 self.raise_error(f"Expected database name but got {self._curr}") 3587 3588 table = self.expression( 3589 exp.Table, 3590 comments=comments, 3591 this=table, 3592 db=db, 3593 catalog=catalog, 3594 ) 3595 3596 changes = self._parse_changes() 3597 if changes: 3598 table.set("changes", changes) 3599 3600 at_before = self._parse_historical_data() 3601 if at_before: 3602 table.set("when", at_before) 3603 3604 pivots = self._parse_pivots() 3605 if pivots: 3606 table.set("pivots", pivots) 3607 3608 return table 3609 3610 def _parse_table( 3611 self, 3612 schema: bool = False, 3613 joins: bool = False, 3614 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3615 parse_bracket: bool = False, 3616 is_db_reference: bool = False, 3617 parse_partition: bool = False, 3618 ) -> t.Optional[exp.Expression]: 3619 lateral = self._parse_lateral() 3620 if lateral: 3621 return lateral 3622 3623 unnest = self._parse_unnest() 3624 if unnest: 3625 return unnest 3626 3627 values = self._parse_derived_table_values() 3628 if values: 3629 return values 3630 3631 subquery = self._parse_select(table=True) 3632 if subquery: 3633 if not subquery.args.get("pivots"): 3634 subquery.set("pivots", self._parse_pivots()) 3635 return subquery 3636 3637 bracket = parse_bracket and self._parse_bracket(None) 3638 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3639 3640 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3641 self._parse_table 3642 ) 3643 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3644 3645 only = self._match(TokenType.ONLY) 3646 3647 this = t.cast( 3648 exp.Expression, 3649 bracket 3650 or rows_from 3651 or self._parse_bracket( 3652 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3653 ), 3654 ) 3655 3656 if only: 3657 this.set("only", only) 3658 3659 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3660 self._match_text_seq("*") 3661 3662 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3663 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3664 this.set("partition", self._parse_partition()) 3665 3666 if schema: 3667 return self._parse_schema(this=this) 3668 3669 version = self._parse_version() 3670 3671 if version: 3672 this.set("version", version) 3673 3674 if self.dialect.ALIAS_POST_TABLESAMPLE: 3675 this.set("sample", self._parse_table_sample()) 3676 3677 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3678 if alias: 3679 this.set("alias", alias) 3680 3681 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3682 return self.expression( 3683 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3684 ) 3685 3686 this.set("hints", self._parse_table_hints()) 3687 3688 if not this.args.get("pivots"): 3689 this.set("pivots", self._parse_pivots()) 3690 3691 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3692 this.set("sample", self._parse_table_sample()) 3693 3694 if joins: 3695 for join in self._parse_joins(): 3696 this.append("joins", join) 3697 3698 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3699 this.set("ordinality", True) 3700 this.set("alias", self._parse_table_alias()) 3701 3702 return this 3703 3704 def _parse_version(self) -> t.Optional[exp.Version]: 3705 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3706 this = "TIMESTAMP" 3707 elif self._match(TokenType.VERSION_SNAPSHOT): 3708 this = "VERSION" 3709 else: 3710 return None 3711 3712 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3713 kind = self._prev.text.upper() 3714 start = self._parse_bitwise() 3715 self._match_texts(("TO", "AND")) 3716 end = self._parse_bitwise() 3717 expression: t.Optional[exp.Expression] = self.expression( 3718 exp.Tuple, expressions=[start, end] 3719 ) 3720 elif self._match_text_seq("CONTAINED", "IN"): 3721 kind = "CONTAINED IN" 3722 expression = self.expression( 3723 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3724 ) 3725 elif self._match(TokenType.ALL): 3726 kind = "ALL" 3727 expression = None 3728 else: 3729 self._match_text_seq("AS", "OF") 3730 kind = "AS OF" 3731 expression = self._parse_type() 3732 3733 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3734 3735 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3736 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3737 index = self._index 3738 historical_data = None 3739 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3740 this = self._prev.text.upper() 3741 kind = ( 3742 self._match(TokenType.L_PAREN) 3743 and self._match_texts(self.HISTORICAL_DATA_KIND) 3744 and self._prev.text.upper() 3745 ) 3746 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3747 3748 if expression: 3749 self._match_r_paren() 3750 historical_data = self.expression( 3751 exp.HistoricalData, this=this, kind=kind, expression=expression 3752 ) 3753 else: 3754 self._retreat(index) 3755 3756 return historical_data 3757 3758 def _parse_changes(self) -> t.Optional[exp.Changes]: 3759 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3760 return None 3761 3762 information = self._parse_var(any_token=True) 3763 self._match_r_paren() 3764 3765 return self.expression( 3766 exp.Changes, 3767 information=information, 3768 at_before=self._parse_historical_data(), 3769 end=self._parse_historical_data(), 3770 ) 3771 3772 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3773 if not self._match(TokenType.UNNEST): 3774 return None 3775 3776 expressions = self._parse_wrapped_csv(self._parse_equality) 3777 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3778 3779 alias = self._parse_table_alias() if with_alias else None 3780 3781 if alias: 3782 if self.dialect.UNNEST_COLUMN_ONLY: 3783 if alias.args.get("columns"): 3784 self.raise_error("Unexpected extra column alias in unnest.") 3785 3786 alias.set("columns", [alias.this]) 3787 alias.set("this", None) 3788 3789 columns = alias.args.get("columns") or [] 3790 if offset and len(expressions) < len(columns): 3791 offset = columns.pop() 3792 3793 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3794 self._match(TokenType.ALIAS) 3795 offset = self._parse_id_var( 3796 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3797 ) or exp.to_identifier("offset") 3798 3799 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3800 3801 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3802 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3803 if not is_derived and not ( 3804 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3805 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3806 ): 3807 return None 3808 3809 expressions = self._parse_csv(self._parse_value) 3810 alias = self._parse_table_alias() 3811 3812 if is_derived: 3813 self._match_r_paren() 3814 3815 return self.expression( 3816 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3817 ) 3818 3819 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3820 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3821 as_modifier and self._match_text_seq("USING", "SAMPLE") 3822 ): 3823 return None 3824 3825 bucket_numerator = None 3826 bucket_denominator = None 3827 bucket_field = None 3828 percent = None 3829 size = None 3830 seed = None 3831 3832 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3833 matched_l_paren = self._match(TokenType.L_PAREN) 3834 3835 if self.TABLESAMPLE_CSV: 3836 num = None 3837 expressions = self._parse_csv(self._parse_primary) 3838 else: 3839 expressions = None 3840 num = ( 3841 self._parse_factor() 3842 if self._match(TokenType.NUMBER, advance=False) 3843 else self._parse_primary() or self._parse_placeholder() 3844 ) 3845 3846 if self._match_text_seq("BUCKET"): 3847 bucket_numerator = self._parse_number() 3848 self._match_text_seq("OUT", "OF") 3849 bucket_denominator = bucket_denominator = self._parse_number() 3850 self._match(TokenType.ON) 3851 bucket_field = self._parse_field() 3852 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3853 percent = num 3854 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3855 size = num 3856 else: 3857 percent = num 3858 3859 if matched_l_paren: 3860 self._match_r_paren() 3861 3862 if self._match(TokenType.L_PAREN): 3863 method = self._parse_var(upper=True) 3864 seed = self._match(TokenType.COMMA) and self._parse_number() 3865 self._match_r_paren() 3866 elif self._match_texts(("SEED", "REPEATABLE")): 3867 seed = self._parse_wrapped(self._parse_number) 3868 3869 if not method and self.DEFAULT_SAMPLING_METHOD: 3870 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3871 3872 return self.expression( 3873 exp.TableSample, 3874 expressions=expressions, 3875 method=method, 3876 bucket_numerator=bucket_numerator, 3877 bucket_denominator=bucket_denominator, 3878 bucket_field=bucket_field, 3879 percent=percent, 3880 size=size, 3881 seed=seed, 3882 ) 3883 3884 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3885 return list(iter(self._parse_pivot, None)) or None 3886 3887 def _parse_joins(self) -> t.Iterator[exp.Join]: 3888 return iter(self._parse_join, None) 3889 3890 # https://duckdb.org/docs/sql/statements/pivot 3891 def _parse_simplified_pivot(self) -> exp.Pivot: 3892 def _parse_on() -> t.Optional[exp.Expression]: 3893 this = self._parse_bitwise() 3894 return self._parse_in(this) if self._match(TokenType.IN) else this 3895 3896 this = self._parse_table() 3897 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3898 using = self._match(TokenType.USING) and self._parse_csv( 3899 lambda: self._parse_alias(self._parse_function()) 3900 ) 3901 group = self._parse_group() 3902 return self.expression( 3903 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3904 ) 3905 3906 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3907 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3908 this = self._parse_select_or_expression() 3909 3910 self._match(TokenType.ALIAS) 3911 alias = self._parse_bitwise() 3912 if alias: 3913 if isinstance(alias, exp.Column) and not alias.db: 3914 alias = alias.this 3915 return self.expression(exp.PivotAlias, this=this, alias=alias) 3916 3917 return this 3918 3919 value = self._parse_column() 3920 3921 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3922 self.raise_error("Expecting IN (") 3923 3924 if self._match(TokenType.ANY): 3925 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3926 else: 3927 exprs = self._parse_csv(_parse_aliased_expression) 3928 3929 self._match_r_paren() 3930 return self.expression(exp.In, this=value, expressions=exprs) 3931 3932 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3933 index = self._index 3934 include_nulls = None 3935 3936 if self._match(TokenType.PIVOT): 3937 unpivot = False 3938 elif self._match(TokenType.UNPIVOT): 3939 unpivot = True 3940 3941 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3942 if self._match_text_seq("INCLUDE", "NULLS"): 3943 include_nulls = True 3944 elif self._match_text_seq("EXCLUDE", "NULLS"): 3945 include_nulls = False 3946 else: 3947 return None 3948 3949 expressions = [] 3950 3951 if not self._match(TokenType.L_PAREN): 3952 self._retreat(index) 3953 return None 3954 3955 if unpivot: 3956 expressions = self._parse_csv(self._parse_column) 3957 else: 3958 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3959 3960 if not expressions: 3961 self.raise_error("Failed to parse PIVOT's aggregation list") 3962 3963 if not self._match(TokenType.FOR): 3964 self.raise_error("Expecting FOR") 3965 3966 field = self._parse_pivot_in() 3967 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3968 self._parse_bitwise 3969 ) 3970 3971 self._match_r_paren() 3972 3973 pivot = self.expression( 3974 exp.Pivot, 3975 expressions=expressions, 3976 field=field, 3977 unpivot=unpivot, 3978 include_nulls=include_nulls, 3979 default_on_null=default_on_null, 3980 ) 3981 3982 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3983 pivot.set("alias", self._parse_table_alias()) 3984 3985 if not unpivot: 3986 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3987 3988 columns: t.List[exp.Expression] = [] 3989 for fld in pivot.args["field"].expressions: 3990 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3991 for name in names: 3992 if self.PREFIXED_PIVOT_COLUMNS: 3993 name = f"{name}_{field_name}" if name else field_name 3994 else: 3995 name = f"{field_name}_{name}" if name else field_name 3996 3997 columns.append(exp.to_identifier(name)) 3998 3999 pivot.set("columns", columns) 4000 4001 return pivot 4002 4003 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4004 return [agg.alias for agg in aggregations] 4005 4006 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4007 if not skip_where_token and not self._match(TokenType.PREWHERE): 4008 return None 4009 4010 return self.expression( 4011 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4012 ) 4013 4014 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4015 if not skip_where_token and not self._match(TokenType.WHERE): 4016 return None 4017 4018 return self.expression( 4019 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4020 ) 4021 4022 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4023 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4024 return None 4025 4026 elements: t.Dict[str, t.Any] = defaultdict(list) 4027 4028 if self._match(TokenType.ALL): 4029 elements["all"] = True 4030 elif self._match(TokenType.DISTINCT): 4031 elements["all"] = False 4032 4033 while True: 4034 index = self._index 4035 4036 elements["expressions"].extend( 4037 self._parse_csv( 4038 lambda: None 4039 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4040 else self._parse_assignment() 4041 ) 4042 ) 4043 4044 before_with_index = self._index 4045 with_prefix = self._match(TokenType.WITH) 4046 4047 if self._match(TokenType.ROLLUP): 4048 elements["rollup"].append( 4049 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4050 ) 4051 elif self._match(TokenType.CUBE): 4052 elements["cube"].append( 4053 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4054 ) 4055 elif self._match(TokenType.GROUPING_SETS): 4056 elements["grouping_sets"].append( 4057 self.expression( 4058 exp.GroupingSets, 4059 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4060 ) 4061 ) 4062 elif self._match_text_seq("TOTALS"): 4063 elements["totals"] = True # type: ignore 4064 4065 if before_with_index <= self._index <= before_with_index + 1: 4066 self._retreat(before_with_index) 4067 break 4068 4069 if index == self._index: 4070 break 4071 4072 return self.expression(exp.Group, **elements) # type: ignore 4073 4074 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4075 return self.expression( 4076 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4077 ) 4078 4079 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4080 if self._match(TokenType.L_PAREN): 4081 grouping_set = self._parse_csv(self._parse_column) 4082 self._match_r_paren() 4083 return self.expression(exp.Tuple, expressions=grouping_set) 4084 4085 return self._parse_column() 4086 4087 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4088 if not skip_having_token and not self._match(TokenType.HAVING): 4089 return None 4090 return self.expression(exp.Having, this=self._parse_assignment()) 4091 4092 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4093 if not self._match(TokenType.QUALIFY): 4094 return None 4095 return self.expression(exp.Qualify, this=self._parse_assignment()) 4096 4097 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4098 if skip_start_token: 4099 start = None 4100 elif self._match(TokenType.START_WITH): 4101 start = self._parse_assignment() 4102 else: 4103 return None 4104 4105 self._match(TokenType.CONNECT_BY) 4106 nocycle = self._match_text_seq("NOCYCLE") 4107 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4108 exp.Prior, this=self._parse_bitwise() 4109 ) 4110 connect = self._parse_assignment() 4111 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4112 4113 if not start and self._match(TokenType.START_WITH): 4114 start = self._parse_assignment() 4115 4116 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4117 4118 def _parse_name_as_expression(self) -> exp.Alias: 4119 return self.expression( 4120 exp.Alias, 4121 alias=self._parse_id_var(any_token=True), 4122 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4123 ) 4124 4125 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4126 if self._match_text_seq("INTERPOLATE"): 4127 return self._parse_wrapped_csv(self._parse_name_as_expression) 4128 return None 4129 4130 def _parse_order( 4131 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4132 ) -> t.Optional[exp.Expression]: 4133 siblings = None 4134 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4135 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4136 return this 4137 4138 siblings = True 4139 4140 return self.expression( 4141 exp.Order, 4142 this=this, 4143 expressions=self._parse_csv(self._parse_ordered), 4144 siblings=siblings, 4145 ) 4146 4147 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4148 if not self._match(token): 4149 return None 4150 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4151 4152 def _parse_ordered( 4153 self, parse_method: t.Optional[t.Callable] = None 4154 ) -> t.Optional[exp.Ordered]: 4155 this = parse_method() if parse_method else self._parse_assignment() 4156 if not this: 4157 return None 4158 4159 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4160 this = exp.var("ALL") 4161 4162 asc = self._match(TokenType.ASC) 4163 desc = self._match(TokenType.DESC) or (asc and False) 4164 4165 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4166 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4167 4168 nulls_first = is_nulls_first or False 4169 explicitly_null_ordered = is_nulls_first or is_nulls_last 4170 4171 if ( 4172 not explicitly_null_ordered 4173 and ( 4174 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4175 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4176 ) 4177 and self.dialect.NULL_ORDERING != "nulls_are_last" 4178 ): 4179 nulls_first = True 4180 4181 if self._match_text_seq("WITH", "FILL"): 4182 with_fill = self.expression( 4183 exp.WithFill, 4184 **{ # type: ignore 4185 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4186 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4187 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4188 "interpolate": self._parse_interpolate(), 4189 }, 4190 ) 4191 else: 4192 with_fill = None 4193 4194 return self.expression( 4195 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4196 ) 4197 4198 def _parse_limit( 4199 self, 4200 this: t.Optional[exp.Expression] = None, 4201 top: bool = False, 4202 skip_limit_token: bool = False, 4203 ) -> t.Optional[exp.Expression]: 4204 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4205 comments = self._prev_comments 4206 if top: 4207 limit_paren = self._match(TokenType.L_PAREN) 4208 expression = self._parse_term() if limit_paren else self._parse_number() 4209 4210 if limit_paren: 4211 self._match_r_paren() 4212 else: 4213 expression = self._parse_term() 4214 4215 if self._match(TokenType.COMMA): 4216 offset = expression 4217 expression = self._parse_term() 4218 else: 4219 offset = None 4220 4221 limit_exp = self.expression( 4222 exp.Limit, 4223 this=this, 4224 expression=expression, 4225 offset=offset, 4226 comments=comments, 4227 expressions=self._parse_limit_by(), 4228 ) 4229 4230 return limit_exp 4231 4232 if self._match(TokenType.FETCH): 4233 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4234 direction = self._prev.text.upper() if direction else "FIRST" 4235 4236 count = self._parse_field(tokens=self.FETCH_TOKENS) 4237 percent = self._match(TokenType.PERCENT) 4238 4239 self._match_set((TokenType.ROW, TokenType.ROWS)) 4240 4241 only = self._match_text_seq("ONLY") 4242 with_ties = self._match_text_seq("WITH", "TIES") 4243 4244 if only and with_ties: 4245 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4246 4247 return self.expression( 4248 exp.Fetch, 4249 direction=direction, 4250 count=count, 4251 percent=percent, 4252 with_ties=with_ties, 4253 ) 4254 4255 return this 4256 4257 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4258 if not self._match(TokenType.OFFSET): 4259 return this 4260 4261 count = self._parse_term() 4262 self._match_set((TokenType.ROW, TokenType.ROWS)) 4263 4264 return self.expression( 4265 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4266 ) 4267 4268 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4269 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4270 4271 def _parse_locks(self) -> t.List[exp.Lock]: 4272 locks = [] 4273 while True: 4274 if self._match_text_seq("FOR", "UPDATE"): 4275 update = True 4276 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4277 "LOCK", "IN", "SHARE", "MODE" 4278 ): 4279 update = False 4280 else: 4281 break 4282 4283 expressions = None 4284 if self._match_text_seq("OF"): 4285 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4286 4287 wait: t.Optional[bool | exp.Expression] = None 4288 if self._match_text_seq("NOWAIT"): 4289 wait = True 4290 elif self._match_text_seq("WAIT"): 4291 wait = self._parse_primary() 4292 elif self._match_text_seq("SKIP", "LOCKED"): 4293 wait = False 4294 4295 locks.append( 4296 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4297 ) 4298 4299 return locks 4300 4301 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4302 while this and self._match_set(self.SET_OPERATIONS): 4303 token_type = self._prev.token_type 4304 4305 if token_type == TokenType.UNION: 4306 operation: t.Type[exp.SetOperation] = exp.Union 4307 elif token_type == TokenType.EXCEPT: 4308 operation = exp.Except 4309 else: 4310 operation = exp.Intersect 4311 4312 comments = self._prev.comments 4313 4314 if self._match(TokenType.DISTINCT): 4315 distinct: t.Optional[bool] = True 4316 elif self._match(TokenType.ALL): 4317 distinct = False 4318 else: 4319 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4320 if distinct is None: 4321 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4322 4323 by_name = self._match_text_seq("BY", "NAME") 4324 expression = self._parse_select(nested=True, parse_set_operation=False) 4325 4326 this = self.expression( 4327 operation, 4328 comments=comments, 4329 this=this, 4330 distinct=distinct, 4331 by_name=by_name, 4332 expression=expression, 4333 ) 4334 4335 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4336 expression = this.expression 4337 4338 if expression: 4339 for arg in self.SET_OP_MODIFIERS: 4340 expr = expression.args.get(arg) 4341 if expr: 4342 this.set(arg, expr.pop()) 4343 4344 return this 4345 4346 def _parse_expression(self) -> t.Optional[exp.Expression]: 4347 return self._parse_alias(self._parse_assignment()) 4348 4349 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4350 this = self._parse_disjunction() 4351 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4352 # This allows us to parse <non-identifier token> := <expr> 4353 this = exp.column( 4354 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4355 ) 4356 4357 while self._match_set(self.ASSIGNMENT): 4358 if isinstance(this, exp.Column) and len(this.parts) == 1: 4359 this = this.this 4360 4361 this = self.expression( 4362 self.ASSIGNMENT[self._prev.token_type], 4363 this=this, 4364 comments=self._prev_comments, 4365 expression=self._parse_assignment(), 4366 ) 4367 4368 return this 4369 4370 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4371 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4372 4373 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4374 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4375 4376 def _parse_equality(self) -> t.Optional[exp.Expression]: 4377 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4378 4379 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4380 return self._parse_tokens(self._parse_range, self.COMPARISON) 4381 4382 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4383 this = this or self._parse_bitwise() 4384 negate = self._match(TokenType.NOT) 4385 4386 if self._match_set(self.RANGE_PARSERS): 4387 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4388 if not expression: 4389 return this 4390 4391 this = expression 4392 elif self._match(TokenType.ISNULL): 4393 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4394 4395 # Postgres supports ISNULL and NOTNULL for conditions. 4396 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4397 if self._match(TokenType.NOTNULL): 4398 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4399 this = self.expression(exp.Not, this=this) 4400 4401 if negate: 4402 this = self._negate_range(this) 4403 4404 if self._match(TokenType.IS): 4405 this = self._parse_is(this) 4406 4407 return this 4408 4409 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4410 if not this: 4411 return this 4412 4413 return self.expression(exp.Not, this=this) 4414 4415 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4416 index = self._index - 1 4417 negate = self._match(TokenType.NOT) 4418 4419 if self._match_text_seq("DISTINCT", "FROM"): 4420 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4421 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4422 4423 if self._match(TokenType.JSON): 4424 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4425 4426 if self._match_text_seq("WITH"): 4427 _with = True 4428 elif self._match_text_seq("WITHOUT"): 4429 _with = False 4430 else: 4431 _with = None 4432 4433 unique = self._match(TokenType.UNIQUE) 4434 self._match_text_seq("KEYS") 4435 expression: t.Optional[exp.Expression] = self.expression( 4436 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4437 ) 4438 else: 4439 expression = self._parse_primary() or self._parse_null() 4440 if not expression: 4441 self._retreat(index) 4442 return None 4443 4444 this = self.expression(exp.Is, this=this, expression=expression) 4445 return self.expression(exp.Not, this=this) if negate else this 4446 4447 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4448 unnest = self._parse_unnest(with_alias=False) 4449 if unnest: 4450 this = self.expression(exp.In, this=this, unnest=unnest) 4451 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4452 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4453 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4454 4455 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4456 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4457 else: 4458 this = self.expression(exp.In, this=this, expressions=expressions) 4459 4460 if matched_l_paren: 4461 self._match_r_paren(this) 4462 elif not self._match(TokenType.R_BRACKET, expression=this): 4463 self.raise_error("Expecting ]") 4464 else: 4465 this = self.expression(exp.In, this=this, field=self._parse_field()) 4466 4467 return this 4468 4469 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4470 low = self._parse_bitwise() 4471 self._match(TokenType.AND) 4472 high = self._parse_bitwise() 4473 return self.expression(exp.Between, this=this, low=low, high=high) 4474 4475 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4476 if not self._match(TokenType.ESCAPE): 4477 return this 4478 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4479 4480 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4481 index = self._index 4482 4483 if not self._match(TokenType.INTERVAL) and match_interval: 4484 return None 4485 4486 if self._match(TokenType.STRING, advance=False): 4487 this = self._parse_primary() 4488 else: 4489 this = self._parse_term() 4490 4491 if not this or ( 4492 isinstance(this, exp.Column) 4493 and not this.table 4494 and not this.this.quoted 4495 and this.name.upper() == "IS" 4496 ): 4497 self._retreat(index) 4498 return None 4499 4500 unit = self._parse_function() or ( 4501 not self._match(TokenType.ALIAS, advance=False) 4502 and self._parse_var(any_token=True, upper=True) 4503 ) 4504 4505 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4506 # each INTERVAL expression into this canonical form so it's easy to transpile 4507 if this and this.is_number: 4508 this = exp.Literal.string(this.to_py()) 4509 elif this and this.is_string: 4510 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4511 if len(parts) == 1: 4512 if unit: 4513 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4514 self._retreat(self._index - 1) 4515 4516 this = exp.Literal.string(parts[0][0]) 4517 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4518 4519 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4520 unit = self.expression( 4521 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4522 ) 4523 4524 interval = self.expression(exp.Interval, this=this, unit=unit) 4525 4526 index = self._index 4527 self._match(TokenType.PLUS) 4528 4529 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4530 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4531 return self.expression( 4532 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4533 ) 4534 4535 self._retreat(index) 4536 return interval 4537 4538 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4539 this = self._parse_term() 4540 4541 while True: 4542 if self._match_set(self.BITWISE): 4543 this = self.expression( 4544 self.BITWISE[self._prev.token_type], 4545 this=this, 4546 expression=self._parse_term(), 4547 ) 4548 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4549 this = self.expression( 4550 exp.DPipe, 4551 this=this, 4552 expression=self._parse_term(), 4553 safe=not self.dialect.STRICT_STRING_CONCAT, 4554 ) 4555 elif self._match(TokenType.DQMARK): 4556 this = self.expression( 4557 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4558 ) 4559 elif self._match_pair(TokenType.LT, TokenType.LT): 4560 this = self.expression( 4561 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4562 ) 4563 elif self._match_pair(TokenType.GT, TokenType.GT): 4564 this = self.expression( 4565 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4566 ) 4567 else: 4568 break 4569 4570 return this 4571 4572 def _parse_term(self) -> t.Optional[exp.Expression]: 4573 this = self._parse_factor() 4574 4575 while self._match_set(self.TERM): 4576 klass = self.TERM[self._prev.token_type] 4577 comments = self._prev_comments 4578 expression = self._parse_factor() 4579 4580 this = self.expression(klass, this=this, comments=comments, expression=expression) 4581 4582 if isinstance(this, exp.Collate): 4583 expr = this.expression 4584 4585 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4586 # fallback to Identifier / Var 4587 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4588 ident = expr.this 4589 if isinstance(ident, exp.Identifier): 4590 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4591 4592 return this 4593 4594 def _parse_factor(self) -> t.Optional[exp.Expression]: 4595 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4596 this = parse_method() 4597 4598 while self._match_set(self.FACTOR): 4599 klass = self.FACTOR[self._prev.token_type] 4600 comments = self._prev_comments 4601 expression = parse_method() 4602 4603 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4604 self._retreat(self._index - 1) 4605 return this 4606 4607 this = self.expression(klass, this=this, comments=comments, expression=expression) 4608 4609 if isinstance(this, exp.Div): 4610 this.args["typed"] = self.dialect.TYPED_DIVISION 4611 this.args["safe"] = self.dialect.SAFE_DIVISION 4612 4613 return this 4614 4615 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4616 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4617 4618 def _parse_unary(self) -> t.Optional[exp.Expression]: 4619 if self._match_set(self.UNARY_PARSERS): 4620 return self.UNARY_PARSERS[self._prev.token_type](self) 4621 return self._parse_at_time_zone(self._parse_type()) 4622 4623 def _parse_type( 4624 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4625 ) -> t.Optional[exp.Expression]: 4626 interval = parse_interval and self._parse_interval() 4627 if interval: 4628 return interval 4629 4630 index = self._index 4631 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4632 4633 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4634 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4635 if isinstance(data_type, exp.Cast): 4636 # This constructor can contain ops directly after it, for instance struct unnesting: 4637 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4638 return self._parse_column_ops(data_type) 4639 4640 if data_type: 4641 index2 = self._index 4642 this = self._parse_primary() 4643 4644 if isinstance(this, exp.Literal): 4645 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4646 if parser: 4647 return parser(self, this, data_type) 4648 4649 return self.expression(exp.Cast, this=this, to=data_type) 4650 4651 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4652 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4653 # 4654 # If the index difference here is greater than 1, that means the parser itself must have 4655 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4656 # 4657 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4658 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4659 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4660 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4661 # 4662 # In these cases, we don't really want to return the converted type, but instead retreat 4663 # and try to parse a Column or Identifier in the section below. 4664 if data_type.expressions and index2 - index > 1: 4665 self._retreat(index2) 4666 return self._parse_column_ops(data_type) 4667 4668 self._retreat(index) 4669 4670 if fallback_to_identifier: 4671 return self._parse_id_var() 4672 4673 this = self._parse_column() 4674 return this and self._parse_column_ops(this) 4675 4676 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4677 this = self._parse_type() 4678 if not this: 4679 return None 4680 4681 if isinstance(this, exp.Column) and not this.table: 4682 this = exp.var(this.name.upper()) 4683 4684 return self.expression( 4685 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4686 ) 4687 4688 def _parse_types( 4689 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4690 ) -> t.Optional[exp.Expression]: 4691 index = self._index 4692 4693 this: t.Optional[exp.Expression] = None 4694 prefix = self._match_text_seq("SYSUDTLIB", ".") 4695 4696 if not self._match_set(self.TYPE_TOKENS): 4697 identifier = allow_identifiers and self._parse_id_var( 4698 any_token=False, tokens=(TokenType.VAR,) 4699 ) 4700 if isinstance(identifier, exp.Identifier): 4701 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4702 4703 if len(tokens) != 1: 4704 self.raise_error("Unexpected identifier", self._prev) 4705 4706 if tokens[0].token_type in self.TYPE_TOKENS: 4707 self._prev = tokens[0] 4708 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4709 type_name = identifier.name 4710 4711 while self._match(TokenType.DOT): 4712 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4713 4714 this = exp.DataType.build(type_name, udt=True) 4715 else: 4716 self._retreat(self._index - 1) 4717 return None 4718 else: 4719 return None 4720 4721 type_token = self._prev.token_type 4722 4723 if type_token == TokenType.PSEUDO_TYPE: 4724 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4725 4726 if type_token == TokenType.OBJECT_IDENTIFIER: 4727 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4728 4729 # https://materialize.com/docs/sql/types/map/ 4730 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4731 key_type = self._parse_types( 4732 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4733 ) 4734 if not self._match(TokenType.FARROW): 4735 self._retreat(index) 4736 return None 4737 4738 value_type = self._parse_types( 4739 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4740 ) 4741 if not self._match(TokenType.R_BRACKET): 4742 self._retreat(index) 4743 return None 4744 4745 return exp.DataType( 4746 this=exp.DataType.Type.MAP, 4747 expressions=[key_type, value_type], 4748 nested=True, 4749 prefix=prefix, 4750 ) 4751 4752 nested = type_token in self.NESTED_TYPE_TOKENS 4753 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4754 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4755 expressions = None 4756 maybe_func = False 4757 4758 if self._match(TokenType.L_PAREN): 4759 if is_struct: 4760 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4761 elif nested: 4762 expressions = self._parse_csv( 4763 lambda: self._parse_types( 4764 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4765 ) 4766 ) 4767 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4768 this = expressions[0] 4769 this.set("nullable", True) 4770 self._match_r_paren() 4771 return this 4772 elif type_token in self.ENUM_TYPE_TOKENS: 4773 expressions = self._parse_csv(self._parse_equality) 4774 elif is_aggregate: 4775 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4776 any_token=False, tokens=(TokenType.VAR,) 4777 ) 4778 if not func_or_ident or not self._match(TokenType.COMMA): 4779 return None 4780 expressions = self._parse_csv( 4781 lambda: self._parse_types( 4782 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4783 ) 4784 ) 4785 expressions.insert(0, func_or_ident) 4786 else: 4787 expressions = self._parse_csv(self._parse_type_size) 4788 4789 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4790 if type_token == TokenType.VECTOR and len(expressions) == 2: 4791 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4792 4793 if not expressions or not self._match(TokenType.R_PAREN): 4794 self._retreat(index) 4795 return None 4796 4797 maybe_func = True 4798 4799 values: t.Optional[t.List[exp.Expression]] = None 4800 4801 if nested and self._match(TokenType.LT): 4802 if is_struct: 4803 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4804 else: 4805 expressions = self._parse_csv( 4806 lambda: self._parse_types( 4807 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4808 ) 4809 ) 4810 4811 if not self._match(TokenType.GT): 4812 self.raise_error("Expecting >") 4813 4814 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4815 values = self._parse_csv(self._parse_assignment) 4816 if not values and is_struct: 4817 values = None 4818 self._retreat(self._index - 1) 4819 else: 4820 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4821 4822 if type_token in self.TIMESTAMPS: 4823 if self._match_text_seq("WITH", "TIME", "ZONE"): 4824 maybe_func = False 4825 tz_type = ( 4826 exp.DataType.Type.TIMETZ 4827 if type_token in self.TIMES 4828 else exp.DataType.Type.TIMESTAMPTZ 4829 ) 4830 this = exp.DataType(this=tz_type, expressions=expressions) 4831 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4832 maybe_func = False 4833 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4834 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4835 maybe_func = False 4836 elif type_token == TokenType.INTERVAL: 4837 unit = self._parse_var(upper=True) 4838 if unit: 4839 if self._match_text_seq("TO"): 4840 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4841 4842 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4843 else: 4844 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4845 4846 if maybe_func and check_func: 4847 index2 = self._index 4848 peek = self._parse_string() 4849 4850 if not peek: 4851 self._retreat(index) 4852 return None 4853 4854 self._retreat(index2) 4855 4856 if not this: 4857 if self._match_text_seq("UNSIGNED"): 4858 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4859 if not unsigned_type_token: 4860 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4861 4862 type_token = unsigned_type_token or type_token 4863 4864 this = exp.DataType( 4865 this=exp.DataType.Type[type_token.value], 4866 expressions=expressions, 4867 nested=nested, 4868 prefix=prefix, 4869 ) 4870 4871 # Empty arrays/structs are allowed 4872 if values is not None: 4873 cls = exp.Struct if is_struct else exp.Array 4874 this = exp.cast(cls(expressions=values), this, copy=False) 4875 4876 elif expressions: 4877 this.set("expressions", expressions) 4878 4879 # https://materialize.com/docs/sql/types/list/#type-name 4880 while self._match(TokenType.LIST): 4881 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4882 4883 index = self._index 4884 4885 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4886 matched_array = self._match(TokenType.ARRAY) 4887 4888 while self._curr: 4889 datatype_token = self._prev.token_type 4890 matched_l_bracket = self._match(TokenType.L_BRACKET) 4891 if not matched_l_bracket and not matched_array: 4892 break 4893 4894 matched_array = False 4895 values = self._parse_csv(self._parse_assignment) or None 4896 if ( 4897 values 4898 and not schema 4899 and ( 4900 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4901 ) 4902 ): 4903 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4904 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4905 self._retreat(index) 4906 break 4907 4908 this = exp.DataType( 4909 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4910 ) 4911 self._match(TokenType.R_BRACKET) 4912 4913 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4914 converter = self.TYPE_CONVERTERS.get(this.this) 4915 if converter: 4916 this = converter(t.cast(exp.DataType, this)) 4917 4918 return this 4919 4920 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4921 index = self._index 4922 4923 if ( 4924 self._curr 4925 and self._next 4926 and self._curr.token_type in self.TYPE_TOKENS 4927 and self._next.token_type in self.TYPE_TOKENS 4928 ): 4929 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4930 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4931 this = self._parse_id_var() 4932 else: 4933 this = ( 4934 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4935 or self._parse_id_var() 4936 ) 4937 4938 self._match(TokenType.COLON) 4939 4940 if ( 4941 type_required 4942 and not isinstance(this, exp.DataType) 4943 and not self._match_set(self.TYPE_TOKENS, advance=False) 4944 ): 4945 self._retreat(index) 4946 return self._parse_types() 4947 4948 return self._parse_column_def(this) 4949 4950 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4951 if not self._match_text_seq("AT", "TIME", "ZONE"): 4952 return this 4953 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4954 4955 def _parse_column(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_column_reference() 4957 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4958 4959 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4960 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4961 4962 return column 4963 4964 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4965 this = self._parse_field() 4966 if ( 4967 not this 4968 and self._match(TokenType.VALUES, advance=False) 4969 and self.VALUES_FOLLOWED_BY_PAREN 4970 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4971 ): 4972 this = self._parse_id_var() 4973 4974 if isinstance(this, exp.Identifier): 4975 # We bubble up comments from the Identifier to the Column 4976 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4977 4978 return this 4979 4980 def _parse_colon_as_variant_extract( 4981 self, this: t.Optional[exp.Expression] 4982 ) -> t.Optional[exp.Expression]: 4983 casts = [] 4984 json_path = [] 4985 escape = None 4986 4987 while self._match(TokenType.COLON): 4988 start_index = self._index 4989 4990 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4991 path = self._parse_column_ops( 4992 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4993 ) 4994 4995 # The cast :: operator has a lower precedence than the extraction operator :, so 4996 # we rearrange the AST appropriately to avoid casting the JSON path 4997 while isinstance(path, exp.Cast): 4998 casts.append(path.to) 4999 path = path.this 5000 5001 if casts: 5002 dcolon_offset = next( 5003 i 5004 for i, t in enumerate(self._tokens[start_index:]) 5005 if t.token_type == TokenType.DCOLON 5006 ) 5007 end_token = self._tokens[start_index + dcolon_offset - 1] 5008 else: 5009 end_token = self._prev 5010 5011 if path: 5012 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5013 # it'll roundtrip to a string literal in GET_PATH 5014 if isinstance(path, exp.Identifier) and path.quoted: 5015 escape = True 5016 5017 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5018 5019 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5020 # Databricks transforms it back to the colon/dot notation 5021 if json_path: 5022 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5023 5024 if json_path_expr: 5025 json_path_expr.set("escape", escape) 5026 5027 this = self.expression( 5028 exp.JSONExtract, 5029 this=this, 5030 expression=json_path_expr, 5031 variant_extract=True, 5032 ) 5033 5034 while casts: 5035 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5036 5037 return this 5038 5039 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5040 return self._parse_types() 5041 5042 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5043 this = self._parse_bracket(this) 5044 5045 while self._match_set(self.COLUMN_OPERATORS): 5046 op_token = self._prev.token_type 5047 op = self.COLUMN_OPERATORS.get(op_token) 5048 5049 if op_token == TokenType.DCOLON: 5050 field = self._parse_dcolon() 5051 if not field: 5052 self.raise_error("Expected type") 5053 elif op and self._curr: 5054 field = self._parse_column_reference() or self._parse_bracket() 5055 else: 5056 field = self._parse_field(any_token=True, anonymous_func=True) 5057 5058 if isinstance(field, exp.Func) and this: 5059 # bigquery allows function calls like x.y.count(...) 5060 # SAFE.SUBSTR(...) 5061 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5062 this = exp.replace_tree( 5063 this, 5064 lambda n: ( 5065 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5066 if n.table 5067 else n.this 5068 ) 5069 if isinstance(n, exp.Column) 5070 else n, 5071 ) 5072 5073 if op: 5074 this = op(self, this, field) 5075 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5076 this = self.expression( 5077 exp.Column, 5078 this=field, 5079 table=this.this, 5080 db=this.args.get("table"), 5081 catalog=this.args.get("db"), 5082 ) 5083 else: 5084 this = self.expression(exp.Dot, this=this, expression=field) 5085 5086 this = self._parse_bracket(this) 5087 5088 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5089 5090 def _parse_primary(self) -> t.Optional[exp.Expression]: 5091 if self._match_set(self.PRIMARY_PARSERS): 5092 token_type = self._prev.token_type 5093 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5094 5095 if token_type == TokenType.STRING: 5096 expressions = [primary] 5097 while self._match(TokenType.STRING): 5098 expressions.append(exp.Literal.string(self._prev.text)) 5099 5100 if len(expressions) > 1: 5101 return self.expression(exp.Concat, expressions=expressions) 5102 5103 return primary 5104 5105 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5106 return exp.Literal.number(f"0.{self._prev.text}") 5107 5108 if self._match(TokenType.L_PAREN): 5109 comments = self._prev_comments 5110 query = self._parse_select() 5111 5112 if query: 5113 expressions = [query] 5114 else: 5115 expressions = self._parse_expressions() 5116 5117 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5118 5119 if not this and self._match(TokenType.R_PAREN, advance=False): 5120 this = self.expression(exp.Tuple) 5121 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5122 this = self._parse_subquery(this=this, parse_alias=False) 5123 elif isinstance(this, exp.Subquery): 5124 this = self._parse_subquery( 5125 this=self._parse_set_operations(this), parse_alias=False 5126 ) 5127 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5128 this = self.expression(exp.Tuple, expressions=expressions) 5129 else: 5130 this = self.expression(exp.Paren, this=this) 5131 5132 if this: 5133 this.add_comments(comments) 5134 5135 self._match_r_paren(expression=this) 5136 return this 5137 5138 return None 5139 5140 def _parse_field( 5141 self, 5142 any_token: bool = False, 5143 tokens: t.Optional[t.Collection[TokenType]] = None, 5144 anonymous_func: bool = False, 5145 ) -> t.Optional[exp.Expression]: 5146 if anonymous_func: 5147 field = ( 5148 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5149 or self._parse_primary() 5150 ) 5151 else: 5152 field = self._parse_primary() or self._parse_function( 5153 anonymous=anonymous_func, any_token=any_token 5154 ) 5155 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5156 5157 def _parse_function( 5158 self, 5159 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5160 anonymous: bool = False, 5161 optional_parens: bool = True, 5162 any_token: bool = False, 5163 ) -> t.Optional[exp.Expression]: 5164 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5165 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5166 fn_syntax = False 5167 if ( 5168 self._match(TokenType.L_BRACE, advance=False) 5169 and self._next 5170 and self._next.text.upper() == "FN" 5171 ): 5172 self._advance(2) 5173 fn_syntax = True 5174 5175 func = self._parse_function_call( 5176 functions=functions, 5177 anonymous=anonymous, 5178 optional_parens=optional_parens, 5179 any_token=any_token, 5180 ) 5181 5182 if fn_syntax: 5183 self._match(TokenType.R_BRACE) 5184 5185 return func 5186 5187 def _parse_function_call( 5188 self, 5189 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5190 anonymous: bool = False, 5191 optional_parens: bool = True, 5192 any_token: bool = False, 5193 ) -> t.Optional[exp.Expression]: 5194 if not self._curr: 5195 return None 5196 5197 comments = self._curr.comments 5198 token_type = self._curr.token_type 5199 this = self._curr.text 5200 upper = this.upper() 5201 5202 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5203 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5204 self._advance() 5205 return self._parse_window(parser(self)) 5206 5207 if not self._next or self._next.token_type != TokenType.L_PAREN: 5208 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5209 self._advance() 5210 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5211 5212 return None 5213 5214 if any_token: 5215 if token_type in self.RESERVED_TOKENS: 5216 return None 5217 elif token_type not in self.FUNC_TOKENS: 5218 return None 5219 5220 self._advance(2) 5221 5222 parser = self.FUNCTION_PARSERS.get(upper) 5223 if parser and not anonymous: 5224 this = parser(self) 5225 else: 5226 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5227 5228 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5229 this = self.expression(subquery_predicate, this=self._parse_select()) 5230 self._match_r_paren() 5231 return this 5232 5233 if functions is None: 5234 functions = self.FUNCTIONS 5235 5236 function = functions.get(upper) 5237 5238 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5239 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5240 5241 if alias: 5242 args = self._kv_to_prop_eq(args) 5243 5244 if function and not anonymous: 5245 if "dialect" in function.__code__.co_varnames: 5246 func = function(args, dialect=self.dialect) 5247 else: 5248 func = function(args) 5249 5250 func = self.validate_expression(func, args) 5251 if not self.dialect.NORMALIZE_FUNCTIONS: 5252 func.meta["name"] = this 5253 5254 this = func 5255 else: 5256 if token_type == TokenType.IDENTIFIER: 5257 this = exp.Identifier(this=this, quoted=True) 5258 this = self.expression(exp.Anonymous, this=this, expressions=args) 5259 5260 if isinstance(this, exp.Expression): 5261 this.add_comments(comments) 5262 5263 self._match_r_paren(this) 5264 return self._parse_window(this) 5265 5266 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5267 return expression 5268 5269 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5270 transformed = [] 5271 5272 for index, e in enumerate(expressions): 5273 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5274 if isinstance(e, exp.Alias): 5275 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5276 5277 if not isinstance(e, exp.PropertyEQ): 5278 e = self.expression( 5279 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5280 ) 5281 5282 if isinstance(e.this, exp.Column): 5283 e.this.replace(e.this.this) 5284 else: 5285 e = self._to_prop_eq(e, index) 5286 5287 transformed.append(e) 5288 5289 return transformed 5290 5291 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5292 return self._parse_column_def(self._parse_id_var()) 5293 5294 def _parse_user_defined_function( 5295 self, kind: t.Optional[TokenType] = None 5296 ) -> t.Optional[exp.Expression]: 5297 this = self._parse_id_var() 5298 5299 while self._match(TokenType.DOT): 5300 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5301 5302 if not self._match(TokenType.L_PAREN): 5303 return this 5304 5305 expressions = self._parse_csv(self._parse_function_parameter) 5306 self._match_r_paren() 5307 return self.expression( 5308 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5309 ) 5310 5311 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5312 literal = self._parse_primary() 5313 if literal: 5314 return self.expression(exp.Introducer, this=token.text, expression=literal) 5315 5316 return self.expression(exp.Identifier, this=token.text) 5317 5318 def _parse_session_parameter(self) -> exp.SessionParameter: 5319 kind = None 5320 this = self._parse_id_var() or self._parse_primary() 5321 5322 if this and self._match(TokenType.DOT): 5323 kind = this.name 5324 this = self._parse_var() or self._parse_primary() 5325 5326 return self.expression(exp.SessionParameter, this=this, kind=kind) 5327 5328 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5329 return self._parse_id_var() 5330 5331 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5332 index = self._index 5333 5334 if self._match(TokenType.L_PAREN): 5335 expressions = t.cast( 5336 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5337 ) 5338 5339 if not self._match(TokenType.R_PAREN): 5340 self._retreat(index) 5341 else: 5342 expressions = [self._parse_lambda_arg()] 5343 5344 if self._match_set(self.LAMBDAS): 5345 return self.LAMBDAS[self._prev.token_type](self, expressions) 5346 5347 self._retreat(index) 5348 5349 this: t.Optional[exp.Expression] 5350 5351 if self._match(TokenType.DISTINCT): 5352 this = self.expression( 5353 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5354 ) 5355 else: 5356 this = self._parse_select_or_expression(alias=alias) 5357 5358 return self._parse_limit( 5359 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5360 ) 5361 5362 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5363 index = self._index 5364 if not self._match(TokenType.L_PAREN): 5365 return this 5366 5367 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5368 # expr can be of both types 5369 if self._match_set(self.SELECT_START_TOKENS): 5370 self._retreat(index) 5371 return this 5372 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5373 self._match_r_paren() 5374 return self.expression(exp.Schema, this=this, expressions=args) 5375 5376 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5377 return self._parse_column_def(self._parse_field(any_token=True)) 5378 5379 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5380 # column defs are not really columns, they're identifiers 5381 if isinstance(this, exp.Column): 5382 this = this.this 5383 5384 kind = self._parse_types(schema=True) 5385 5386 if self._match_text_seq("FOR", "ORDINALITY"): 5387 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5388 5389 constraints: t.List[exp.Expression] = [] 5390 5391 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5392 ("ALIAS", "MATERIALIZED") 5393 ): 5394 persisted = self._prev.text.upper() == "MATERIALIZED" 5395 constraint_kind = exp.ComputedColumnConstraint( 5396 this=self._parse_assignment(), 5397 persisted=persisted or self._match_text_seq("PERSISTED"), 5398 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5399 ) 5400 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5401 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5402 self._match(TokenType.ALIAS) 5403 constraints.append( 5404 self.expression( 5405 exp.ColumnConstraint, 5406 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5407 ) 5408 ) 5409 5410 while True: 5411 constraint = self._parse_column_constraint() 5412 if not constraint: 5413 break 5414 constraints.append(constraint) 5415 5416 if not kind and not constraints: 5417 return this 5418 5419 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5420 5421 def _parse_auto_increment( 5422 self, 5423 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5424 start = None 5425 increment = None 5426 5427 if self._match(TokenType.L_PAREN, advance=False): 5428 args = self._parse_wrapped_csv(self._parse_bitwise) 5429 start = seq_get(args, 0) 5430 increment = seq_get(args, 1) 5431 elif self._match_text_seq("START"): 5432 start = self._parse_bitwise() 5433 self._match_text_seq("INCREMENT") 5434 increment = self._parse_bitwise() 5435 5436 if start and increment: 5437 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5438 5439 return exp.AutoIncrementColumnConstraint() 5440 5441 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5442 if not self._match_text_seq("REFRESH"): 5443 self._retreat(self._index - 1) 5444 return None 5445 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5446 5447 def _parse_compress(self) -> exp.CompressColumnConstraint: 5448 if self._match(TokenType.L_PAREN, advance=False): 5449 return self.expression( 5450 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5451 ) 5452 5453 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5454 5455 def _parse_generated_as_identity( 5456 self, 5457 ) -> ( 5458 exp.GeneratedAsIdentityColumnConstraint 5459 | exp.ComputedColumnConstraint 5460 | exp.GeneratedAsRowColumnConstraint 5461 ): 5462 if self._match_text_seq("BY", "DEFAULT"): 5463 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5464 this = self.expression( 5465 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5466 ) 5467 else: 5468 self._match_text_seq("ALWAYS") 5469 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5470 5471 self._match(TokenType.ALIAS) 5472 5473 if self._match_text_seq("ROW"): 5474 start = self._match_text_seq("START") 5475 if not start: 5476 self._match(TokenType.END) 5477 hidden = self._match_text_seq("HIDDEN") 5478 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5479 5480 identity = self._match_text_seq("IDENTITY") 5481 5482 if self._match(TokenType.L_PAREN): 5483 if self._match(TokenType.START_WITH): 5484 this.set("start", self._parse_bitwise()) 5485 if self._match_text_seq("INCREMENT", "BY"): 5486 this.set("increment", self._parse_bitwise()) 5487 if self._match_text_seq("MINVALUE"): 5488 this.set("minvalue", self._parse_bitwise()) 5489 if self._match_text_seq("MAXVALUE"): 5490 this.set("maxvalue", self._parse_bitwise()) 5491 5492 if self._match_text_seq("CYCLE"): 5493 this.set("cycle", True) 5494 elif self._match_text_seq("NO", "CYCLE"): 5495 this.set("cycle", False) 5496 5497 if not identity: 5498 this.set("expression", self._parse_range()) 5499 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5500 args = self._parse_csv(self._parse_bitwise) 5501 this.set("start", seq_get(args, 0)) 5502 this.set("increment", seq_get(args, 1)) 5503 5504 self._match_r_paren() 5505 5506 return this 5507 5508 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5509 self._match_text_seq("LENGTH") 5510 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5511 5512 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5513 if self._match_text_seq("NULL"): 5514 return self.expression(exp.NotNullColumnConstraint) 5515 if self._match_text_seq("CASESPECIFIC"): 5516 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5517 if self._match_text_seq("FOR", "REPLICATION"): 5518 return self.expression(exp.NotForReplicationColumnConstraint) 5519 5520 # Unconsume the `NOT` token 5521 self._retreat(self._index - 1) 5522 return None 5523 5524 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5525 if self._match(TokenType.CONSTRAINT): 5526 this = self._parse_id_var() 5527 else: 5528 this = None 5529 5530 if self._match_texts(self.CONSTRAINT_PARSERS): 5531 return self.expression( 5532 exp.ColumnConstraint, 5533 this=this, 5534 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5535 ) 5536 5537 return this 5538 5539 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5540 if not self._match(TokenType.CONSTRAINT): 5541 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5542 5543 return self.expression( 5544 exp.Constraint, 5545 this=self._parse_id_var(), 5546 expressions=self._parse_unnamed_constraints(), 5547 ) 5548 5549 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5550 constraints = [] 5551 while True: 5552 constraint = self._parse_unnamed_constraint() or self._parse_function() 5553 if not constraint: 5554 break 5555 constraints.append(constraint) 5556 5557 return constraints 5558 5559 def _parse_unnamed_constraint( 5560 self, constraints: t.Optional[t.Collection[str]] = None 5561 ) -> t.Optional[exp.Expression]: 5562 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5563 constraints or self.CONSTRAINT_PARSERS 5564 ): 5565 return None 5566 5567 constraint = self._prev.text.upper() 5568 if constraint not in self.CONSTRAINT_PARSERS: 5569 self.raise_error(f"No parser found for schema constraint {constraint}.") 5570 5571 return self.CONSTRAINT_PARSERS[constraint](self) 5572 5573 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5574 return self._parse_id_var(any_token=False) 5575 5576 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5577 self._match_text_seq("KEY") 5578 return self.expression( 5579 exp.UniqueColumnConstraint, 5580 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5581 this=self._parse_schema(self._parse_unique_key()), 5582 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5583 on_conflict=self._parse_on_conflict(), 5584 ) 5585 5586 def _parse_key_constraint_options(self) -> t.List[str]: 5587 options = [] 5588 while True: 5589 if not self._curr: 5590 break 5591 5592 if self._match(TokenType.ON): 5593 action = None 5594 on = self._advance_any() and self._prev.text 5595 5596 if self._match_text_seq("NO", "ACTION"): 5597 action = "NO ACTION" 5598 elif self._match_text_seq("CASCADE"): 5599 action = "CASCADE" 5600 elif self._match_text_seq("RESTRICT"): 5601 action = "RESTRICT" 5602 elif self._match_pair(TokenType.SET, TokenType.NULL): 5603 action = "SET NULL" 5604 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5605 action = "SET DEFAULT" 5606 else: 5607 self.raise_error("Invalid key constraint") 5608 5609 options.append(f"ON {on} {action}") 5610 else: 5611 var = self._parse_var_from_options( 5612 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5613 ) 5614 if not var: 5615 break 5616 options.append(var.name) 5617 5618 return options 5619 5620 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5621 if match and not self._match(TokenType.REFERENCES): 5622 return None 5623 5624 expressions = None 5625 this = self._parse_table(schema=True) 5626 options = self._parse_key_constraint_options() 5627 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5628 5629 def _parse_foreign_key(self) -> exp.ForeignKey: 5630 expressions = self._parse_wrapped_id_vars() 5631 reference = self._parse_references() 5632 options = {} 5633 5634 while self._match(TokenType.ON): 5635 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5636 self.raise_error("Expected DELETE or UPDATE") 5637 5638 kind = self._prev.text.lower() 5639 5640 if self._match_text_seq("NO", "ACTION"): 5641 action = "NO ACTION" 5642 elif self._match(TokenType.SET): 5643 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5644 action = "SET " + self._prev.text.upper() 5645 else: 5646 self._advance() 5647 action = self._prev.text.upper() 5648 5649 options[kind] = action 5650 5651 return self.expression( 5652 exp.ForeignKey, 5653 expressions=expressions, 5654 reference=reference, 5655 **options, # type: ignore 5656 ) 5657 5658 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5659 return self._parse_field() 5660 5661 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5662 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5663 self._retreat(self._index - 1) 5664 return None 5665 5666 id_vars = self._parse_wrapped_id_vars() 5667 return self.expression( 5668 exp.PeriodForSystemTimeConstraint, 5669 this=seq_get(id_vars, 0), 5670 expression=seq_get(id_vars, 1), 5671 ) 5672 5673 def _parse_primary_key( 5674 self, wrapped_optional: bool = False, in_props: bool = False 5675 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5676 desc = ( 5677 self._match_set((TokenType.ASC, TokenType.DESC)) 5678 and self._prev.token_type == TokenType.DESC 5679 ) 5680 5681 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5682 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5683 5684 expressions = self._parse_wrapped_csv( 5685 self._parse_primary_key_part, optional=wrapped_optional 5686 ) 5687 options = self._parse_key_constraint_options() 5688 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5689 5690 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5691 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5692 5693 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5694 """ 5695 Parses a datetime column in ODBC format. We parse the column into the corresponding 5696 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5697 same as we did for `DATE('yyyy-mm-dd')`. 5698 5699 Reference: 5700 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5701 """ 5702 self._match(TokenType.VAR) 5703 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5704 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5705 if not self._match(TokenType.R_BRACE): 5706 self.raise_error("Expected }") 5707 return expression 5708 5709 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5710 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5711 return this 5712 5713 bracket_kind = self._prev.token_type 5714 if ( 5715 bracket_kind == TokenType.L_BRACE 5716 and self._curr 5717 and self._curr.token_type == TokenType.VAR 5718 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5719 ): 5720 return self._parse_odbc_datetime_literal() 5721 5722 expressions = self._parse_csv( 5723 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5724 ) 5725 5726 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5727 self.raise_error("Expected ]") 5728 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5729 self.raise_error("Expected }") 5730 5731 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5732 if bracket_kind == TokenType.L_BRACE: 5733 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5734 elif not this: 5735 this = build_array_constructor( 5736 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5737 ) 5738 else: 5739 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5740 if constructor_type: 5741 return build_array_constructor( 5742 constructor_type, 5743 args=expressions, 5744 bracket_kind=bracket_kind, 5745 dialect=self.dialect, 5746 ) 5747 5748 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5749 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5750 5751 self._add_comments(this) 5752 return self._parse_bracket(this) 5753 5754 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5755 if self._match(TokenType.COLON): 5756 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5757 return this 5758 5759 def _parse_case(self) -> t.Optional[exp.Expression]: 5760 ifs = [] 5761 default = None 5762 5763 comments = self._prev_comments 5764 expression = self._parse_assignment() 5765 5766 while self._match(TokenType.WHEN): 5767 this = self._parse_assignment() 5768 self._match(TokenType.THEN) 5769 then = self._parse_assignment() 5770 ifs.append(self.expression(exp.If, this=this, true=then)) 5771 5772 if self._match(TokenType.ELSE): 5773 default = self._parse_assignment() 5774 5775 if not self._match(TokenType.END): 5776 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5777 default = exp.column("interval") 5778 else: 5779 self.raise_error("Expected END after CASE", self._prev) 5780 5781 return self.expression( 5782 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5783 ) 5784 5785 def _parse_if(self) -> t.Optional[exp.Expression]: 5786 if self._match(TokenType.L_PAREN): 5787 args = self._parse_csv(self._parse_assignment) 5788 this = self.validate_expression(exp.If.from_arg_list(args), args) 5789 self._match_r_paren() 5790 else: 5791 index = self._index - 1 5792 5793 if self.NO_PAREN_IF_COMMANDS and index == 0: 5794 return self._parse_as_command(self._prev) 5795 5796 condition = self._parse_assignment() 5797 5798 if not condition: 5799 self._retreat(index) 5800 return None 5801 5802 self._match(TokenType.THEN) 5803 true = self._parse_assignment() 5804 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5805 self._match(TokenType.END) 5806 this = self.expression(exp.If, this=condition, true=true, false=false) 5807 5808 return this 5809 5810 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5811 if not self._match_text_seq("VALUE", "FOR"): 5812 self._retreat(self._index - 1) 5813 return None 5814 5815 return self.expression( 5816 exp.NextValueFor, 5817 this=self._parse_column(), 5818 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5819 ) 5820 5821 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5822 this = self._parse_function() or self._parse_var_or_string(upper=True) 5823 5824 if self._match(TokenType.FROM): 5825 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5826 5827 if not self._match(TokenType.COMMA): 5828 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5829 5830 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5831 5832 def _parse_gap_fill(self) -> exp.GapFill: 5833 self._match(TokenType.TABLE) 5834 this = self._parse_table() 5835 5836 self._match(TokenType.COMMA) 5837 args = [this, *self._parse_csv(self._parse_lambda)] 5838 5839 gap_fill = exp.GapFill.from_arg_list(args) 5840 return self.validate_expression(gap_fill, args) 5841 5842 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5843 this = self._parse_assignment() 5844 5845 if not self._match(TokenType.ALIAS): 5846 if self._match(TokenType.COMMA): 5847 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5848 5849 self.raise_error("Expected AS after CAST") 5850 5851 fmt = None 5852 to = self._parse_types() 5853 5854 if self._match(TokenType.FORMAT): 5855 fmt_string = self._parse_string() 5856 fmt = self._parse_at_time_zone(fmt_string) 5857 5858 if not to: 5859 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5860 if to.this in exp.DataType.TEMPORAL_TYPES: 5861 this = self.expression( 5862 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5863 this=this, 5864 format=exp.Literal.string( 5865 format_time( 5866 fmt_string.this if fmt_string else "", 5867 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5868 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5869 ) 5870 ), 5871 safe=safe, 5872 ) 5873 5874 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5875 this.set("zone", fmt.args["zone"]) 5876 return this 5877 elif not to: 5878 self.raise_error("Expected TYPE after CAST") 5879 elif isinstance(to, exp.Identifier): 5880 to = exp.DataType.build(to.name, udt=True) 5881 elif to.this == exp.DataType.Type.CHAR: 5882 if self._match(TokenType.CHARACTER_SET): 5883 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5884 5885 return self.expression( 5886 exp.Cast if strict else exp.TryCast, 5887 this=this, 5888 to=to, 5889 format=fmt, 5890 safe=safe, 5891 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5892 ) 5893 5894 def _parse_string_agg(self) -> exp.Expression: 5895 if self._match(TokenType.DISTINCT): 5896 args: t.List[t.Optional[exp.Expression]] = [ 5897 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5898 ] 5899 if self._match(TokenType.COMMA): 5900 args.extend(self._parse_csv(self._parse_assignment)) 5901 else: 5902 args = self._parse_csv(self._parse_assignment) # type: ignore 5903 5904 index = self._index 5905 if not self._match(TokenType.R_PAREN) and args: 5906 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5907 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5908 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5909 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5910 5911 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5912 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5913 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5914 if not self._match_text_seq("WITHIN", "GROUP"): 5915 self._retreat(index) 5916 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5917 5918 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5919 order = self._parse_order(this=seq_get(args, 0)) 5920 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5921 5922 def _parse_convert( 5923 self, strict: bool, safe: t.Optional[bool] = None 5924 ) -> t.Optional[exp.Expression]: 5925 this = self._parse_bitwise() 5926 5927 if self._match(TokenType.USING): 5928 to: t.Optional[exp.Expression] = self.expression( 5929 exp.CharacterSet, this=self._parse_var() 5930 ) 5931 elif self._match(TokenType.COMMA): 5932 to = self._parse_types() 5933 else: 5934 to = None 5935 5936 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5937 5938 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5939 """ 5940 There are generally two variants of the DECODE function: 5941 5942 - DECODE(bin, charset) 5943 - DECODE(expression, search, result [, search, result] ... [, default]) 5944 5945 The second variant will always be parsed into a CASE expression. Note that NULL 5946 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5947 instead of relying on pattern matching. 5948 """ 5949 args = self._parse_csv(self._parse_assignment) 5950 5951 if len(args) < 3: 5952 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5953 5954 expression, *expressions = args 5955 if not expression: 5956 return None 5957 5958 ifs = [] 5959 for search, result in zip(expressions[::2], expressions[1::2]): 5960 if not search or not result: 5961 return None 5962 5963 if isinstance(search, exp.Literal): 5964 ifs.append( 5965 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5966 ) 5967 elif isinstance(search, exp.Null): 5968 ifs.append( 5969 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5970 ) 5971 else: 5972 cond = exp.or_( 5973 exp.EQ(this=expression.copy(), expression=search), 5974 exp.and_( 5975 exp.Is(this=expression.copy(), expression=exp.Null()), 5976 exp.Is(this=search.copy(), expression=exp.Null()), 5977 copy=False, 5978 ), 5979 copy=False, 5980 ) 5981 ifs.append(exp.If(this=cond, true=result)) 5982 5983 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5984 5985 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5986 self._match_text_seq("KEY") 5987 key = self._parse_column() 5988 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5989 self._match_text_seq("VALUE") 5990 value = self._parse_bitwise() 5991 5992 if not key and not value: 5993 return None 5994 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5995 5996 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5997 if not this or not self._match_text_seq("FORMAT", "JSON"): 5998 return this 5999 6000 return self.expression(exp.FormatJson, this=this) 6001 6002 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6003 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6004 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6005 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6006 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6007 else: 6008 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6009 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6010 6011 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6012 6013 if not empty and not error and not null: 6014 return None 6015 6016 return self.expression( 6017 exp.OnCondition, 6018 empty=empty, 6019 error=error, 6020 null=null, 6021 ) 6022 6023 def _parse_on_handling( 6024 self, on: str, *values: str 6025 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6026 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6027 for value in values: 6028 if self._match_text_seq(value, "ON", on): 6029 return f"{value} ON {on}" 6030 6031 index = self._index 6032 if self._match(TokenType.DEFAULT): 6033 default_value = self._parse_bitwise() 6034 if self._match_text_seq("ON", on): 6035 return default_value 6036 6037 self._retreat(index) 6038 6039 return None 6040 6041 @t.overload 6042 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6043 6044 @t.overload 6045 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6046 6047 def _parse_json_object(self, agg=False): 6048 star = self._parse_star() 6049 expressions = ( 6050 [star] 6051 if star 6052 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6053 ) 6054 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6055 6056 unique_keys = None 6057 if self._match_text_seq("WITH", "UNIQUE"): 6058 unique_keys = True 6059 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6060 unique_keys = False 6061 6062 self._match_text_seq("KEYS") 6063 6064 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6065 self._parse_type() 6066 ) 6067 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6068 6069 return self.expression( 6070 exp.JSONObjectAgg if agg else exp.JSONObject, 6071 expressions=expressions, 6072 null_handling=null_handling, 6073 unique_keys=unique_keys, 6074 return_type=return_type, 6075 encoding=encoding, 6076 ) 6077 6078 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6079 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6080 if not self._match_text_seq("NESTED"): 6081 this = self._parse_id_var() 6082 kind = self._parse_types(allow_identifiers=False) 6083 nested = None 6084 else: 6085 this = None 6086 kind = None 6087 nested = True 6088 6089 path = self._match_text_seq("PATH") and self._parse_string() 6090 nested_schema = nested and self._parse_json_schema() 6091 6092 return self.expression( 6093 exp.JSONColumnDef, 6094 this=this, 6095 kind=kind, 6096 path=path, 6097 nested_schema=nested_schema, 6098 ) 6099 6100 def _parse_json_schema(self) -> exp.JSONSchema: 6101 self._match_text_seq("COLUMNS") 6102 return self.expression( 6103 exp.JSONSchema, 6104 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6105 ) 6106 6107 def _parse_json_table(self) -> exp.JSONTable: 6108 this = self._parse_format_json(self._parse_bitwise()) 6109 path = self._match(TokenType.COMMA) and self._parse_string() 6110 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6111 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6112 schema = self._parse_json_schema() 6113 6114 return exp.JSONTable( 6115 this=this, 6116 schema=schema, 6117 path=path, 6118 error_handling=error_handling, 6119 empty_handling=empty_handling, 6120 ) 6121 6122 def _parse_match_against(self) -> exp.MatchAgainst: 6123 expressions = self._parse_csv(self._parse_column) 6124 6125 self._match_text_seq(")", "AGAINST", "(") 6126 6127 this = self._parse_string() 6128 6129 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6130 modifier = "IN NATURAL LANGUAGE MODE" 6131 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6132 modifier = f"{modifier} WITH QUERY EXPANSION" 6133 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6134 modifier = "IN BOOLEAN MODE" 6135 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6136 modifier = "WITH QUERY EXPANSION" 6137 else: 6138 modifier = None 6139 6140 return self.expression( 6141 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6142 ) 6143 6144 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6145 def _parse_open_json(self) -> exp.OpenJSON: 6146 this = self._parse_bitwise() 6147 path = self._match(TokenType.COMMA) and self._parse_string() 6148 6149 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6150 this = self._parse_field(any_token=True) 6151 kind = self._parse_types() 6152 path = self._parse_string() 6153 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6154 6155 return self.expression( 6156 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6157 ) 6158 6159 expressions = None 6160 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6161 self._match_l_paren() 6162 expressions = self._parse_csv(_parse_open_json_column_def) 6163 6164 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6165 6166 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6167 args = self._parse_csv(self._parse_bitwise) 6168 6169 if self._match(TokenType.IN): 6170 return self.expression( 6171 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6172 ) 6173 6174 if haystack_first: 6175 haystack = seq_get(args, 0) 6176 needle = seq_get(args, 1) 6177 else: 6178 needle = seq_get(args, 0) 6179 haystack = seq_get(args, 1) 6180 6181 return self.expression( 6182 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6183 ) 6184 6185 def _parse_predict(self) -> exp.Predict: 6186 self._match_text_seq("MODEL") 6187 this = self._parse_table() 6188 6189 self._match(TokenType.COMMA) 6190 self._match_text_seq("TABLE") 6191 6192 return self.expression( 6193 exp.Predict, 6194 this=this, 6195 expression=self._parse_table(), 6196 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6197 ) 6198 6199 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6200 args = self._parse_csv(self._parse_table) 6201 return exp.JoinHint(this=func_name.upper(), expressions=args) 6202 6203 def _parse_substring(self) -> exp.Substring: 6204 # Postgres supports the form: substring(string [from int] [for int]) 6205 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6206 6207 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6208 6209 if self._match(TokenType.FROM): 6210 args.append(self._parse_bitwise()) 6211 if self._match(TokenType.FOR): 6212 if len(args) == 1: 6213 args.append(exp.Literal.number(1)) 6214 args.append(self._parse_bitwise()) 6215 6216 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6217 6218 def _parse_trim(self) -> exp.Trim: 6219 # https://www.w3resource.com/sql/character-functions/trim.php 6220 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6221 6222 position = None 6223 collation = None 6224 expression = None 6225 6226 if self._match_texts(self.TRIM_TYPES): 6227 position = self._prev.text.upper() 6228 6229 this = self._parse_bitwise() 6230 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6231 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6232 expression = self._parse_bitwise() 6233 6234 if invert_order: 6235 this, expression = expression, this 6236 6237 if self._match(TokenType.COLLATE): 6238 collation = self._parse_bitwise() 6239 6240 return self.expression( 6241 exp.Trim, this=this, position=position, expression=expression, collation=collation 6242 ) 6243 6244 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6245 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6246 6247 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6248 return self._parse_window(self._parse_id_var(), alias=True) 6249 6250 def _parse_respect_or_ignore_nulls( 6251 self, this: t.Optional[exp.Expression] 6252 ) -> t.Optional[exp.Expression]: 6253 if self._match_text_seq("IGNORE", "NULLS"): 6254 return self.expression(exp.IgnoreNulls, this=this) 6255 if self._match_text_seq("RESPECT", "NULLS"): 6256 return self.expression(exp.RespectNulls, this=this) 6257 return this 6258 6259 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6260 if self._match(TokenType.HAVING): 6261 self._match_texts(("MAX", "MIN")) 6262 max = self._prev.text.upper() != "MIN" 6263 return self.expression( 6264 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6265 ) 6266 6267 return this 6268 6269 def _parse_window( 6270 self, this: t.Optional[exp.Expression], alias: bool = False 6271 ) -> t.Optional[exp.Expression]: 6272 func = this 6273 comments = func.comments if isinstance(func, exp.Expression) else None 6274 6275 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6276 self._match(TokenType.WHERE) 6277 this = self.expression( 6278 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6279 ) 6280 self._match_r_paren() 6281 6282 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6283 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6284 if self._match_text_seq("WITHIN", "GROUP"): 6285 order = self._parse_wrapped(self._parse_order) 6286 this = self.expression(exp.WithinGroup, this=this, expression=order) 6287 6288 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6289 # Some dialects choose to implement and some do not. 6290 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6291 6292 # There is some code above in _parse_lambda that handles 6293 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6294 6295 # The below changes handle 6296 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6297 6298 # Oracle allows both formats 6299 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6300 # and Snowflake chose to do the same for familiarity 6301 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6302 if isinstance(this, exp.AggFunc): 6303 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6304 6305 if ignore_respect and ignore_respect is not this: 6306 ignore_respect.replace(ignore_respect.this) 6307 this = self.expression(ignore_respect.__class__, this=this) 6308 6309 this = self._parse_respect_or_ignore_nulls(this) 6310 6311 # bigquery select from window x AS (partition by ...) 6312 if alias: 6313 over = None 6314 self._match(TokenType.ALIAS) 6315 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6316 return this 6317 else: 6318 over = self._prev.text.upper() 6319 6320 if comments and isinstance(func, exp.Expression): 6321 func.pop_comments() 6322 6323 if not self._match(TokenType.L_PAREN): 6324 return self.expression( 6325 exp.Window, 6326 comments=comments, 6327 this=this, 6328 alias=self._parse_id_var(False), 6329 over=over, 6330 ) 6331 6332 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6333 6334 first = self._match(TokenType.FIRST) 6335 if self._match_text_seq("LAST"): 6336 first = False 6337 6338 partition, order = self._parse_partition_and_order() 6339 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6340 6341 if kind: 6342 self._match(TokenType.BETWEEN) 6343 start = self._parse_window_spec() 6344 self._match(TokenType.AND) 6345 end = self._parse_window_spec() 6346 6347 spec = self.expression( 6348 exp.WindowSpec, 6349 kind=kind, 6350 start=start["value"], 6351 start_side=start["side"], 6352 end=end["value"], 6353 end_side=end["side"], 6354 ) 6355 else: 6356 spec = None 6357 6358 self._match_r_paren() 6359 6360 window = self.expression( 6361 exp.Window, 6362 comments=comments, 6363 this=this, 6364 partition_by=partition, 6365 order=order, 6366 spec=spec, 6367 alias=window_alias, 6368 over=over, 6369 first=first, 6370 ) 6371 6372 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6373 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6374 return self._parse_window(window, alias=alias) 6375 6376 return window 6377 6378 def _parse_partition_and_order( 6379 self, 6380 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6381 return self._parse_partition_by(), self._parse_order() 6382 6383 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6384 self._match(TokenType.BETWEEN) 6385 6386 return { 6387 "value": ( 6388 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6389 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6390 or self._parse_bitwise() 6391 ), 6392 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6393 } 6394 6395 def _parse_alias( 6396 self, this: t.Optional[exp.Expression], explicit: bool = False 6397 ) -> t.Optional[exp.Expression]: 6398 any_token = self._match(TokenType.ALIAS) 6399 comments = self._prev_comments or [] 6400 6401 if explicit and not any_token: 6402 return this 6403 6404 if self._match(TokenType.L_PAREN): 6405 aliases = self.expression( 6406 exp.Aliases, 6407 comments=comments, 6408 this=this, 6409 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6410 ) 6411 self._match_r_paren(aliases) 6412 return aliases 6413 6414 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6415 self.STRING_ALIASES and self._parse_string_as_identifier() 6416 ) 6417 6418 if alias: 6419 comments.extend(alias.pop_comments()) 6420 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6421 column = this.this 6422 6423 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6424 if not this.comments and column and column.comments: 6425 this.comments = column.pop_comments() 6426 6427 return this 6428 6429 def _parse_id_var( 6430 self, 6431 any_token: bool = True, 6432 tokens: t.Optional[t.Collection[TokenType]] = None, 6433 ) -> t.Optional[exp.Expression]: 6434 expression = self._parse_identifier() 6435 if not expression and ( 6436 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6437 ): 6438 quoted = self._prev.token_type == TokenType.STRING 6439 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6440 6441 return expression 6442 6443 def _parse_string(self) -> t.Optional[exp.Expression]: 6444 if self._match_set(self.STRING_PARSERS): 6445 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6446 return self._parse_placeholder() 6447 6448 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6449 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6450 6451 def _parse_number(self) -> t.Optional[exp.Expression]: 6452 if self._match_set(self.NUMERIC_PARSERS): 6453 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6454 return self._parse_placeholder() 6455 6456 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6457 if self._match(TokenType.IDENTIFIER): 6458 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6459 return self._parse_placeholder() 6460 6461 def _parse_var( 6462 self, 6463 any_token: bool = False, 6464 tokens: t.Optional[t.Collection[TokenType]] = None, 6465 upper: bool = False, 6466 ) -> t.Optional[exp.Expression]: 6467 if ( 6468 (any_token and self._advance_any()) 6469 or self._match(TokenType.VAR) 6470 or (self._match_set(tokens) if tokens else False) 6471 ): 6472 return self.expression( 6473 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6474 ) 6475 return self._parse_placeholder() 6476 6477 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6478 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6479 self._advance() 6480 return self._prev 6481 return None 6482 6483 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6484 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6485 6486 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6487 return self._parse_primary() or self._parse_var(any_token=True) 6488 6489 def _parse_null(self) -> t.Optional[exp.Expression]: 6490 if self._match_set(self.NULL_TOKENS): 6491 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6492 return self._parse_placeholder() 6493 6494 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6495 if self._match(TokenType.TRUE): 6496 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6497 if self._match(TokenType.FALSE): 6498 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6499 return self._parse_placeholder() 6500 6501 def _parse_star(self) -> t.Optional[exp.Expression]: 6502 if self._match(TokenType.STAR): 6503 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6504 return self._parse_placeholder() 6505 6506 def _parse_parameter(self) -> exp.Parameter: 6507 this = self._parse_identifier() or self._parse_primary_or_var() 6508 return self.expression(exp.Parameter, this=this) 6509 6510 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6511 if self._match_set(self.PLACEHOLDER_PARSERS): 6512 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6513 if placeholder: 6514 return placeholder 6515 self._advance(-1) 6516 return None 6517 6518 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6519 if not self._match_texts(keywords): 6520 return None 6521 if self._match(TokenType.L_PAREN, advance=False): 6522 return self._parse_wrapped_csv(self._parse_expression) 6523 6524 expression = self._parse_expression() 6525 return [expression] if expression else None 6526 6527 def _parse_csv( 6528 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6529 ) -> t.List[exp.Expression]: 6530 parse_result = parse_method() 6531 items = [parse_result] if parse_result is not None else [] 6532 6533 while self._match(sep): 6534 self._add_comments(parse_result) 6535 parse_result = parse_method() 6536 if parse_result is not None: 6537 items.append(parse_result) 6538 6539 return items 6540 6541 def _parse_tokens( 6542 self, parse_method: t.Callable, expressions: t.Dict 6543 ) -> t.Optional[exp.Expression]: 6544 this = parse_method() 6545 6546 while self._match_set(expressions): 6547 this = self.expression( 6548 expressions[self._prev.token_type], 6549 this=this, 6550 comments=self._prev_comments, 6551 expression=parse_method(), 6552 ) 6553 6554 return this 6555 6556 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6557 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6558 6559 def _parse_wrapped_csv( 6560 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6561 ) -> t.List[exp.Expression]: 6562 return self._parse_wrapped( 6563 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6564 ) 6565 6566 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6567 wrapped = self._match(TokenType.L_PAREN) 6568 if not wrapped and not optional: 6569 self.raise_error("Expecting (") 6570 parse_result = parse_method() 6571 if wrapped: 6572 self._match_r_paren() 6573 return parse_result 6574 6575 def _parse_expressions(self) -> t.List[exp.Expression]: 6576 return self._parse_csv(self._parse_expression) 6577 6578 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6579 return self._parse_select() or self._parse_set_operations( 6580 self._parse_expression() if alias else self._parse_assignment() 6581 ) 6582 6583 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6584 return self._parse_query_modifiers( 6585 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6586 ) 6587 6588 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6589 this = None 6590 if self._match_texts(self.TRANSACTION_KIND): 6591 this = self._prev.text 6592 6593 self._match_texts(("TRANSACTION", "WORK")) 6594 6595 modes = [] 6596 while True: 6597 mode = [] 6598 while self._match(TokenType.VAR): 6599 mode.append(self._prev.text) 6600 6601 if mode: 6602 modes.append(" ".join(mode)) 6603 if not self._match(TokenType.COMMA): 6604 break 6605 6606 return self.expression(exp.Transaction, this=this, modes=modes) 6607 6608 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6609 chain = None 6610 savepoint = None 6611 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6612 6613 self._match_texts(("TRANSACTION", "WORK")) 6614 6615 if self._match_text_seq("TO"): 6616 self._match_text_seq("SAVEPOINT") 6617 savepoint = self._parse_id_var() 6618 6619 if self._match(TokenType.AND): 6620 chain = not self._match_text_seq("NO") 6621 self._match_text_seq("CHAIN") 6622 6623 if is_rollback: 6624 return self.expression(exp.Rollback, savepoint=savepoint) 6625 6626 return self.expression(exp.Commit, chain=chain) 6627 6628 def _parse_refresh(self) -> exp.Refresh: 6629 self._match(TokenType.TABLE) 6630 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6631 6632 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6633 if not self._match_text_seq("ADD"): 6634 return None 6635 6636 self._match(TokenType.COLUMN) 6637 exists_column = self._parse_exists(not_=True) 6638 expression = self._parse_field_def() 6639 6640 if expression: 6641 expression.set("exists", exists_column) 6642 6643 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6644 if self._match_texts(("FIRST", "AFTER")): 6645 position = self._prev.text 6646 column_position = self.expression( 6647 exp.ColumnPosition, this=self._parse_column(), position=position 6648 ) 6649 expression.set("position", column_position) 6650 6651 return expression 6652 6653 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6654 drop = self._match(TokenType.DROP) and self._parse_drop() 6655 if drop and not isinstance(drop, exp.Command): 6656 drop.set("kind", drop.args.get("kind", "COLUMN")) 6657 return drop 6658 6659 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6660 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6661 return self.expression( 6662 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6663 ) 6664 6665 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6666 index = self._index - 1 6667 6668 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6669 return self._parse_csv( 6670 lambda: self.expression( 6671 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6672 ) 6673 ) 6674 6675 self._retreat(index) 6676 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6677 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6678 6679 if self._match_text_seq("ADD", "COLUMNS"): 6680 schema = self._parse_schema() 6681 if schema: 6682 return [schema] 6683 return [] 6684 6685 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6686 6687 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6688 if self._match_texts(self.ALTER_ALTER_PARSERS): 6689 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6690 6691 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6692 # keyword after ALTER we default to parsing this statement 6693 self._match(TokenType.COLUMN) 6694 column = self._parse_field(any_token=True) 6695 6696 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6697 return self.expression(exp.AlterColumn, this=column, drop=True) 6698 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6699 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6700 if self._match(TokenType.COMMENT): 6701 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6702 if self._match_text_seq("DROP", "NOT", "NULL"): 6703 return self.expression( 6704 exp.AlterColumn, 6705 this=column, 6706 drop=True, 6707 allow_null=True, 6708 ) 6709 if self._match_text_seq("SET", "NOT", "NULL"): 6710 return self.expression( 6711 exp.AlterColumn, 6712 this=column, 6713 allow_null=False, 6714 ) 6715 self._match_text_seq("SET", "DATA") 6716 self._match_text_seq("TYPE") 6717 return self.expression( 6718 exp.AlterColumn, 6719 this=column, 6720 dtype=self._parse_types(), 6721 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6722 using=self._match(TokenType.USING) and self._parse_assignment(), 6723 ) 6724 6725 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6726 if self._match_texts(("ALL", "EVEN", "AUTO")): 6727 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6728 6729 self._match_text_seq("KEY", "DISTKEY") 6730 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6731 6732 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6733 if compound: 6734 self._match_text_seq("SORTKEY") 6735 6736 if self._match(TokenType.L_PAREN, advance=False): 6737 return self.expression( 6738 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6739 ) 6740 6741 self._match_texts(("AUTO", "NONE")) 6742 return self.expression( 6743 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6744 ) 6745 6746 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6747 index = self._index - 1 6748 6749 partition_exists = self._parse_exists() 6750 if self._match(TokenType.PARTITION, advance=False): 6751 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6752 6753 self._retreat(index) 6754 return self._parse_csv(self._parse_drop_column) 6755 6756 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6757 if self._match(TokenType.COLUMN): 6758 exists = self._parse_exists() 6759 old_column = self._parse_column() 6760 to = self._match_text_seq("TO") 6761 new_column = self._parse_column() 6762 6763 if old_column is None or to is None or new_column is None: 6764 return None 6765 6766 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6767 6768 self._match_text_seq("TO") 6769 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6770 6771 def _parse_alter_table_set(self) -> exp.AlterSet: 6772 alter_set = self.expression(exp.AlterSet) 6773 6774 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6775 "TABLE", "PROPERTIES" 6776 ): 6777 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6778 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6779 alter_set.set("expressions", [self._parse_assignment()]) 6780 elif self._match_texts(("LOGGED", "UNLOGGED")): 6781 alter_set.set("option", exp.var(self._prev.text.upper())) 6782 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6783 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6784 elif self._match_text_seq("LOCATION"): 6785 alter_set.set("location", self._parse_field()) 6786 elif self._match_text_seq("ACCESS", "METHOD"): 6787 alter_set.set("access_method", self._parse_field()) 6788 elif self._match_text_seq("TABLESPACE"): 6789 alter_set.set("tablespace", self._parse_field()) 6790 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6791 alter_set.set("file_format", [self._parse_field()]) 6792 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6793 alter_set.set("file_format", self._parse_wrapped_options()) 6794 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6795 alter_set.set("copy_options", self._parse_wrapped_options()) 6796 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6797 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6798 else: 6799 if self._match_text_seq("SERDE"): 6800 alter_set.set("serde", self._parse_field()) 6801 6802 alter_set.set("expressions", [self._parse_properties()]) 6803 6804 return alter_set 6805 6806 def _parse_alter(self) -> exp.Alter | exp.Command: 6807 start = self._prev 6808 6809 alter_token = self._match_set(self.ALTERABLES) and self._prev 6810 if not alter_token: 6811 return self._parse_as_command(start) 6812 6813 exists = self._parse_exists() 6814 only = self._match_text_seq("ONLY") 6815 this = self._parse_table(schema=True) 6816 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6817 6818 if self._next: 6819 self._advance() 6820 6821 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6822 if parser: 6823 actions = ensure_list(parser(self)) 6824 not_valid = self._match_text_seq("NOT", "VALID") 6825 options = self._parse_csv(self._parse_property) 6826 6827 if not self._curr and actions: 6828 return self.expression( 6829 exp.Alter, 6830 this=this, 6831 kind=alter_token.text.upper(), 6832 exists=exists, 6833 actions=actions, 6834 only=only, 6835 options=options, 6836 cluster=cluster, 6837 not_valid=not_valid, 6838 ) 6839 6840 return self._parse_as_command(start) 6841 6842 def _parse_merge(self) -> exp.Merge: 6843 self._match(TokenType.INTO) 6844 target = self._parse_table() 6845 6846 if target and self._match(TokenType.ALIAS, advance=False): 6847 target.set("alias", self._parse_table_alias()) 6848 6849 self._match(TokenType.USING) 6850 using = self._parse_table() 6851 6852 self._match(TokenType.ON) 6853 on = self._parse_assignment() 6854 6855 return self.expression( 6856 exp.Merge, 6857 this=target, 6858 using=using, 6859 on=on, 6860 expressions=self._parse_when_matched(), 6861 returning=self._parse_returning(), 6862 ) 6863 6864 def _parse_when_matched(self) -> t.List[exp.When]: 6865 whens = [] 6866 6867 while self._match(TokenType.WHEN): 6868 matched = not self._match(TokenType.NOT) 6869 self._match_text_seq("MATCHED") 6870 source = ( 6871 False 6872 if self._match_text_seq("BY", "TARGET") 6873 else self._match_text_seq("BY", "SOURCE") 6874 ) 6875 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6876 6877 self._match(TokenType.THEN) 6878 6879 if self._match(TokenType.INSERT): 6880 this = self._parse_star() 6881 if this: 6882 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6883 else: 6884 then = self.expression( 6885 exp.Insert, 6886 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6887 expression=self._match_text_seq("VALUES") and self._parse_value(), 6888 ) 6889 elif self._match(TokenType.UPDATE): 6890 expressions = self._parse_star() 6891 if expressions: 6892 then = self.expression(exp.Update, expressions=expressions) 6893 else: 6894 then = self.expression( 6895 exp.Update, 6896 expressions=self._match(TokenType.SET) 6897 and self._parse_csv(self._parse_equality), 6898 ) 6899 elif self._match(TokenType.DELETE): 6900 then = self.expression(exp.Var, this=self._prev.text) 6901 else: 6902 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6903 6904 whens.append( 6905 self.expression( 6906 exp.When, 6907 matched=matched, 6908 source=source, 6909 condition=condition, 6910 then=then, 6911 ) 6912 ) 6913 return whens 6914 6915 def _parse_show(self) -> t.Optional[exp.Expression]: 6916 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6917 if parser: 6918 return parser(self) 6919 return self._parse_as_command(self._prev) 6920 6921 def _parse_set_item_assignment( 6922 self, kind: t.Optional[str] = None 6923 ) -> t.Optional[exp.Expression]: 6924 index = self._index 6925 6926 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6927 return self._parse_set_transaction(global_=kind == "GLOBAL") 6928 6929 left = self._parse_primary() or self._parse_column() 6930 assignment_delimiter = self._match_texts(("=", "TO")) 6931 6932 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6933 self._retreat(index) 6934 return None 6935 6936 right = self._parse_statement() or self._parse_id_var() 6937 if isinstance(right, (exp.Column, exp.Identifier)): 6938 right = exp.var(right.name) 6939 6940 this = self.expression(exp.EQ, this=left, expression=right) 6941 return self.expression(exp.SetItem, this=this, kind=kind) 6942 6943 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6944 self._match_text_seq("TRANSACTION") 6945 characteristics = self._parse_csv( 6946 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6947 ) 6948 return self.expression( 6949 exp.SetItem, 6950 expressions=characteristics, 6951 kind="TRANSACTION", 6952 **{"global": global_}, # type: ignore 6953 ) 6954 6955 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6956 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6957 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6958 6959 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6960 index = self._index 6961 set_ = self.expression( 6962 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6963 ) 6964 6965 if self._curr: 6966 self._retreat(index) 6967 return self._parse_as_command(self._prev) 6968 6969 return set_ 6970 6971 def _parse_var_from_options( 6972 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6973 ) -> t.Optional[exp.Var]: 6974 start = self._curr 6975 if not start: 6976 return None 6977 6978 option = start.text.upper() 6979 continuations = options.get(option) 6980 6981 index = self._index 6982 self._advance() 6983 for keywords in continuations or []: 6984 if isinstance(keywords, str): 6985 keywords = (keywords,) 6986 6987 if self._match_text_seq(*keywords): 6988 option = f"{option} {' '.join(keywords)}" 6989 break 6990 else: 6991 if continuations or continuations is None: 6992 if raise_unmatched: 6993 self.raise_error(f"Unknown option {option}") 6994 6995 self._retreat(index) 6996 return None 6997 6998 return exp.var(option) 6999 7000 def _parse_as_command(self, start: Token) -> exp.Command: 7001 while self._curr: 7002 self._advance() 7003 text = self._find_sql(start, self._prev) 7004 size = len(start.text) 7005 self._warn_unsupported() 7006 return exp.Command(this=text[:size], expression=text[size:]) 7007 7008 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7009 settings = [] 7010 7011 self._match_l_paren() 7012 kind = self._parse_id_var() 7013 7014 if self._match(TokenType.L_PAREN): 7015 while True: 7016 key = self._parse_id_var() 7017 value = self._parse_primary() 7018 7019 if not key and value is None: 7020 break 7021 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7022 self._match(TokenType.R_PAREN) 7023 7024 self._match_r_paren() 7025 7026 return self.expression( 7027 exp.DictProperty, 7028 this=this, 7029 kind=kind.this if kind else None, 7030 settings=settings, 7031 ) 7032 7033 def _parse_dict_range(self, this: str) -> exp.DictRange: 7034 self._match_l_paren() 7035 has_min = self._match_text_seq("MIN") 7036 if has_min: 7037 min = self._parse_var() or self._parse_primary() 7038 self._match_text_seq("MAX") 7039 max = self._parse_var() or self._parse_primary() 7040 else: 7041 max = self._parse_var() or self._parse_primary() 7042 min = exp.Literal.number(0) 7043 self._match_r_paren() 7044 return self.expression(exp.DictRange, this=this, min=min, max=max) 7045 7046 def _parse_comprehension( 7047 self, this: t.Optional[exp.Expression] 7048 ) -> t.Optional[exp.Comprehension]: 7049 index = self._index 7050 expression = self._parse_column() 7051 if not self._match(TokenType.IN): 7052 self._retreat(index - 1) 7053 return None 7054 iterator = self._parse_column() 7055 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7056 return self.expression( 7057 exp.Comprehension, 7058 this=this, 7059 expression=expression, 7060 iterator=iterator, 7061 condition=condition, 7062 ) 7063 7064 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7065 if self._match(TokenType.HEREDOC_STRING): 7066 return self.expression(exp.Heredoc, this=self._prev.text) 7067 7068 if not self._match_text_seq("$"): 7069 return None 7070 7071 tags = ["$"] 7072 tag_text = None 7073 7074 if self._is_connected(): 7075 self._advance() 7076 tags.append(self._prev.text.upper()) 7077 else: 7078 self.raise_error("No closing $ found") 7079 7080 if tags[-1] != "$": 7081 if self._is_connected() and self._match_text_seq("$"): 7082 tag_text = tags[-1] 7083 tags.append("$") 7084 else: 7085 self.raise_error("No closing $ found") 7086 7087 heredoc_start = self._curr 7088 7089 while self._curr: 7090 if self._match_text_seq(*tags, advance=False): 7091 this = self._find_sql(heredoc_start, self._prev) 7092 self._advance(len(tags)) 7093 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7094 7095 self._advance() 7096 7097 self.raise_error(f"No closing {''.join(tags)} found") 7098 return None 7099 7100 def _find_parser( 7101 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7102 ) -> t.Optional[t.Callable]: 7103 if not self._curr: 7104 return None 7105 7106 index = self._index 7107 this = [] 7108 while True: 7109 # The current token might be multiple words 7110 curr = self._curr.text.upper() 7111 key = curr.split(" ") 7112 this.append(curr) 7113 7114 self._advance() 7115 result, trie = in_trie(trie, key) 7116 if result == TrieResult.FAILED: 7117 break 7118 7119 if result == TrieResult.EXISTS: 7120 subparser = parsers[" ".join(this)] 7121 return subparser 7122 7123 self._retreat(index) 7124 return None 7125 7126 def _match(self, token_type, advance=True, expression=None): 7127 if not self._curr: 7128 return None 7129 7130 if self._curr.token_type == token_type: 7131 if advance: 7132 self._advance() 7133 self._add_comments(expression) 7134 return True 7135 7136 return None 7137 7138 def _match_set(self, types, advance=True): 7139 if not self._curr: 7140 return None 7141 7142 if self._curr.token_type in types: 7143 if advance: 7144 self._advance() 7145 return True 7146 7147 return None 7148 7149 def _match_pair(self, token_type_a, token_type_b, advance=True): 7150 if not self._curr or not self._next: 7151 return None 7152 7153 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7154 if advance: 7155 self._advance(2) 7156 return True 7157 7158 return None 7159 7160 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7161 if not self._match(TokenType.L_PAREN, expression=expression): 7162 self.raise_error("Expecting (") 7163 7164 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7165 if not self._match(TokenType.R_PAREN, expression=expression): 7166 self.raise_error("Expecting )") 7167 7168 def _match_texts(self, texts, advance=True): 7169 if ( 7170 self._curr 7171 and self._curr.token_type != TokenType.STRING 7172 and self._curr.text.upper() in texts 7173 ): 7174 if advance: 7175 self._advance() 7176 return True 7177 return None 7178 7179 def _match_text_seq(self, *texts, advance=True): 7180 index = self._index 7181 for text in texts: 7182 if ( 7183 self._curr 7184 and self._curr.token_type != TokenType.STRING 7185 and self._curr.text.upper() == text 7186 ): 7187 self._advance() 7188 else: 7189 self._retreat(index) 7190 return None 7191 7192 if not advance: 7193 self._retreat(index) 7194 7195 return True 7196 7197 def _replace_lambda( 7198 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7199 ) -> t.Optional[exp.Expression]: 7200 if not node: 7201 return node 7202 7203 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7204 7205 for column in node.find_all(exp.Column): 7206 typ = lambda_types.get(column.parts[0].name) 7207 if typ is not None: 7208 dot_or_id = column.to_dot() if column.table else column.this 7209 7210 if typ: 7211 dot_or_id = self.expression( 7212 exp.Cast, 7213 this=dot_or_id, 7214 to=typ, 7215 ) 7216 7217 parent = column.parent 7218 7219 while isinstance(parent, exp.Dot): 7220 if not isinstance(parent.parent, exp.Dot): 7221 parent.replace(dot_or_id) 7222 break 7223 parent = parent.parent 7224 else: 7225 if column is node: 7226 node = dot_or_id 7227 else: 7228 column.replace(dot_or_id) 7229 return node 7230 7231 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7232 start = self._prev 7233 7234 # Not to be confused with TRUNCATE(number, decimals) function call 7235 if self._match(TokenType.L_PAREN): 7236 self._retreat(self._index - 2) 7237 return self._parse_function() 7238 7239 # Clickhouse supports TRUNCATE DATABASE as well 7240 is_database = self._match(TokenType.DATABASE) 7241 7242 self._match(TokenType.TABLE) 7243 7244 exists = self._parse_exists(not_=False) 7245 7246 expressions = self._parse_csv( 7247 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7248 ) 7249 7250 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7251 7252 if self._match_text_seq("RESTART", "IDENTITY"): 7253 identity = "RESTART" 7254 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7255 identity = "CONTINUE" 7256 else: 7257 identity = None 7258 7259 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7260 option = self._prev.text 7261 else: 7262 option = None 7263 7264 partition = self._parse_partition() 7265 7266 # Fallback case 7267 if self._curr: 7268 return self._parse_as_command(start) 7269 7270 return self.expression( 7271 exp.TruncateTable, 7272 expressions=expressions, 7273 is_database=is_database, 7274 exists=exists, 7275 cluster=cluster, 7276 identity=identity, 7277 option=option, 7278 partition=partition, 7279 ) 7280 7281 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7282 this = self._parse_ordered(self._parse_opclass) 7283 7284 if not self._match(TokenType.WITH): 7285 return this 7286 7287 op = self._parse_var(any_token=True) 7288 7289 return self.expression(exp.WithOperator, this=this, op=op) 7290 7291 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7292 self._match(TokenType.EQ) 7293 self._match(TokenType.L_PAREN) 7294 7295 opts: t.List[t.Optional[exp.Expression]] = [] 7296 while self._curr and not self._match(TokenType.R_PAREN): 7297 if self._match_text_seq("FORMAT_NAME", "="): 7298 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7299 # so we parse it separately to use _parse_field() 7300 prop = self.expression( 7301 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7302 ) 7303 opts.append(prop) 7304 else: 7305 opts.append(self._parse_property()) 7306 7307 self._match(TokenType.COMMA) 7308 7309 return opts 7310 7311 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7312 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7313 7314 options = [] 7315 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7316 option = self._parse_var(any_token=True) 7317 prev = self._prev.text.upper() 7318 7319 # Different dialects might separate options and values by white space, "=" and "AS" 7320 self._match(TokenType.EQ) 7321 self._match(TokenType.ALIAS) 7322 7323 param = self.expression(exp.CopyParameter, this=option) 7324 7325 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7326 TokenType.L_PAREN, advance=False 7327 ): 7328 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7329 param.set("expressions", self._parse_wrapped_options()) 7330 elif prev == "FILE_FORMAT": 7331 # T-SQL's external file format case 7332 param.set("expression", self._parse_field()) 7333 else: 7334 param.set("expression", self._parse_unquoted_field()) 7335 7336 options.append(param) 7337 self._match(sep) 7338 7339 return options 7340 7341 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7342 expr = self.expression(exp.Credentials) 7343 7344 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7345 expr.set("storage", self._parse_field()) 7346 if self._match_text_seq("CREDENTIALS"): 7347 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7348 creds = ( 7349 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7350 ) 7351 expr.set("credentials", creds) 7352 if self._match_text_seq("ENCRYPTION"): 7353 expr.set("encryption", self._parse_wrapped_options()) 7354 if self._match_text_seq("IAM_ROLE"): 7355 expr.set("iam_role", self._parse_field()) 7356 if self._match_text_seq("REGION"): 7357 expr.set("region", self._parse_field()) 7358 7359 return expr 7360 7361 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7362 return self._parse_field() 7363 7364 def _parse_copy(self) -> exp.Copy | exp.Command: 7365 start = self._prev 7366 7367 self._match(TokenType.INTO) 7368 7369 this = ( 7370 self._parse_select(nested=True, parse_subquery_alias=False) 7371 if self._match(TokenType.L_PAREN, advance=False) 7372 else self._parse_table(schema=True) 7373 ) 7374 7375 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7376 7377 files = self._parse_csv(self._parse_file_location) 7378 credentials = self._parse_credentials() 7379 7380 self._match_text_seq("WITH") 7381 7382 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7383 7384 # Fallback case 7385 if self._curr: 7386 return self._parse_as_command(start) 7387 7388 return self.expression( 7389 exp.Copy, 7390 this=this, 7391 kind=kind, 7392 credentials=credentials, 7393 files=files, 7394 params=params, 7395 ) 7396 7397 def _parse_normalize(self) -> exp.Normalize: 7398 return self.expression( 7399 exp.Normalize, 7400 this=self._parse_bitwise(), 7401 form=self._match(TokenType.COMMA) and self._parse_var(), 7402 ) 7403 7404 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7405 if self._match_text_seq("COLUMNS", "(", advance=False): 7406 this = self._parse_function() 7407 if isinstance(this, exp.Columns): 7408 this.set("unpack", True) 7409 return this 7410 7411 return self.expression( 7412 exp.Star, 7413 **{ # type: ignore 7414 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7415 "replace": self._parse_star_op("REPLACE"), 7416 "rename": self._parse_star_op("RENAME"), 7417 }, 7418 ) 7419 7420 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7421 privilege_parts = [] 7422 7423 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7424 # (end of privilege list) or L_PAREN (start of column list) are met 7425 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7426 privilege_parts.append(self._curr.text.upper()) 7427 self._advance() 7428 7429 this = exp.var(" ".join(privilege_parts)) 7430 expressions = ( 7431 self._parse_wrapped_csv(self._parse_column) 7432 if self._match(TokenType.L_PAREN, advance=False) 7433 else None 7434 ) 7435 7436 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7437 7438 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7439 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7440 principal = self._parse_id_var() 7441 7442 if not principal: 7443 return None 7444 7445 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7446 7447 def _parse_grant(self) -> exp.Grant | exp.Command: 7448 start = self._prev 7449 7450 privileges = self._parse_csv(self._parse_grant_privilege) 7451 7452 self._match(TokenType.ON) 7453 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7454 7455 # Attempt to parse the securable e.g. MySQL allows names 7456 # such as "foo.*", "*.*" which are not easily parseable yet 7457 securable = self._try_parse(self._parse_table_parts) 7458 7459 if not securable or not self._match_text_seq("TO"): 7460 return self._parse_as_command(start) 7461 7462 principals = self._parse_csv(self._parse_grant_principal) 7463 7464 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7465 7466 if self._curr: 7467 return self._parse_as_command(start) 7468 7469 return self.expression( 7470 exp.Grant, 7471 privileges=privileges, 7472 kind=kind, 7473 securable=securable, 7474 principals=principals, 7475 grant_option=grant_option, 7476 ) 7477 7478 def _parse_overlay(self) -> exp.Overlay: 7479 return self.expression( 7480 exp.Overlay, 7481 **{ # type: ignore 7482 "this": self._parse_bitwise(), 7483 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7484 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7485 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7486 }, 7487 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 864 TokenType.GLOB: binary_range_parser(exp.Glob), 865 TokenType.ILIKE: binary_range_parser(exp.ILike), 866 TokenType.IN: lambda self, this: self._parse_in(this), 867 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 868 TokenType.IS: lambda self, this: self._parse_is(this), 869 TokenType.LIKE: binary_range_parser(exp.Like), 870 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 871 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 872 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 873 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 874 } 875 876 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 877 "ALLOWED_VALUES": lambda self: self.expression( 878 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 879 ), 880 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 881 "AUTO": lambda self: self._parse_auto_property(), 882 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 883 "BACKUP": lambda self: self.expression( 884 exp.BackupProperty, this=self._parse_var(any_token=True) 885 ), 886 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 887 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 889 "CHECKSUM": lambda self: self._parse_checksum(), 890 "CLUSTER BY": lambda self: self._parse_cluster(), 891 "CLUSTERED": lambda self: self._parse_clustered_by(), 892 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 893 exp.CollateProperty, **kwargs 894 ), 895 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 896 "CONTAINS": lambda self: self._parse_contains_property(), 897 "COPY": lambda self: self._parse_copy_property(), 898 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 899 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 900 "DEFINER": lambda self: self._parse_definer(), 901 "DETERMINISTIC": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 903 ), 904 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 905 "DUPLICATE": lambda self: self._parse_duplicate(), 906 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 907 "DISTKEY": lambda self: self._parse_distkey(), 908 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 909 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 910 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 911 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 912 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 913 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 914 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "FREESPACE": lambda self: self._parse_freespace(), 916 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 917 "HEAP": lambda self: self.expression(exp.HeapProperty), 918 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 919 "IMMUTABLE": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "INHERITS": lambda self: self.expression( 923 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 924 ), 925 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 926 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 927 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 928 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 929 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 930 "LIKE": lambda self: self._parse_create_like(), 931 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 932 "LOCK": lambda self: self._parse_locking(), 933 "LOCKING": lambda self: self._parse_locking(), 934 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 935 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 936 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 937 "MODIFIES": lambda self: self._parse_modifies_property(), 938 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 939 "NO": lambda self: self._parse_no_property(), 940 "ON": lambda self: self._parse_on_property(), 941 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 942 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 943 "PARTITION": lambda self: self._parse_partitioned_of(), 944 "PARTITION BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 946 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 947 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 948 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 949 "READS": lambda self: self._parse_reads_property(), 950 "REMOTE": lambda self: self._parse_remote_with_connection(), 951 "RETURNS": lambda self: self._parse_returns(), 952 "STRICT": lambda self: self.expression(exp.StrictProperty), 953 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 954 "ROW": lambda self: self._parse_row(), 955 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 956 "SAMPLE": lambda self: self.expression( 957 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 958 ), 959 "SECURE": lambda self: self.expression(exp.SecureProperty), 960 "SECURITY": lambda self: self._parse_security(), 961 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 962 "SETTINGS": lambda self: self._parse_settings_property(), 963 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 964 "SORTKEY": lambda self: self._parse_sortkey(), 965 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 966 "STABLE": lambda self: self.expression( 967 exp.StabilityProperty, this=exp.Literal.string("STABLE") 968 ), 969 "STORED": lambda self: self._parse_stored(), 970 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 971 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 972 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 973 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 974 "TO": lambda self: self._parse_to_table(), 975 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 976 "TRANSFORM": lambda self: self.expression( 977 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 978 ), 979 "TTL": lambda self: self._parse_ttl(), 980 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 981 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 982 "VOLATILE": lambda self: self._parse_volatile_property(), 983 "WITH": lambda self: self._parse_with_property(), 984 } 985 986 CONSTRAINT_PARSERS = { 987 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 988 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 989 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 990 "CHARACTER SET": lambda self: self.expression( 991 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 992 ), 993 "CHECK": lambda self: self.expression( 994 exp.CheckColumnConstraint, 995 this=self._parse_wrapped(self._parse_assignment), 996 enforced=self._match_text_seq("ENFORCED"), 997 ), 998 "COLLATE": lambda self: self.expression( 999 exp.CollateColumnConstraint, 1000 this=self._parse_identifier() or self._parse_column(), 1001 ), 1002 "COMMENT": lambda self: self.expression( 1003 exp.CommentColumnConstraint, this=self._parse_string() 1004 ), 1005 "COMPRESS": lambda self: self._parse_compress(), 1006 "CLUSTERED": lambda self: self.expression( 1007 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1008 ), 1009 "NONCLUSTERED": lambda self: self.expression( 1010 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "DEFAULT": lambda self: self.expression( 1013 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1016 "EPHEMERAL": lambda self: self.expression( 1017 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1018 ), 1019 "EXCLUDE": lambda self: self.expression( 1020 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1021 ), 1022 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1023 "FORMAT": lambda self: self.expression( 1024 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "GENERATED": lambda self: self._parse_generated_as_identity(), 1027 "IDENTITY": lambda self: self._parse_auto_increment(), 1028 "INLINE": lambda self: self._parse_inline(), 1029 "LIKE": lambda self: self._parse_create_like(), 1030 "NOT": lambda self: self._parse_not_constraint(), 1031 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1032 "ON": lambda self: ( 1033 self._match(TokenType.UPDATE) 1034 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1035 ) 1036 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1037 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1038 "PERIOD": lambda self: self._parse_period_for_system_time(), 1039 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1040 "REFERENCES": lambda self: self._parse_references(match=False), 1041 "TITLE": lambda self: self.expression( 1042 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1045 "UNIQUE": lambda self: self._parse_unique(), 1046 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1047 "WITH": lambda self: self.expression( 1048 exp.Properties, expressions=self._parse_wrapped_properties() 1049 ), 1050 } 1051 1052 ALTER_PARSERS = { 1053 "ADD": lambda self: self._parse_alter_table_add(), 1054 "ALTER": lambda self: self._parse_alter_table_alter(), 1055 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1056 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1057 "DROP": lambda self: self._parse_alter_table_drop(), 1058 "RENAME": lambda self: self._parse_alter_table_rename(), 1059 "SET": lambda self: self._parse_alter_table_set(), 1060 "AS": lambda self: self._parse_select(), 1061 } 1062 1063 ALTER_ALTER_PARSERS = { 1064 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1065 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1066 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1067 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1068 } 1069 1070 SCHEMA_UNNAMED_CONSTRAINTS = { 1071 "CHECK", 1072 "EXCLUDE", 1073 "FOREIGN KEY", 1074 "LIKE", 1075 "PERIOD", 1076 "PRIMARY KEY", 1077 "UNIQUE", 1078 } 1079 1080 NO_PAREN_FUNCTION_PARSERS = { 1081 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1082 "CASE": lambda self: self._parse_case(), 1083 "CONNECT_BY_ROOT": lambda self: self.expression( 1084 exp.ConnectByRoot, this=self._parse_column() 1085 ), 1086 "IF": lambda self: self._parse_if(), 1087 "NEXT": lambda self: self._parse_next_value_for(), 1088 } 1089 1090 INVALID_FUNC_NAME_TOKENS = { 1091 TokenType.IDENTIFIER, 1092 TokenType.STRING, 1093 } 1094 1095 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1096 1097 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1098 1099 FUNCTION_PARSERS = { 1100 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1101 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1102 "DECODE": lambda self: self._parse_decode(), 1103 "EXTRACT": lambda self: self._parse_extract(), 1104 "GAP_FILL": lambda self: self._parse_gap_fill(), 1105 "JSON_OBJECT": lambda self: self._parse_json_object(), 1106 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1107 "JSON_TABLE": lambda self: self._parse_json_table(), 1108 "MATCH": lambda self: self._parse_match_against(), 1109 "NORMALIZE": lambda self: self._parse_normalize(), 1110 "OPENJSON": lambda self: self._parse_open_json(), 1111 "OVERLAY": lambda self: self._parse_overlay(), 1112 "POSITION": lambda self: self._parse_position(), 1113 "PREDICT": lambda self: self._parse_predict(), 1114 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1115 "STRING_AGG": lambda self: self._parse_string_agg(), 1116 "SUBSTRING": lambda self: self._parse_substring(), 1117 "TRIM": lambda self: self._parse_trim(), 1118 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1119 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1120 } 1121 1122 QUERY_MODIFIER_PARSERS = { 1123 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1124 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1125 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1126 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1127 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1128 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1129 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1130 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1131 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1132 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1133 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1134 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1135 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1136 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1137 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1138 TokenType.CLUSTER_BY: lambda self: ( 1139 "cluster", 1140 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1141 ), 1142 TokenType.DISTRIBUTE_BY: lambda self: ( 1143 "distribute", 1144 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1145 ), 1146 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1147 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1148 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1149 } 1150 1151 SET_PARSERS = { 1152 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1153 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1154 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1155 "TRANSACTION": lambda self: self._parse_set_transaction(), 1156 } 1157 1158 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1159 1160 TYPE_LITERAL_PARSERS = { 1161 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1162 } 1163 1164 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1165 1166 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1167 1168 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1169 1170 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1171 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1172 "ISOLATION": ( 1173 ("LEVEL", "REPEATABLE", "READ"), 1174 ("LEVEL", "READ", "COMMITTED"), 1175 ("LEVEL", "READ", "UNCOMITTED"), 1176 ("LEVEL", "SERIALIZABLE"), 1177 ), 1178 "READ": ("WRITE", "ONLY"), 1179 } 1180 1181 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1182 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1183 ) 1184 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1185 1186 CREATE_SEQUENCE: OPTIONS_TYPE = { 1187 "SCALE": ("EXTEND", "NOEXTEND"), 1188 "SHARD": ("EXTEND", "NOEXTEND"), 1189 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1190 **dict.fromkeys( 1191 ( 1192 "SESSION", 1193 "GLOBAL", 1194 "KEEP", 1195 "NOKEEP", 1196 "ORDER", 1197 "NOORDER", 1198 "NOCACHE", 1199 "CYCLE", 1200 "NOCYCLE", 1201 "NOMINVALUE", 1202 "NOMAXVALUE", 1203 "NOSCALE", 1204 "NOSHARD", 1205 ), 1206 tuple(), 1207 ), 1208 } 1209 1210 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1211 1212 USABLES: OPTIONS_TYPE = dict.fromkeys( 1213 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1214 ) 1215 1216 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1217 1218 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1219 "TYPE": ("EVOLUTION",), 1220 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1221 } 1222 1223 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1224 "NOT": ("ENFORCED",), 1225 "MATCH": ( 1226 "FULL", 1227 "PARTIAL", 1228 "SIMPLE", 1229 ), 1230 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1231 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1232 } 1233 1234 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1235 1236 CLONE_KEYWORDS = {"CLONE", "COPY"} 1237 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1238 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1239 1240 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1241 1242 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1243 1244 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1245 1246 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1247 1248 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1249 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1250 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1251 1252 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1253 1254 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1255 1256 ADD_CONSTRAINT_TOKENS = { 1257 TokenType.CONSTRAINT, 1258 TokenType.FOREIGN_KEY, 1259 TokenType.INDEX, 1260 TokenType.KEY, 1261 TokenType.PRIMARY_KEY, 1262 TokenType.UNIQUE, 1263 } 1264 1265 DISTINCT_TOKENS = {TokenType.DISTINCT} 1266 1267 NULL_TOKENS = {TokenType.NULL} 1268 1269 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1270 1271 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1272 1273 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1274 1275 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1276 1277 ODBC_DATETIME_LITERALS = { 1278 "d": exp.Date, 1279 "t": exp.Time, 1280 "ts": exp.Timestamp, 1281 } 1282 1283 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1284 1285 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1286 1287 STRICT_CAST = True 1288 1289 PREFIXED_PIVOT_COLUMNS = False 1290 IDENTIFY_PIVOT_STRINGS = False 1291 1292 LOG_DEFAULTS_TO_LN = False 1293 1294 # Whether ADD is present for each column added by ALTER TABLE 1295 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1296 1297 # Whether the table sample clause expects CSV syntax 1298 TABLESAMPLE_CSV = False 1299 1300 # The default method used for table sampling 1301 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1302 1303 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1304 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1305 1306 # Whether the TRIM function expects the characters to trim as its first argument 1307 TRIM_PATTERN_FIRST = False 1308 1309 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1310 STRING_ALIASES = False 1311 1312 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1313 MODIFIERS_ATTACHED_TO_SET_OP = True 1314 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1315 1316 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1317 NO_PAREN_IF_COMMANDS = True 1318 1319 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1320 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1321 1322 # Whether the `:` operator is used to extract a value from a VARIANT column 1323 COLON_IS_VARIANT_EXTRACT = False 1324 1325 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1326 # If this is True and '(' is not found, the keyword will be treated as an identifier 1327 VALUES_FOLLOWED_BY_PAREN = True 1328 1329 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1330 SUPPORTS_IMPLICIT_UNNEST = False 1331 1332 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1333 INTERVAL_SPANS = True 1334 1335 # Whether a PARTITION clause can follow a table reference 1336 SUPPORTS_PARTITION_SELECTION = False 1337 1338 __slots__ = ( 1339 "error_level", 1340 "error_message_context", 1341 "max_errors", 1342 "dialect", 1343 "sql", 1344 "errors", 1345 "_tokens", 1346 "_index", 1347 "_curr", 1348 "_next", 1349 "_prev", 1350 "_prev_comments", 1351 ) 1352 1353 # Autofilled 1354 SHOW_TRIE: t.Dict = {} 1355 SET_TRIE: t.Dict = {} 1356 1357 def __init__( 1358 self, 1359 error_level: t.Optional[ErrorLevel] = None, 1360 error_message_context: int = 100, 1361 max_errors: int = 3, 1362 dialect: DialectType = None, 1363 ): 1364 from sqlglot.dialects import Dialect 1365 1366 self.error_level = error_level or ErrorLevel.IMMEDIATE 1367 self.error_message_context = error_message_context 1368 self.max_errors = max_errors 1369 self.dialect = Dialect.get_or_raise(dialect) 1370 self.reset() 1371 1372 def reset(self): 1373 self.sql = "" 1374 self.errors = [] 1375 self._tokens = [] 1376 self._index = 0 1377 self._curr = None 1378 self._next = None 1379 self._prev = None 1380 self._prev_comments = None 1381 1382 def parse( 1383 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1384 ) -> t.List[t.Optional[exp.Expression]]: 1385 """ 1386 Parses a list of tokens and returns a list of syntax trees, one tree 1387 per parsed SQL statement. 1388 1389 Args: 1390 raw_tokens: The list of tokens. 1391 sql: The original SQL string, used to produce helpful debug messages. 1392 1393 Returns: 1394 The list of the produced syntax trees. 1395 """ 1396 return self._parse( 1397 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1398 ) 1399 1400 def parse_into( 1401 self, 1402 expression_types: exp.IntoType, 1403 raw_tokens: t.List[Token], 1404 sql: t.Optional[str] = None, 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens into a given Expression type. If a collection of Expression 1408 types is given instead, this method will try to parse the token list into each one 1409 of them, stopping at the first for which the parsing succeeds. 1410 1411 Args: 1412 expression_types: The expression type(s) to try and parse the token list into. 1413 raw_tokens: The list of tokens. 1414 sql: The original SQL string, used to produce helpful debug messages. 1415 1416 Returns: 1417 The target Expression. 1418 """ 1419 errors = [] 1420 for expression_type in ensure_list(expression_types): 1421 parser = self.EXPRESSION_PARSERS.get(expression_type) 1422 if not parser: 1423 raise TypeError(f"No parser registered for {expression_type}") 1424 1425 try: 1426 return self._parse(parser, raw_tokens, sql) 1427 except ParseError as e: 1428 e.errors[0]["into_expression"] = expression_type 1429 errors.append(e) 1430 1431 raise ParseError( 1432 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1433 errors=merge_errors(errors), 1434 ) from errors[-1] 1435 1436 def _parse( 1437 self, 1438 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 self.reset() 1443 self.sql = sql or "" 1444 1445 total = len(raw_tokens) 1446 chunks: t.List[t.List[Token]] = [[]] 1447 1448 for i, token in enumerate(raw_tokens): 1449 if token.token_type == TokenType.SEMICOLON: 1450 if token.comments: 1451 chunks.append([token]) 1452 1453 if i < total - 1: 1454 chunks.append([]) 1455 else: 1456 chunks[-1].append(token) 1457 1458 expressions = [] 1459 1460 for tokens in chunks: 1461 self._index = -1 1462 self._tokens = tokens 1463 self._advance() 1464 1465 expressions.append(parse_method(self)) 1466 1467 if self._index < len(self._tokens): 1468 self.raise_error("Invalid expression / Unexpected token") 1469 1470 self.check_errors() 1471 1472 return expressions 1473 1474 def check_errors(self) -> None: 1475 """Logs or raises any found errors, depending on the chosen error level setting.""" 1476 if self.error_level == ErrorLevel.WARN: 1477 for error in self.errors: 1478 logger.error(str(error)) 1479 elif self.error_level == ErrorLevel.RAISE and self.errors: 1480 raise ParseError( 1481 concat_messages(self.errors, self.max_errors), 1482 errors=merge_errors(self.errors), 1483 ) 1484 1485 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1486 """ 1487 Appends an error in the list of recorded errors or raises it, depending on the chosen 1488 error level setting. 1489 """ 1490 token = token or self._curr or self._prev or Token.string("") 1491 start = token.start 1492 end = token.end + 1 1493 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1494 highlight = self.sql[start:end] 1495 end_context = self.sql[end : end + self.error_message_context] 1496 1497 error = ParseError.new( 1498 f"{message}. Line {token.line}, Col: {token.col}.\n" 1499 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1500 description=message, 1501 line=token.line, 1502 col=token.col, 1503 start_context=start_context, 1504 highlight=highlight, 1505 end_context=end_context, 1506 ) 1507 1508 if self.error_level == ErrorLevel.IMMEDIATE: 1509 raise error 1510 1511 self.errors.append(error) 1512 1513 def expression( 1514 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1515 ) -> E: 1516 """ 1517 Creates a new, validated Expression. 1518 1519 Args: 1520 exp_class: The expression class to instantiate. 1521 comments: An optional list of comments to attach to the expression. 1522 kwargs: The arguments to set for the expression along with their respective values. 1523 1524 Returns: 1525 The target expression. 1526 """ 1527 instance = exp_class(**kwargs) 1528 instance.add_comments(comments) if comments else self._add_comments(instance) 1529 return self.validate_expression(instance) 1530 1531 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1532 if expression and self._prev_comments: 1533 expression.add_comments(self._prev_comments) 1534 self._prev_comments = None 1535 1536 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1537 """ 1538 Validates an Expression, making sure that all its mandatory arguments are set. 1539 1540 Args: 1541 expression: The expression to validate. 1542 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1543 1544 Returns: 1545 The validated expression. 1546 """ 1547 if self.error_level != ErrorLevel.IGNORE: 1548 for error_message in expression.error_messages(args): 1549 self.raise_error(error_message) 1550 1551 return expression 1552 1553 def _find_sql(self, start: Token, end: Token) -> str: 1554 return self.sql[start.start : end.end + 1] 1555 1556 def _is_connected(self) -> bool: 1557 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1558 1559 def _advance(self, times: int = 1) -> None: 1560 self._index += times 1561 self._curr = seq_get(self._tokens, self._index) 1562 self._next = seq_get(self._tokens, self._index + 1) 1563 1564 if self._index > 0: 1565 self._prev = self._tokens[self._index - 1] 1566 self._prev_comments = self._prev.comments 1567 else: 1568 self._prev = None 1569 self._prev_comments = None 1570 1571 def _retreat(self, index: int) -> None: 1572 if index != self._index: 1573 self._advance(index - self._index) 1574 1575 def _warn_unsupported(self) -> None: 1576 if len(self._tokens) <= 1: 1577 return 1578 1579 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1580 # interested in emitting a warning for the one being currently processed. 1581 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1582 1583 logger.warning( 1584 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1585 ) 1586 1587 def _parse_command(self) -> exp.Command: 1588 self._warn_unsupported() 1589 return self.expression( 1590 exp.Command, 1591 comments=self._prev_comments, 1592 this=self._prev.text.upper(), 1593 expression=self._parse_string(), 1594 ) 1595 1596 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1597 """ 1598 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1599 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1600 solve this by setting & resetting the parser state accordingly 1601 """ 1602 index = self._index 1603 error_level = self.error_level 1604 1605 self.error_level = ErrorLevel.IMMEDIATE 1606 try: 1607 this = parse_method() 1608 except ParseError: 1609 this = None 1610 finally: 1611 if not this or retreat: 1612 self._retreat(index) 1613 self.error_level = error_level 1614 1615 return this 1616 1617 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1618 start = self._prev 1619 exists = self._parse_exists() if allow_exists else None 1620 1621 self._match(TokenType.ON) 1622 1623 materialized = self._match_text_seq("MATERIALIZED") 1624 kind = self._match_set(self.CREATABLES) and self._prev 1625 if not kind: 1626 return self._parse_as_command(start) 1627 1628 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1629 this = self._parse_user_defined_function(kind=kind.token_type) 1630 elif kind.token_type == TokenType.TABLE: 1631 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1632 elif kind.token_type == TokenType.COLUMN: 1633 this = self._parse_column() 1634 else: 1635 this = self._parse_id_var() 1636 1637 self._match(TokenType.IS) 1638 1639 return self.expression( 1640 exp.Comment, 1641 this=this, 1642 kind=kind.text, 1643 expression=self._parse_string(), 1644 exists=exists, 1645 materialized=materialized, 1646 ) 1647 1648 def _parse_to_table( 1649 self, 1650 ) -> exp.ToTableProperty: 1651 table = self._parse_table_parts(schema=True) 1652 return self.expression(exp.ToTableProperty, this=table) 1653 1654 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1655 def _parse_ttl(self) -> exp.Expression: 1656 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1657 this = self._parse_bitwise() 1658 1659 if self._match_text_seq("DELETE"): 1660 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1661 if self._match_text_seq("RECOMPRESS"): 1662 return self.expression( 1663 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1664 ) 1665 if self._match_text_seq("TO", "DISK"): 1666 return self.expression( 1667 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1668 ) 1669 if self._match_text_seq("TO", "VOLUME"): 1670 return self.expression( 1671 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1672 ) 1673 1674 return this 1675 1676 expressions = self._parse_csv(_parse_ttl_action) 1677 where = self._parse_where() 1678 group = self._parse_group() 1679 1680 aggregates = None 1681 if group and self._match(TokenType.SET): 1682 aggregates = self._parse_csv(self._parse_set_item) 1683 1684 return self.expression( 1685 exp.MergeTreeTTL, 1686 expressions=expressions, 1687 where=where, 1688 group=group, 1689 aggregates=aggregates, 1690 ) 1691 1692 def _parse_statement(self) -> t.Optional[exp.Expression]: 1693 if self._curr is None: 1694 return None 1695 1696 if self._match_set(self.STATEMENT_PARSERS): 1697 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1698 1699 if self._match_set(self.dialect.tokenizer.COMMANDS): 1700 return self._parse_command() 1701 1702 expression = self._parse_expression() 1703 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1704 return self._parse_query_modifiers(expression) 1705 1706 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1707 start = self._prev 1708 temporary = self._match(TokenType.TEMPORARY) 1709 materialized = self._match_text_seq("MATERIALIZED") 1710 1711 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1712 if not kind: 1713 return self._parse_as_command(start) 1714 1715 concurrently = self._match_text_seq("CONCURRENTLY") 1716 if_exists = exists or self._parse_exists() 1717 table = self._parse_table_parts( 1718 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1719 ) 1720 1721 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1722 1723 if self._match(TokenType.L_PAREN, advance=False): 1724 expressions = self._parse_wrapped_csv(self._parse_types) 1725 else: 1726 expressions = None 1727 1728 return self.expression( 1729 exp.Drop, 1730 comments=start.comments, 1731 exists=if_exists, 1732 this=table, 1733 expressions=expressions, 1734 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1735 temporary=temporary, 1736 materialized=materialized, 1737 cascade=self._match_text_seq("CASCADE"), 1738 constraints=self._match_text_seq("CONSTRAINTS"), 1739 purge=self._match_text_seq("PURGE"), 1740 cluster=cluster, 1741 concurrently=concurrently, 1742 ) 1743 1744 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1745 return ( 1746 self._match_text_seq("IF") 1747 and (not not_ or self._match(TokenType.NOT)) 1748 and self._match(TokenType.EXISTS) 1749 ) 1750 1751 def _parse_create(self) -> exp.Create | exp.Command: 1752 # Note: this can't be None because we've matched a statement parser 1753 start = self._prev 1754 comments = self._prev_comments 1755 1756 replace = ( 1757 start.token_type == TokenType.REPLACE 1758 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1759 or self._match_pair(TokenType.OR, TokenType.ALTER) 1760 ) 1761 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1762 1763 unique = self._match(TokenType.UNIQUE) 1764 1765 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1766 clustered = True 1767 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1768 "COLUMNSTORE" 1769 ): 1770 clustered = False 1771 else: 1772 clustered = None 1773 1774 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1775 self._advance() 1776 1777 properties = None 1778 create_token = self._match_set(self.CREATABLES) and self._prev 1779 1780 if not create_token: 1781 # exp.Properties.Location.POST_CREATE 1782 properties = self._parse_properties() 1783 create_token = self._match_set(self.CREATABLES) and self._prev 1784 1785 if not properties or not create_token: 1786 return self._parse_as_command(start) 1787 1788 concurrently = self._match_text_seq("CONCURRENTLY") 1789 exists = self._parse_exists(not_=True) 1790 this = None 1791 expression: t.Optional[exp.Expression] = None 1792 indexes = None 1793 no_schema_binding = None 1794 begin = None 1795 end = None 1796 clone = None 1797 1798 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1799 nonlocal properties 1800 if properties and temp_props: 1801 properties.expressions.extend(temp_props.expressions) 1802 elif temp_props: 1803 properties = temp_props 1804 1805 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1806 this = self._parse_user_defined_function(kind=create_token.token_type) 1807 1808 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1809 extend_props(self._parse_properties()) 1810 1811 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1812 extend_props(self._parse_properties()) 1813 1814 if not expression: 1815 if self._match(TokenType.COMMAND): 1816 expression = self._parse_as_command(self._prev) 1817 else: 1818 begin = self._match(TokenType.BEGIN) 1819 return_ = self._match_text_seq("RETURN") 1820 1821 if self._match(TokenType.STRING, advance=False): 1822 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1823 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1824 expression = self._parse_string() 1825 extend_props(self._parse_properties()) 1826 else: 1827 expression = self._parse_statement() 1828 1829 end = self._match_text_seq("END") 1830 1831 if return_: 1832 expression = self.expression(exp.Return, this=expression) 1833 elif create_token.token_type == TokenType.INDEX: 1834 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1835 if not self._match(TokenType.ON): 1836 index = self._parse_id_var() 1837 anonymous = False 1838 else: 1839 index = None 1840 anonymous = True 1841 1842 this = self._parse_index(index=index, anonymous=anonymous) 1843 elif create_token.token_type in self.DB_CREATABLES: 1844 table_parts = self._parse_table_parts( 1845 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1846 ) 1847 1848 # exp.Properties.Location.POST_NAME 1849 self._match(TokenType.COMMA) 1850 extend_props(self._parse_properties(before=True)) 1851 1852 this = self._parse_schema(this=table_parts) 1853 1854 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1855 extend_props(self._parse_properties()) 1856 1857 self._match(TokenType.ALIAS) 1858 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1859 # exp.Properties.Location.POST_ALIAS 1860 extend_props(self._parse_properties()) 1861 1862 if create_token.token_type == TokenType.SEQUENCE: 1863 expression = self._parse_types() 1864 extend_props(self._parse_properties()) 1865 else: 1866 expression = self._parse_ddl_select() 1867 1868 if create_token.token_type == TokenType.TABLE: 1869 # exp.Properties.Location.POST_EXPRESSION 1870 extend_props(self._parse_properties()) 1871 1872 indexes = [] 1873 while True: 1874 index = self._parse_index() 1875 1876 # exp.Properties.Location.POST_INDEX 1877 extend_props(self._parse_properties()) 1878 if not index: 1879 break 1880 else: 1881 self._match(TokenType.COMMA) 1882 indexes.append(index) 1883 elif create_token.token_type == TokenType.VIEW: 1884 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1885 no_schema_binding = True 1886 1887 shallow = self._match_text_seq("SHALLOW") 1888 1889 if self._match_texts(self.CLONE_KEYWORDS): 1890 copy = self._prev.text.lower() == "copy" 1891 clone = self.expression( 1892 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1893 ) 1894 1895 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1896 return self._parse_as_command(start) 1897 1898 create_kind_text = create_token.text.upper() 1899 return self.expression( 1900 exp.Create, 1901 comments=comments, 1902 this=this, 1903 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1904 replace=replace, 1905 refresh=refresh, 1906 unique=unique, 1907 expression=expression, 1908 exists=exists, 1909 properties=properties, 1910 indexes=indexes, 1911 no_schema_binding=no_schema_binding, 1912 begin=begin, 1913 end=end, 1914 clone=clone, 1915 concurrently=concurrently, 1916 clustered=clustered, 1917 ) 1918 1919 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1920 seq = exp.SequenceProperties() 1921 1922 options = [] 1923 index = self._index 1924 1925 while self._curr: 1926 self._match(TokenType.COMMA) 1927 if self._match_text_seq("INCREMENT"): 1928 self._match_text_seq("BY") 1929 self._match_text_seq("=") 1930 seq.set("increment", self._parse_term()) 1931 elif self._match_text_seq("MINVALUE"): 1932 seq.set("minvalue", self._parse_term()) 1933 elif self._match_text_seq("MAXVALUE"): 1934 seq.set("maxvalue", self._parse_term()) 1935 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1936 self._match_text_seq("=") 1937 seq.set("start", self._parse_term()) 1938 elif self._match_text_seq("CACHE"): 1939 # T-SQL allows empty CACHE which is initialized dynamically 1940 seq.set("cache", self._parse_number() or True) 1941 elif self._match_text_seq("OWNED", "BY"): 1942 # "OWNED BY NONE" is the default 1943 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1944 else: 1945 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1946 if opt: 1947 options.append(opt) 1948 else: 1949 break 1950 1951 seq.set("options", options if options else None) 1952 return None if self._index == index else seq 1953 1954 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1955 # only used for teradata currently 1956 self._match(TokenType.COMMA) 1957 1958 kwargs = { 1959 "no": self._match_text_seq("NO"), 1960 "dual": self._match_text_seq("DUAL"), 1961 "before": self._match_text_seq("BEFORE"), 1962 "default": self._match_text_seq("DEFAULT"), 1963 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1964 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1965 "after": self._match_text_seq("AFTER"), 1966 "minimum": self._match_texts(("MIN", "MINIMUM")), 1967 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1968 } 1969 1970 if self._match_texts(self.PROPERTY_PARSERS): 1971 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1972 try: 1973 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1974 except TypeError: 1975 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1976 1977 return None 1978 1979 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1980 return self._parse_wrapped_csv(self._parse_property) 1981 1982 def _parse_property(self) -> t.Optional[exp.Expression]: 1983 if self._match_texts(self.PROPERTY_PARSERS): 1984 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1985 1986 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1987 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1988 1989 if self._match_text_seq("COMPOUND", "SORTKEY"): 1990 return self._parse_sortkey(compound=True) 1991 1992 if self._match_text_seq("SQL", "SECURITY"): 1993 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1994 1995 index = self._index 1996 key = self._parse_column() 1997 1998 if not self._match(TokenType.EQ): 1999 self._retreat(index) 2000 return self._parse_sequence_properties() 2001 2002 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2003 if isinstance(key, exp.Column): 2004 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2005 2006 value = self._parse_bitwise() or self._parse_var(any_token=True) 2007 2008 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2009 if isinstance(value, exp.Column): 2010 value = exp.var(value.name) 2011 2012 return self.expression(exp.Property, this=key, value=value) 2013 2014 def _parse_stored(self) -> exp.FileFormatProperty: 2015 self._match(TokenType.ALIAS) 2016 2017 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2018 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2019 2020 return self.expression( 2021 exp.FileFormatProperty, 2022 this=( 2023 self.expression( 2024 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2025 ) 2026 if input_format or output_format 2027 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2028 ), 2029 ) 2030 2031 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2032 field = self._parse_field() 2033 if isinstance(field, exp.Identifier) and not field.quoted: 2034 field = exp.var(field) 2035 2036 return field 2037 2038 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2039 self._match(TokenType.EQ) 2040 self._match(TokenType.ALIAS) 2041 2042 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2043 2044 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2045 properties = [] 2046 while True: 2047 if before: 2048 prop = self._parse_property_before() 2049 else: 2050 prop = self._parse_property() 2051 if not prop: 2052 break 2053 for p in ensure_list(prop): 2054 properties.append(p) 2055 2056 if properties: 2057 return self.expression(exp.Properties, expressions=properties) 2058 2059 return None 2060 2061 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2062 return self.expression( 2063 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2064 ) 2065 2066 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2067 if self._match_texts(("DEFINER", "INVOKER")): 2068 security_specifier = self._prev.text.upper() 2069 return self.expression(exp.SecurityProperty, this=security_specifier) 2070 return None 2071 2072 def _parse_settings_property(self) -> exp.SettingsProperty: 2073 return self.expression( 2074 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2075 ) 2076 2077 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2078 if self._index >= 2: 2079 pre_volatile_token = self._tokens[self._index - 2] 2080 else: 2081 pre_volatile_token = None 2082 2083 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2084 return exp.VolatileProperty() 2085 2086 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2087 2088 def _parse_retention_period(self) -> exp.Var: 2089 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2090 number = self._parse_number() 2091 number_str = f"{number} " if number else "" 2092 unit = self._parse_var(any_token=True) 2093 return exp.var(f"{number_str}{unit}") 2094 2095 def _parse_system_versioning_property( 2096 self, with_: bool = False 2097 ) -> exp.WithSystemVersioningProperty: 2098 self._match(TokenType.EQ) 2099 prop = self.expression( 2100 exp.WithSystemVersioningProperty, 2101 **{ # type: ignore 2102 "on": True, 2103 "with": with_, 2104 }, 2105 ) 2106 2107 if self._match_text_seq("OFF"): 2108 prop.set("on", False) 2109 return prop 2110 2111 self._match(TokenType.ON) 2112 if self._match(TokenType.L_PAREN): 2113 while self._curr and not self._match(TokenType.R_PAREN): 2114 if self._match_text_seq("HISTORY_TABLE", "="): 2115 prop.set("this", self._parse_table_parts()) 2116 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2117 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2118 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2119 prop.set("retention_period", self._parse_retention_period()) 2120 2121 self._match(TokenType.COMMA) 2122 2123 return prop 2124 2125 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2126 self._match(TokenType.EQ) 2127 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2128 prop = self.expression(exp.DataDeletionProperty, on=on) 2129 2130 if self._match(TokenType.L_PAREN): 2131 while self._curr and not self._match(TokenType.R_PAREN): 2132 if self._match_text_seq("FILTER_COLUMN", "="): 2133 prop.set("filter_column", self._parse_column()) 2134 elif self._match_text_seq("RETENTION_PERIOD", "="): 2135 prop.set("retention_period", self._parse_retention_period()) 2136 2137 self._match(TokenType.COMMA) 2138 2139 return prop 2140 2141 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2142 kind = "HASH" 2143 expressions: t.Optional[t.List[exp.Expression]] = None 2144 if self._match_text_seq("BY", "HASH"): 2145 expressions = self._parse_wrapped_csv(self._parse_id_var) 2146 elif self._match_text_seq("BY", "RANDOM"): 2147 kind = "RANDOM" 2148 2149 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2150 buckets: t.Optional[exp.Expression] = None 2151 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2152 buckets = self._parse_number() 2153 2154 return self.expression( 2155 exp.DistributedByProperty, 2156 expressions=expressions, 2157 kind=kind, 2158 buckets=buckets, 2159 order=self._parse_order(), 2160 ) 2161 2162 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2163 self._match_text_seq("KEY") 2164 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2165 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2166 2167 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2168 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2169 prop = self._parse_system_versioning_property(with_=True) 2170 self._match_r_paren() 2171 return prop 2172 2173 if self._match(TokenType.L_PAREN, advance=False): 2174 return self._parse_wrapped_properties() 2175 2176 if self._match_text_seq("JOURNAL"): 2177 return self._parse_withjournaltable() 2178 2179 if self._match_texts(self.VIEW_ATTRIBUTES): 2180 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2181 2182 if self._match_text_seq("DATA"): 2183 return self._parse_withdata(no=False) 2184 elif self._match_text_seq("NO", "DATA"): 2185 return self._parse_withdata(no=True) 2186 2187 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2188 return self._parse_serde_properties(with_=True) 2189 2190 if self._match(TokenType.SCHEMA): 2191 return self.expression( 2192 exp.WithSchemaBindingProperty, 2193 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2194 ) 2195 2196 if not self._next: 2197 return None 2198 2199 return self._parse_withisolatedloading() 2200 2201 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2202 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2203 self._match(TokenType.EQ) 2204 2205 user = self._parse_id_var() 2206 self._match(TokenType.PARAMETER) 2207 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2208 2209 if not user or not host: 2210 return None 2211 2212 return exp.DefinerProperty(this=f"{user}@{host}") 2213 2214 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2215 self._match(TokenType.TABLE) 2216 self._match(TokenType.EQ) 2217 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2218 2219 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2220 return self.expression(exp.LogProperty, no=no) 2221 2222 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2223 return self.expression(exp.JournalProperty, **kwargs) 2224 2225 def _parse_checksum(self) -> exp.ChecksumProperty: 2226 self._match(TokenType.EQ) 2227 2228 on = None 2229 if self._match(TokenType.ON): 2230 on = True 2231 elif self._match_text_seq("OFF"): 2232 on = False 2233 2234 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2235 2236 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2237 return self.expression( 2238 exp.Cluster, 2239 expressions=( 2240 self._parse_wrapped_csv(self._parse_ordered) 2241 if wrapped 2242 else self._parse_csv(self._parse_ordered) 2243 ), 2244 ) 2245 2246 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2247 self._match_text_seq("BY") 2248 2249 self._match_l_paren() 2250 expressions = self._parse_csv(self._parse_column) 2251 self._match_r_paren() 2252 2253 if self._match_text_seq("SORTED", "BY"): 2254 self._match_l_paren() 2255 sorted_by = self._parse_csv(self._parse_ordered) 2256 self._match_r_paren() 2257 else: 2258 sorted_by = None 2259 2260 self._match(TokenType.INTO) 2261 buckets = self._parse_number() 2262 self._match_text_seq("BUCKETS") 2263 2264 return self.expression( 2265 exp.ClusteredByProperty, 2266 expressions=expressions, 2267 sorted_by=sorted_by, 2268 buckets=buckets, 2269 ) 2270 2271 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2272 if not self._match_text_seq("GRANTS"): 2273 self._retreat(self._index - 1) 2274 return None 2275 2276 return self.expression(exp.CopyGrantsProperty) 2277 2278 def _parse_freespace(self) -> exp.FreespaceProperty: 2279 self._match(TokenType.EQ) 2280 return self.expression( 2281 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2282 ) 2283 2284 def _parse_mergeblockratio( 2285 self, no: bool = False, default: bool = False 2286 ) -> exp.MergeBlockRatioProperty: 2287 if self._match(TokenType.EQ): 2288 return self.expression( 2289 exp.MergeBlockRatioProperty, 2290 this=self._parse_number(), 2291 percent=self._match(TokenType.PERCENT), 2292 ) 2293 2294 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2295 2296 def _parse_datablocksize( 2297 self, 2298 default: t.Optional[bool] = None, 2299 minimum: t.Optional[bool] = None, 2300 maximum: t.Optional[bool] = None, 2301 ) -> exp.DataBlocksizeProperty: 2302 self._match(TokenType.EQ) 2303 size = self._parse_number() 2304 2305 units = None 2306 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2307 units = self._prev.text 2308 2309 return self.expression( 2310 exp.DataBlocksizeProperty, 2311 size=size, 2312 units=units, 2313 default=default, 2314 minimum=minimum, 2315 maximum=maximum, 2316 ) 2317 2318 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2319 self._match(TokenType.EQ) 2320 always = self._match_text_seq("ALWAYS") 2321 manual = self._match_text_seq("MANUAL") 2322 never = self._match_text_seq("NEVER") 2323 default = self._match_text_seq("DEFAULT") 2324 2325 autotemp = None 2326 if self._match_text_seq("AUTOTEMP"): 2327 autotemp = self._parse_schema() 2328 2329 return self.expression( 2330 exp.BlockCompressionProperty, 2331 always=always, 2332 manual=manual, 2333 never=never, 2334 default=default, 2335 autotemp=autotemp, 2336 ) 2337 2338 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2339 index = self._index 2340 no = self._match_text_seq("NO") 2341 concurrent = self._match_text_seq("CONCURRENT") 2342 2343 if not self._match_text_seq("ISOLATED", "LOADING"): 2344 self._retreat(index) 2345 return None 2346 2347 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2348 return self.expression( 2349 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2350 ) 2351 2352 def _parse_locking(self) -> exp.LockingProperty: 2353 if self._match(TokenType.TABLE): 2354 kind = "TABLE" 2355 elif self._match(TokenType.VIEW): 2356 kind = "VIEW" 2357 elif self._match(TokenType.ROW): 2358 kind = "ROW" 2359 elif self._match_text_seq("DATABASE"): 2360 kind = "DATABASE" 2361 else: 2362 kind = None 2363 2364 if kind in ("DATABASE", "TABLE", "VIEW"): 2365 this = self._parse_table_parts() 2366 else: 2367 this = None 2368 2369 if self._match(TokenType.FOR): 2370 for_or_in = "FOR" 2371 elif self._match(TokenType.IN): 2372 for_or_in = "IN" 2373 else: 2374 for_or_in = None 2375 2376 if self._match_text_seq("ACCESS"): 2377 lock_type = "ACCESS" 2378 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2379 lock_type = "EXCLUSIVE" 2380 elif self._match_text_seq("SHARE"): 2381 lock_type = "SHARE" 2382 elif self._match_text_seq("READ"): 2383 lock_type = "READ" 2384 elif self._match_text_seq("WRITE"): 2385 lock_type = "WRITE" 2386 elif self._match_text_seq("CHECKSUM"): 2387 lock_type = "CHECKSUM" 2388 else: 2389 lock_type = None 2390 2391 override = self._match_text_seq("OVERRIDE") 2392 2393 return self.expression( 2394 exp.LockingProperty, 2395 this=this, 2396 kind=kind, 2397 for_or_in=for_or_in, 2398 lock_type=lock_type, 2399 override=override, 2400 ) 2401 2402 def _parse_partition_by(self) -> t.List[exp.Expression]: 2403 if self._match(TokenType.PARTITION_BY): 2404 return self._parse_csv(self._parse_assignment) 2405 return [] 2406 2407 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2408 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2409 if self._match_text_seq("MINVALUE"): 2410 return exp.var("MINVALUE") 2411 if self._match_text_seq("MAXVALUE"): 2412 return exp.var("MAXVALUE") 2413 return self._parse_bitwise() 2414 2415 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2416 expression = None 2417 from_expressions = None 2418 to_expressions = None 2419 2420 if self._match(TokenType.IN): 2421 this = self._parse_wrapped_csv(self._parse_bitwise) 2422 elif self._match(TokenType.FROM): 2423 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2424 self._match_text_seq("TO") 2425 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2426 elif self._match_text_seq("WITH", "(", "MODULUS"): 2427 this = self._parse_number() 2428 self._match_text_seq(",", "REMAINDER") 2429 expression = self._parse_number() 2430 self._match_r_paren() 2431 else: 2432 self.raise_error("Failed to parse partition bound spec.") 2433 2434 return self.expression( 2435 exp.PartitionBoundSpec, 2436 this=this, 2437 expression=expression, 2438 from_expressions=from_expressions, 2439 to_expressions=to_expressions, 2440 ) 2441 2442 # https://www.postgresql.org/docs/current/sql-createtable.html 2443 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2444 if not self._match_text_seq("OF"): 2445 self._retreat(self._index - 1) 2446 return None 2447 2448 this = self._parse_table(schema=True) 2449 2450 if self._match(TokenType.DEFAULT): 2451 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2452 elif self._match_text_seq("FOR", "VALUES"): 2453 expression = self._parse_partition_bound_spec() 2454 else: 2455 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2456 2457 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2458 2459 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2460 self._match(TokenType.EQ) 2461 return self.expression( 2462 exp.PartitionedByProperty, 2463 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2464 ) 2465 2466 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2467 if self._match_text_seq("AND", "STATISTICS"): 2468 statistics = True 2469 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2470 statistics = False 2471 else: 2472 statistics = None 2473 2474 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2475 2476 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2477 if self._match_text_seq("SQL"): 2478 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2479 return None 2480 2481 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2482 if self._match_text_seq("SQL", "DATA"): 2483 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2484 return None 2485 2486 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2487 if self._match_text_seq("PRIMARY", "INDEX"): 2488 return exp.NoPrimaryIndexProperty() 2489 if self._match_text_seq("SQL"): 2490 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2491 return None 2492 2493 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2494 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2495 return exp.OnCommitProperty() 2496 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2497 return exp.OnCommitProperty(delete=True) 2498 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2499 2500 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2501 if self._match_text_seq("SQL", "DATA"): 2502 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2503 return None 2504 2505 def _parse_distkey(self) -> exp.DistKeyProperty: 2506 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2507 2508 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2509 table = self._parse_table(schema=True) 2510 2511 options = [] 2512 while self._match_texts(("INCLUDING", "EXCLUDING")): 2513 this = self._prev.text.upper() 2514 2515 id_var = self._parse_id_var() 2516 if not id_var: 2517 return None 2518 2519 options.append( 2520 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2521 ) 2522 2523 return self.expression(exp.LikeProperty, this=table, expressions=options) 2524 2525 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2526 return self.expression( 2527 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2528 ) 2529 2530 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2531 self._match(TokenType.EQ) 2532 return self.expression( 2533 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2534 ) 2535 2536 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2537 self._match_text_seq("WITH", "CONNECTION") 2538 return self.expression( 2539 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2540 ) 2541 2542 def _parse_returns(self) -> exp.ReturnsProperty: 2543 value: t.Optional[exp.Expression] 2544 null = None 2545 is_table = self._match(TokenType.TABLE) 2546 2547 if is_table: 2548 if self._match(TokenType.LT): 2549 value = self.expression( 2550 exp.Schema, 2551 this="TABLE", 2552 expressions=self._parse_csv(self._parse_struct_types), 2553 ) 2554 if not self._match(TokenType.GT): 2555 self.raise_error("Expecting >") 2556 else: 2557 value = self._parse_schema(exp.var("TABLE")) 2558 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2559 null = True 2560 value = None 2561 else: 2562 value = self._parse_types() 2563 2564 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2565 2566 def _parse_describe(self) -> exp.Describe: 2567 kind = self._match_set(self.CREATABLES) and self._prev.text 2568 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2569 if self._match(TokenType.DOT): 2570 style = None 2571 self._retreat(self._index - 2) 2572 this = self._parse_table(schema=True) 2573 properties = self._parse_properties() 2574 expressions = properties.expressions if properties else None 2575 partition = self._parse_partition() 2576 return self.expression( 2577 exp.Describe, 2578 this=this, 2579 style=style, 2580 kind=kind, 2581 expressions=expressions, 2582 partition=partition, 2583 ) 2584 2585 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2586 kind = self._prev.text.upper() 2587 expressions = [] 2588 2589 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2590 if self._match(TokenType.WHEN): 2591 expression = self._parse_disjunction() 2592 self._match(TokenType.THEN) 2593 else: 2594 expression = None 2595 2596 else_ = self._match(TokenType.ELSE) 2597 2598 if not self._match(TokenType.INTO): 2599 return None 2600 2601 return self.expression( 2602 exp.ConditionalInsert, 2603 this=self.expression( 2604 exp.Insert, 2605 this=self._parse_table(schema=True), 2606 expression=self._parse_derived_table_values(), 2607 ), 2608 expression=expression, 2609 else_=else_, 2610 ) 2611 2612 expression = parse_conditional_insert() 2613 while expression is not None: 2614 expressions.append(expression) 2615 expression = parse_conditional_insert() 2616 2617 return self.expression( 2618 exp.MultitableInserts, 2619 kind=kind, 2620 comments=comments, 2621 expressions=expressions, 2622 source=self._parse_table(), 2623 ) 2624 2625 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2626 comments = ensure_list(self._prev_comments) 2627 hint = self._parse_hint() 2628 overwrite = self._match(TokenType.OVERWRITE) 2629 ignore = self._match(TokenType.IGNORE) 2630 local = self._match_text_seq("LOCAL") 2631 alternative = None 2632 is_function = None 2633 2634 if self._match_text_seq("DIRECTORY"): 2635 this: t.Optional[exp.Expression] = self.expression( 2636 exp.Directory, 2637 this=self._parse_var_or_string(), 2638 local=local, 2639 row_format=self._parse_row_format(match_row=True), 2640 ) 2641 else: 2642 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2643 comments += ensure_list(self._prev_comments) 2644 return self._parse_multitable_inserts(comments) 2645 2646 if self._match(TokenType.OR): 2647 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2648 2649 self._match(TokenType.INTO) 2650 comments += ensure_list(self._prev_comments) 2651 self._match(TokenType.TABLE) 2652 is_function = self._match(TokenType.FUNCTION) 2653 2654 this = ( 2655 self._parse_table(schema=True, parse_partition=True) 2656 if not is_function 2657 else self._parse_function() 2658 ) 2659 2660 returning = self._parse_returning() 2661 2662 return self.expression( 2663 exp.Insert, 2664 comments=comments, 2665 hint=hint, 2666 is_function=is_function, 2667 this=this, 2668 stored=self._match_text_seq("STORED") and self._parse_stored(), 2669 by_name=self._match_text_seq("BY", "NAME"), 2670 exists=self._parse_exists(), 2671 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2672 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2673 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2674 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2675 conflict=self._parse_on_conflict(), 2676 returning=returning or self._parse_returning(), 2677 overwrite=overwrite, 2678 alternative=alternative, 2679 ignore=ignore, 2680 source=self._match(TokenType.TABLE) and self._parse_table(), 2681 ) 2682 2683 def _parse_kill(self) -> exp.Kill: 2684 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2685 2686 return self.expression( 2687 exp.Kill, 2688 this=self._parse_primary(), 2689 kind=kind, 2690 ) 2691 2692 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2693 conflict = self._match_text_seq("ON", "CONFLICT") 2694 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2695 2696 if not conflict and not duplicate: 2697 return None 2698 2699 conflict_keys = None 2700 constraint = None 2701 2702 if conflict: 2703 if self._match_text_seq("ON", "CONSTRAINT"): 2704 constraint = self._parse_id_var() 2705 elif self._match(TokenType.L_PAREN): 2706 conflict_keys = self._parse_csv(self._parse_id_var) 2707 self._match_r_paren() 2708 2709 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2710 if self._prev.token_type == TokenType.UPDATE: 2711 self._match(TokenType.SET) 2712 expressions = self._parse_csv(self._parse_equality) 2713 else: 2714 expressions = None 2715 2716 return self.expression( 2717 exp.OnConflict, 2718 duplicate=duplicate, 2719 expressions=expressions, 2720 action=action, 2721 conflict_keys=conflict_keys, 2722 constraint=constraint, 2723 ) 2724 2725 def _parse_returning(self) -> t.Optional[exp.Returning]: 2726 if not self._match(TokenType.RETURNING): 2727 return None 2728 return self.expression( 2729 exp.Returning, 2730 expressions=self._parse_csv(self._parse_expression), 2731 into=self._match(TokenType.INTO) and self._parse_table_part(), 2732 ) 2733 2734 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2735 if not self._match(TokenType.FORMAT): 2736 return None 2737 return self._parse_row_format() 2738 2739 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2740 index = self._index 2741 with_ = with_ or self._match_text_seq("WITH") 2742 2743 if not self._match(TokenType.SERDE_PROPERTIES): 2744 self._retreat(index) 2745 return None 2746 return self.expression( 2747 exp.SerdeProperties, 2748 **{ # type: ignore 2749 "expressions": self._parse_wrapped_properties(), 2750 "with": with_, 2751 }, 2752 ) 2753 2754 def _parse_row_format( 2755 self, match_row: bool = False 2756 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2757 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2758 return None 2759 2760 if self._match_text_seq("SERDE"): 2761 this = self._parse_string() 2762 2763 serde_properties = self._parse_serde_properties() 2764 2765 return self.expression( 2766 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2767 ) 2768 2769 self._match_text_seq("DELIMITED") 2770 2771 kwargs = {} 2772 2773 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2774 kwargs["fields"] = self._parse_string() 2775 if self._match_text_seq("ESCAPED", "BY"): 2776 kwargs["escaped"] = self._parse_string() 2777 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2778 kwargs["collection_items"] = self._parse_string() 2779 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2780 kwargs["map_keys"] = self._parse_string() 2781 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2782 kwargs["lines"] = self._parse_string() 2783 if self._match_text_seq("NULL", "DEFINED", "AS"): 2784 kwargs["null"] = self._parse_string() 2785 2786 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2787 2788 def _parse_load(self) -> exp.LoadData | exp.Command: 2789 if self._match_text_seq("DATA"): 2790 local = self._match_text_seq("LOCAL") 2791 self._match_text_seq("INPATH") 2792 inpath = self._parse_string() 2793 overwrite = self._match(TokenType.OVERWRITE) 2794 self._match_pair(TokenType.INTO, TokenType.TABLE) 2795 2796 return self.expression( 2797 exp.LoadData, 2798 this=self._parse_table(schema=True), 2799 local=local, 2800 overwrite=overwrite, 2801 inpath=inpath, 2802 partition=self._parse_partition(), 2803 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2804 serde=self._match_text_seq("SERDE") and self._parse_string(), 2805 ) 2806 return self._parse_as_command(self._prev) 2807 2808 def _parse_delete(self) -> exp.Delete: 2809 # This handles MySQL's "Multiple-Table Syntax" 2810 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2811 tables = None 2812 comments = self._prev_comments 2813 if not self._match(TokenType.FROM, advance=False): 2814 tables = self._parse_csv(self._parse_table) or None 2815 2816 returning = self._parse_returning() 2817 2818 return self.expression( 2819 exp.Delete, 2820 comments=comments, 2821 tables=tables, 2822 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2823 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2824 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2825 where=self._parse_where(), 2826 returning=returning or self._parse_returning(), 2827 limit=self._parse_limit(), 2828 ) 2829 2830 def _parse_update(self) -> exp.Update: 2831 comments = self._prev_comments 2832 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2833 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2834 returning = self._parse_returning() 2835 return self.expression( 2836 exp.Update, 2837 comments=comments, 2838 **{ # type: ignore 2839 "this": this, 2840 "expressions": expressions, 2841 "from": self._parse_from(joins=True), 2842 "where": self._parse_where(), 2843 "returning": returning or self._parse_returning(), 2844 "order": self._parse_order(), 2845 "limit": self._parse_limit(), 2846 }, 2847 ) 2848 2849 def _parse_uncache(self) -> exp.Uncache: 2850 if not self._match(TokenType.TABLE): 2851 self.raise_error("Expecting TABLE after UNCACHE") 2852 2853 return self.expression( 2854 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2855 ) 2856 2857 def _parse_cache(self) -> exp.Cache: 2858 lazy = self._match_text_seq("LAZY") 2859 self._match(TokenType.TABLE) 2860 table = self._parse_table(schema=True) 2861 2862 options = [] 2863 if self._match_text_seq("OPTIONS"): 2864 self._match_l_paren() 2865 k = self._parse_string() 2866 self._match(TokenType.EQ) 2867 v = self._parse_string() 2868 options = [k, v] 2869 self._match_r_paren() 2870 2871 self._match(TokenType.ALIAS) 2872 return self.expression( 2873 exp.Cache, 2874 this=table, 2875 lazy=lazy, 2876 options=options, 2877 expression=self._parse_select(nested=True), 2878 ) 2879 2880 def _parse_partition(self) -> t.Optional[exp.Partition]: 2881 if not self._match(TokenType.PARTITION): 2882 return None 2883 2884 return self.expression( 2885 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2886 ) 2887 2888 def _parse_value(self) -> t.Optional[exp.Tuple]: 2889 if self._match(TokenType.L_PAREN): 2890 expressions = self._parse_csv(self._parse_expression) 2891 self._match_r_paren() 2892 return self.expression(exp.Tuple, expressions=expressions) 2893 2894 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2895 expression = self._parse_expression() 2896 if expression: 2897 return self.expression(exp.Tuple, expressions=[expression]) 2898 return None 2899 2900 def _parse_projections(self) -> t.List[exp.Expression]: 2901 return self._parse_expressions() 2902 2903 def _parse_select( 2904 self, 2905 nested: bool = False, 2906 table: bool = False, 2907 parse_subquery_alias: bool = True, 2908 parse_set_operation: bool = True, 2909 ) -> t.Optional[exp.Expression]: 2910 cte = self._parse_with() 2911 2912 if cte: 2913 this = self._parse_statement() 2914 2915 if not this: 2916 self.raise_error("Failed to parse any statement following CTE") 2917 return cte 2918 2919 if "with" in this.arg_types: 2920 this.set("with", cte) 2921 else: 2922 self.raise_error(f"{this.key} does not support CTE") 2923 this = cte 2924 2925 return this 2926 2927 # duckdb supports leading with FROM x 2928 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2929 2930 if self._match(TokenType.SELECT): 2931 comments = self._prev_comments 2932 2933 hint = self._parse_hint() 2934 2935 if self._next and not self._next.token_type == TokenType.DOT: 2936 all_ = self._match(TokenType.ALL) 2937 distinct = self._match_set(self.DISTINCT_TOKENS) 2938 else: 2939 all_, distinct = None, None 2940 2941 kind = ( 2942 self._match(TokenType.ALIAS) 2943 and self._match_texts(("STRUCT", "VALUE")) 2944 and self._prev.text.upper() 2945 ) 2946 2947 if distinct: 2948 distinct = self.expression( 2949 exp.Distinct, 2950 on=self._parse_value() if self._match(TokenType.ON) else None, 2951 ) 2952 2953 if all_ and distinct: 2954 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2955 2956 limit = self._parse_limit(top=True) 2957 projections = self._parse_projections() 2958 2959 this = self.expression( 2960 exp.Select, 2961 kind=kind, 2962 hint=hint, 2963 distinct=distinct, 2964 expressions=projections, 2965 limit=limit, 2966 ) 2967 this.comments = comments 2968 2969 into = self._parse_into() 2970 if into: 2971 this.set("into", into) 2972 2973 if not from_: 2974 from_ = self._parse_from() 2975 2976 if from_: 2977 this.set("from", from_) 2978 2979 this = self._parse_query_modifiers(this) 2980 elif (table or nested) and self._match(TokenType.L_PAREN): 2981 if self._match(TokenType.PIVOT): 2982 this = self._parse_simplified_pivot() 2983 elif self._match(TokenType.FROM): 2984 this = exp.select("*").from_( 2985 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2986 ) 2987 else: 2988 this = ( 2989 self._parse_table() 2990 if table 2991 else self._parse_select(nested=True, parse_set_operation=False) 2992 ) 2993 2994 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2995 # in case a modifier (e.g. join) is following 2996 if table and isinstance(this, exp.Values) and this.alias: 2997 alias = this.args["alias"].pop() 2998 this = exp.Table(this=this, alias=alias) 2999 3000 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3001 3002 self._match_r_paren() 3003 3004 # We return early here so that the UNION isn't attached to the subquery by the 3005 # following call to _parse_set_operations, but instead becomes the parent node 3006 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3007 elif self._match(TokenType.VALUES, advance=False): 3008 this = self._parse_derived_table_values() 3009 elif from_: 3010 this = exp.select("*").from_(from_.this, copy=False) 3011 elif self._match(TokenType.SUMMARIZE): 3012 table = self._match(TokenType.TABLE) 3013 this = self._parse_select() or self._parse_string() or self._parse_table() 3014 return self.expression(exp.Summarize, this=this, table=table) 3015 elif self._match(TokenType.DESCRIBE): 3016 this = self._parse_describe() 3017 elif self._match_text_seq("STREAM"): 3018 this = self.expression(exp.Stream, this=self._parse_function()) 3019 else: 3020 this = None 3021 3022 return self._parse_set_operations(this) if parse_set_operation else this 3023 3024 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3025 if not skip_with_token and not self._match(TokenType.WITH): 3026 return None 3027 3028 comments = self._prev_comments 3029 recursive = self._match(TokenType.RECURSIVE) 3030 3031 expressions = [] 3032 while True: 3033 expressions.append(self._parse_cte()) 3034 3035 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3036 break 3037 else: 3038 self._match(TokenType.WITH) 3039 3040 return self.expression( 3041 exp.With, comments=comments, expressions=expressions, recursive=recursive 3042 ) 3043 3044 def _parse_cte(self) -> exp.CTE: 3045 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3046 if not alias or not alias.this: 3047 self.raise_error("Expected CTE to have alias") 3048 3049 self._match(TokenType.ALIAS) 3050 comments = self._prev_comments 3051 3052 if self._match_text_seq("NOT", "MATERIALIZED"): 3053 materialized = False 3054 elif self._match_text_seq("MATERIALIZED"): 3055 materialized = True 3056 else: 3057 materialized = None 3058 3059 return self.expression( 3060 exp.CTE, 3061 this=self._parse_wrapped(self._parse_statement), 3062 alias=alias, 3063 materialized=materialized, 3064 comments=comments, 3065 ) 3066 3067 def _parse_table_alias( 3068 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3069 ) -> t.Optional[exp.TableAlias]: 3070 any_token = self._match(TokenType.ALIAS) 3071 alias = ( 3072 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3073 or self._parse_string_as_identifier() 3074 ) 3075 3076 index = self._index 3077 if self._match(TokenType.L_PAREN): 3078 columns = self._parse_csv(self._parse_function_parameter) 3079 self._match_r_paren() if columns else self._retreat(index) 3080 else: 3081 columns = None 3082 3083 if not alias and not columns: 3084 return None 3085 3086 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3087 3088 # We bubble up comments from the Identifier to the TableAlias 3089 if isinstance(alias, exp.Identifier): 3090 table_alias.add_comments(alias.pop_comments()) 3091 3092 return table_alias 3093 3094 def _parse_subquery( 3095 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3096 ) -> t.Optional[exp.Subquery]: 3097 if not this: 3098 return None 3099 3100 return self.expression( 3101 exp.Subquery, 3102 this=this, 3103 pivots=self._parse_pivots(), 3104 alias=self._parse_table_alias() if parse_alias else None, 3105 sample=self._parse_table_sample(), 3106 ) 3107 3108 def _implicit_unnests_to_explicit(self, this: E) -> E: 3109 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3110 3111 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3112 for i, join in enumerate(this.args.get("joins") or []): 3113 table = join.this 3114 normalized_table = table.copy() 3115 normalized_table.meta["maybe_column"] = True 3116 normalized_table = _norm(normalized_table, dialect=self.dialect) 3117 3118 if isinstance(table, exp.Table) and not join.args.get("on"): 3119 if normalized_table.parts[0].name in refs: 3120 table_as_column = table.to_column() 3121 unnest = exp.Unnest(expressions=[table_as_column]) 3122 3123 # Table.to_column creates a parent Alias node that we want to convert to 3124 # a TableAlias and attach to the Unnest, so it matches the parser's output 3125 if isinstance(table.args.get("alias"), exp.TableAlias): 3126 table_as_column.replace(table_as_column.this) 3127 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3128 3129 table.replace(unnest) 3130 3131 refs.add(normalized_table.alias_or_name) 3132 3133 return this 3134 3135 def _parse_query_modifiers( 3136 self, this: t.Optional[exp.Expression] 3137 ) -> t.Optional[exp.Expression]: 3138 if isinstance(this, (exp.Query, exp.Table)): 3139 for join in self._parse_joins(): 3140 this.append("joins", join) 3141 for lateral in iter(self._parse_lateral, None): 3142 this.append("laterals", lateral) 3143 3144 while True: 3145 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3146 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3147 key, expression = parser(self) 3148 3149 if expression: 3150 this.set(key, expression) 3151 if key == "limit": 3152 offset = expression.args.pop("offset", None) 3153 3154 if offset: 3155 offset = exp.Offset(expression=offset) 3156 this.set("offset", offset) 3157 3158 limit_by_expressions = expression.expressions 3159 expression.set("expressions", None) 3160 offset.set("expressions", limit_by_expressions) 3161 continue 3162 break 3163 3164 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3165 this = self._implicit_unnests_to_explicit(this) 3166 3167 return this 3168 3169 def _parse_hint(self) -> t.Optional[exp.Hint]: 3170 if self._match(TokenType.HINT): 3171 hints = [] 3172 for hint in iter( 3173 lambda: self._parse_csv( 3174 lambda: self._parse_function() or self._parse_var(upper=True) 3175 ), 3176 [], 3177 ): 3178 hints.extend(hint) 3179 3180 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3181 self.raise_error("Expected */ after HINT") 3182 3183 return self.expression(exp.Hint, expressions=hints) 3184 3185 return None 3186 3187 def _parse_into(self) -> t.Optional[exp.Into]: 3188 if not self._match(TokenType.INTO): 3189 return None 3190 3191 temp = self._match(TokenType.TEMPORARY) 3192 unlogged = self._match_text_seq("UNLOGGED") 3193 self._match(TokenType.TABLE) 3194 3195 return self.expression( 3196 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3197 ) 3198 3199 def _parse_from( 3200 self, joins: bool = False, skip_from_token: bool = False 3201 ) -> t.Optional[exp.From]: 3202 if not skip_from_token and not self._match(TokenType.FROM): 3203 return None 3204 3205 return self.expression( 3206 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3207 ) 3208 3209 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3210 return self.expression( 3211 exp.MatchRecognizeMeasure, 3212 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3213 this=self._parse_expression(), 3214 ) 3215 3216 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3217 if not self._match(TokenType.MATCH_RECOGNIZE): 3218 return None 3219 3220 self._match_l_paren() 3221 3222 partition = self._parse_partition_by() 3223 order = self._parse_order() 3224 3225 measures = ( 3226 self._parse_csv(self._parse_match_recognize_measure) 3227 if self._match_text_seq("MEASURES") 3228 else None 3229 ) 3230 3231 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3232 rows = exp.var("ONE ROW PER MATCH") 3233 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3234 text = "ALL ROWS PER MATCH" 3235 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3236 text += " SHOW EMPTY MATCHES" 3237 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3238 text += " OMIT EMPTY MATCHES" 3239 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3240 text += " WITH UNMATCHED ROWS" 3241 rows = exp.var(text) 3242 else: 3243 rows = None 3244 3245 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3246 text = "AFTER MATCH SKIP" 3247 if self._match_text_seq("PAST", "LAST", "ROW"): 3248 text += " PAST LAST ROW" 3249 elif self._match_text_seq("TO", "NEXT", "ROW"): 3250 text += " TO NEXT ROW" 3251 elif self._match_text_seq("TO", "FIRST"): 3252 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3253 elif self._match_text_seq("TO", "LAST"): 3254 text += f" TO LAST {self._advance_any().text}" # type: ignore 3255 after = exp.var(text) 3256 else: 3257 after = None 3258 3259 if self._match_text_seq("PATTERN"): 3260 self._match_l_paren() 3261 3262 if not self._curr: 3263 self.raise_error("Expecting )", self._curr) 3264 3265 paren = 1 3266 start = self._curr 3267 3268 while self._curr and paren > 0: 3269 if self._curr.token_type == TokenType.L_PAREN: 3270 paren += 1 3271 if self._curr.token_type == TokenType.R_PAREN: 3272 paren -= 1 3273 3274 end = self._prev 3275 self._advance() 3276 3277 if paren > 0: 3278 self.raise_error("Expecting )", self._curr) 3279 3280 pattern = exp.var(self._find_sql(start, end)) 3281 else: 3282 pattern = None 3283 3284 define = ( 3285 self._parse_csv(self._parse_name_as_expression) 3286 if self._match_text_seq("DEFINE") 3287 else None 3288 ) 3289 3290 self._match_r_paren() 3291 3292 return self.expression( 3293 exp.MatchRecognize, 3294 partition_by=partition, 3295 order=order, 3296 measures=measures, 3297 rows=rows, 3298 after=after, 3299 pattern=pattern, 3300 define=define, 3301 alias=self._parse_table_alias(), 3302 ) 3303 3304 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3305 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3306 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3307 cross_apply = False 3308 3309 if cross_apply is not None: 3310 this = self._parse_select(table=True) 3311 view = None 3312 outer = None 3313 elif self._match(TokenType.LATERAL): 3314 this = self._parse_select(table=True) 3315 view = self._match(TokenType.VIEW) 3316 outer = self._match(TokenType.OUTER) 3317 else: 3318 return None 3319 3320 if not this: 3321 this = ( 3322 self._parse_unnest() 3323 or self._parse_function() 3324 or self._parse_id_var(any_token=False) 3325 ) 3326 3327 while self._match(TokenType.DOT): 3328 this = exp.Dot( 3329 this=this, 3330 expression=self._parse_function() or self._parse_id_var(any_token=False), 3331 ) 3332 3333 if view: 3334 table = self._parse_id_var(any_token=False) 3335 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3336 table_alias: t.Optional[exp.TableAlias] = self.expression( 3337 exp.TableAlias, this=table, columns=columns 3338 ) 3339 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3340 # We move the alias from the lateral's child node to the lateral itself 3341 table_alias = this.args["alias"].pop() 3342 else: 3343 table_alias = self._parse_table_alias() 3344 3345 return self.expression( 3346 exp.Lateral, 3347 this=this, 3348 view=view, 3349 outer=outer, 3350 alias=table_alias, 3351 cross_apply=cross_apply, 3352 ) 3353 3354 def _parse_join_parts( 3355 self, 3356 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3357 return ( 3358 self._match_set(self.JOIN_METHODS) and self._prev, 3359 self._match_set(self.JOIN_SIDES) and self._prev, 3360 self._match_set(self.JOIN_KINDS) and self._prev, 3361 ) 3362 3363 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3364 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3365 this = self._parse_column() 3366 if isinstance(this, exp.Column): 3367 return this.this 3368 return this 3369 3370 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3371 3372 def _parse_join( 3373 self, skip_join_token: bool = False, parse_bracket: bool = False 3374 ) -> t.Optional[exp.Join]: 3375 if self._match(TokenType.COMMA): 3376 return self.expression(exp.Join, this=self._parse_table()) 3377 3378 index = self._index 3379 method, side, kind = self._parse_join_parts() 3380 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3381 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3382 3383 if not skip_join_token and not join: 3384 self._retreat(index) 3385 kind = None 3386 method = None 3387 side = None 3388 3389 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3390 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3391 3392 if not skip_join_token and not join and not outer_apply and not cross_apply: 3393 return None 3394 3395 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3396 3397 if method: 3398 kwargs["method"] = method.text 3399 if side: 3400 kwargs["side"] = side.text 3401 if kind: 3402 kwargs["kind"] = kind.text 3403 if hint: 3404 kwargs["hint"] = hint 3405 3406 if self._match(TokenType.MATCH_CONDITION): 3407 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3408 3409 if self._match(TokenType.ON): 3410 kwargs["on"] = self._parse_assignment() 3411 elif self._match(TokenType.USING): 3412 kwargs["using"] = self._parse_using_identifiers() 3413 elif ( 3414 not (outer_apply or cross_apply) 3415 and not isinstance(kwargs["this"], exp.Unnest) 3416 and not (kind and kind.token_type == TokenType.CROSS) 3417 ): 3418 index = self._index 3419 joins: t.Optional[list] = list(self._parse_joins()) 3420 3421 if joins and self._match(TokenType.ON): 3422 kwargs["on"] = self._parse_assignment() 3423 elif joins and self._match(TokenType.USING): 3424 kwargs["using"] = self._parse_using_identifiers() 3425 else: 3426 joins = None 3427 self._retreat(index) 3428 3429 kwargs["this"].set("joins", joins if joins else None) 3430 3431 comments = [c for token in (method, side, kind) if token for c in token.comments] 3432 return self.expression(exp.Join, comments=comments, **kwargs) 3433 3434 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3435 this = self._parse_assignment() 3436 3437 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3438 return this 3439 3440 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3441 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3442 3443 return this 3444 3445 def _parse_index_params(self) -> exp.IndexParameters: 3446 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3447 3448 if self._match(TokenType.L_PAREN, advance=False): 3449 columns = self._parse_wrapped_csv(self._parse_with_operator) 3450 else: 3451 columns = None 3452 3453 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3454 partition_by = self._parse_partition_by() 3455 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3456 tablespace = ( 3457 self._parse_var(any_token=True) 3458 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3459 else None 3460 ) 3461 where = self._parse_where() 3462 3463 on = self._parse_field() if self._match(TokenType.ON) else None 3464 3465 return self.expression( 3466 exp.IndexParameters, 3467 using=using, 3468 columns=columns, 3469 include=include, 3470 partition_by=partition_by, 3471 where=where, 3472 with_storage=with_storage, 3473 tablespace=tablespace, 3474 on=on, 3475 ) 3476 3477 def _parse_index( 3478 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3479 ) -> t.Optional[exp.Index]: 3480 if index or anonymous: 3481 unique = None 3482 primary = None 3483 amp = None 3484 3485 self._match(TokenType.ON) 3486 self._match(TokenType.TABLE) # hive 3487 table = self._parse_table_parts(schema=True) 3488 else: 3489 unique = self._match(TokenType.UNIQUE) 3490 primary = self._match_text_seq("PRIMARY") 3491 amp = self._match_text_seq("AMP") 3492 3493 if not self._match(TokenType.INDEX): 3494 return None 3495 3496 index = self._parse_id_var() 3497 table = None 3498 3499 params = self._parse_index_params() 3500 3501 return self.expression( 3502 exp.Index, 3503 this=index, 3504 table=table, 3505 unique=unique, 3506 primary=primary, 3507 amp=amp, 3508 params=params, 3509 ) 3510 3511 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3512 hints: t.List[exp.Expression] = [] 3513 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3514 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3515 hints.append( 3516 self.expression( 3517 exp.WithTableHint, 3518 expressions=self._parse_csv( 3519 lambda: self._parse_function() or self._parse_var(any_token=True) 3520 ), 3521 ) 3522 ) 3523 self._match_r_paren() 3524 else: 3525 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3526 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3527 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3528 3529 self._match_set((TokenType.INDEX, TokenType.KEY)) 3530 if self._match(TokenType.FOR): 3531 hint.set("target", self._advance_any() and self._prev.text.upper()) 3532 3533 hint.set("expressions", self._parse_wrapped_id_vars()) 3534 hints.append(hint) 3535 3536 return hints or None 3537 3538 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3539 return ( 3540 (not schema and self._parse_function(optional_parens=False)) 3541 or self._parse_id_var(any_token=False) 3542 or self._parse_string_as_identifier() 3543 or self._parse_placeholder() 3544 ) 3545 3546 def _parse_table_parts( 3547 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3548 ) -> exp.Table: 3549 catalog = None 3550 db = None 3551 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3552 3553 while self._match(TokenType.DOT): 3554 if catalog: 3555 # This allows nesting the table in arbitrarily many dot expressions if needed 3556 table = self.expression( 3557 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3558 ) 3559 else: 3560 catalog = db 3561 db = table 3562 # "" used for tsql FROM a..b case 3563 table = self._parse_table_part(schema=schema) or "" 3564 3565 if ( 3566 wildcard 3567 and self._is_connected() 3568 and (isinstance(table, exp.Identifier) or not table) 3569 and self._match(TokenType.STAR) 3570 ): 3571 if isinstance(table, exp.Identifier): 3572 table.args["this"] += "*" 3573 else: 3574 table = exp.Identifier(this="*") 3575 3576 # We bubble up comments from the Identifier to the Table 3577 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3578 3579 if is_db_reference: 3580 catalog = db 3581 db = table 3582 table = None 3583 3584 if not table and not is_db_reference: 3585 self.raise_error(f"Expected table name but got {self._curr}") 3586 if not db and is_db_reference: 3587 self.raise_error(f"Expected database name but got {self._curr}") 3588 3589 table = self.expression( 3590 exp.Table, 3591 comments=comments, 3592 this=table, 3593 db=db, 3594 catalog=catalog, 3595 ) 3596 3597 changes = self._parse_changes() 3598 if changes: 3599 table.set("changes", changes) 3600 3601 at_before = self._parse_historical_data() 3602 if at_before: 3603 table.set("when", at_before) 3604 3605 pivots = self._parse_pivots() 3606 if pivots: 3607 table.set("pivots", pivots) 3608 3609 return table 3610 3611 def _parse_table( 3612 self, 3613 schema: bool = False, 3614 joins: bool = False, 3615 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3616 parse_bracket: bool = False, 3617 is_db_reference: bool = False, 3618 parse_partition: bool = False, 3619 ) -> t.Optional[exp.Expression]: 3620 lateral = self._parse_lateral() 3621 if lateral: 3622 return lateral 3623 3624 unnest = self._parse_unnest() 3625 if unnest: 3626 return unnest 3627 3628 values = self._parse_derived_table_values() 3629 if values: 3630 return values 3631 3632 subquery = self._parse_select(table=True) 3633 if subquery: 3634 if not subquery.args.get("pivots"): 3635 subquery.set("pivots", self._parse_pivots()) 3636 return subquery 3637 3638 bracket = parse_bracket and self._parse_bracket(None) 3639 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3640 3641 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3642 self._parse_table 3643 ) 3644 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3645 3646 only = self._match(TokenType.ONLY) 3647 3648 this = t.cast( 3649 exp.Expression, 3650 bracket 3651 or rows_from 3652 or self._parse_bracket( 3653 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3654 ), 3655 ) 3656 3657 if only: 3658 this.set("only", only) 3659 3660 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3661 self._match_text_seq("*") 3662 3663 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3664 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3665 this.set("partition", self._parse_partition()) 3666 3667 if schema: 3668 return self._parse_schema(this=this) 3669 3670 version = self._parse_version() 3671 3672 if version: 3673 this.set("version", version) 3674 3675 if self.dialect.ALIAS_POST_TABLESAMPLE: 3676 this.set("sample", self._parse_table_sample()) 3677 3678 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3679 if alias: 3680 this.set("alias", alias) 3681 3682 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3683 return self.expression( 3684 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3685 ) 3686 3687 this.set("hints", self._parse_table_hints()) 3688 3689 if not this.args.get("pivots"): 3690 this.set("pivots", self._parse_pivots()) 3691 3692 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3693 this.set("sample", self._parse_table_sample()) 3694 3695 if joins: 3696 for join in self._parse_joins(): 3697 this.append("joins", join) 3698 3699 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3700 this.set("ordinality", True) 3701 this.set("alias", self._parse_table_alias()) 3702 3703 return this 3704 3705 def _parse_version(self) -> t.Optional[exp.Version]: 3706 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3707 this = "TIMESTAMP" 3708 elif self._match(TokenType.VERSION_SNAPSHOT): 3709 this = "VERSION" 3710 else: 3711 return None 3712 3713 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3714 kind = self._prev.text.upper() 3715 start = self._parse_bitwise() 3716 self._match_texts(("TO", "AND")) 3717 end = self._parse_bitwise() 3718 expression: t.Optional[exp.Expression] = self.expression( 3719 exp.Tuple, expressions=[start, end] 3720 ) 3721 elif self._match_text_seq("CONTAINED", "IN"): 3722 kind = "CONTAINED IN" 3723 expression = self.expression( 3724 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3725 ) 3726 elif self._match(TokenType.ALL): 3727 kind = "ALL" 3728 expression = None 3729 else: 3730 self._match_text_seq("AS", "OF") 3731 kind = "AS OF" 3732 expression = self._parse_type() 3733 3734 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3735 3736 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3737 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3738 index = self._index 3739 historical_data = None 3740 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3741 this = self._prev.text.upper() 3742 kind = ( 3743 self._match(TokenType.L_PAREN) 3744 and self._match_texts(self.HISTORICAL_DATA_KIND) 3745 and self._prev.text.upper() 3746 ) 3747 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3748 3749 if expression: 3750 self._match_r_paren() 3751 historical_data = self.expression( 3752 exp.HistoricalData, this=this, kind=kind, expression=expression 3753 ) 3754 else: 3755 self._retreat(index) 3756 3757 return historical_data 3758 3759 def _parse_changes(self) -> t.Optional[exp.Changes]: 3760 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3761 return None 3762 3763 information = self._parse_var(any_token=True) 3764 self._match_r_paren() 3765 3766 return self.expression( 3767 exp.Changes, 3768 information=information, 3769 at_before=self._parse_historical_data(), 3770 end=self._parse_historical_data(), 3771 ) 3772 3773 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3774 if not self._match(TokenType.UNNEST): 3775 return None 3776 3777 expressions = self._parse_wrapped_csv(self._parse_equality) 3778 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3779 3780 alias = self._parse_table_alias() if with_alias else None 3781 3782 if alias: 3783 if self.dialect.UNNEST_COLUMN_ONLY: 3784 if alias.args.get("columns"): 3785 self.raise_error("Unexpected extra column alias in unnest.") 3786 3787 alias.set("columns", [alias.this]) 3788 alias.set("this", None) 3789 3790 columns = alias.args.get("columns") or [] 3791 if offset and len(expressions) < len(columns): 3792 offset = columns.pop() 3793 3794 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3795 self._match(TokenType.ALIAS) 3796 offset = self._parse_id_var( 3797 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3798 ) or exp.to_identifier("offset") 3799 3800 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3801 3802 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3803 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3804 if not is_derived and not ( 3805 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3806 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3807 ): 3808 return None 3809 3810 expressions = self._parse_csv(self._parse_value) 3811 alias = self._parse_table_alias() 3812 3813 if is_derived: 3814 self._match_r_paren() 3815 3816 return self.expression( 3817 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3818 ) 3819 3820 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3821 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3822 as_modifier and self._match_text_seq("USING", "SAMPLE") 3823 ): 3824 return None 3825 3826 bucket_numerator = None 3827 bucket_denominator = None 3828 bucket_field = None 3829 percent = None 3830 size = None 3831 seed = None 3832 3833 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3834 matched_l_paren = self._match(TokenType.L_PAREN) 3835 3836 if self.TABLESAMPLE_CSV: 3837 num = None 3838 expressions = self._parse_csv(self._parse_primary) 3839 else: 3840 expressions = None 3841 num = ( 3842 self._parse_factor() 3843 if self._match(TokenType.NUMBER, advance=False) 3844 else self._parse_primary() or self._parse_placeholder() 3845 ) 3846 3847 if self._match_text_seq("BUCKET"): 3848 bucket_numerator = self._parse_number() 3849 self._match_text_seq("OUT", "OF") 3850 bucket_denominator = bucket_denominator = self._parse_number() 3851 self._match(TokenType.ON) 3852 bucket_field = self._parse_field() 3853 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3854 percent = num 3855 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3856 size = num 3857 else: 3858 percent = num 3859 3860 if matched_l_paren: 3861 self._match_r_paren() 3862 3863 if self._match(TokenType.L_PAREN): 3864 method = self._parse_var(upper=True) 3865 seed = self._match(TokenType.COMMA) and self._parse_number() 3866 self._match_r_paren() 3867 elif self._match_texts(("SEED", "REPEATABLE")): 3868 seed = self._parse_wrapped(self._parse_number) 3869 3870 if not method and self.DEFAULT_SAMPLING_METHOD: 3871 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3872 3873 return self.expression( 3874 exp.TableSample, 3875 expressions=expressions, 3876 method=method, 3877 bucket_numerator=bucket_numerator, 3878 bucket_denominator=bucket_denominator, 3879 bucket_field=bucket_field, 3880 percent=percent, 3881 size=size, 3882 seed=seed, 3883 ) 3884 3885 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3886 return list(iter(self._parse_pivot, None)) or None 3887 3888 def _parse_joins(self) -> t.Iterator[exp.Join]: 3889 return iter(self._parse_join, None) 3890 3891 # https://duckdb.org/docs/sql/statements/pivot 3892 def _parse_simplified_pivot(self) -> exp.Pivot: 3893 def _parse_on() -> t.Optional[exp.Expression]: 3894 this = self._parse_bitwise() 3895 return self._parse_in(this) if self._match(TokenType.IN) else this 3896 3897 this = self._parse_table() 3898 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3899 using = self._match(TokenType.USING) and self._parse_csv( 3900 lambda: self._parse_alias(self._parse_function()) 3901 ) 3902 group = self._parse_group() 3903 return self.expression( 3904 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3905 ) 3906 3907 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3908 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3909 this = self._parse_select_or_expression() 3910 3911 self._match(TokenType.ALIAS) 3912 alias = self._parse_bitwise() 3913 if alias: 3914 if isinstance(alias, exp.Column) and not alias.db: 3915 alias = alias.this 3916 return self.expression(exp.PivotAlias, this=this, alias=alias) 3917 3918 return this 3919 3920 value = self._parse_column() 3921 3922 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3923 self.raise_error("Expecting IN (") 3924 3925 if self._match(TokenType.ANY): 3926 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3927 else: 3928 exprs = self._parse_csv(_parse_aliased_expression) 3929 3930 self._match_r_paren() 3931 return self.expression(exp.In, this=value, expressions=exprs) 3932 3933 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3934 index = self._index 3935 include_nulls = None 3936 3937 if self._match(TokenType.PIVOT): 3938 unpivot = False 3939 elif self._match(TokenType.UNPIVOT): 3940 unpivot = True 3941 3942 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3943 if self._match_text_seq("INCLUDE", "NULLS"): 3944 include_nulls = True 3945 elif self._match_text_seq("EXCLUDE", "NULLS"): 3946 include_nulls = False 3947 else: 3948 return None 3949 3950 expressions = [] 3951 3952 if not self._match(TokenType.L_PAREN): 3953 self._retreat(index) 3954 return None 3955 3956 if unpivot: 3957 expressions = self._parse_csv(self._parse_column) 3958 else: 3959 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3960 3961 if not expressions: 3962 self.raise_error("Failed to parse PIVOT's aggregation list") 3963 3964 if not self._match(TokenType.FOR): 3965 self.raise_error("Expecting FOR") 3966 3967 field = self._parse_pivot_in() 3968 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3969 self._parse_bitwise 3970 ) 3971 3972 self._match_r_paren() 3973 3974 pivot = self.expression( 3975 exp.Pivot, 3976 expressions=expressions, 3977 field=field, 3978 unpivot=unpivot, 3979 include_nulls=include_nulls, 3980 default_on_null=default_on_null, 3981 ) 3982 3983 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3984 pivot.set("alias", self._parse_table_alias()) 3985 3986 if not unpivot: 3987 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3988 3989 columns: t.List[exp.Expression] = [] 3990 for fld in pivot.args["field"].expressions: 3991 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3992 for name in names: 3993 if self.PREFIXED_PIVOT_COLUMNS: 3994 name = f"{name}_{field_name}" if name else field_name 3995 else: 3996 name = f"{field_name}_{name}" if name else field_name 3997 3998 columns.append(exp.to_identifier(name)) 3999 4000 pivot.set("columns", columns) 4001 4002 return pivot 4003 4004 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4005 return [agg.alias for agg in aggregations] 4006 4007 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4008 if not skip_where_token and not self._match(TokenType.PREWHERE): 4009 return None 4010 4011 return self.expression( 4012 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4013 ) 4014 4015 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4016 if not skip_where_token and not self._match(TokenType.WHERE): 4017 return None 4018 4019 return self.expression( 4020 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4021 ) 4022 4023 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4024 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4025 return None 4026 4027 elements: t.Dict[str, t.Any] = defaultdict(list) 4028 4029 if self._match(TokenType.ALL): 4030 elements["all"] = True 4031 elif self._match(TokenType.DISTINCT): 4032 elements["all"] = False 4033 4034 while True: 4035 index = self._index 4036 4037 elements["expressions"].extend( 4038 self._parse_csv( 4039 lambda: None 4040 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4041 else self._parse_assignment() 4042 ) 4043 ) 4044 4045 before_with_index = self._index 4046 with_prefix = self._match(TokenType.WITH) 4047 4048 if self._match(TokenType.ROLLUP): 4049 elements["rollup"].append( 4050 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4051 ) 4052 elif self._match(TokenType.CUBE): 4053 elements["cube"].append( 4054 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4055 ) 4056 elif self._match(TokenType.GROUPING_SETS): 4057 elements["grouping_sets"].append( 4058 self.expression( 4059 exp.GroupingSets, 4060 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4061 ) 4062 ) 4063 elif self._match_text_seq("TOTALS"): 4064 elements["totals"] = True # type: ignore 4065 4066 if before_with_index <= self._index <= before_with_index + 1: 4067 self._retreat(before_with_index) 4068 break 4069 4070 if index == self._index: 4071 break 4072 4073 return self.expression(exp.Group, **elements) # type: ignore 4074 4075 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4076 return self.expression( 4077 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4078 ) 4079 4080 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4081 if self._match(TokenType.L_PAREN): 4082 grouping_set = self._parse_csv(self._parse_column) 4083 self._match_r_paren() 4084 return self.expression(exp.Tuple, expressions=grouping_set) 4085 4086 return self._parse_column() 4087 4088 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4089 if not skip_having_token and not self._match(TokenType.HAVING): 4090 return None 4091 return self.expression(exp.Having, this=self._parse_assignment()) 4092 4093 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4094 if not self._match(TokenType.QUALIFY): 4095 return None 4096 return self.expression(exp.Qualify, this=self._parse_assignment()) 4097 4098 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4099 if skip_start_token: 4100 start = None 4101 elif self._match(TokenType.START_WITH): 4102 start = self._parse_assignment() 4103 else: 4104 return None 4105 4106 self._match(TokenType.CONNECT_BY) 4107 nocycle = self._match_text_seq("NOCYCLE") 4108 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4109 exp.Prior, this=self._parse_bitwise() 4110 ) 4111 connect = self._parse_assignment() 4112 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4113 4114 if not start and self._match(TokenType.START_WITH): 4115 start = self._parse_assignment() 4116 4117 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4118 4119 def _parse_name_as_expression(self) -> exp.Alias: 4120 return self.expression( 4121 exp.Alias, 4122 alias=self._parse_id_var(any_token=True), 4123 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4124 ) 4125 4126 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4127 if self._match_text_seq("INTERPOLATE"): 4128 return self._parse_wrapped_csv(self._parse_name_as_expression) 4129 return None 4130 4131 def _parse_order( 4132 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4133 ) -> t.Optional[exp.Expression]: 4134 siblings = None 4135 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4136 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4137 return this 4138 4139 siblings = True 4140 4141 return self.expression( 4142 exp.Order, 4143 this=this, 4144 expressions=self._parse_csv(self._parse_ordered), 4145 siblings=siblings, 4146 ) 4147 4148 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4149 if not self._match(token): 4150 return None 4151 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4152 4153 def _parse_ordered( 4154 self, parse_method: t.Optional[t.Callable] = None 4155 ) -> t.Optional[exp.Ordered]: 4156 this = parse_method() if parse_method else self._parse_assignment() 4157 if not this: 4158 return None 4159 4160 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4161 this = exp.var("ALL") 4162 4163 asc = self._match(TokenType.ASC) 4164 desc = self._match(TokenType.DESC) or (asc and False) 4165 4166 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4167 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4168 4169 nulls_first = is_nulls_first or False 4170 explicitly_null_ordered = is_nulls_first or is_nulls_last 4171 4172 if ( 4173 not explicitly_null_ordered 4174 and ( 4175 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4176 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4177 ) 4178 and self.dialect.NULL_ORDERING != "nulls_are_last" 4179 ): 4180 nulls_first = True 4181 4182 if self._match_text_seq("WITH", "FILL"): 4183 with_fill = self.expression( 4184 exp.WithFill, 4185 **{ # type: ignore 4186 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4187 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4188 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4189 "interpolate": self._parse_interpolate(), 4190 }, 4191 ) 4192 else: 4193 with_fill = None 4194 4195 return self.expression( 4196 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4197 ) 4198 4199 def _parse_limit( 4200 self, 4201 this: t.Optional[exp.Expression] = None, 4202 top: bool = False, 4203 skip_limit_token: bool = False, 4204 ) -> t.Optional[exp.Expression]: 4205 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4206 comments = self._prev_comments 4207 if top: 4208 limit_paren = self._match(TokenType.L_PAREN) 4209 expression = self._parse_term() if limit_paren else self._parse_number() 4210 4211 if limit_paren: 4212 self._match_r_paren() 4213 else: 4214 expression = self._parse_term() 4215 4216 if self._match(TokenType.COMMA): 4217 offset = expression 4218 expression = self._parse_term() 4219 else: 4220 offset = None 4221 4222 limit_exp = self.expression( 4223 exp.Limit, 4224 this=this, 4225 expression=expression, 4226 offset=offset, 4227 comments=comments, 4228 expressions=self._parse_limit_by(), 4229 ) 4230 4231 return limit_exp 4232 4233 if self._match(TokenType.FETCH): 4234 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4235 direction = self._prev.text.upper() if direction else "FIRST" 4236 4237 count = self._parse_field(tokens=self.FETCH_TOKENS) 4238 percent = self._match(TokenType.PERCENT) 4239 4240 self._match_set((TokenType.ROW, TokenType.ROWS)) 4241 4242 only = self._match_text_seq("ONLY") 4243 with_ties = self._match_text_seq("WITH", "TIES") 4244 4245 if only and with_ties: 4246 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4247 4248 return self.expression( 4249 exp.Fetch, 4250 direction=direction, 4251 count=count, 4252 percent=percent, 4253 with_ties=with_ties, 4254 ) 4255 4256 return this 4257 4258 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4259 if not self._match(TokenType.OFFSET): 4260 return this 4261 4262 count = self._parse_term() 4263 self._match_set((TokenType.ROW, TokenType.ROWS)) 4264 4265 return self.expression( 4266 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4267 ) 4268 4269 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4270 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4271 4272 def _parse_locks(self) -> t.List[exp.Lock]: 4273 locks = [] 4274 while True: 4275 if self._match_text_seq("FOR", "UPDATE"): 4276 update = True 4277 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4278 "LOCK", "IN", "SHARE", "MODE" 4279 ): 4280 update = False 4281 else: 4282 break 4283 4284 expressions = None 4285 if self._match_text_seq("OF"): 4286 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4287 4288 wait: t.Optional[bool | exp.Expression] = None 4289 if self._match_text_seq("NOWAIT"): 4290 wait = True 4291 elif self._match_text_seq("WAIT"): 4292 wait = self._parse_primary() 4293 elif self._match_text_seq("SKIP", "LOCKED"): 4294 wait = False 4295 4296 locks.append( 4297 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4298 ) 4299 4300 return locks 4301 4302 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4303 while this and self._match_set(self.SET_OPERATIONS): 4304 token_type = self._prev.token_type 4305 4306 if token_type == TokenType.UNION: 4307 operation: t.Type[exp.SetOperation] = exp.Union 4308 elif token_type == TokenType.EXCEPT: 4309 operation = exp.Except 4310 else: 4311 operation = exp.Intersect 4312 4313 comments = self._prev.comments 4314 4315 if self._match(TokenType.DISTINCT): 4316 distinct: t.Optional[bool] = True 4317 elif self._match(TokenType.ALL): 4318 distinct = False 4319 else: 4320 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4321 if distinct is None: 4322 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4323 4324 by_name = self._match_text_seq("BY", "NAME") 4325 expression = self._parse_select(nested=True, parse_set_operation=False) 4326 4327 this = self.expression( 4328 operation, 4329 comments=comments, 4330 this=this, 4331 distinct=distinct, 4332 by_name=by_name, 4333 expression=expression, 4334 ) 4335 4336 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4337 expression = this.expression 4338 4339 if expression: 4340 for arg in self.SET_OP_MODIFIERS: 4341 expr = expression.args.get(arg) 4342 if expr: 4343 this.set(arg, expr.pop()) 4344 4345 return this 4346 4347 def _parse_expression(self) -> t.Optional[exp.Expression]: 4348 return self._parse_alias(self._parse_assignment()) 4349 4350 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4351 this = self._parse_disjunction() 4352 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4353 # This allows us to parse <non-identifier token> := <expr> 4354 this = exp.column( 4355 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4356 ) 4357 4358 while self._match_set(self.ASSIGNMENT): 4359 if isinstance(this, exp.Column) and len(this.parts) == 1: 4360 this = this.this 4361 4362 this = self.expression( 4363 self.ASSIGNMENT[self._prev.token_type], 4364 this=this, 4365 comments=self._prev_comments, 4366 expression=self._parse_assignment(), 4367 ) 4368 4369 return this 4370 4371 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4372 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4373 4374 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4375 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4376 4377 def _parse_equality(self) -> t.Optional[exp.Expression]: 4378 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4379 4380 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4381 return self._parse_tokens(self._parse_range, self.COMPARISON) 4382 4383 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4384 this = this or self._parse_bitwise() 4385 negate = self._match(TokenType.NOT) 4386 4387 if self._match_set(self.RANGE_PARSERS): 4388 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4389 if not expression: 4390 return this 4391 4392 this = expression 4393 elif self._match(TokenType.ISNULL): 4394 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4395 4396 # Postgres supports ISNULL and NOTNULL for conditions. 4397 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4398 if self._match(TokenType.NOTNULL): 4399 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4400 this = self.expression(exp.Not, this=this) 4401 4402 if negate: 4403 this = self._negate_range(this) 4404 4405 if self._match(TokenType.IS): 4406 this = self._parse_is(this) 4407 4408 return this 4409 4410 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4411 if not this: 4412 return this 4413 4414 return self.expression(exp.Not, this=this) 4415 4416 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4417 index = self._index - 1 4418 negate = self._match(TokenType.NOT) 4419 4420 if self._match_text_seq("DISTINCT", "FROM"): 4421 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4422 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4423 4424 if self._match(TokenType.JSON): 4425 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4426 4427 if self._match_text_seq("WITH"): 4428 _with = True 4429 elif self._match_text_seq("WITHOUT"): 4430 _with = False 4431 else: 4432 _with = None 4433 4434 unique = self._match(TokenType.UNIQUE) 4435 self._match_text_seq("KEYS") 4436 expression: t.Optional[exp.Expression] = self.expression( 4437 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4438 ) 4439 else: 4440 expression = self._parse_primary() or self._parse_null() 4441 if not expression: 4442 self._retreat(index) 4443 return None 4444 4445 this = self.expression(exp.Is, this=this, expression=expression) 4446 return self.expression(exp.Not, this=this) if negate else this 4447 4448 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4449 unnest = self._parse_unnest(with_alias=False) 4450 if unnest: 4451 this = self.expression(exp.In, this=this, unnest=unnest) 4452 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4453 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4454 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4455 4456 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4457 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4458 else: 4459 this = self.expression(exp.In, this=this, expressions=expressions) 4460 4461 if matched_l_paren: 4462 self._match_r_paren(this) 4463 elif not self._match(TokenType.R_BRACKET, expression=this): 4464 self.raise_error("Expecting ]") 4465 else: 4466 this = self.expression(exp.In, this=this, field=self._parse_field()) 4467 4468 return this 4469 4470 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4471 low = self._parse_bitwise() 4472 self._match(TokenType.AND) 4473 high = self._parse_bitwise() 4474 return self.expression(exp.Between, this=this, low=low, high=high) 4475 4476 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4477 if not self._match(TokenType.ESCAPE): 4478 return this 4479 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4480 4481 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4482 index = self._index 4483 4484 if not self._match(TokenType.INTERVAL) and match_interval: 4485 return None 4486 4487 if self._match(TokenType.STRING, advance=False): 4488 this = self._parse_primary() 4489 else: 4490 this = self._parse_term() 4491 4492 if not this or ( 4493 isinstance(this, exp.Column) 4494 and not this.table 4495 and not this.this.quoted 4496 and this.name.upper() == "IS" 4497 ): 4498 self._retreat(index) 4499 return None 4500 4501 unit = self._parse_function() or ( 4502 not self._match(TokenType.ALIAS, advance=False) 4503 and self._parse_var(any_token=True, upper=True) 4504 ) 4505 4506 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4507 # each INTERVAL expression into this canonical form so it's easy to transpile 4508 if this and this.is_number: 4509 this = exp.Literal.string(this.to_py()) 4510 elif this and this.is_string: 4511 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4512 if len(parts) == 1: 4513 if unit: 4514 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4515 self._retreat(self._index - 1) 4516 4517 this = exp.Literal.string(parts[0][0]) 4518 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4519 4520 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4521 unit = self.expression( 4522 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4523 ) 4524 4525 interval = self.expression(exp.Interval, this=this, unit=unit) 4526 4527 index = self._index 4528 self._match(TokenType.PLUS) 4529 4530 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4531 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4532 return self.expression( 4533 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4534 ) 4535 4536 self._retreat(index) 4537 return interval 4538 4539 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4540 this = self._parse_term() 4541 4542 while True: 4543 if self._match_set(self.BITWISE): 4544 this = self.expression( 4545 self.BITWISE[self._prev.token_type], 4546 this=this, 4547 expression=self._parse_term(), 4548 ) 4549 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4550 this = self.expression( 4551 exp.DPipe, 4552 this=this, 4553 expression=self._parse_term(), 4554 safe=not self.dialect.STRICT_STRING_CONCAT, 4555 ) 4556 elif self._match(TokenType.DQMARK): 4557 this = self.expression( 4558 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4559 ) 4560 elif self._match_pair(TokenType.LT, TokenType.LT): 4561 this = self.expression( 4562 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4563 ) 4564 elif self._match_pair(TokenType.GT, TokenType.GT): 4565 this = self.expression( 4566 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4567 ) 4568 else: 4569 break 4570 4571 return this 4572 4573 def _parse_term(self) -> t.Optional[exp.Expression]: 4574 this = self._parse_factor() 4575 4576 while self._match_set(self.TERM): 4577 klass = self.TERM[self._prev.token_type] 4578 comments = self._prev_comments 4579 expression = self._parse_factor() 4580 4581 this = self.expression(klass, this=this, comments=comments, expression=expression) 4582 4583 if isinstance(this, exp.Collate): 4584 expr = this.expression 4585 4586 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4587 # fallback to Identifier / Var 4588 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4589 ident = expr.this 4590 if isinstance(ident, exp.Identifier): 4591 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4592 4593 return this 4594 4595 def _parse_factor(self) -> t.Optional[exp.Expression]: 4596 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4597 this = parse_method() 4598 4599 while self._match_set(self.FACTOR): 4600 klass = self.FACTOR[self._prev.token_type] 4601 comments = self._prev_comments 4602 expression = parse_method() 4603 4604 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4605 self._retreat(self._index - 1) 4606 return this 4607 4608 this = self.expression(klass, this=this, comments=comments, expression=expression) 4609 4610 if isinstance(this, exp.Div): 4611 this.args["typed"] = self.dialect.TYPED_DIVISION 4612 this.args["safe"] = self.dialect.SAFE_DIVISION 4613 4614 return this 4615 4616 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4617 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4618 4619 def _parse_unary(self) -> t.Optional[exp.Expression]: 4620 if self._match_set(self.UNARY_PARSERS): 4621 return self.UNARY_PARSERS[self._prev.token_type](self) 4622 return self._parse_at_time_zone(self._parse_type()) 4623 4624 def _parse_type( 4625 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4626 ) -> t.Optional[exp.Expression]: 4627 interval = parse_interval and self._parse_interval() 4628 if interval: 4629 return interval 4630 4631 index = self._index 4632 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4633 4634 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4635 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4636 if isinstance(data_type, exp.Cast): 4637 # This constructor can contain ops directly after it, for instance struct unnesting: 4638 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4639 return self._parse_column_ops(data_type) 4640 4641 if data_type: 4642 index2 = self._index 4643 this = self._parse_primary() 4644 4645 if isinstance(this, exp.Literal): 4646 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4647 if parser: 4648 return parser(self, this, data_type) 4649 4650 return self.expression(exp.Cast, this=this, to=data_type) 4651 4652 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4653 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4654 # 4655 # If the index difference here is greater than 1, that means the parser itself must have 4656 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4657 # 4658 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4659 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4660 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4661 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4662 # 4663 # In these cases, we don't really want to return the converted type, but instead retreat 4664 # and try to parse a Column or Identifier in the section below. 4665 if data_type.expressions and index2 - index > 1: 4666 self._retreat(index2) 4667 return self._parse_column_ops(data_type) 4668 4669 self._retreat(index) 4670 4671 if fallback_to_identifier: 4672 return self._parse_id_var() 4673 4674 this = self._parse_column() 4675 return this and self._parse_column_ops(this) 4676 4677 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4678 this = self._parse_type() 4679 if not this: 4680 return None 4681 4682 if isinstance(this, exp.Column) and not this.table: 4683 this = exp.var(this.name.upper()) 4684 4685 return self.expression( 4686 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4687 ) 4688 4689 def _parse_types( 4690 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4691 ) -> t.Optional[exp.Expression]: 4692 index = self._index 4693 4694 this: t.Optional[exp.Expression] = None 4695 prefix = self._match_text_seq("SYSUDTLIB", ".") 4696 4697 if not self._match_set(self.TYPE_TOKENS): 4698 identifier = allow_identifiers and self._parse_id_var( 4699 any_token=False, tokens=(TokenType.VAR,) 4700 ) 4701 if isinstance(identifier, exp.Identifier): 4702 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4703 4704 if len(tokens) != 1: 4705 self.raise_error("Unexpected identifier", self._prev) 4706 4707 if tokens[0].token_type in self.TYPE_TOKENS: 4708 self._prev = tokens[0] 4709 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4710 type_name = identifier.name 4711 4712 while self._match(TokenType.DOT): 4713 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4714 4715 this = exp.DataType.build(type_name, udt=True) 4716 else: 4717 self._retreat(self._index - 1) 4718 return None 4719 else: 4720 return None 4721 4722 type_token = self._prev.token_type 4723 4724 if type_token == TokenType.PSEUDO_TYPE: 4725 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4726 4727 if type_token == TokenType.OBJECT_IDENTIFIER: 4728 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4729 4730 # https://materialize.com/docs/sql/types/map/ 4731 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4732 key_type = self._parse_types( 4733 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4734 ) 4735 if not self._match(TokenType.FARROW): 4736 self._retreat(index) 4737 return None 4738 4739 value_type = self._parse_types( 4740 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4741 ) 4742 if not self._match(TokenType.R_BRACKET): 4743 self._retreat(index) 4744 return None 4745 4746 return exp.DataType( 4747 this=exp.DataType.Type.MAP, 4748 expressions=[key_type, value_type], 4749 nested=True, 4750 prefix=prefix, 4751 ) 4752 4753 nested = type_token in self.NESTED_TYPE_TOKENS 4754 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4755 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4756 expressions = None 4757 maybe_func = False 4758 4759 if self._match(TokenType.L_PAREN): 4760 if is_struct: 4761 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4762 elif nested: 4763 expressions = self._parse_csv( 4764 lambda: self._parse_types( 4765 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4766 ) 4767 ) 4768 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4769 this = expressions[0] 4770 this.set("nullable", True) 4771 self._match_r_paren() 4772 return this 4773 elif type_token in self.ENUM_TYPE_TOKENS: 4774 expressions = self._parse_csv(self._parse_equality) 4775 elif is_aggregate: 4776 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4777 any_token=False, tokens=(TokenType.VAR,) 4778 ) 4779 if not func_or_ident or not self._match(TokenType.COMMA): 4780 return None 4781 expressions = self._parse_csv( 4782 lambda: self._parse_types( 4783 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4784 ) 4785 ) 4786 expressions.insert(0, func_or_ident) 4787 else: 4788 expressions = self._parse_csv(self._parse_type_size) 4789 4790 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4791 if type_token == TokenType.VECTOR and len(expressions) == 2: 4792 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4793 4794 if not expressions or not self._match(TokenType.R_PAREN): 4795 self._retreat(index) 4796 return None 4797 4798 maybe_func = True 4799 4800 values: t.Optional[t.List[exp.Expression]] = None 4801 4802 if nested and self._match(TokenType.LT): 4803 if is_struct: 4804 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4805 else: 4806 expressions = self._parse_csv( 4807 lambda: self._parse_types( 4808 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4809 ) 4810 ) 4811 4812 if not self._match(TokenType.GT): 4813 self.raise_error("Expecting >") 4814 4815 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4816 values = self._parse_csv(self._parse_assignment) 4817 if not values and is_struct: 4818 values = None 4819 self._retreat(self._index - 1) 4820 else: 4821 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4822 4823 if type_token in self.TIMESTAMPS: 4824 if self._match_text_seq("WITH", "TIME", "ZONE"): 4825 maybe_func = False 4826 tz_type = ( 4827 exp.DataType.Type.TIMETZ 4828 if type_token in self.TIMES 4829 else exp.DataType.Type.TIMESTAMPTZ 4830 ) 4831 this = exp.DataType(this=tz_type, expressions=expressions) 4832 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4833 maybe_func = False 4834 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4835 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4836 maybe_func = False 4837 elif type_token == TokenType.INTERVAL: 4838 unit = self._parse_var(upper=True) 4839 if unit: 4840 if self._match_text_seq("TO"): 4841 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4842 4843 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4844 else: 4845 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4846 4847 if maybe_func and check_func: 4848 index2 = self._index 4849 peek = self._parse_string() 4850 4851 if not peek: 4852 self._retreat(index) 4853 return None 4854 4855 self._retreat(index2) 4856 4857 if not this: 4858 if self._match_text_seq("UNSIGNED"): 4859 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4860 if not unsigned_type_token: 4861 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4862 4863 type_token = unsigned_type_token or type_token 4864 4865 this = exp.DataType( 4866 this=exp.DataType.Type[type_token.value], 4867 expressions=expressions, 4868 nested=nested, 4869 prefix=prefix, 4870 ) 4871 4872 # Empty arrays/structs are allowed 4873 if values is not None: 4874 cls = exp.Struct if is_struct else exp.Array 4875 this = exp.cast(cls(expressions=values), this, copy=False) 4876 4877 elif expressions: 4878 this.set("expressions", expressions) 4879 4880 # https://materialize.com/docs/sql/types/list/#type-name 4881 while self._match(TokenType.LIST): 4882 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4883 4884 index = self._index 4885 4886 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4887 matched_array = self._match(TokenType.ARRAY) 4888 4889 while self._curr: 4890 datatype_token = self._prev.token_type 4891 matched_l_bracket = self._match(TokenType.L_BRACKET) 4892 if not matched_l_bracket and not matched_array: 4893 break 4894 4895 matched_array = False 4896 values = self._parse_csv(self._parse_assignment) or None 4897 if ( 4898 values 4899 and not schema 4900 and ( 4901 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4902 ) 4903 ): 4904 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4905 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4906 self._retreat(index) 4907 break 4908 4909 this = exp.DataType( 4910 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4911 ) 4912 self._match(TokenType.R_BRACKET) 4913 4914 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4915 converter = self.TYPE_CONVERTERS.get(this.this) 4916 if converter: 4917 this = converter(t.cast(exp.DataType, this)) 4918 4919 return this 4920 4921 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4922 index = self._index 4923 4924 if ( 4925 self._curr 4926 and self._next 4927 and self._curr.token_type in self.TYPE_TOKENS 4928 and self._next.token_type in self.TYPE_TOKENS 4929 ): 4930 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4931 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4932 this = self._parse_id_var() 4933 else: 4934 this = ( 4935 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4936 or self._parse_id_var() 4937 ) 4938 4939 self._match(TokenType.COLON) 4940 4941 if ( 4942 type_required 4943 and not isinstance(this, exp.DataType) 4944 and not self._match_set(self.TYPE_TOKENS, advance=False) 4945 ): 4946 self._retreat(index) 4947 return self._parse_types() 4948 4949 return self._parse_column_def(this) 4950 4951 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4952 if not self._match_text_seq("AT", "TIME", "ZONE"): 4953 return this 4954 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4955 4956 def _parse_column(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_column_reference() 4958 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4959 4960 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4961 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4962 4963 return column 4964 4965 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4966 this = self._parse_field() 4967 if ( 4968 not this 4969 and self._match(TokenType.VALUES, advance=False) 4970 and self.VALUES_FOLLOWED_BY_PAREN 4971 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4972 ): 4973 this = self._parse_id_var() 4974 4975 if isinstance(this, exp.Identifier): 4976 # We bubble up comments from the Identifier to the Column 4977 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4978 4979 return this 4980 4981 def _parse_colon_as_variant_extract( 4982 self, this: t.Optional[exp.Expression] 4983 ) -> t.Optional[exp.Expression]: 4984 casts = [] 4985 json_path = [] 4986 escape = None 4987 4988 while self._match(TokenType.COLON): 4989 start_index = self._index 4990 4991 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4992 path = self._parse_column_ops( 4993 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4994 ) 4995 4996 # The cast :: operator has a lower precedence than the extraction operator :, so 4997 # we rearrange the AST appropriately to avoid casting the JSON path 4998 while isinstance(path, exp.Cast): 4999 casts.append(path.to) 5000 path = path.this 5001 5002 if casts: 5003 dcolon_offset = next( 5004 i 5005 for i, t in enumerate(self._tokens[start_index:]) 5006 if t.token_type == TokenType.DCOLON 5007 ) 5008 end_token = self._tokens[start_index + dcolon_offset - 1] 5009 else: 5010 end_token = self._prev 5011 5012 if path: 5013 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5014 # it'll roundtrip to a string literal in GET_PATH 5015 if isinstance(path, exp.Identifier) and path.quoted: 5016 escape = True 5017 5018 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5019 5020 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5021 # Databricks transforms it back to the colon/dot notation 5022 if json_path: 5023 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5024 5025 if json_path_expr: 5026 json_path_expr.set("escape", escape) 5027 5028 this = self.expression( 5029 exp.JSONExtract, 5030 this=this, 5031 expression=json_path_expr, 5032 variant_extract=True, 5033 ) 5034 5035 while casts: 5036 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5037 5038 return this 5039 5040 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5041 return self._parse_types() 5042 5043 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5044 this = self._parse_bracket(this) 5045 5046 while self._match_set(self.COLUMN_OPERATORS): 5047 op_token = self._prev.token_type 5048 op = self.COLUMN_OPERATORS.get(op_token) 5049 5050 if op_token == TokenType.DCOLON: 5051 field = self._parse_dcolon() 5052 if not field: 5053 self.raise_error("Expected type") 5054 elif op and self._curr: 5055 field = self._parse_column_reference() or self._parse_bracket() 5056 else: 5057 field = self._parse_field(any_token=True, anonymous_func=True) 5058 5059 if isinstance(field, exp.Func) and this: 5060 # bigquery allows function calls like x.y.count(...) 5061 # SAFE.SUBSTR(...) 5062 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5063 this = exp.replace_tree( 5064 this, 5065 lambda n: ( 5066 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5067 if n.table 5068 else n.this 5069 ) 5070 if isinstance(n, exp.Column) 5071 else n, 5072 ) 5073 5074 if op: 5075 this = op(self, this, field) 5076 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5077 this = self.expression( 5078 exp.Column, 5079 this=field, 5080 table=this.this, 5081 db=this.args.get("table"), 5082 catalog=this.args.get("db"), 5083 ) 5084 else: 5085 this = self.expression(exp.Dot, this=this, expression=field) 5086 5087 this = self._parse_bracket(this) 5088 5089 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5090 5091 def _parse_primary(self) -> t.Optional[exp.Expression]: 5092 if self._match_set(self.PRIMARY_PARSERS): 5093 token_type = self._prev.token_type 5094 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5095 5096 if token_type == TokenType.STRING: 5097 expressions = [primary] 5098 while self._match(TokenType.STRING): 5099 expressions.append(exp.Literal.string(self._prev.text)) 5100 5101 if len(expressions) > 1: 5102 return self.expression(exp.Concat, expressions=expressions) 5103 5104 return primary 5105 5106 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5107 return exp.Literal.number(f"0.{self._prev.text}") 5108 5109 if self._match(TokenType.L_PAREN): 5110 comments = self._prev_comments 5111 query = self._parse_select() 5112 5113 if query: 5114 expressions = [query] 5115 else: 5116 expressions = self._parse_expressions() 5117 5118 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5119 5120 if not this and self._match(TokenType.R_PAREN, advance=False): 5121 this = self.expression(exp.Tuple) 5122 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5123 this = self._parse_subquery(this=this, parse_alias=False) 5124 elif isinstance(this, exp.Subquery): 5125 this = self._parse_subquery( 5126 this=self._parse_set_operations(this), parse_alias=False 5127 ) 5128 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5129 this = self.expression(exp.Tuple, expressions=expressions) 5130 else: 5131 this = self.expression(exp.Paren, this=this) 5132 5133 if this: 5134 this.add_comments(comments) 5135 5136 self._match_r_paren(expression=this) 5137 return this 5138 5139 return None 5140 5141 def _parse_field( 5142 self, 5143 any_token: bool = False, 5144 tokens: t.Optional[t.Collection[TokenType]] = None, 5145 anonymous_func: bool = False, 5146 ) -> t.Optional[exp.Expression]: 5147 if anonymous_func: 5148 field = ( 5149 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5150 or self._parse_primary() 5151 ) 5152 else: 5153 field = self._parse_primary() or self._parse_function( 5154 anonymous=anonymous_func, any_token=any_token 5155 ) 5156 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5157 5158 def _parse_function( 5159 self, 5160 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5161 anonymous: bool = False, 5162 optional_parens: bool = True, 5163 any_token: bool = False, 5164 ) -> t.Optional[exp.Expression]: 5165 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5166 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5167 fn_syntax = False 5168 if ( 5169 self._match(TokenType.L_BRACE, advance=False) 5170 and self._next 5171 and self._next.text.upper() == "FN" 5172 ): 5173 self._advance(2) 5174 fn_syntax = True 5175 5176 func = self._parse_function_call( 5177 functions=functions, 5178 anonymous=anonymous, 5179 optional_parens=optional_parens, 5180 any_token=any_token, 5181 ) 5182 5183 if fn_syntax: 5184 self._match(TokenType.R_BRACE) 5185 5186 return func 5187 5188 def _parse_function_call( 5189 self, 5190 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5191 anonymous: bool = False, 5192 optional_parens: bool = True, 5193 any_token: bool = False, 5194 ) -> t.Optional[exp.Expression]: 5195 if not self._curr: 5196 return None 5197 5198 comments = self._curr.comments 5199 token_type = self._curr.token_type 5200 this = self._curr.text 5201 upper = this.upper() 5202 5203 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5204 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5205 self._advance() 5206 return self._parse_window(parser(self)) 5207 5208 if not self._next or self._next.token_type != TokenType.L_PAREN: 5209 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5210 self._advance() 5211 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5212 5213 return None 5214 5215 if any_token: 5216 if token_type in self.RESERVED_TOKENS: 5217 return None 5218 elif token_type not in self.FUNC_TOKENS: 5219 return None 5220 5221 self._advance(2) 5222 5223 parser = self.FUNCTION_PARSERS.get(upper) 5224 if parser and not anonymous: 5225 this = parser(self) 5226 else: 5227 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5228 5229 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5230 this = self.expression(subquery_predicate, this=self._parse_select()) 5231 self._match_r_paren() 5232 return this 5233 5234 if functions is None: 5235 functions = self.FUNCTIONS 5236 5237 function = functions.get(upper) 5238 5239 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5240 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5241 5242 if alias: 5243 args = self._kv_to_prop_eq(args) 5244 5245 if function and not anonymous: 5246 if "dialect" in function.__code__.co_varnames: 5247 func = function(args, dialect=self.dialect) 5248 else: 5249 func = function(args) 5250 5251 func = self.validate_expression(func, args) 5252 if not self.dialect.NORMALIZE_FUNCTIONS: 5253 func.meta["name"] = this 5254 5255 this = func 5256 else: 5257 if token_type == TokenType.IDENTIFIER: 5258 this = exp.Identifier(this=this, quoted=True) 5259 this = self.expression(exp.Anonymous, this=this, expressions=args) 5260 5261 if isinstance(this, exp.Expression): 5262 this.add_comments(comments) 5263 5264 self._match_r_paren(this) 5265 return self._parse_window(this) 5266 5267 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5268 return expression 5269 5270 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5271 transformed = [] 5272 5273 for index, e in enumerate(expressions): 5274 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5275 if isinstance(e, exp.Alias): 5276 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5277 5278 if not isinstance(e, exp.PropertyEQ): 5279 e = self.expression( 5280 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5281 ) 5282 5283 if isinstance(e.this, exp.Column): 5284 e.this.replace(e.this.this) 5285 else: 5286 e = self._to_prop_eq(e, index) 5287 5288 transformed.append(e) 5289 5290 return transformed 5291 5292 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5293 return self._parse_column_def(self._parse_id_var()) 5294 5295 def _parse_user_defined_function( 5296 self, kind: t.Optional[TokenType] = None 5297 ) -> t.Optional[exp.Expression]: 5298 this = self._parse_id_var() 5299 5300 while self._match(TokenType.DOT): 5301 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5302 5303 if not self._match(TokenType.L_PAREN): 5304 return this 5305 5306 expressions = self._parse_csv(self._parse_function_parameter) 5307 self._match_r_paren() 5308 return self.expression( 5309 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5310 ) 5311 5312 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5313 literal = self._parse_primary() 5314 if literal: 5315 return self.expression(exp.Introducer, this=token.text, expression=literal) 5316 5317 return self.expression(exp.Identifier, this=token.text) 5318 5319 def _parse_session_parameter(self) -> exp.SessionParameter: 5320 kind = None 5321 this = self._parse_id_var() or self._parse_primary() 5322 5323 if this and self._match(TokenType.DOT): 5324 kind = this.name 5325 this = self._parse_var() or self._parse_primary() 5326 5327 return self.expression(exp.SessionParameter, this=this, kind=kind) 5328 5329 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5330 return self._parse_id_var() 5331 5332 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5333 index = self._index 5334 5335 if self._match(TokenType.L_PAREN): 5336 expressions = t.cast( 5337 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5338 ) 5339 5340 if not self._match(TokenType.R_PAREN): 5341 self._retreat(index) 5342 else: 5343 expressions = [self._parse_lambda_arg()] 5344 5345 if self._match_set(self.LAMBDAS): 5346 return self.LAMBDAS[self._prev.token_type](self, expressions) 5347 5348 self._retreat(index) 5349 5350 this: t.Optional[exp.Expression] 5351 5352 if self._match(TokenType.DISTINCT): 5353 this = self.expression( 5354 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5355 ) 5356 else: 5357 this = self._parse_select_or_expression(alias=alias) 5358 5359 return self._parse_limit( 5360 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5361 ) 5362 5363 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5364 index = self._index 5365 if not self._match(TokenType.L_PAREN): 5366 return this 5367 5368 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5369 # expr can be of both types 5370 if self._match_set(self.SELECT_START_TOKENS): 5371 self._retreat(index) 5372 return this 5373 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5374 self._match_r_paren() 5375 return self.expression(exp.Schema, this=this, expressions=args) 5376 5377 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5378 return self._parse_column_def(self._parse_field(any_token=True)) 5379 5380 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5381 # column defs are not really columns, they're identifiers 5382 if isinstance(this, exp.Column): 5383 this = this.this 5384 5385 kind = self._parse_types(schema=True) 5386 5387 if self._match_text_seq("FOR", "ORDINALITY"): 5388 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5389 5390 constraints: t.List[exp.Expression] = [] 5391 5392 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5393 ("ALIAS", "MATERIALIZED") 5394 ): 5395 persisted = self._prev.text.upper() == "MATERIALIZED" 5396 constraint_kind = exp.ComputedColumnConstraint( 5397 this=self._parse_assignment(), 5398 persisted=persisted or self._match_text_seq("PERSISTED"), 5399 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5400 ) 5401 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5402 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5403 self._match(TokenType.ALIAS) 5404 constraints.append( 5405 self.expression( 5406 exp.ColumnConstraint, 5407 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5408 ) 5409 ) 5410 5411 while True: 5412 constraint = self._parse_column_constraint() 5413 if not constraint: 5414 break 5415 constraints.append(constraint) 5416 5417 if not kind and not constraints: 5418 return this 5419 5420 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5421 5422 def _parse_auto_increment( 5423 self, 5424 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5425 start = None 5426 increment = None 5427 5428 if self._match(TokenType.L_PAREN, advance=False): 5429 args = self._parse_wrapped_csv(self._parse_bitwise) 5430 start = seq_get(args, 0) 5431 increment = seq_get(args, 1) 5432 elif self._match_text_seq("START"): 5433 start = self._parse_bitwise() 5434 self._match_text_seq("INCREMENT") 5435 increment = self._parse_bitwise() 5436 5437 if start and increment: 5438 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5439 5440 return exp.AutoIncrementColumnConstraint() 5441 5442 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5443 if not self._match_text_seq("REFRESH"): 5444 self._retreat(self._index - 1) 5445 return None 5446 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5447 5448 def _parse_compress(self) -> exp.CompressColumnConstraint: 5449 if self._match(TokenType.L_PAREN, advance=False): 5450 return self.expression( 5451 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5452 ) 5453 5454 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5455 5456 def _parse_generated_as_identity( 5457 self, 5458 ) -> ( 5459 exp.GeneratedAsIdentityColumnConstraint 5460 | exp.ComputedColumnConstraint 5461 | exp.GeneratedAsRowColumnConstraint 5462 ): 5463 if self._match_text_seq("BY", "DEFAULT"): 5464 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5465 this = self.expression( 5466 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5467 ) 5468 else: 5469 self._match_text_seq("ALWAYS") 5470 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5471 5472 self._match(TokenType.ALIAS) 5473 5474 if self._match_text_seq("ROW"): 5475 start = self._match_text_seq("START") 5476 if not start: 5477 self._match(TokenType.END) 5478 hidden = self._match_text_seq("HIDDEN") 5479 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5480 5481 identity = self._match_text_seq("IDENTITY") 5482 5483 if self._match(TokenType.L_PAREN): 5484 if self._match(TokenType.START_WITH): 5485 this.set("start", self._parse_bitwise()) 5486 if self._match_text_seq("INCREMENT", "BY"): 5487 this.set("increment", self._parse_bitwise()) 5488 if self._match_text_seq("MINVALUE"): 5489 this.set("minvalue", self._parse_bitwise()) 5490 if self._match_text_seq("MAXVALUE"): 5491 this.set("maxvalue", self._parse_bitwise()) 5492 5493 if self._match_text_seq("CYCLE"): 5494 this.set("cycle", True) 5495 elif self._match_text_seq("NO", "CYCLE"): 5496 this.set("cycle", False) 5497 5498 if not identity: 5499 this.set("expression", self._parse_range()) 5500 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5501 args = self._parse_csv(self._parse_bitwise) 5502 this.set("start", seq_get(args, 0)) 5503 this.set("increment", seq_get(args, 1)) 5504 5505 self._match_r_paren() 5506 5507 return this 5508 5509 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5510 self._match_text_seq("LENGTH") 5511 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5512 5513 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5514 if self._match_text_seq("NULL"): 5515 return self.expression(exp.NotNullColumnConstraint) 5516 if self._match_text_seq("CASESPECIFIC"): 5517 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5518 if self._match_text_seq("FOR", "REPLICATION"): 5519 return self.expression(exp.NotForReplicationColumnConstraint) 5520 5521 # Unconsume the `NOT` token 5522 self._retreat(self._index - 1) 5523 return None 5524 5525 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5526 if self._match(TokenType.CONSTRAINT): 5527 this = self._parse_id_var() 5528 else: 5529 this = None 5530 5531 if self._match_texts(self.CONSTRAINT_PARSERS): 5532 return self.expression( 5533 exp.ColumnConstraint, 5534 this=this, 5535 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5536 ) 5537 5538 return this 5539 5540 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5541 if not self._match(TokenType.CONSTRAINT): 5542 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5543 5544 return self.expression( 5545 exp.Constraint, 5546 this=self._parse_id_var(), 5547 expressions=self._parse_unnamed_constraints(), 5548 ) 5549 5550 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5551 constraints = [] 5552 while True: 5553 constraint = self._parse_unnamed_constraint() or self._parse_function() 5554 if not constraint: 5555 break 5556 constraints.append(constraint) 5557 5558 return constraints 5559 5560 def _parse_unnamed_constraint( 5561 self, constraints: t.Optional[t.Collection[str]] = None 5562 ) -> t.Optional[exp.Expression]: 5563 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5564 constraints or self.CONSTRAINT_PARSERS 5565 ): 5566 return None 5567 5568 constraint = self._prev.text.upper() 5569 if constraint not in self.CONSTRAINT_PARSERS: 5570 self.raise_error(f"No parser found for schema constraint {constraint}.") 5571 5572 return self.CONSTRAINT_PARSERS[constraint](self) 5573 5574 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5575 return self._parse_id_var(any_token=False) 5576 5577 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5578 self._match_text_seq("KEY") 5579 return self.expression( 5580 exp.UniqueColumnConstraint, 5581 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5582 this=self._parse_schema(self._parse_unique_key()), 5583 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5584 on_conflict=self._parse_on_conflict(), 5585 ) 5586 5587 def _parse_key_constraint_options(self) -> t.List[str]: 5588 options = [] 5589 while True: 5590 if not self._curr: 5591 break 5592 5593 if self._match(TokenType.ON): 5594 action = None 5595 on = self._advance_any() and self._prev.text 5596 5597 if self._match_text_seq("NO", "ACTION"): 5598 action = "NO ACTION" 5599 elif self._match_text_seq("CASCADE"): 5600 action = "CASCADE" 5601 elif self._match_text_seq("RESTRICT"): 5602 action = "RESTRICT" 5603 elif self._match_pair(TokenType.SET, TokenType.NULL): 5604 action = "SET NULL" 5605 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5606 action = "SET DEFAULT" 5607 else: 5608 self.raise_error("Invalid key constraint") 5609 5610 options.append(f"ON {on} {action}") 5611 else: 5612 var = self._parse_var_from_options( 5613 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5614 ) 5615 if not var: 5616 break 5617 options.append(var.name) 5618 5619 return options 5620 5621 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5622 if match and not self._match(TokenType.REFERENCES): 5623 return None 5624 5625 expressions = None 5626 this = self._parse_table(schema=True) 5627 options = self._parse_key_constraint_options() 5628 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5629 5630 def _parse_foreign_key(self) -> exp.ForeignKey: 5631 expressions = self._parse_wrapped_id_vars() 5632 reference = self._parse_references() 5633 options = {} 5634 5635 while self._match(TokenType.ON): 5636 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5637 self.raise_error("Expected DELETE or UPDATE") 5638 5639 kind = self._prev.text.lower() 5640 5641 if self._match_text_seq("NO", "ACTION"): 5642 action = "NO ACTION" 5643 elif self._match(TokenType.SET): 5644 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5645 action = "SET " + self._prev.text.upper() 5646 else: 5647 self._advance() 5648 action = self._prev.text.upper() 5649 5650 options[kind] = action 5651 5652 return self.expression( 5653 exp.ForeignKey, 5654 expressions=expressions, 5655 reference=reference, 5656 **options, # type: ignore 5657 ) 5658 5659 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5660 return self._parse_field() 5661 5662 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5663 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5664 self._retreat(self._index - 1) 5665 return None 5666 5667 id_vars = self._parse_wrapped_id_vars() 5668 return self.expression( 5669 exp.PeriodForSystemTimeConstraint, 5670 this=seq_get(id_vars, 0), 5671 expression=seq_get(id_vars, 1), 5672 ) 5673 5674 def _parse_primary_key( 5675 self, wrapped_optional: bool = False, in_props: bool = False 5676 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5677 desc = ( 5678 self._match_set((TokenType.ASC, TokenType.DESC)) 5679 and self._prev.token_type == TokenType.DESC 5680 ) 5681 5682 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5683 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5684 5685 expressions = self._parse_wrapped_csv( 5686 self._parse_primary_key_part, optional=wrapped_optional 5687 ) 5688 options = self._parse_key_constraint_options() 5689 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5690 5691 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5692 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5693 5694 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5695 """ 5696 Parses a datetime column in ODBC format. We parse the column into the corresponding 5697 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5698 same as we did for `DATE('yyyy-mm-dd')`. 5699 5700 Reference: 5701 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5702 """ 5703 self._match(TokenType.VAR) 5704 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5705 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5706 if not self._match(TokenType.R_BRACE): 5707 self.raise_error("Expected }") 5708 return expression 5709 5710 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5711 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5712 return this 5713 5714 bracket_kind = self._prev.token_type 5715 if ( 5716 bracket_kind == TokenType.L_BRACE 5717 and self._curr 5718 and self._curr.token_type == TokenType.VAR 5719 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5720 ): 5721 return self._parse_odbc_datetime_literal() 5722 5723 expressions = self._parse_csv( 5724 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5725 ) 5726 5727 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5728 self.raise_error("Expected ]") 5729 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5730 self.raise_error("Expected }") 5731 5732 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5733 if bracket_kind == TokenType.L_BRACE: 5734 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5735 elif not this: 5736 this = build_array_constructor( 5737 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5738 ) 5739 else: 5740 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5741 if constructor_type: 5742 return build_array_constructor( 5743 constructor_type, 5744 args=expressions, 5745 bracket_kind=bracket_kind, 5746 dialect=self.dialect, 5747 ) 5748 5749 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5750 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5751 5752 self._add_comments(this) 5753 return self._parse_bracket(this) 5754 5755 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5756 if self._match(TokenType.COLON): 5757 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5758 return this 5759 5760 def _parse_case(self) -> t.Optional[exp.Expression]: 5761 ifs = [] 5762 default = None 5763 5764 comments = self._prev_comments 5765 expression = self._parse_assignment() 5766 5767 while self._match(TokenType.WHEN): 5768 this = self._parse_assignment() 5769 self._match(TokenType.THEN) 5770 then = self._parse_assignment() 5771 ifs.append(self.expression(exp.If, this=this, true=then)) 5772 5773 if self._match(TokenType.ELSE): 5774 default = self._parse_assignment() 5775 5776 if not self._match(TokenType.END): 5777 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5778 default = exp.column("interval") 5779 else: 5780 self.raise_error("Expected END after CASE", self._prev) 5781 5782 return self.expression( 5783 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5784 ) 5785 5786 def _parse_if(self) -> t.Optional[exp.Expression]: 5787 if self._match(TokenType.L_PAREN): 5788 args = self._parse_csv(self._parse_assignment) 5789 this = self.validate_expression(exp.If.from_arg_list(args), args) 5790 self._match_r_paren() 5791 else: 5792 index = self._index - 1 5793 5794 if self.NO_PAREN_IF_COMMANDS and index == 0: 5795 return self._parse_as_command(self._prev) 5796 5797 condition = self._parse_assignment() 5798 5799 if not condition: 5800 self._retreat(index) 5801 return None 5802 5803 self._match(TokenType.THEN) 5804 true = self._parse_assignment() 5805 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5806 self._match(TokenType.END) 5807 this = self.expression(exp.If, this=condition, true=true, false=false) 5808 5809 return this 5810 5811 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5812 if not self._match_text_seq("VALUE", "FOR"): 5813 self._retreat(self._index - 1) 5814 return None 5815 5816 return self.expression( 5817 exp.NextValueFor, 5818 this=self._parse_column(), 5819 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5820 ) 5821 5822 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5823 this = self._parse_function() or self._parse_var_or_string(upper=True) 5824 5825 if self._match(TokenType.FROM): 5826 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5827 5828 if not self._match(TokenType.COMMA): 5829 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5830 5831 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5832 5833 def _parse_gap_fill(self) -> exp.GapFill: 5834 self._match(TokenType.TABLE) 5835 this = self._parse_table() 5836 5837 self._match(TokenType.COMMA) 5838 args = [this, *self._parse_csv(self._parse_lambda)] 5839 5840 gap_fill = exp.GapFill.from_arg_list(args) 5841 return self.validate_expression(gap_fill, args) 5842 5843 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5844 this = self._parse_assignment() 5845 5846 if not self._match(TokenType.ALIAS): 5847 if self._match(TokenType.COMMA): 5848 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5849 5850 self.raise_error("Expected AS after CAST") 5851 5852 fmt = None 5853 to = self._parse_types() 5854 5855 if self._match(TokenType.FORMAT): 5856 fmt_string = self._parse_string() 5857 fmt = self._parse_at_time_zone(fmt_string) 5858 5859 if not to: 5860 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5861 if to.this in exp.DataType.TEMPORAL_TYPES: 5862 this = self.expression( 5863 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5864 this=this, 5865 format=exp.Literal.string( 5866 format_time( 5867 fmt_string.this if fmt_string else "", 5868 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5869 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5870 ) 5871 ), 5872 safe=safe, 5873 ) 5874 5875 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5876 this.set("zone", fmt.args["zone"]) 5877 return this 5878 elif not to: 5879 self.raise_error("Expected TYPE after CAST") 5880 elif isinstance(to, exp.Identifier): 5881 to = exp.DataType.build(to.name, udt=True) 5882 elif to.this == exp.DataType.Type.CHAR: 5883 if self._match(TokenType.CHARACTER_SET): 5884 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5885 5886 return self.expression( 5887 exp.Cast if strict else exp.TryCast, 5888 this=this, 5889 to=to, 5890 format=fmt, 5891 safe=safe, 5892 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5893 ) 5894 5895 def _parse_string_agg(self) -> exp.Expression: 5896 if self._match(TokenType.DISTINCT): 5897 args: t.List[t.Optional[exp.Expression]] = [ 5898 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5899 ] 5900 if self._match(TokenType.COMMA): 5901 args.extend(self._parse_csv(self._parse_assignment)) 5902 else: 5903 args = self._parse_csv(self._parse_assignment) # type: ignore 5904 5905 index = self._index 5906 if not self._match(TokenType.R_PAREN) and args: 5907 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5908 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5909 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5910 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5911 5912 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5913 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5914 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5915 if not self._match_text_seq("WITHIN", "GROUP"): 5916 self._retreat(index) 5917 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5918 5919 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5920 order = self._parse_order(this=seq_get(args, 0)) 5921 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5922 5923 def _parse_convert( 5924 self, strict: bool, safe: t.Optional[bool] = None 5925 ) -> t.Optional[exp.Expression]: 5926 this = self._parse_bitwise() 5927 5928 if self._match(TokenType.USING): 5929 to: t.Optional[exp.Expression] = self.expression( 5930 exp.CharacterSet, this=self._parse_var() 5931 ) 5932 elif self._match(TokenType.COMMA): 5933 to = self._parse_types() 5934 else: 5935 to = None 5936 5937 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5938 5939 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5940 """ 5941 There are generally two variants of the DECODE function: 5942 5943 - DECODE(bin, charset) 5944 - DECODE(expression, search, result [, search, result] ... [, default]) 5945 5946 The second variant will always be parsed into a CASE expression. Note that NULL 5947 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5948 instead of relying on pattern matching. 5949 """ 5950 args = self._parse_csv(self._parse_assignment) 5951 5952 if len(args) < 3: 5953 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5954 5955 expression, *expressions = args 5956 if not expression: 5957 return None 5958 5959 ifs = [] 5960 for search, result in zip(expressions[::2], expressions[1::2]): 5961 if not search or not result: 5962 return None 5963 5964 if isinstance(search, exp.Literal): 5965 ifs.append( 5966 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5967 ) 5968 elif isinstance(search, exp.Null): 5969 ifs.append( 5970 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5971 ) 5972 else: 5973 cond = exp.or_( 5974 exp.EQ(this=expression.copy(), expression=search), 5975 exp.and_( 5976 exp.Is(this=expression.copy(), expression=exp.Null()), 5977 exp.Is(this=search.copy(), expression=exp.Null()), 5978 copy=False, 5979 ), 5980 copy=False, 5981 ) 5982 ifs.append(exp.If(this=cond, true=result)) 5983 5984 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5985 5986 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5987 self._match_text_seq("KEY") 5988 key = self._parse_column() 5989 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5990 self._match_text_seq("VALUE") 5991 value = self._parse_bitwise() 5992 5993 if not key and not value: 5994 return None 5995 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5996 5997 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5998 if not this or not self._match_text_seq("FORMAT", "JSON"): 5999 return this 6000 6001 return self.expression(exp.FormatJson, this=this) 6002 6003 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6004 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6005 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6006 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6007 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6008 else: 6009 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6010 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6011 6012 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6013 6014 if not empty and not error and not null: 6015 return None 6016 6017 return self.expression( 6018 exp.OnCondition, 6019 empty=empty, 6020 error=error, 6021 null=null, 6022 ) 6023 6024 def _parse_on_handling( 6025 self, on: str, *values: str 6026 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6027 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6028 for value in values: 6029 if self._match_text_seq(value, "ON", on): 6030 return f"{value} ON {on}" 6031 6032 index = self._index 6033 if self._match(TokenType.DEFAULT): 6034 default_value = self._parse_bitwise() 6035 if self._match_text_seq("ON", on): 6036 return default_value 6037 6038 self._retreat(index) 6039 6040 return None 6041 6042 @t.overload 6043 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6044 6045 @t.overload 6046 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6047 6048 def _parse_json_object(self, agg=False): 6049 star = self._parse_star() 6050 expressions = ( 6051 [star] 6052 if star 6053 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6054 ) 6055 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6056 6057 unique_keys = None 6058 if self._match_text_seq("WITH", "UNIQUE"): 6059 unique_keys = True 6060 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6061 unique_keys = False 6062 6063 self._match_text_seq("KEYS") 6064 6065 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6066 self._parse_type() 6067 ) 6068 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6069 6070 return self.expression( 6071 exp.JSONObjectAgg if agg else exp.JSONObject, 6072 expressions=expressions, 6073 null_handling=null_handling, 6074 unique_keys=unique_keys, 6075 return_type=return_type, 6076 encoding=encoding, 6077 ) 6078 6079 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6080 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6081 if not self._match_text_seq("NESTED"): 6082 this = self._parse_id_var() 6083 kind = self._parse_types(allow_identifiers=False) 6084 nested = None 6085 else: 6086 this = None 6087 kind = None 6088 nested = True 6089 6090 path = self._match_text_seq("PATH") and self._parse_string() 6091 nested_schema = nested and self._parse_json_schema() 6092 6093 return self.expression( 6094 exp.JSONColumnDef, 6095 this=this, 6096 kind=kind, 6097 path=path, 6098 nested_schema=nested_schema, 6099 ) 6100 6101 def _parse_json_schema(self) -> exp.JSONSchema: 6102 self._match_text_seq("COLUMNS") 6103 return self.expression( 6104 exp.JSONSchema, 6105 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6106 ) 6107 6108 def _parse_json_table(self) -> exp.JSONTable: 6109 this = self._parse_format_json(self._parse_bitwise()) 6110 path = self._match(TokenType.COMMA) and self._parse_string() 6111 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6112 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6113 schema = self._parse_json_schema() 6114 6115 return exp.JSONTable( 6116 this=this, 6117 schema=schema, 6118 path=path, 6119 error_handling=error_handling, 6120 empty_handling=empty_handling, 6121 ) 6122 6123 def _parse_match_against(self) -> exp.MatchAgainst: 6124 expressions = self._parse_csv(self._parse_column) 6125 6126 self._match_text_seq(")", "AGAINST", "(") 6127 6128 this = self._parse_string() 6129 6130 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6131 modifier = "IN NATURAL LANGUAGE MODE" 6132 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6133 modifier = f"{modifier} WITH QUERY EXPANSION" 6134 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6135 modifier = "IN BOOLEAN MODE" 6136 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6137 modifier = "WITH QUERY EXPANSION" 6138 else: 6139 modifier = None 6140 6141 return self.expression( 6142 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6143 ) 6144 6145 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6146 def _parse_open_json(self) -> exp.OpenJSON: 6147 this = self._parse_bitwise() 6148 path = self._match(TokenType.COMMA) and self._parse_string() 6149 6150 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6151 this = self._parse_field(any_token=True) 6152 kind = self._parse_types() 6153 path = self._parse_string() 6154 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6155 6156 return self.expression( 6157 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6158 ) 6159 6160 expressions = None 6161 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6162 self._match_l_paren() 6163 expressions = self._parse_csv(_parse_open_json_column_def) 6164 6165 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6166 6167 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6168 args = self._parse_csv(self._parse_bitwise) 6169 6170 if self._match(TokenType.IN): 6171 return self.expression( 6172 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6173 ) 6174 6175 if haystack_first: 6176 haystack = seq_get(args, 0) 6177 needle = seq_get(args, 1) 6178 else: 6179 needle = seq_get(args, 0) 6180 haystack = seq_get(args, 1) 6181 6182 return self.expression( 6183 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6184 ) 6185 6186 def _parse_predict(self) -> exp.Predict: 6187 self._match_text_seq("MODEL") 6188 this = self._parse_table() 6189 6190 self._match(TokenType.COMMA) 6191 self._match_text_seq("TABLE") 6192 6193 return self.expression( 6194 exp.Predict, 6195 this=this, 6196 expression=self._parse_table(), 6197 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6198 ) 6199 6200 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6201 args = self._parse_csv(self._parse_table) 6202 return exp.JoinHint(this=func_name.upper(), expressions=args) 6203 6204 def _parse_substring(self) -> exp.Substring: 6205 # Postgres supports the form: substring(string [from int] [for int]) 6206 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6207 6208 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6209 6210 if self._match(TokenType.FROM): 6211 args.append(self._parse_bitwise()) 6212 if self._match(TokenType.FOR): 6213 if len(args) == 1: 6214 args.append(exp.Literal.number(1)) 6215 args.append(self._parse_bitwise()) 6216 6217 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6218 6219 def _parse_trim(self) -> exp.Trim: 6220 # https://www.w3resource.com/sql/character-functions/trim.php 6221 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6222 6223 position = None 6224 collation = None 6225 expression = None 6226 6227 if self._match_texts(self.TRIM_TYPES): 6228 position = self._prev.text.upper() 6229 6230 this = self._parse_bitwise() 6231 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6232 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6233 expression = self._parse_bitwise() 6234 6235 if invert_order: 6236 this, expression = expression, this 6237 6238 if self._match(TokenType.COLLATE): 6239 collation = self._parse_bitwise() 6240 6241 return self.expression( 6242 exp.Trim, this=this, position=position, expression=expression, collation=collation 6243 ) 6244 6245 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6246 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6247 6248 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6249 return self._parse_window(self._parse_id_var(), alias=True) 6250 6251 def _parse_respect_or_ignore_nulls( 6252 self, this: t.Optional[exp.Expression] 6253 ) -> t.Optional[exp.Expression]: 6254 if self._match_text_seq("IGNORE", "NULLS"): 6255 return self.expression(exp.IgnoreNulls, this=this) 6256 if self._match_text_seq("RESPECT", "NULLS"): 6257 return self.expression(exp.RespectNulls, this=this) 6258 return this 6259 6260 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6261 if self._match(TokenType.HAVING): 6262 self._match_texts(("MAX", "MIN")) 6263 max = self._prev.text.upper() != "MIN" 6264 return self.expression( 6265 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6266 ) 6267 6268 return this 6269 6270 def _parse_window( 6271 self, this: t.Optional[exp.Expression], alias: bool = False 6272 ) -> t.Optional[exp.Expression]: 6273 func = this 6274 comments = func.comments if isinstance(func, exp.Expression) else None 6275 6276 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6277 self._match(TokenType.WHERE) 6278 this = self.expression( 6279 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6280 ) 6281 self._match_r_paren() 6282 6283 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6284 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6285 if self._match_text_seq("WITHIN", "GROUP"): 6286 order = self._parse_wrapped(self._parse_order) 6287 this = self.expression(exp.WithinGroup, this=this, expression=order) 6288 6289 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6290 # Some dialects choose to implement and some do not. 6291 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6292 6293 # There is some code above in _parse_lambda that handles 6294 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6295 6296 # The below changes handle 6297 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6298 6299 # Oracle allows both formats 6300 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6301 # and Snowflake chose to do the same for familiarity 6302 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6303 if isinstance(this, exp.AggFunc): 6304 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6305 6306 if ignore_respect and ignore_respect is not this: 6307 ignore_respect.replace(ignore_respect.this) 6308 this = self.expression(ignore_respect.__class__, this=this) 6309 6310 this = self._parse_respect_or_ignore_nulls(this) 6311 6312 # bigquery select from window x AS (partition by ...) 6313 if alias: 6314 over = None 6315 self._match(TokenType.ALIAS) 6316 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6317 return this 6318 else: 6319 over = self._prev.text.upper() 6320 6321 if comments and isinstance(func, exp.Expression): 6322 func.pop_comments() 6323 6324 if not self._match(TokenType.L_PAREN): 6325 return self.expression( 6326 exp.Window, 6327 comments=comments, 6328 this=this, 6329 alias=self._parse_id_var(False), 6330 over=over, 6331 ) 6332 6333 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6334 6335 first = self._match(TokenType.FIRST) 6336 if self._match_text_seq("LAST"): 6337 first = False 6338 6339 partition, order = self._parse_partition_and_order() 6340 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6341 6342 if kind: 6343 self._match(TokenType.BETWEEN) 6344 start = self._parse_window_spec() 6345 self._match(TokenType.AND) 6346 end = self._parse_window_spec() 6347 6348 spec = self.expression( 6349 exp.WindowSpec, 6350 kind=kind, 6351 start=start["value"], 6352 start_side=start["side"], 6353 end=end["value"], 6354 end_side=end["side"], 6355 ) 6356 else: 6357 spec = None 6358 6359 self._match_r_paren() 6360 6361 window = self.expression( 6362 exp.Window, 6363 comments=comments, 6364 this=this, 6365 partition_by=partition, 6366 order=order, 6367 spec=spec, 6368 alias=window_alias, 6369 over=over, 6370 first=first, 6371 ) 6372 6373 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6374 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6375 return self._parse_window(window, alias=alias) 6376 6377 return window 6378 6379 def _parse_partition_and_order( 6380 self, 6381 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6382 return self._parse_partition_by(), self._parse_order() 6383 6384 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6385 self._match(TokenType.BETWEEN) 6386 6387 return { 6388 "value": ( 6389 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6390 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6391 or self._parse_bitwise() 6392 ), 6393 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6394 } 6395 6396 def _parse_alias( 6397 self, this: t.Optional[exp.Expression], explicit: bool = False 6398 ) -> t.Optional[exp.Expression]: 6399 any_token = self._match(TokenType.ALIAS) 6400 comments = self._prev_comments or [] 6401 6402 if explicit and not any_token: 6403 return this 6404 6405 if self._match(TokenType.L_PAREN): 6406 aliases = self.expression( 6407 exp.Aliases, 6408 comments=comments, 6409 this=this, 6410 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6411 ) 6412 self._match_r_paren(aliases) 6413 return aliases 6414 6415 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6416 self.STRING_ALIASES and self._parse_string_as_identifier() 6417 ) 6418 6419 if alias: 6420 comments.extend(alias.pop_comments()) 6421 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6422 column = this.this 6423 6424 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6425 if not this.comments and column and column.comments: 6426 this.comments = column.pop_comments() 6427 6428 return this 6429 6430 def _parse_id_var( 6431 self, 6432 any_token: bool = True, 6433 tokens: t.Optional[t.Collection[TokenType]] = None, 6434 ) -> t.Optional[exp.Expression]: 6435 expression = self._parse_identifier() 6436 if not expression and ( 6437 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6438 ): 6439 quoted = self._prev.token_type == TokenType.STRING 6440 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6441 6442 return expression 6443 6444 def _parse_string(self) -> t.Optional[exp.Expression]: 6445 if self._match_set(self.STRING_PARSERS): 6446 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6447 return self._parse_placeholder() 6448 6449 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6450 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6451 6452 def _parse_number(self) -> t.Optional[exp.Expression]: 6453 if self._match_set(self.NUMERIC_PARSERS): 6454 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6455 return self._parse_placeholder() 6456 6457 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6458 if self._match(TokenType.IDENTIFIER): 6459 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6460 return self._parse_placeholder() 6461 6462 def _parse_var( 6463 self, 6464 any_token: bool = False, 6465 tokens: t.Optional[t.Collection[TokenType]] = None, 6466 upper: bool = False, 6467 ) -> t.Optional[exp.Expression]: 6468 if ( 6469 (any_token and self._advance_any()) 6470 or self._match(TokenType.VAR) 6471 or (self._match_set(tokens) if tokens else False) 6472 ): 6473 return self.expression( 6474 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6475 ) 6476 return self._parse_placeholder() 6477 6478 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6479 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6480 self._advance() 6481 return self._prev 6482 return None 6483 6484 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6485 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6486 6487 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6488 return self._parse_primary() or self._parse_var(any_token=True) 6489 6490 def _parse_null(self) -> t.Optional[exp.Expression]: 6491 if self._match_set(self.NULL_TOKENS): 6492 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6493 return self._parse_placeholder() 6494 6495 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6496 if self._match(TokenType.TRUE): 6497 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6498 if self._match(TokenType.FALSE): 6499 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6500 return self._parse_placeholder() 6501 6502 def _parse_star(self) -> t.Optional[exp.Expression]: 6503 if self._match(TokenType.STAR): 6504 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6505 return self._parse_placeholder() 6506 6507 def _parse_parameter(self) -> exp.Parameter: 6508 this = self._parse_identifier() or self._parse_primary_or_var() 6509 return self.expression(exp.Parameter, this=this) 6510 6511 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6512 if self._match_set(self.PLACEHOLDER_PARSERS): 6513 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6514 if placeholder: 6515 return placeholder 6516 self._advance(-1) 6517 return None 6518 6519 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6520 if not self._match_texts(keywords): 6521 return None 6522 if self._match(TokenType.L_PAREN, advance=False): 6523 return self._parse_wrapped_csv(self._parse_expression) 6524 6525 expression = self._parse_expression() 6526 return [expression] if expression else None 6527 6528 def _parse_csv( 6529 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6530 ) -> t.List[exp.Expression]: 6531 parse_result = parse_method() 6532 items = [parse_result] if parse_result is not None else [] 6533 6534 while self._match(sep): 6535 self._add_comments(parse_result) 6536 parse_result = parse_method() 6537 if parse_result is not None: 6538 items.append(parse_result) 6539 6540 return items 6541 6542 def _parse_tokens( 6543 self, parse_method: t.Callable, expressions: t.Dict 6544 ) -> t.Optional[exp.Expression]: 6545 this = parse_method() 6546 6547 while self._match_set(expressions): 6548 this = self.expression( 6549 expressions[self._prev.token_type], 6550 this=this, 6551 comments=self._prev_comments, 6552 expression=parse_method(), 6553 ) 6554 6555 return this 6556 6557 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6558 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6559 6560 def _parse_wrapped_csv( 6561 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6562 ) -> t.List[exp.Expression]: 6563 return self._parse_wrapped( 6564 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6565 ) 6566 6567 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6568 wrapped = self._match(TokenType.L_PAREN) 6569 if not wrapped and not optional: 6570 self.raise_error("Expecting (") 6571 parse_result = parse_method() 6572 if wrapped: 6573 self._match_r_paren() 6574 return parse_result 6575 6576 def _parse_expressions(self) -> t.List[exp.Expression]: 6577 return self._parse_csv(self._parse_expression) 6578 6579 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6580 return self._parse_select() or self._parse_set_operations( 6581 self._parse_expression() if alias else self._parse_assignment() 6582 ) 6583 6584 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6585 return self._parse_query_modifiers( 6586 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6587 ) 6588 6589 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6590 this = None 6591 if self._match_texts(self.TRANSACTION_KIND): 6592 this = self._prev.text 6593 6594 self._match_texts(("TRANSACTION", "WORK")) 6595 6596 modes = [] 6597 while True: 6598 mode = [] 6599 while self._match(TokenType.VAR): 6600 mode.append(self._prev.text) 6601 6602 if mode: 6603 modes.append(" ".join(mode)) 6604 if not self._match(TokenType.COMMA): 6605 break 6606 6607 return self.expression(exp.Transaction, this=this, modes=modes) 6608 6609 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6610 chain = None 6611 savepoint = None 6612 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6613 6614 self._match_texts(("TRANSACTION", "WORK")) 6615 6616 if self._match_text_seq("TO"): 6617 self._match_text_seq("SAVEPOINT") 6618 savepoint = self._parse_id_var() 6619 6620 if self._match(TokenType.AND): 6621 chain = not self._match_text_seq("NO") 6622 self._match_text_seq("CHAIN") 6623 6624 if is_rollback: 6625 return self.expression(exp.Rollback, savepoint=savepoint) 6626 6627 return self.expression(exp.Commit, chain=chain) 6628 6629 def _parse_refresh(self) -> exp.Refresh: 6630 self._match(TokenType.TABLE) 6631 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6632 6633 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6634 if not self._match_text_seq("ADD"): 6635 return None 6636 6637 self._match(TokenType.COLUMN) 6638 exists_column = self._parse_exists(not_=True) 6639 expression = self._parse_field_def() 6640 6641 if expression: 6642 expression.set("exists", exists_column) 6643 6644 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6645 if self._match_texts(("FIRST", "AFTER")): 6646 position = self._prev.text 6647 column_position = self.expression( 6648 exp.ColumnPosition, this=self._parse_column(), position=position 6649 ) 6650 expression.set("position", column_position) 6651 6652 return expression 6653 6654 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6655 drop = self._match(TokenType.DROP) and self._parse_drop() 6656 if drop and not isinstance(drop, exp.Command): 6657 drop.set("kind", drop.args.get("kind", "COLUMN")) 6658 return drop 6659 6660 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6661 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6662 return self.expression( 6663 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6664 ) 6665 6666 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6667 index = self._index - 1 6668 6669 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6670 return self._parse_csv( 6671 lambda: self.expression( 6672 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6673 ) 6674 ) 6675 6676 self._retreat(index) 6677 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6678 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6679 6680 if self._match_text_seq("ADD", "COLUMNS"): 6681 schema = self._parse_schema() 6682 if schema: 6683 return [schema] 6684 return [] 6685 6686 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6687 6688 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6689 if self._match_texts(self.ALTER_ALTER_PARSERS): 6690 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6691 6692 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6693 # keyword after ALTER we default to parsing this statement 6694 self._match(TokenType.COLUMN) 6695 column = self._parse_field(any_token=True) 6696 6697 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6698 return self.expression(exp.AlterColumn, this=column, drop=True) 6699 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6700 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6701 if self._match(TokenType.COMMENT): 6702 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6703 if self._match_text_seq("DROP", "NOT", "NULL"): 6704 return self.expression( 6705 exp.AlterColumn, 6706 this=column, 6707 drop=True, 6708 allow_null=True, 6709 ) 6710 if self._match_text_seq("SET", "NOT", "NULL"): 6711 return self.expression( 6712 exp.AlterColumn, 6713 this=column, 6714 allow_null=False, 6715 ) 6716 self._match_text_seq("SET", "DATA") 6717 self._match_text_seq("TYPE") 6718 return self.expression( 6719 exp.AlterColumn, 6720 this=column, 6721 dtype=self._parse_types(), 6722 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6723 using=self._match(TokenType.USING) and self._parse_assignment(), 6724 ) 6725 6726 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6727 if self._match_texts(("ALL", "EVEN", "AUTO")): 6728 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6729 6730 self._match_text_seq("KEY", "DISTKEY") 6731 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6732 6733 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6734 if compound: 6735 self._match_text_seq("SORTKEY") 6736 6737 if self._match(TokenType.L_PAREN, advance=False): 6738 return self.expression( 6739 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6740 ) 6741 6742 self._match_texts(("AUTO", "NONE")) 6743 return self.expression( 6744 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6745 ) 6746 6747 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6748 index = self._index - 1 6749 6750 partition_exists = self._parse_exists() 6751 if self._match(TokenType.PARTITION, advance=False): 6752 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6753 6754 self._retreat(index) 6755 return self._parse_csv(self._parse_drop_column) 6756 6757 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6758 if self._match(TokenType.COLUMN): 6759 exists = self._parse_exists() 6760 old_column = self._parse_column() 6761 to = self._match_text_seq("TO") 6762 new_column = self._parse_column() 6763 6764 if old_column is None or to is None or new_column is None: 6765 return None 6766 6767 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6768 6769 self._match_text_seq("TO") 6770 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6771 6772 def _parse_alter_table_set(self) -> exp.AlterSet: 6773 alter_set = self.expression(exp.AlterSet) 6774 6775 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6776 "TABLE", "PROPERTIES" 6777 ): 6778 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6779 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6780 alter_set.set("expressions", [self._parse_assignment()]) 6781 elif self._match_texts(("LOGGED", "UNLOGGED")): 6782 alter_set.set("option", exp.var(self._prev.text.upper())) 6783 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6784 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6785 elif self._match_text_seq("LOCATION"): 6786 alter_set.set("location", self._parse_field()) 6787 elif self._match_text_seq("ACCESS", "METHOD"): 6788 alter_set.set("access_method", self._parse_field()) 6789 elif self._match_text_seq("TABLESPACE"): 6790 alter_set.set("tablespace", self._parse_field()) 6791 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6792 alter_set.set("file_format", [self._parse_field()]) 6793 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6794 alter_set.set("file_format", self._parse_wrapped_options()) 6795 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6796 alter_set.set("copy_options", self._parse_wrapped_options()) 6797 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6798 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6799 else: 6800 if self._match_text_seq("SERDE"): 6801 alter_set.set("serde", self._parse_field()) 6802 6803 alter_set.set("expressions", [self._parse_properties()]) 6804 6805 return alter_set 6806 6807 def _parse_alter(self) -> exp.Alter | exp.Command: 6808 start = self._prev 6809 6810 alter_token = self._match_set(self.ALTERABLES) and self._prev 6811 if not alter_token: 6812 return self._parse_as_command(start) 6813 6814 exists = self._parse_exists() 6815 only = self._match_text_seq("ONLY") 6816 this = self._parse_table(schema=True) 6817 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6818 6819 if self._next: 6820 self._advance() 6821 6822 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6823 if parser: 6824 actions = ensure_list(parser(self)) 6825 not_valid = self._match_text_seq("NOT", "VALID") 6826 options = self._parse_csv(self._parse_property) 6827 6828 if not self._curr and actions: 6829 return self.expression( 6830 exp.Alter, 6831 this=this, 6832 kind=alter_token.text.upper(), 6833 exists=exists, 6834 actions=actions, 6835 only=only, 6836 options=options, 6837 cluster=cluster, 6838 not_valid=not_valid, 6839 ) 6840 6841 return self._parse_as_command(start) 6842 6843 def _parse_merge(self) -> exp.Merge: 6844 self._match(TokenType.INTO) 6845 target = self._parse_table() 6846 6847 if target and self._match(TokenType.ALIAS, advance=False): 6848 target.set("alias", self._parse_table_alias()) 6849 6850 self._match(TokenType.USING) 6851 using = self._parse_table() 6852 6853 self._match(TokenType.ON) 6854 on = self._parse_assignment() 6855 6856 return self.expression( 6857 exp.Merge, 6858 this=target, 6859 using=using, 6860 on=on, 6861 expressions=self._parse_when_matched(), 6862 returning=self._parse_returning(), 6863 ) 6864 6865 def _parse_when_matched(self) -> t.List[exp.When]: 6866 whens = [] 6867 6868 while self._match(TokenType.WHEN): 6869 matched = not self._match(TokenType.NOT) 6870 self._match_text_seq("MATCHED") 6871 source = ( 6872 False 6873 if self._match_text_seq("BY", "TARGET") 6874 else self._match_text_seq("BY", "SOURCE") 6875 ) 6876 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6877 6878 self._match(TokenType.THEN) 6879 6880 if self._match(TokenType.INSERT): 6881 this = self._parse_star() 6882 if this: 6883 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6884 else: 6885 then = self.expression( 6886 exp.Insert, 6887 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6888 expression=self._match_text_seq("VALUES") and self._parse_value(), 6889 ) 6890 elif self._match(TokenType.UPDATE): 6891 expressions = self._parse_star() 6892 if expressions: 6893 then = self.expression(exp.Update, expressions=expressions) 6894 else: 6895 then = self.expression( 6896 exp.Update, 6897 expressions=self._match(TokenType.SET) 6898 and self._parse_csv(self._parse_equality), 6899 ) 6900 elif self._match(TokenType.DELETE): 6901 then = self.expression(exp.Var, this=self._prev.text) 6902 else: 6903 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6904 6905 whens.append( 6906 self.expression( 6907 exp.When, 6908 matched=matched, 6909 source=source, 6910 condition=condition, 6911 then=then, 6912 ) 6913 ) 6914 return whens 6915 6916 def _parse_show(self) -> t.Optional[exp.Expression]: 6917 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6918 if parser: 6919 return parser(self) 6920 return self._parse_as_command(self._prev) 6921 6922 def _parse_set_item_assignment( 6923 self, kind: t.Optional[str] = None 6924 ) -> t.Optional[exp.Expression]: 6925 index = self._index 6926 6927 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6928 return self._parse_set_transaction(global_=kind == "GLOBAL") 6929 6930 left = self._parse_primary() or self._parse_column() 6931 assignment_delimiter = self._match_texts(("=", "TO")) 6932 6933 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6934 self._retreat(index) 6935 return None 6936 6937 right = self._parse_statement() or self._parse_id_var() 6938 if isinstance(right, (exp.Column, exp.Identifier)): 6939 right = exp.var(right.name) 6940 6941 this = self.expression(exp.EQ, this=left, expression=right) 6942 return self.expression(exp.SetItem, this=this, kind=kind) 6943 6944 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6945 self._match_text_seq("TRANSACTION") 6946 characteristics = self._parse_csv( 6947 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6948 ) 6949 return self.expression( 6950 exp.SetItem, 6951 expressions=characteristics, 6952 kind="TRANSACTION", 6953 **{"global": global_}, # type: ignore 6954 ) 6955 6956 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6957 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6958 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6959 6960 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6961 index = self._index 6962 set_ = self.expression( 6963 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6964 ) 6965 6966 if self._curr: 6967 self._retreat(index) 6968 return self._parse_as_command(self._prev) 6969 6970 return set_ 6971 6972 def _parse_var_from_options( 6973 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6974 ) -> t.Optional[exp.Var]: 6975 start = self._curr 6976 if not start: 6977 return None 6978 6979 option = start.text.upper() 6980 continuations = options.get(option) 6981 6982 index = self._index 6983 self._advance() 6984 for keywords in continuations or []: 6985 if isinstance(keywords, str): 6986 keywords = (keywords,) 6987 6988 if self._match_text_seq(*keywords): 6989 option = f"{option} {' '.join(keywords)}" 6990 break 6991 else: 6992 if continuations or continuations is None: 6993 if raise_unmatched: 6994 self.raise_error(f"Unknown option {option}") 6995 6996 self._retreat(index) 6997 return None 6998 6999 return exp.var(option) 7000 7001 def _parse_as_command(self, start: Token) -> exp.Command: 7002 while self._curr: 7003 self._advance() 7004 text = self._find_sql(start, self._prev) 7005 size = len(start.text) 7006 self._warn_unsupported() 7007 return exp.Command(this=text[:size], expression=text[size:]) 7008 7009 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7010 settings = [] 7011 7012 self._match_l_paren() 7013 kind = self._parse_id_var() 7014 7015 if self._match(TokenType.L_PAREN): 7016 while True: 7017 key = self._parse_id_var() 7018 value = self._parse_primary() 7019 7020 if not key and value is None: 7021 break 7022 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7023 self._match(TokenType.R_PAREN) 7024 7025 self._match_r_paren() 7026 7027 return self.expression( 7028 exp.DictProperty, 7029 this=this, 7030 kind=kind.this if kind else None, 7031 settings=settings, 7032 ) 7033 7034 def _parse_dict_range(self, this: str) -> exp.DictRange: 7035 self._match_l_paren() 7036 has_min = self._match_text_seq("MIN") 7037 if has_min: 7038 min = self._parse_var() or self._parse_primary() 7039 self._match_text_seq("MAX") 7040 max = self._parse_var() or self._parse_primary() 7041 else: 7042 max = self._parse_var() or self._parse_primary() 7043 min = exp.Literal.number(0) 7044 self._match_r_paren() 7045 return self.expression(exp.DictRange, this=this, min=min, max=max) 7046 7047 def _parse_comprehension( 7048 self, this: t.Optional[exp.Expression] 7049 ) -> t.Optional[exp.Comprehension]: 7050 index = self._index 7051 expression = self._parse_column() 7052 if not self._match(TokenType.IN): 7053 self._retreat(index - 1) 7054 return None 7055 iterator = self._parse_column() 7056 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7057 return self.expression( 7058 exp.Comprehension, 7059 this=this, 7060 expression=expression, 7061 iterator=iterator, 7062 condition=condition, 7063 ) 7064 7065 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7066 if self._match(TokenType.HEREDOC_STRING): 7067 return self.expression(exp.Heredoc, this=self._prev.text) 7068 7069 if not self._match_text_seq("$"): 7070 return None 7071 7072 tags = ["$"] 7073 tag_text = None 7074 7075 if self._is_connected(): 7076 self._advance() 7077 tags.append(self._prev.text.upper()) 7078 else: 7079 self.raise_error("No closing $ found") 7080 7081 if tags[-1] != "$": 7082 if self._is_connected() and self._match_text_seq("$"): 7083 tag_text = tags[-1] 7084 tags.append("$") 7085 else: 7086 self.raise_error("No closing $ found") 7087 7088 heredoc_start = self._curr 7089 7090 while self._curr: 7091 if self._match_text_seq(*tags, advance=False): 7092 this = self._find_sql(heredoc_start, self._prev) 7093 self._advance(len(tags)) 7094 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7095 7096 self._advance() 7097 7098 self.raise_error(f"No closing {''.join(tags)} found") 7099 return None 7100 7101 def _find_parser( 7102 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7103 ) -> t.Optional[t.Callable]: 7104 if not self._curr: 7105 return None 7106 7107 index = self._index 7108 this = [] 7109 while True: 7110 # The current token might be multiple words 7111 curr = self._curr.text.upper() 7112 key = curr.split(" ") 7113 this.append(curr) 7114 7115 self._advance() 7116 result, trie = in_trie(trie, key) 7117 if result == TrieResult.FAILED: 7118 break 7119 7120 if result == TrieResult.EXISTS: 7121 subparser = parsers[" ".join(this)] 7122 return subparser 7123 7124 self._retreat(index) 7125 return None 7126 7127 def _match(self, token_type, advance=True, expression=None): 7128 if not self._curr: 7129 return None 7130 7131 if self._curr.token_type == token_type: 7132 if advance: 7133 self._advance() 7134 self._add_comments(expression) 7135 return True 7136 7137 return None 7138 7139 def _match_set(self, types, advance=True): 7140 if not self._curr: 7141 return None 7142 7143 if self._curr.token_type in types: 7144 if advance: 7145 self._advance() 7146 return True 7147 7148 return None 7149 7150 def _match_pair(self, token_type_a, token_type_b, advance=True): 7151 if not self._curr or not self._next: 7152 return None 7153 7154 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7155 if advance: 7156 self._advance(2) 7157 return True 7158 7159 return None 7160 7161 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7162 if not self._match(TokenType.L_PAREN, expression=expression): 7163 self.raise_error("Expecting (") 7164 7165 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7166 if not self._match(TokenType.R_PAREN, expression=expression): 7167 self.raise_error("Expecting )") 7168 7169 def _match_texts(self, texts, advance=True): 7170 if ( 7171 self._curr 7172 and self._curr.token_type != TokenType.STRING 7173 and self._curr.text.upper() in texts 7174 ): 7175 if advance: 7176 self._advance() 7177 return True 7178 return None 7179 7180 def _match_text_seq(self, *texts, advance=True): 7181 index = self._index 7182 for text in texts: 7183 if ( 7184 self._curr 7185 and self._curr.token_type != TokenType.STRING 7186 and self._curr.text.upper() == text 7187 ): 7188 self._advance() 7189 else: 7190 self._retreat(index) 7191 return None 7192 7193 if not advance: 7194 self._retreat(index) 7195 7196 return True 7197 7198 def _replace_lambda( 7199 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7200 ) -> t.Optional[exp.Expression]: 7201 if not node: 7202 return node 7203 7204 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7205 7206 for column in node.find_all(exp.Column): 7207 typ = lambda_types.get(column.parts[0].name) 7208 if typ is not None: 7209 dot_or_id = column.to_dot() if column.table else column.this 7210 7211 if typ: 7212 dot_or_id = self.expression( 7213 exp.Cast, 7214 this=dot_or_id, 7215 to=typ, 7216 ) 7217 7218 parent = column.parent 7219 7220 while isinstance(parent, exp.Dot): 7221 if not isinstance(parent.parent, exp.Dot): 7222 parent.replace(dot_or_id) 7223 break 7224 parent = parent.parent 7225 else: 7226 if column is node: 7227 node = dot_or_id 7228 else: 7229 column.replace(dot_or_id) 7230 return node 7231 7232 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7233 start = self._prev 7234 7235 # Not to be confused with TRUNCATE(number, decimals) function call 7236 if self._match(TokenType.L_PAREN): 7237 self._retreat(self._index - 2) 7238 return self._parse_function() 7239 7240 # Clickhouse supports TRUNCATE DATABASE as well 7241 is_database = self._match(TokenType.DATABASE) 7242 7243 self._match(TokenType.TABLE) 7244 7245 exists = self._parse_exists(not_=False) 7246 7247 expressions = self._parse_csv( 7248 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7249 ) 7250 7251 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7252 7253 if self._match_text_seq("RESTART", "IDENTITY"): 7254 identity = "RESTART" 7255 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7256 identity = "CONTINUE" 7257 else: 7258 identity = None 7259 7260 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7261 option = self._prev.text 7262 else: 7263 option = None 7264 7265 partition = self._parse_partition() 7266 7267 # Fallback case 7268 if self._curr: 7269 return self._parse_as_command(start) 7270 7271 return self.expression( 7272 exp.TruncateTable, 7273 expressions=expressions, 7274 is_database=is_database, 7275 exists=exists, 7276 cluster=cluster, 7277 identity=identity, 7278 option=option, 7279 partition=partition, 7280 ) 7281 7282 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7283 this = self._parse_ordered(self._parse_opclass) 7284 7285 if not self._match(TokenType.WITH): 7286 return this 7287 7288 op = self._parse_var(any_token=True) 7289 7290 return self.expression(exp.WithOperator, this=this, op=op) 7291 7292 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7293 self._match(TokenType.EQ) 7294 self._match(TokenType.L_PAREN) 7295 7296 opts: t.List[t.Optional[exp.Expression]] = [] 7297 while self._curr and not self._match(TokenType.R_PAREN): 7298 if self._match_text_seq("FORMAT_NAME", "="): 7299 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7300 # so we parse it separately to use _parse_field() 7301 prop = self.expression( 7302 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7303 ) 7304 opts.append(prop) 7305 else: 7306 opts.append(self._parse_property()) 7307 7308 self._match(TokenType.COMMA) 7309 7310 return opts 7311 7312 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7313 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7314 7315 options = [] 7316 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7317 option = self._parse_var(any_token=True) 7318 prev = self._prev.text.upper() 7319 7320 # Different dialects might separate options and values by white space, "=" and "AS" 7321 self._match(TokenType.EQ) 7322 self._match(TokenType.ALIAS) 7323 7324 param = self.expression(exp.CopyParameter, this=option) 7325 7326 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7327 TokenType.L_PAREN, advance=False 7328 ): 7329 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7330 param.set("expressions", self._parse_wrapped_options()) 7331 elif prev == "FILE_FORMAT": 7332 # T-SQL's external file format case 7333 param.set("expression", self._parse_field()) 7334 else: 7335 param.set("expression", self._parse_unquoted_field()) 7336 7337 options.append(param) 7338 self._match(sep) 7339 7340 return options 7341 7342 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7343 expr = self.expression(exp.Credentials) 7344 7345 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7346 expr.set("storage", self._parse_field()) 7347 if self._match_text_seq("CREDENTIALS"): 7348 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7349 creds = ( 7350 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7351 ) 7352 expr.set("credentials", creds) 7353 if self._match_text_seq("ENCRYPTION"): 7354 expr.set("encryption", self._parse_wrapped_options()) 7355 if self._match_text_seq("IAM_ROLE"): 7356 expr.set("iam_role", self._parse_field()) 7357 if self._match_text_seq("REGION"): 7358 expr.set("region", self._parse_field()) 7359 7360 return expr 7361 7362 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7363 return self._parse_field() 7364 7365 def _parse_copy(self) -> exp.Copy | exp.Command: 7366 start = self._prev 7367 7368 self._match(TokenType.INTO) 7369 7370 this = ( 7371 self._parse_select(nested=True, parse_subquery_alias=False) 7372 if self._match(TokenType.L_PAREN, advance=False) 7373 else self._parse_table(schema=True) 7374 ) 7375 7376 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7377 7378 files = self._parse_csv(self._parse_file_location) 7379 credentials = self._parse_credentials() 7380 7381 self._match_text_seq("WITH") 7382 7383 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7384 7385 # Fallback case 7386 if self._curr: 7387 return self._parse_as_command(start) 7388 7389 return self.expression( 7390 exp.Copy, 7391 this=this, 7392 kind=kind, 7393 credentials=credentials, 7394 files=files, 7395 params=params, 7396 ) 7397 7398 def _parse_normalize(self) -> exp.Normalize: 7399 return self.expression( 7400 exp.Normalize, 7401 this=self._parse_bitwise(), 7402 form=self._match(TokenType.COMMA) and self._parse_var(), 7403 ) 7404 7405 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7406 if self._match_text_seq("COLUMNS", "(", advance=False): 7407 this = self._parse_function() 7408 if isinstance(this, exp.Columns): 7409 this.set("unpack", True) 7410 return this 7411 7412 return self.expression( 7413 exp.Star, 7414 **{ # type: ignore 7415 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7416 "replace": self._parse_star_op("REPLACE"), 7417 "rename": self._parse_star_op("RENAME"), 7418 }, 7419 ) 7420 7421 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7422 privilege_parts = [] 7423 7424 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7425 # (end of privilege list) or L_PAREN (start of column list) are met 7426 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7427 privilege_parts.append(self._curr.text.upper()) 7428 self._advance() 7429 7430 this = exp.var(" ".join(privilege_parts)) 7431 expressions = ( 7432 self._parse_wrapped_csv(self._parse_column) 7433 if self._match(TokenType.L_PAREN, advance=False) 7434 else None 7435 ) 7436 7437 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7438 7439 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7440 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7441 principal = self._parse_id_var() 7442 7443 if not principal: 7444 return None 7445 7446 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7447 7448 def _parse_grant(self) -> exp.Grant | exp.Command: 7449 start = self._prev 7450 7451 privileges = self._parse_csv(self._parse_grant_privilege) 7452 7453 self._match(TokenType.ON) 7454 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7455 7456 # Attempt to parse the securable e.g. MySQL allows names 7457 # such as "foo.*", "*.*" which are not easily parseable yet 7458 securable = self._try_parse(self._parse_table_parts) 7459 7460 if not securable or not self._match_text_seq("TO"): 7461 return self._parse_as_command(start) 7462 7463 principals = self._parse_csv(self._parse_grant_principal) 7464 7465 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7466 7467 if self._curr: 7468 return self._parse_as_command(start) 7469 7470 return self.expression( 7471 exp.Grant, 7472 privileges=privileges, 7473 kind=kind, 7474 securable=securable, 7475 principals=principals, 7476 grant_option=grant_option, 7477 ) 7478 7479 def _parse_overlay(self) -> exp.Overlay: 7480 return self.expression( 7481 exp.Overlay, 7482 **{ # type: ignore 7483 "this": self._parse_bitwise(), 7484 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7485 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7486 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7487 }, 7488 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1357 def __init__( 1358 self, 1359 error_level: t.Optional[ErrorLevel] = None, 1360 error_message_context: int = 100, 1361 max_errors: int = 3, 1362 dialect: DialectType = None, 1363 ): 1364 from sqlglot.dialects import Dialect 1365 1366 self.error_level = error_level or ErrorLevel.IMMEDIATE 1367 self.error_message_context = error_message_context 1368 self.max_errors = max_errors 1369 self.dialect = Dialect.get_or_raise(dialect) 1370 self.reset()
1382 def parse( 1383 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1384 ) -> t.List[t.Optional[exp.Expression]]: 1385 """ 1386 Parses a list of tokens and returns a list of syntax trees, one tree 1387 per parsed SQL statement. 1388 1389 Args: 1390 raw_tokens: The list of tokens. 1391 sql: The original SQL string, used to produce helpful debug messages. 1392 1393 Returns: 1394 The list of the produced syntax trees. 1395 """ 1396 return self._parse( 1397 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1398 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1400 def parse_into( 1401 self, 1402 expression_types: exp.IntoType, 1403 raw_tokens: t.List[Token], 1404 sql: t.Optional[str] = None, 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens into a given Expression type. If a collection of Expression 1408 types is given instead, this method will try to parse the token list into each one 1409 of them, stopping at the first for which the parsing succeeds. 1410 1411 Args: 1412 expression_types: The expression type(s) to try and parse the token list into. 1413 raw_tokens: The list of tokens. 1414 sql: The original SQL string, used to produce helpful debug messages. 1415 1416 Returns: 1417 The target Expression. 1418 """ 1419 errors = [] 1420 for expression_type in ensure_list(expression_types): 1421 parser = self.EXPRESSION_PARSERS.get(expression_type) 1422 if not parser: 1423 raise TypeError(f"No parser registered for {expression_type}") 1424 1425 try: 1426 return self._parse(parser, raw_tokens, sql) 1427 except ParseError as e: 1428 e.errors[0]["into_expression"] = expression_type 1429 errors.append(e) 1430 1431 raise ParseError( 1432 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1433 errors=merge_errors(errors), 1434 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1474 def check_errors(self) -> None: 1475 """Logs or raises any found errors, depending on the chosen error level setting.""" 1476 if self.error_level == ErrorLevel.WARN: 1477 for error in self.errors: 1478 logger.error(str(error)) 1479 elif self.error_level == ErrorLevel.RAISE and self.errors: 1480 raise ParseError( 1481 concat_messages(self.errors, self.max_errors), 1482 errors=merge_errors(self.errors), 1483 )
Logs or raises any found errors, depending on the chosen error level setting.
1485 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1486 """ 1487 Appends an error in the list of recorded errors or raises it, depending on the chosen 1488 error level setting. 1489 """ 1490 token = token or self._curr or self._prev or Token.string("") 1491 start = token.start 1492 end = token.end + 1 1493 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1494 highlight = self.sql[start:end] 1495 end_context = self.sql[end : end + self.error_message_context] 1496 1497 error = ParseError.new( 1498 f"{message}. Line {token.line}, Col: {token.col}.\n" 1499 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1500 description=message, 1501 line=token.line, 1502 col=token.col, 1503 start_context=start_context, 1504 highlight=highlight, 1505 end_context=end_context, 1506 ) 1507 1508 if self.error_level == ErrorLevel.IMMEDIATE: 1509 raise error 1510 1511 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1513 def expression( 1514 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1515 ) -> E: 1516 """ 1517 Creates a new, validated Expression. 1518 1519 Args: 1520 exp_class: The expression class to instantiate. 1521 comments: An optional list of comments to attach to the expression. 1522 kwargs: The arguments to set for the expression along with their respective values. 1523 1524 Returns: 1525 The target expression. 1526 """ 1527 instance = exp_class(**kwargs) 1528 instance.add_comments(comments) if comments else self._add_comments(instance) 1529 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1536 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1537 """ 1538 Validates an Expression, making sure that all its mandatory arguments are set. 1539 1540 Args: 1541 expression: The expression to validate. 1542 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1543 1544 Returns: 1545 The validated expression. 1546 """ 1547 if self.error_level != ErrorLevel.IGNORE: 1548 for error_message in expression.error_messages(args): 1549 self.raise_error(error_message) 1550 1551 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.