sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38) 39from sqlglot.helper import seq_get 40from sqlglot.tokens import TokenType 41 42DATETIME_DELTA = t.Union[ 43 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 44] 45 46WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 47 exp.FirstValue, 48 exp.LastValue, 49 exp.Lag, 50 exp.Lead, 51 exp.NthValue, 52) 53 54 55def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 56 this = expression.this 57 unit = unit_to_var(expression) 58 op = ( 59 "+" 60 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 61 else "-" 62 ) 63 64 to_type: t.Optional[DATA_TYPE] = None 65 if isinstance(expression, exp.TsOrDsAdd): 66 to_type = expression.return_type 67 elif this.is_string: 68 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 69 to_type = ( 70 exp.DataType.Type.DATETIME 71 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 72 else exp.DataType.Type.DATE 73 ) 74 75 this = exp.cast(this, to_type) if to_type else this 76 77 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105@generator.unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 106def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 153 args: t.List[str] = [] 154 155 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 156 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 157 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 158 ancestor_cast = expression.find_ancestor(exp.Cast) 159 is_struct_cast = ancestor_cast and any( 160 casted_type.is_type(exp.DataType.Type.STRUCT) 161 for casted_type in ancestor_cast.find_all(exp.DataType) 162 ) 163 164 for i, expr in enumerate(expression.expressions): 165 is_property_eq = isinstance(expr, exp.PropertyEQ) 166 value = expr.expression if is_property_eq else expr 167 168 if is_struct_cast: 169 args.append(self.sql(value)) 170 else: 171 key = expr.name if is_property_eq else f"_{i}" 172 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 173 174 csv_args = ", ".join(args) 175 176 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 177 178 179def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 180 if expression.is_type("array"): 181 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 182 183 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 184 if expression.is_type( 185 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 186 ): 187 return expression.this.value 188 189 return self.datatype_sql(expression) 190 191 192def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 193 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 194 return f"CAST({sql} AS TEXT)" 195 196 197def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TO_TIMESTAMP", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("EPOCH_MS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("MAKE_TIMESTAMP", timestamp) 207 208 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 209 210 211WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 212 213 214def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 215 arrow_sql = arrow_json_extract_sql(self, expression) 216 if not expression.same_parent and isinstance( 217 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 218 ): 219 arrow_sql = self.wrap(arrow_sql) 220 return arrow_sql 221 222 223def _implicit_datetime_cast( 224 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 225) -> t.Optional[exp.Expression]: 226 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 227 228 229def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 230 this = _implicit_datetime_cast(expression.this) 231 expr = _implicit_datetime_cast(expression.expression) 232 233 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 234 235 236def _generate_datetime_array_sql( 237 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 238) -> str: 239 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 240 241 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 242 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 243 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 244 245 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 246 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 247 start=start, end=end, step=expression.args.get("step") 248 ) 249 250 if is_generate_date_array: 251 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 252 # GENERATE_DATE_ARRAY we must cast it back to DATE array 253 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 254 255 return self.sql(gen_series) 256 257 258class DuckDB(Dialect): 259 NULL_ORDERING = "nulls_are_last" 260 SUPPORTS_USER_DEFINED_TYPES = False 261 SAFE_DIVISION = True 262 INDEX_OFFSET = 1 263 CONCAT_COALESCE = True 264 SUPPORTS_ORDER_BY_ALL = True 265 SUPPORTS_FIXED_SIZE_ARRAYS = True 266 STRICT_JSON_PATH_SYNTAX = False 267 268 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 272 if isinstance(path, exp.Literal): 273 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 274 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 275 # This check ensures we'll avoid trying to parse these as JSON paths, which can 276 # either result in a noisy warning or in an invalid representation of the path. 277 path_text = path.name 278 if path_text.startswith("/") or "[#" in path_text: 279 return path 280 281 return super().to_json_path(path) 282 283 class Tokenizer(tokens.Tokenizer): 284 HEREDOC_STRINGS = ["$"] 285 286 HEREDOC_TAG_IS_IDENTIFIER = True 287 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "//": TokenType.DIV, 292 "ATTACH": TokenType.COMMAND, 293 "BINARY": TokenType.VARBINARY, 294 "BITSTRING": TokenType.BIT, 295 "BPCHAR": TokenType.TEXT, 296 "CHAR": TokenType.TEXT, 297 "CHARACTER VARYING": TokenType.TEXT, 298 "EXCLUDE": TokenType.EXCEPT, 299 "LOGICAL": TokenType.BOOLEAN, 300 "ONLY": TokenType.ONLY, 301 "PIVOT_WIDER": TokenType.PIVOT, 302 "POSITIONAL": TokenType.POSITIONAL, 303 "SIGNED": TokenType.INT, 304 "STRING": TokenType.TEXT, 305 "SUMMARIZE": TokenType.SUMMARIZE, 306 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 307 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 308 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 309 "TIMESTAMP_US": TokenType.TIMESTAMP, 310 "UBIGINT": TokenType.UBIGINT, 311 "UINTEGER": TokenType.UINT, 312 "USMALLINT": TokenType.USMALLINT, 313 "UTINYINT": TokenType.UTINYINT, 314 "VARCHAR": TokenType.TEXT, 315 } 316 KEYWORDS.pop("/*+") 317 318 SINGLE_TOKENS = { 319 **tokens.Tokenizer.SINGLE_TOKENS, 320 "$": TokenType.PARAMETER, 321 } 322 323 class Parser(parser.Parser): 324 BITWISE = { 325 **parser.Parser.BITWISE, 326 TokenType.TILDA: exp.RegexpLike, 327 } 328 329 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 330 331 FUNCTIONS = { 332 **parser.Parser.FUNCTIONS, 333 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 334 "ARRAY_SORT": exp.SortArray.from_arg_list, 335 "DATEDIFF": _build_date_diff, 336 "DATE_DIFF": _build_date_diff, 337 "DATE_TRUNC": date_trunc_to_time, 338 "DATETRUNC": date_trunc_to_time, 339 "DECODE": lambda args: exp.Decode( 340 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 341 ), 342 "ENCODE": lambda args: exp.Encode( 343 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 344 ), 345 "EPOCH": exp.TimeToUnix.from_arg_list, 346 "EPOCH_MS": lambda args: exp.UnixToTime( 347 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 348 ), 349 "JSON": exp.ParseJSON.from_arg_list, 350 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 351 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 352 "LIST_HAS": exp.ArrayContains.from_arg_list, 353 "LIST_REVERSE_SORT": _build_sort_array_desc, 354 "LIST_SORT": exp.SortArray.from_arg_list, 355 "LIST_VALUE": lambda args: exp.Array(expressions=args), 356 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 357 "MAKE_TIMESTAMP": _build_make_timestamp, 358 "MEDIAN": lambda args: exp.PercentileCont( 359 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 360 ), 361 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 362 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 363 "REGEXP_EXTRACT": build_regexp_extract, 364 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 365 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1), 368 replacement=seq_get(args, 2), 369 modifiers=seq_get(args, 3), 370 ), 371 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 372 "STRING_SPLIT": exp.Split.from_arg_list, 373 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 374 "STRING_TO_ARRAY": exp.Split.from_arg_list, 375 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 376 "STRUCT_PACK": exp.Struct.from_arg_list, 377 "STR_SPLIT": exp.Split.from_arg_list, 378 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 379 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 380 "UNNEST": exp.Explode.from_arg_list, 381 "XOR": binary_from_function(exp.BitwiseXor), 382 "GENERATE_SERIES": _build_generate_series(), 383 "RANGE": _build_generate_series(end_exclusive=True), 384 } 385 386 FUNCTIONS.pop("DATE_SUB") 387 FUNCTIONS.pop("GLOB") 388 389 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 390 FUNCTION_PARSERS.pop("DECODE") 391 392 NO_PAREN_FUNCTION_PARSERS = { 393 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 394 "MAP": lambda self: self._parse_map(), 395 } 396 397 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 398 TokenType.SEMI, 399 TokenType.ANTI, 400 } 401 402 PLACEHOLDER_PARSERS = { 403 **parser.Parser.PLACEHOLDER_PARSERS, 404 TokenType.PARAMETER: lambda self: ( 405 self.expression(exp.Placeholder, this=self._prev.text) 406 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 407 else None 408 ), 409 } 410 411 TYPE_CONVERTERS = { 412 # https://duckdb.org/docs/sql/data_types/numeric 413 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 414 # https://duckdb.org/docs/sql/data_types/text 415 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 416 } 417 418 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 419 # https://duckdb.org/docs/sql/samples.html 420 sample = super()._parse_table_sample(as_modifier=as_modifier) 421 if sample and not sample.args.get("method"): 422 if sample.args.get("size"): 423 sample.set("method", exp.var("RESERVOIR")) 424 else: 425 sample.set("method", exp.var("SYSTEM")) 426 427 return sample 428 429 def _parse_bracket( 430 self, this: t.Optional[exp.Expression] = None 431 ) -> t.Optional[exp.Expression]: 432 bracket = super()._parse_bracket(this) 433 if isinstance(bracket, exp.Bracket): 434 bracket.set("returns_list_for_maps", True) 435 436 return bracket 437 438 def _parse_map(self) -> exp.ToMap | exp.Map: 439 if self._match(TokenType.L_BRACE, advance=False): 440 return self.expression(exp.ToMap, this=self._parse_bracket()) 441 442 args = self._parse_wrapped_csv(self._parse_assignment) 443 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 444 445 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 446 return self._parse_field_def() 447 448 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 449 if len(aggregations) == 1: 450 return super()._pivot_column_names(aggregations) 451 return pivot_column_names(aggregations, dialect="duckdb") 452 453 class Generator(generator.Generator): 454 PARAMETER_TOKEN = "$" 455 NAMED_PLACEHOLDER_TOKEN = "$" 456 JOIN_HINTS = False 457 TABLE_HINTS = False 458 QUERY_HINTS = False 459 LIMIT_FETCH = "LIMIT" 460 STRUCT_DELIMITER = ("(", ")") 461 RENAME_TABLE_WITH_DB = False 462 NVL2_SUPPORTED = False 463 SEMI_ANTI_JOIN_WITH_SIDE = False 464 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 465 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 466 LAST_DAY_SUPPORTS_DATE_PART = False 467 JSON_KEY_VALUE_PAIR_SEP = "," 468 IGNORE_NULLS_IN_FUNC = True 469 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 470 SUPPORTS_CREATE_TABLE_LIKE = False 471 MULTI_ARG_DISTINCT = False 472 CAN_IMPLEMENT_ARRAY_ANY = True 473 SUPPORTS_TO_NUMBER = False 474 COPY_HAS_INTO_KEYWORD = False 475 STAR_EXCEPT = "EXCLUDE" 476 PAD_FILL_PATTERN_IS_REQUIRED = True 477 ARRAY_CONCAT_IS_VAR_LEN = False 478 479 TRANSFORMS = { 480 **generator.Generator.TRANSFORMS, 481 exp.ApproxDistinct: approx_count_distinct_sql, 482 exp.Array: inline_array_unless_query, 483 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 484 exp.ArrayFilter: rename_func("LIST_FILTER"), 485 exp.ArraySize: rename_func("ARRAY_LENGTH"), 486 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 487 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 488 exp.ArraySort: _array_sort_sql, 489 exp.ArraySum: rename_func("LIST_SUM"), 490 exp.BitwiseXor: rename_func("XOR"), 491 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 492 exp.CurrentDate: lambda *_: "CURRENT_DATE", 493 exp.CurrentTime: lambda *_: "CURRENT_TIME", 494 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 495 exp.DayOfMonth: rename_func("DAYOFMONTH"), 496 exp.DayOfWeek: rename_func("DAYOFWEEK"), 497 exp.DayOfWeekIso: rename_func("ISODOW"), 498 exp.DayOfYear: rename_func("DAYOFYEAR"), 499 exp.DataType: _datatype_sql, 500 exp.Date: _date_sql, 501 exp.DateAdd: _date_delta_sql, 502 exp.DateFromParts: rename_func("MAKE_DATE"), 503 exp.DateSub: _date_delta_sql, 504 exp.DateDiff: _date_diff_sql, 505 exp.DateStrToDate: datestrtodate_sql, 506 exp.Datetime: no_datetime_sql, 507 exp.DatetimeSub: _date_delta_sql, 508 exp.DatetimeAdd: _date_delta_sql, 509 exp.DateToDi: lambda self, 510 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 511 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 512 exp.DiToDate: lambda self, 513 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 514 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 515 exp.GenerateDateArray: _generate_datetime_array_sql, 516 exp.GenerateTimestampArray: _generate_datetime_array_sql, 517 exp.Explode: rename_func("UNNEST"), 518 exp.IntDiv: lambda self, e: self.binary(e, "//"), 519 exp.IsInf: rename_func("ISINF"), 520 exp.IsNan: rename_func("ISNAN"), 521 exp.JSONExtract: _arrow_json_extract_sql, 522 exp.JSONExtractScalar: _arrow_json_extract_sql, 523 exp.JSONFormat: _json_format_sql, 524 exp.LogicalOr: rename_func("BOOL_OR"), 525 exp.LogicalAnd: rename_func("BOOL_AND"), 526 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 527 exp.MonthsBetween: lambda self, e: self.func( 528 "DATEDIFF", 529 "'month'", 530 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 531 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 532 ), 533 exp.PercentileCont: rename_func("QUANTILE_CONT"), 534 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 535 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 536 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 537 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 538 exp.RegexpExtract: regexp_extract_sql, 539 exp.RegexpReplace: lambda self, e: self.func( 540 "REGEXP_REPLACE", 541 e.this, 542 e.expression, 543 e.args.get("replacement"), 544 e.args.get("modifiers"), 545 ), 546 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 547 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 548 exp.Return: lambda self, e: self.sql(e, "this"), 549 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 550 exp.Rand: rename_func("RANDOM"), 551 exp.SafeDivide: no_safe_divide_sql, 552 exp.SHA2: sha256_sql, 553 exp.Split: rename_func("STR_SPLIT"), 554 exp.SortArray: _sort_array_sql, 555 exp.StrPosition: str_position_sql, 556 exp.StrToUnix: lambda self, e: self.func( 557 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 558 ), 559 exp.Struct: _struct_sql, 560 exp.Transform: rename_func("LIST_TRANSFORM"), 561 exp.TimeAdd: _date_delta_sql, 562 exp.Time: no_time_sql, 563 exp.TimeDiff: _timediff_sql, 564 exp.Timestamp: no_timestamp_sql, 565 exp.TimestampDiff: lambda self, e: self.func( 566 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 567 ), 568 exp.TimestampTrunc: timestamptrunc_sql(), 569 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 570 exp.TimeStrToTime: timestrtotime_sql, 571 exp.TimeStrToUnix: lambda self, e: self.func( 572 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 573 ), 574 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 575 exp.TimeToUnix: rename_func("EPOCH"), 576 exp.TsOrDiToDi: lambda self, 577 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 578 exp.TsOrDsAdd: _date_delta_sql, 579 exp.TsOrDsDiff: lambda self, e: self.func( 580 "DATE_DIFF", 581 f"'{e.args.get('unit') or 'DAY'}'", 582 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 583 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 584 ), 585 exp.UnixToStr: lambda self, e: self.func( 586 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 587 ), 588 exp.DatetimeTrunc: lambda self, e: self.func( 589 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 590 ), 591 exp.UnixToTime: _unix_to_time_sql, 592 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 593 exp.VariancePop: rename_func("VAR_POP"), 594 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 595 exp.Xor: bool_xor_sql, 596 } 597 598 SUPPORTED_JSON_PATH_PARTS = { 599 exp.JSONPathKey, 600 exp.JSONPathRoot, 601 exp.JSONPathSubscript, 602 exp.JSONPathWildcard, 603 } 604 605 TYPE_MAPPING = { 606 **generator.Generator.TYPE_MAPPING, 607 exp.DataType.Type.BINARY: "BLOB", 608 exp.DataType.Type.BPCHAR: "TEXT", 609 exp.DataType.Type.CHAR: "TEXT", 610 exp.DataType.Type.FLOAT: "REAL", 611 exp.DataType.Type.NCHAR: "TEXT", 612 exp.DataType.Type.NVARCHAR: "TEXT", 613 exp.DataType.Type.UINT: "UINTEGER", 614 exp.DataType.Type.VARBINARY: "BLOB", 615 exp.DataType.Type.ROWVERSION: "BLOB", 616 exp.DataType.Type.VARCHAR: "TEXT", 617 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 618 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 619 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 620 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 621 } 622 623 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 624 RESERVED_KEYWORDS = { 625 "array", 626 "analyse", 627 "union", 628 "all", 629 "when", 630 "in_p", 631 "default", 632 "create_p", 633 "window", 634 "asymmetric", 635 "to", 636 "else", 637 "localtime", 638 "from", 639 "end_p", 640 "select", 641 "current_date", 642 "foreign", 643 "with", 644 "grant", 645 "session_user", 646 "or", 647 "except", 648 "references", 649 "fetch", 650 "limit", 651 "group_p", 652 "leading", 653 "into", 654 "collate", 655 "offset", 656 "do", 657 "then", 658 "localtimestamp", 659 "check_p", 660 "lateral_p", 661 "current_role", 662 "where", 663 "asc_p", 664 "placing", 665 "desc_p", 666 "user", 667 "unique", 668 "initially", 669 "column", 670 "both", 671 "some", 672 "as", 673 "any", 674 "only", 675 "deferrable", 676 "null_p", 677 "current_time", 678 "true_p", 679 "table", 680 "case", 681 "trailing", 682 "variadic", 683 "for", 684 "on", 685 "distinct", 686 "false_p", 687 "not", 688 "constraint", 689 "current_timestamp", 690 "returning", 691 "primary", 692 "intersect", 693 "having", 694 "analyze", 695 "current_user", 696 "and", 697 "cast", 698 "symmetric", 699 "using", 700 "order", 701 "current_catalog", 702 } 703 704 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 705 706 # DuckDB doesn't generally support CREATE TABLE .. properties 707 # https://duckdb.org/docs/sql/statements/create_table.html 708 PROPERTIES_LOCATION = { 709 prop: exp.Properties.Location.UNSUPPORTED 710 for prop in generator.Generator.PROPERTIES_LOCATION 711 } 712 713 # There are a few exceptions (e.g. temporary tables) which are supported or 714 # can be transpiled to DuckDB, so we explicitly override them accordingly 715 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 716 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 717 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 718 719 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 720 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 721 722 def strtotime_sql(self, expression: exp.StrToTime) -> str: 723 if expression.args.get("safe"): 724 formatted_time = self.format_time(expression) 725 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 726 return str_to_time_sql(self, expression) 727 728 def strtodate_sql(self, expression: exp.StrToDate) -> str: 729 if expression.args.get("safe"): 730 formatted_time = self.format_time(expression) 731 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 732 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 733 734 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 735 arg = expression.this 736 if expression.args.get("safe"): 737 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 738 return self.func("JSON", arg) 739 740 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 741 nano = expression.args.get("nano") 742 if nano is not None: 743 expression.set( 744 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 745 ) 746 747 return rename_func("MAKE_TIME")(self, expression) 748 749 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 750 sec = expression.args["sec"] 751 752 milli = expression.args.get("milli") 753 if milli is not None: 754 sec += milli.pop() / exp.Literal.number(1000.0) 755 756 nano = expression.args.get("nano") 757 if nano is not None: 758 sec += nano.pop() / exp.Literal.number(1000000000.0) 759 760 if milli or nano: 761 expression.set("sec", sec) 762 763 return rename_func("MAKE_TIMESTAMP")(self, expression) 764 765 def tablesample_sql( 766 self, 767 expression: exp.TableSample, 768 tablesample_keyword: t.Optional[str] = None, 769 ) -> str: 770 if not isinstance(expression.parent, exp.Select): 771 # This sample clause only applies to a single source, not the entire resulting relation 772 tablesample_keyword = "TABLESAMPLE" 773 774 if expression.args.get("size"): 775 method = expression.args.get("method") 776 if method and method.name.upper() != "RESERVOIR": 777 self.unsupported( 778 f"Sampling method {method} is not supported with a discrete sample count, " 779 "defaulting to reservoir sampling" 780 ) 781 expression.set("method", exp.var("RESERVOIR")) 782 783 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 784 785 def interval_sql(self, expression: exp.Interval) -> str: 786 multiplier: t.Optional[int] = None 787 unit = expression.text("unit").lower() 788 789 if unit.startswith("week"): 790 multiplier = 7 791 if unit.startswith("quarter"): 792 multiplier = 90 793 794 if multiplier: 795 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 796 797 return super().interval_sql(expression) 798 799 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 800 if isinstance(expression.parent, exp.UserDefinedFunction): 801 return self.sql(expression, "this") 802 return super().columndef_sql(expression, sep) 803 804 def join_sql(self, expression: exp.Join) -> str: 805 if ( 806 expression.side == "LEFT" 807 and not expression.args.get("on") 808 and isinstance(expression.this, exp.Unnest) 809 ): 810 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 811 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 812 return super().join_sql(expression.on(exp.true())) 813 814 return super().join_sql(expression) 815 816 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 817 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 818 if expression.args.get("is_end_exclusive"): 819 return rename_func("RANGE")(self, expression) 820 821 return self.function_fallback_sql(expression) 822 823 def bracket_sql(self, expression: exp.Bracket) -> str: 824 this = expression.this 825 if isinstance(this, exp.Array): 826 this.replace(exp.paren(this)) 827 828 bracket = super().bracket_sql(expression) 829 830 if not expression.args.get("returns_list_for_maps"): 831 if not this.type: 832 from sqlglot.optimizer.annotate_types import annotate_types 833 834 this = annotate_types(this) 835 836 if this.is_type(exp.DataType.Type.MAP): 837 bracket = f"({bracket})[1]" 838 839 return bracket 840 841 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 842 expression_sql = self.sql(expression, "expression") 843 844 func = expression.this 845 if isinstance(func, exp.PERCENTILES): 846 # Make the order key the first arg and slide the fraction to the right 847 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 848 order_col = expression.find(exp.Ordered) 849 if order_col: 850 func.set("expression", func.this) 851 func.set("this", order_col.this) 852 853 this = self.sql(expression, "this").rstrip(")") 854 855 return f"{this}{expression_sql})" 856 857 def length_sql(self, expression: exp.Length) -> str: 858 arg = expression.this 859 860 # Dialects like BQ and Snowflake also accept binary values as args, so 861 # DDB will attempt to infer the type or resort to case/when resolution 862 if not expression.args.get("binary") or arg.is_string: 863 return self.func("LENGTH", arg) 864 865 if not arg.type: 866 from sqlglot.optimizer.annotate_types import annotate_types 867 868 arg = annotate_types(arg) 869 870 if arg.is_type(*exp.DataType.TEXT_TYPES): 871 return self.func("LENGTH", arg) 872 873 # We need these casts to make duckdb's static type checker happy 874 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 875 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 876 877 case = ( 878 exp.case(self.func("TYPEOF", arg)) 879 .when( 880 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 881 ) # anonymous to break length_sql recursion 882 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 883 ) 884 885 return self.sql(case) 886 887 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 888 this = expression.this 889 key = expression.args.get("key") 890 key_sql = key.name if isinstance(key, exp.Expression) else "" 891 value_sql = self.sql(expression, "value") 892 893 kv_sql = f"{key_sql} := {value_sql}" 894 895 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 896 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 897 if isinstance(this, exp.Struct) and not this.expressions: 898 return self.func("STRUCT_PACK", kv_sql) 899 900 return self.func("STRUCT_INSERT", this, kv_sql) 901 902 def unnest_sql(self, expression: exp.Unnest) -> str: 903 explode_array = expression.args.get("explode_array") 904 if explode_array: 905 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 906 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 907 expression.expressions.append( 908 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 909 ) 910 911 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 912 alias = expression.args.get("alias") 913 if alias: 914 expression.set("alias", None) 915 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 916 917 unnest_sql = super().unnest_sql(expression) 918 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 919 return self.sql(select) 920 921 return super().unnest_sql(expression) 922 923 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 924 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 925 # DuckDB should render IGNORE NULLS only for the general-purpose 926 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 927 return super().ignorenulls_sql(expression) 928 929 return self.sql(expression, "this") 930 931 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 932 this = self.sql(expression, "this") 933 null_text = self.sql(expression, "null") 934 935 if null_text: 936 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 937 938 return self.func("ARRAY_TO_STRING", this, expression.expression)
259class DuckDB(Dialect): 260 NULL_ORDERING = "nulls_are_last" 261 SUPPORTS_USER_DEFINED_TYPES = False 262 SAFE_DIVISION = True 263 INDEX_OFFSET = 1 264 CONCAT_COALESCE = True 265 SUPPORTS_ORDER_BY_ALL = True 266 SUPPORTS_FIXED_SIZE_ARRAYS = True 267 STRICT_JSON_PATH_SYNTAX = False 268 269 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 270 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 271 272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path) 283 284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "ATTACH": TokenType.COMMAND, 294 "BINARY": TokenType.VARBINARY, 295 "BITSTRING": TokenType.BIT, 296 "BPCHAR": TokenType.TEXT, 297 "CHAR": TokenType.TEXT, 298 "CHARACTER VARYING": TokenType.TEXT, 299 "EXCLUDE": TokenType.EXCEPT, 300 "LOGICAL": TokenType.BOOLEAN, 301 "ONLY": TokenType.ONLY, 302 "PIVOT_WIDER": TokenType.PIVOT, 303 "POSITIONAL": TokenType.POSITIONAL, 304 "SIGNED": TokenType.INT, 305 "STRING": TokenType.TEXT, 306 "SUMMARIZE": TokenType.SUMMARIZE, 307 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 308 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 309 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 310 "TIMESTAMP_US": TokenType.TIMESTAMP, 311 "UBIGINT": TokenType.UBIGINT, 312 "UINTEGER": TokenType.UINT, 313 "USMALLINT": TokenType.USMALLINT, 314 "UTINYINT": TokenType.UTINYINT, 315 "VARCHAR": TokenType.TEXT, 316 } 317 KEYWORDS.pop("/*+") 318 319 SINGLE_TOKENS = { 320 **tokens.Tokenizer.SINGLE_TOKENS, 321 "$": TokenType.PARAMETER, 322 } 323 324 class Parser(parser.Parser): 325 BITWISE = { 326 **parser.Parser.BITWISE, 327 TokenType.TILDA: exp.RegexpLike, 328 } 329 330 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 335 "ARRAY_SORT": exp.SortArray.from_arg_list, 336 "DATEDIFF": _build_date_diff, 337 "DATE_DIFF": _build_date_diff, 338 "DATE_TRUNC": date_trunc_to_time, 339 "DATETRUNC": date_trunc_to_time, 340 "DECODE": lambda args: exp.Decode( 341 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 342 ), 343 "ENCODE": lambda args: exp.Encode( 344 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 345 ), 346 "EPOCH": exp.TimeToUnix.from_arg_list, 347 "EPOCH_MS": lambda args: exp.UnixToTime( 348 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 349 ), 350 "JSON": exp.ParseJSON.from_arg_list, 351 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 352 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 353 "LIST_HAS": exp.ArrayContains.from_arg_list, 354 "LIST_REVERSE_SORT": _build_sort_array_desc, 355 "LIST_SORT": exp.SortArray.from_arg_list, 356 "LIST_VALUE": lambda args: exp.Array(expressions=args), 357 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 358 "MAKE_TIMESTAMP": _build_make_timestamp, 359 "MEDIAN": lambda args: exp.PercentileCont( 360 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 361 ), 362 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 363 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 364 "REGEXP_EXTRACT": build_regexp_extract, 365 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 366 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1), 369 replacement=seq_get(args, 2), 370 modifiers=seq_get(args, 3), 371 ), 372 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 373 "STRING_SPLIT": exp.Split.from_arg_list, 374 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 375 "STRING_TO_ARRAY": exp.Split.from_arg_list, 376 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 377 "STRUCT_PACK": exp.Struct.from_arg_list, 378 "STR_SPLIT": exp.Split.from_arg_list, 379 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 380 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 381 "UNNEST": exp.Explode.from_arg_list, 382 "XOR": binary_from_function(exp.BitwiseXor), 383 "GENERATE_SERIES": _build_generate_series(), 384 "RANGE": _build_generate_series(end_exclusive=True), 385 } 386 387 FUNCTIONS.pop("DATE_SUB") 388 FUNCTIONS.pop("GLOB") 389 390 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 391 FUNCTION_PARSERS.pop("DECODE") 392 393 NO_PAREN_FUNCTION_PARSERS = { 394 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 395 "MAP": lambda self: self._parse_map(), 396 } 397 398 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 399 TokenType.SEMI, 400 TokenType.ANTI, 401 } 402 403 PLACEHOLDER_PARSERS = { 404 **parser.Parser.PLACEHOLDER_PARSERS, 405 TokenType.PARAMETER: lambda self: ( 406 self.expression(exp.Placeholder, this=self._prev.text) 407 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 408 else None 409 ), 410 } 411 412 TYPE_CONVERTERS = { 413 # https://duckdb.org/docs/sql/data_types/numeric 414 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 415 # https://duckdb.org/docs/sql/data_types/text 416 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 417 } 418 419 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 420 # https://duckdb.org/docs/sql/samples.html 421 sample = super()._parse_table_sample(as_modifier=as_modifier) 422 if sample and not sample.args.get("method"): 423 if sample.args.get("size"): 424 sample.set("method", exp.var("RESERVOIR")) 425 else: 426 sample.set("method", exp.var("SYSTEM")) 427 428 return sample 429 430 def _parse_bracket( 431 self, this: t.Optional[exp.Expression] = None 432 ) -> t.Optional[exp.Expression]: 433 bracket = super()._parse_bracket(this) 434 if isinstance(bracket, exp.Bracket): 435 bracket.set("returns_list_for_maps", True) 436 437 return bracket 438 439 def _parse_map(self) -> exp.ToMap | exp.Map: 440 if self._match(TokenType.L_BRACE, advance=False): 441 return self.expression(exp.ToMap, this=self._parse_bracket()) 442 443 args = self._parse_wrapped_csv(self._parse_assignment) 444 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 445 446 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 447 return self._parse_field_def() 448 449 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 450 if len(aggregations) == 1: 451 return super()._pivot_column_names(aggregations) 452 return pivot_column_names(aggregations, dialect="duckdb") 453 454 class Generator(generator.Generator): 455 PARAMETER_TOKEN = "$" 456 NAMED_PLACEHOLDER_TOKEN = "$" 457 JOIN_HINTS = False 458 TABLE_HINTS = False 459 QUERY_HINTS = False 460 LIMIT_FETCH = "LIMIT" 461 STRUCT_DELIMITER = ("(", ")") 462 RENAME_TABLE_WITH_DB = False 463 NVL2_SUPPORTED = False 464 SEMI_ANTI_JOIN_WITH_SIDE = False 465 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 466 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 467 LAST_DAY_SUPPORTS_DATE_PART = False 468 JSON_KEY_VALUE_PAIR_SEP = "," 469 IGNORE_NULLS_IN_FUNC = True 470 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 471 SUPPORTS_CREATE_TABLE_LIKE = False 472 MULTI_ARG_DISTINCT = False 473 CAN_IMPLEMENT_ARRAY_ANY = True 474 SUPPORTS_TO_NUMBER = False 475 COPY_HAS_INTO_KEYWORD = False 476 STAR_EXCEPT = "EXCLUDE" 477 PAD_FILL_PATTERN_IS_REQUIRED = True 478 ARRAY_CONCAT_IS_VAR_LEN = False 479 480 TRANSFORMS = { 481 **generator.Generator.TRANSFORMS, 482 exp.ApproxDistinct: approx_count_distinct_sql, 483 exp.Array: inline_array_unless_query, 484 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 485 exp.ArrayFilter: rename_func("LIST_FILTER"), 486 exp.ArraySize: rename_func("ARRAY_LENGTH"), 487 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 488 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 489 exp.ArraySort: _array_sort_sql, 490 exp.ArraySum: rename_func("LIST_SUM"), 491 exp.BitwiseXor: rename_func("XOR"), 492 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 493 exp.CurrentDate: lambda *_: "CURRENT_DATE", 494 exp.CurrentTime: lambda *_: "CURRENT_TIME", 495 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 496 exp.DayOfMonth: rename_func("DAYOFMONTH"), 497 exp.DayOfWeek: rename_func("DAYOFWEEK"), 498 exp.DayOfWeekIso: rename_func("ISODOW"), 499 exp.DayOfYear: rename_func("DAYOFYEAR"), 500 exp.DataType: _datatype_sql, 501 exp.Date: _date_sql, 502 exp.DateAdd: _date_delta_sql, 503 exp.DateFromParts: rename_func("MAKE_DATE"), 504 exp.DateSub: _date_delta_sql, 505 exp.DateDiff: _date_diff_sql, 506 exp.DateStrToDate: datestrtodate_sql, 507 exp.Datetime: no_datetime_sql, 508 exp.DatetimeSub: _date_delta_sql, 509 exp.DatetimeAdd: _date_delta_sql, 510 exp.DateToDi: lambda self, 511 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 512 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 513 exp.DiToDate: lambda self, 514 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 515 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 516 exp.GenerateDateArray: _generate_datetime_array_sql, 517 exp.GenerateTimestampArray: _generate_datetime_array_sql, 518 exp.Explode: rename_func("UNNEST"), 519 exp.IntDiv: lambda self, e: self.binary(e, "//"), 520 exp.IsInf: rename_func("ISINF"), 521 exp.IsNan: rename_func("ISNAN"), 522 exp.JSONExtract: _arrow_json_extract_sql, 523 exp.JSONExtractScalar: _arrow_json_extract_sql, 524 exp.JSONFormat: _json_format_sql, 525 exp.LogicalOr: rename_func("BOOL_OR"), 526 exp.LogicalAnd: rename_func("BOOL_AND"), 527 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 528 exp.MonthsBetween: lambda self, e: self.func( 529 "DATEDIFF", 530 "'month'", 531 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 532 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 533 ), 534 exp.PercentileCont: rename_func("QUANTILE_CONT"), 535 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 536 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 537 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 538 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 539 exp.RegexpExtract: regexp_extract_sql, 540 exp.RegexpReplace: lambda self, e: self.func( 541 "REGEXP_REPLACE", 542 e.this, 543 e.expression, 544 e.args.get("replacement"), 545 e.args.get("modifiers"), 546 ), 547 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 548 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 549 exp.Return: lambda self, e: self.sql(e, "this"), 550 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 551 exp.Rand: rename_func("RANDOM"), 552 exp.SafeDivide: no_safe_divide_sql, 553 exp.SHA2: sha256_sql, 554 exp.Split: rename_func("STR_SPLIT"), 555 exp.SortArray: _sort_array_sql, 556 exp.StrPosition: str_position_sql, 557 exp.StrToUnix: lambda self, e: self.func( 558 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 559 ), 560 exp.Struct: _struct_sql, 561 exp.Transform: rename_func("LIST_TRANSFORM"), 562 exp.TimeAdd: _date_delta_sql, 563 exp.Time: no_time_sql, 564 exp.TimeDiff: _timediff_sql, 565 exp.Timestamp: no_timestamp_sql, 566 exp.TimestampDiff: lambda self, e: self.func( 567 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 568 ), 569 exp.TimestampTrunc: timestamptrunc_sql(), 570 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 571 exp.TimeStrToTime: timestrtotime_sql, 572 exp.TimeStrToUnix: lambda self, e: self.func( 573 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 574 ), 575 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 576 exp.TimeToUnix: rename_func("EPOCH"), 577 exp.TsOrDiToDi: lambda self, 578 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 579 exp.TsOrDsAdd: _date_delta_sql, 580 exp.TsOrDsDiff: lambda self, e: self.func( 581 "DATE_DIFF", 582 f"'{e.args.get('unit') or 'DAY'}'", 583 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 584 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 585 ), 586 exp.UnixToStr: lambda self, e: self.func( 587 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 588 ), 589 exp.DatetimeTrunc: lambda self, e: self.func( 590 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 591 ), 592 exp.UnixToTime: _unix_to_time_sql, 593 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 594 exp.VariancePop: rename_func("VAR_POP"), 595 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 596 exp.Xor: bool_xor_sql, 597 } 598 599 SUPPORTED_JSON_PATH_PARTS = { 600 exp.JSONPathKey, 601 exp.JSONPathRoot, 602 exp.JSONPathSubscript, 603 exp.JSONPathWildcard, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.BINARY: "BLOB", 609 exp.DataType.Type.BPCHAR: "TEXT", 610 exp.DataType.Type.CHAR: "TEXT", 611 exp.DataType.Type.FLOAT: "REAL", 612 exp.DataType.Type.NCHAR: "TEXT", 613 exp.DataType.Type.NVARCHAR: "TEXT", 614 exp.DataType.Type.UINT: "UINTEGER", 615 exp.DataType.Type.VARBINARY: "BLOB", 616 exp.DataType.Type.ROWVERSION: "BLOB", 617 exp.DataType.Type.VARCHAR: "TEXT", 618 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 620 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 621 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 622 } 623 624 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 625 RESERVED_KEYWORDS = { 626 "array", 627 "analyse", 628 "union", 629 "all", 630 "when", 631 "in_p", 632 "default", 633 "create_p", 634 "window", 635 "asymmetric", 636 "to", 637 "else", 638 "localtime", 639 "from", 640 "end_p", 641 "select", 642 "current_date", 643 "foreign", 644 "with", 645 "grant", 646 "session_user", 647 "or", 648 "except", 649 "references", 650 "fetch", 651 "limit", 652 "group_p", 653 "leading", 654 "into", 655 "collate", 656 "offset", 657 "do", 658 "then", 659 "localtimestamp", 660 "check_p", 661 "lateral_p", 662 "current_role", 663 "where", 664 "asc_p", 665 "placing", 666 "desc_p", 667 "user", 668 "unique", 669 "initially", 670 "column", 671 "both", 672 "some", 673 "as", 674 "any", 675 "only", 676 "deferrable", 677 "null_p", 678 "current_time", 679 "true_p", 680 "table", 681 "case", 682 "trailing", 683 "variadic", 684 "for", 685 "on", 686 "distinct", 687 "false_p", 688 "not", 689 "constraint", 690 "current_timestamp", 691 "returning", 692 "primary", 693 "intersect", 694 "having", 695 "analyze", 696 "current_user", 697 "and", 698 "cast", 699 "symmetric", 700 "using", 701 "order", 702 "current_catalog", 703 } 704 705 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 706 707 # DuckDB doesn't generally support CREATE TABLE .. properties 708 # https://duckdb.org/docs/sql/statements/create_table.html 709 PROPERTIES_LOCATION = { 710 prop: exp.Properties.Location.UNSUPPORTED 711 for prop in generator.Generator.PROPERTIES_LOCATION 712 } 713 714 # There are a few exceptions (e.g. temporary tables) which are supported or 715 # can be transpiled to DuckDB, so we explicitly override them accordingly 716 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 717 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 718 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 719 720 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 721 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 722 723 def strtotime_sql(self, expression: exp.StrToTime) -> str: 724 if expression.args.get("safe"): 725 formatted_time = self.format_time(expression) 726 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 727 return str_to_time_sql(self, expression) 728 729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 734 735 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 736 arg = expression.this 737 if expression.args.get("safe"): 738 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 739 return self.func("JSON", arg) 740 741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression) 749 750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression) 765 766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 785 786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression) 799 800 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 801 if isinstance(expression.parent, exp.UserDefinedFunction): 802 return self.sql(expression, "this") 803 return super().columndef_sql(expression, sep) 804 805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression) 816 817 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 818 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 819 if expression.args.get("is_end_exclusive"): 820 return rename_func("RANGE")(self, expression) 821 822 return self.function_fallback_sql(expression) 823 824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket 841 842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})" 857 858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case) 887 888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql) 902 903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression) 923 924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this") 931 932 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 933 this = self.sql(expression, "this") 934 null_text = self.sql(expression, "null") 935 936 if null_text: 937 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 938 939 return self.func("ARRAY_TO_STRING", this, expression.expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "ATTACH": TokenType.COMMAND, 294 "BINARY": TokenType.VARBINARY, 295 "BITSTRING": TokenType.BIT, 296 "BPCHAR": TokenType.TEXT, 297 "CHAR": TokenType.TEXT, 298 "CHARACTER VARYING": TokenType.TEXT, 299 "EXCLUDE": TokenType.EXCEPT, 300 "LOGICAL": TokenType.BOOLEAN, 301 "ONLY": TokenType.ONLY, 302 "PIVOT_WIDER": TokenType.PIVOT, 303 "POSITIONAL": TokenType.POSITIONAL, 304 "SIGNED": TokenType.INT, 305 "STRING": TokenType.TEXT, 306 "SUMMARIZE": TokenType.SUMMARIZE, 307 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 308 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 309 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 310 "TIMESTAMP_US": TokenType.TIMESTAMP, 311 "UBIGINT": TokenType.UBIGINT, 312 "UINTEGER": TokenType.UINT, 313 "USMALLINT": TokenType.USMALLINT, 314 "UTINYINT": TokenType.UTINYINT, 315 "VARCHAR": TokenType.TEXT, 316 } 317 KEYWORDS.pop("/*+") 318 319 SINGLE_TOKENS = { 320 **tokens.Tokenizer.SINGLE_TOKENS, 321 "$": TokenType.PARAMETER, 322 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
324 class Parser(parser.Parser): 325 BITWISE = { 326 **parser.Parser.BITWISE, 327 TokenType.TILDA: exp.RegexpLike, 328 } 329 330 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 335 "ARRAY_SORT": exp.SortArray.from_arg_list, 336 "DATEDIFF": _build_date_diff, 337 "DATE_DIFF": _build_date_diff, 338 "DATE_TRUNC": date_trunc_to_time, 339 "DATETRUNC": date_trunc_to_time, 340 "DECODE": lambda args: exp.Decode( 341 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 342 ), 343 "ENCODE": lambda args: exp.Encode( 344 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 345 ), 346 "EPOCH": exp.TimeToUnix.from_arg_list, 347 "EPOCH_MS": lambda args: exp.UnixToTime( 348 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 349 ), 350 "JSON": exp.ParseJSON.from_arg_list, 351 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 352 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 353 "LIST_HAS": exp.ArrayContains.from_arg_list, 354 "LIST_REVERSE_SORT": _build_sort_array_desc, 355 "LIST_SORT": exp.SortArray.from_arg_list, 356 "LIST_VALUE": lambda args: exp.Array(expressions=args), 357 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 358 "MAKE_TIMESTAMP": _build_make_timestamp, 359 "MEDIAN": lambda args: exp.PercentileCont( 360 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 361 ), 362 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 363 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 364 "REGEXP_EXTRACT": build_regexp_extract, 365 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 366 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1), 369 replacement=seq_get(args, 2), 370 modifiers=seq_get(args, 3), 371 ), 372 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 373 "STRING_SPLIT": exp.Split.from_arg_list, 374 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 375 "STRING_TO_ARRAY": exp.Split.from_arg_list, 376 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 377 "STRUCT_PACK": exp.Struct.from_arg_list, 378 "STR_SPLIT": exp.Split.from_arg_list, 379 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 380 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 381 "UNNEST": exp.Explode.from_arg_list, 382 "XOR": binary_from_function(exp.BitwiseXor), 383 "GENERATE_SERIES": _build_generate_series(), 384 "RANGE": _build_generate_series(end_exclusive=True), 385 } 386 387 FUNCTIONS.pop("DATE_SUB") 388 FUNCTIONS.pop("GLOB") 389 390 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 391 FUNCTION_PARSERS.pop("DECODE") 392 393 NO_PAREN_FUNCTION_PARSERS = { 394 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 395 "MAP": lambda self: self._parse_map(), 396 } 397 398 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 399 TokenType.SEMI, 400 TokenType.ANTI, 401 } 402 403 PLACEHOLDER_PARSERS = { 404 **parser.Parser.PLACEHOLDER_PARSERS, 405 TokenType.PARAMETER: lambda self: ( 406 self.expression(exp.Placeholder, this=self._prev.text) 407 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 408 else None 409 ), 410 } 411 412 TYPE_CONVERTERS = { 413 # https://duckdb.org/docs/sql/data_types/numeric 414 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 415 # https://duckdb.org/docs/sql/data_types/text 416 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 417 } 418 419 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 420 # https://duckdb.org/docs/sql/samples.html 421 sample = super()._parse_table_sample(as_modifier=as_modifier) 422 if sample and not sample.args.get("method"): 423 if sample.args.get("size"): 424 sample.set("method", exp.var("RESERVOIR")) 425 else: 426 sample.set("method", exp.var("SYSTEM")) 427 428 return sample 429 430 def _parse_bracket( 431 self, this: t.Optional[exp.Expression] = None 432 ) -> t.Optional[exp.Expression]: 433 bracket = super()._parse_bracket(this) 434 if isinstance(bracket, exp.Bracket): 435 bracket.set("returns_list_for_maps", True) 436 437 return bracket 438 439 def _parse_map(self) -> exp.ToMap | exp.Map: 440 if self._match(TokenType.L_BRACE, advance=False): 441 return self.expression(exp.ToMap, this=self._parse_bracket()) 442 443 args = self._parse_wrapped_csv(self._parse_assignment) 444 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 445 446 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 447 return self._parse_field_def() 448 449 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 450 if len(aggregations) == 1: 451 return super()._pivot_column_names(aggregations) 452 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
454 class Generator(generator.Generator): 455 PARAMETER_TOKEN = "$" 456 NAMED_PLACEHOLDER_TOKEN = "$" 457 JOIN_HINTS = False 458 TABLE_HINTS = False 459 QUERY_HINTS = False 460 LIMIT_FETCH = "LIMIT" 461 STRUCT_DELIMITER = ("(", ")") 462 RENAME_TABLE_WITH_DB = False 463 NVL2_SUPPORTED = False 464 SEMI_ANTI_JOIN_WITH_SIDE = False 465 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 466 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 467 LAST_DAY_SUPPORTS_DATE_PART = False 468 JSON_KEY_VALUE_PAIR_SEP = "," 469 IGNORE_NULLS_IN_FUNC = True 470 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 471 SUPPORTS_CREATE_TABLE_LIKE = False 472 MULTI_ARG_DISTINCT = False 473 CAN_IMPLEMENT_ARRAY_ANY = True 474 SUPPORTS_TO_NUMBER = False 475 COPY_HAS_INTO_KEYWORD = False 476 STAR_EXCEPT = "EXCLUDE" 477 PAD_FILL_PATTERN_IS_REQUIRED = True 478 ARRAY_CONCAT_IS_VAR_LEN = False 479 480 TRANSFORMS = { 481 **generator.Generator.TRANSFORMS, 482 exp.ApproxDistinct: approx_count_distinct_sql, 483 exp.Array: inline_array_unless_query, 484 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 485 exp.ArrayFilter: rename_func("LIST_FILTER"), 486 exp.ArraySize: rename_func("ARRAY_LENGTH"), 487 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 488 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 489 exp.ArraySort: _array_sort_sql, 490 exp.ArraySum: rename_func("LIST_SUM"), 491 exp.BitwiseXor: rename_func("XOR"), 492 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 493 exp.CurrentDate: lambda *_: "CURRENT_DATE", 494 exp.CurrentTime: lambda *_: "CURRENT_TIME", 495 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 496 exp.DayOfMonth: rename_func("DAYOFMONTH"), 497 exp.DayOfWeek: rename_func("DAYOFWEEK"), 498 exp.DayOfWeekIso: rename_func("ISODOW"), 499 exp.DayOfYear: rename_func("DAYOFYEAR"), 500 exp.DataType: _datatype_sql, 501 exp.Date: _date_sql, 502 exp.DateAdd: _date_delta_sql, 503 exp.DateFromParts: rename_func("MAKE_DATE"), 504 exp.DateSub: _date_delta_sql, 505 exp.DateDiff: _date_diff_sql, 506 exp.DateStrToDate: datestrtodate_sql, 507 exp.Datetime: no_datetime_sql, 508 exp.DatetimeSub: _date_delta_sql, 509 exp.DatetimeAdd: _date_delta_sql, 510 exp.DateToDi: lambda self, 511 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 512 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 513 exp.DiToDate: lambda self, 514 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 515 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 516 exp.GenerateDateArray: _generate_datetime_array_sql, 517 exp.GenerateTimestampArray: _generate_datetime_array_sql, 518 exp.Explode: rename_func("UNNEST"), 519 exp.IntDiv: lambda self, e: self.binary(e, "//"), 520 exp.IsInf: rename_func("ISINF"), 521 exp.IsNan: rename_func("ISNAN"), 522 exp.JSONExtract: _arrow_json_extract_sql, 523 exp.JSONExtractScalar: _arrow_json_extract_sql, 524 exp.JSONFormat: _json_format_sql, 525 exp.LogicalOr: rename_func("BOOL_OR"), 526 exp.LogicalAnd: rename_func("BOOL_AND"), 527 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 528 exp.MonthsBetween: lambda self, e: self.func( 529 "DATEDIFF", 530 "'month'", 531 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 532 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 533 ), 534 exp.PercentileCont: rename_func("QUANTILE_CONT"), 535 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 536 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 537 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 538 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 539 exp.RegexpExtract: regexp_extract_sql, 540 exp.RegexpReplace: lambda self, e: self.func( 541 "REGEXP_REPLACE", 542 e.this, 543 e.expression, 544 e.args.get("replacement"), 545 e.args.get("modifiers"), 546 ), 547 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 548 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 549 exp.Return: lambda self, e: self.sql(e, "this"), 550 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 551 exp.Rand: rename_func("RANDOM"), 552 exp.SafeDivide: no_safe_divide_sql, 553 exp.SHA2: sha256_sql, 554 exp.Split: rename_func("STR_SPLIT"), 555 exp.SortArray: _sort_array_sql, 556 exp.StrPosition: str_position_sql, 557 exp.StrToUnix: lambda self, e: self.func( 558 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 559 ), 560 exp.Struct: _struct_sql, 561 exp.Transform: rename_func("LIST_TRANSFORM"), 562 exp.TimeAdd: _date_delta_sql, 563 exp.Time: no_time_sql, 564 exp.TimeDiff: _timediff_sql, 565 exp.Timestamp: no_timestamp_sql, 566 exp.TimestampDiff: lambda self, e: self.func( 567 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 568 ), 569 exp.TimestampTrunc: timestamptrunc_sql(), 570 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 571 exp.TimeStrToTime: timestrtotime_sql, 572 exp.TimeStrToUnix: lambda self, e: self.func( 573 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 574 ), 575 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 576 exp.TimeToUnix: rename_func("EPOCH"), 577 exp.TsOrDiToDi: lambda self, 578 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 579 exp.TsOrDsAdd: _date_delta_sql, 580 exp.TsOrDsDiff: lambda self, e: self.func( 581 "DATE_DIFF", 582 f"'{e.args.get('unit') or 'DAY'}'", 583 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 584 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 585 ), 586 exp.UnixToStr: lambda self, e: self.func( 587 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 588 ), 589 exp.DatetimeTrunc: lambda self, e: self.func( 590 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 591 ), 592 exp.UnixToTime: _unix_to_time_sql, 593 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 594 exp.VariancePop: rename_func("VAR_POP"), 595 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 596 exp.Xor: bool_xor_sql, 597 } 598 599 SUPPORTED_JSON_PATH_PARTS = { 600 exp.JSONPathKey, 601 exp.JSONPathRoot, 602 exp.JSONPathSubscript, 603 exp.JSONPathWildcard, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.BINARY: "BLOB", 609 exp.DataType.Type.BPCHAR: "TEXT", 610 exp.DataType.Type.CHAR: "TEXT", 611 exp.DataType.Type.FLOAT: "REAL", 612 exp.DataType.Type.NCHAR: "TEXT", 613 exp.DataType.Type.NVARCHAR: "TEXT", 614 exp.DataType.Type.UINT: "UINTEGER", 615 exp.DataType.Type.VARBINARY: "BLOB", 616 exp.DataType.Type.ROWVERSION: "BLOB", 617 exp.DataType.Type.VARCHAR: "TEXT", 618 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 619 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 620 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 621 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 622 } 623 624 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 625 RESERVED_KEYWORDS = { 626 "array", 627 "analyse", 628 "union", 629 "all", 630 "when", 631 "in_p", 632 "default", 633 "create_p", 634 "window", 635 "asymmetric", 636 "to", 637 "else", 638 "localtime", 639 "from", 640 "end_p", 641 "select", 642 "current_date", 643 "foreign", 644 "with", 645 "grant", 646 "session_user", 647 "or", 648 "except", 649 "references", 650 "fetch", 651 "limit", 652 "group_p", 653 "leading", 654 "into", 655 "collate", 656 "offset", 657 "do", 658 "then", 659 "localtimestamp", 660 "check_p", 661 "lateral_p", 662 "current_role", 663 "where", 664 "asc_p", 665 "placing", 666 "desc_p", 667 "user", 668 "unique", 669 "initially", 670 "column", 671 "both", 672 "some", 673 "as", 674 "any", 675 "only", 676 "deferrable", 677 "null_p", 678 "current_time", 679 "true_p", 680 "table", 681 "case", 682 "trailing", 683 "variadic", 684 "for", 685 "on", 686 "distinct", 687 "false_p", 688 "not", 689 "constraint", 690 "current_timestamp", 691 "returning", 692 "primary", 693 "intersect", 694 "having", 695 "analyze", 696 "current_user", 697 "and", 698 "cast", 699 "symmetric", 700 "using", 701 "order", 702 "current_catalog", 703 } 704 705 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 706 707 # DuckDB doesn't generally support CREATE TABLE .. properties 708 # https://duckdb.org/docs/sql/statements/create_table.html 709 PROPERTIES_LOCATION = { 710 prop: exp.Properties.Location.UNSUPPORTED 711 for prop in generator.Generator.PROPERTIES_LOCATION 712 } 713 714 # There are a few exceptions (e.g. temporary tables) which are supported or 715 # can be transpiled to DuckDB, so we explicitly override them accordingly 716 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 717 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 718 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 719 720 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 721 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 722 723 def strtotime_sql(self, expression: exp.StrToTime) -> str: 724 if expression.args.get("safe"): 725 formatted_time = self.format_time(expression) 726 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 727 return str_to_time_sql(self, expression) 728 729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 734 735 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 736 arg = expression.this 737 if expression.args.get("safe"): 738 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 739 return self.func("JSON", arg) 740 741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression) 749 750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression) 765 766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 785 786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression) 799 800 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 801 if isinstance(expression.parent, exp.UserDefinedFunction): 802 return self.sql(expression, "this") 803 return super().columndef_sql(expression, sep) 804 805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression) 816 817 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 818 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 819 if expression.args.get("is_end_exclusive"): 820 return rename_func("RANGE")(self, expression) 821 822 return self.function_fallback_sql(expression) 823 824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket 841 842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})" 857 858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case) 887 888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql) 902 903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression) 923 924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this") 931 932 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 933 this = self.sql(expression, "this") 934 null_text = self.sql(expression, "null") 935 936 if null_text: 937 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 938 939 return self.func("ARRAY_TO_STRING", this, expression.expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
729 def strtodate_sql(self, expression: exp.StrToDate) -> str: 730 if expression.args.get("safe"): 731 formatted_time = self.format_time(expression) 732 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 733 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
741 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 742 nano = expression.args.get("nano") 743 if nano is not None: 744 expression.set( 745 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 746 ) 747 748 return rename_func("MAKE_TIME")(self, expression)
750 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 751 sec = expression.args["sec"] 752 753 milli = expression.args.get("milli") 754 if milli is not None: 755 sec += milli.pop() / exp.Literal.number(1000.0) 756 757 nano = expression.args.get("nano") 758 if nano is not None: 759 sec += nano.pop() / exp.Literal.number(1000000000.0) 760 761 if milli or nano: 762 expression.set("sec", sec) 763 764 return rename_func("MAKE_TIMESTAMP")(self, expression)
766 def tablesample_sql( 767 self, 768 expression: exp.TableSample, 769 tablesample_keyword: t.Optional[str] = None, 770 ) -> str: 771 if not isinstance(expression.parent, exp.Select): 772 # This sample clause only applies to a single source, not the entire resulting relation 773 tablesample_keyword = "TABLESAMPLE" 774 775 if expression.args.get("size"): 776 method = expression.args.get("method") 777 if method and method.name.upper() != "RESERVOIR": 778 self.unsupported( 779 f"Sampling method {method} is not supported with a discrete sample count, " 780 "defaulting to reservoir sampling" 781 ) 782 expression.set("method", exp.var("RESERVOIR")) 783 784 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
786 def interval_sql(self, expression: exp.Interval) -> str: 787 multiplier: t.Optional[int] = None 788 unit = expression.text("unit").lower() 789 790 if unit.startswith("week"): 791 multiplier = 7 792 if unit.startswith("quarter"): 793 multiplier = 90 794 795 if multiplier: 796 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 797 798 return super().interval_sql(expression)
805 def join_sql(self, expression: exp.Join) -> str: 806 if ( 807 expression.side == "LEFT" 808 and not expression.args.get("on") 809 and isinstance(expression.this, exp.Unnest) 810 ): 811 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 812 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 813 return super().join_sql(expression.on(exp.true())) 814 815 return super().join_sql(expression)
824 def bracket_sql(self, expression: exp.Bracket) -> str: 825 this = expression.this 826 if isinstance(this, exp.Array): 827 this.replace(exp.paren(this)) 828 829 bracket = super().bracket_sql(expression) 830 831 if not expression.args.get("returns_list_for_maps"): 832 if not this.type: 833 from sqlglot.optimizer.annotate_types import annotate_types 834 835 this = annotate_types(this) 836 837 if this.is_type(exp.DataType.Type.MAP): 838 bracket = f"({bracket})[1]" 839 840 return bracket
842 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 843 expression_sql = self.sql(expression, "expression") 844 845 func = expression.this 846 if isinstance(func, exp.PERCENTILES): 847 # Make the order key the first arg and slide the fraction to the right 848 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 849 order_col = expression.find(exp.Ordered) 850 if order_col: 851 func.set("expression", func.this) 852 func.set("this", order_col.this) 853 854 this = self.sql(expression, "this").rstrip(")") 855 856 return f"{this}{expression_sql})"
858 def length_sql(self, expression: exp.Length) -> str: 859 arg = expression.this 860 861 # Dialects like BQ and Snowflake also accept binary values as args, so 862 # DDB will attempt to infer the type or resort to case/when resolution 863 if not expression.args.get("binary") or arg.is_string: 864 return self.func("LENGTH", arg) 865 866 if not arg.type: 867 from sqlglot.optimizer.annotate_types import annotate_types 868 869 arg = annotate_types(arg) 870 871 if arg.is_type(*exp.DataType.TEXT_TYPES): 872 return self.func("LENGTH", arg) 873 874 # We need these casts to make duckdb's static type checker happy 875 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 876 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 877 878 case = ( 879 exp.case(self.func("TYPEOF", arg)) 880 .when( 881 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 882 ) # anonymous to break length_sql recursion 883 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 884 ) 885 886 return self.sql(case)
888 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 889 this = expression.this 890 key = expression.args.get("key") 891 key_sql = key.name if isinstance(key, exp.Expression) else "" 892 value_sql = self.sql(expression, "value") 893 894 kv_sql = f"{key_sql} := {value_sql}" 895 896 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 897 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 898 if isinstance(this, exp.Struct) and not this.expressions: 899 return self.func("STRUCT_PACK", kv_sql) 900 901 return self.func("STRUCT_INSERT", this, kv_sql)
903 def unnest_sql(self, expression: exp.Unnest) -> str: 904 explode_array = expression.args.get("explode_array") 905 if explode_array: 906 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 907 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 908 expression.expressions.append( 909 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 910 ) 911 912 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 913 alias = expression.args.get("alias") 914 if alias: 915 expression.set("alias", None) 916 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 917 918 unnest_sql = super().unnest_sql(expression) 919 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 920 return self.sql(select) 921 922 return super().unnest_sql(expression)
924 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 925 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 926 # DuckDB should render IGNORE NULLS only for the general-purpose 927 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 928 return super().ignorenulls_sql(expression) 929 930 return self.sql(expression, "this")
932 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 933 this = self.sql(expression, "this") 934 null_text = self.sql(expression, "null") 935 936 if null_text: 937 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 938 939 return self.func("ARRAY_TO_STRING", this, expression.expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql