SQLGlot,这是一个强大的 Python SQL 解析、生成和转换工具库。
SQLGlot 是一个纯 Python 实现的 SQL 解析器、生成器和优化器。它支持多种 SQL 方言,可以用于:
支持 20+ 种 SQL 方言:
pip install sqlglot
import sqlglot
# 解析 SQL 为 AST
sql = "SELECT a, b FROM t WHERE c = 1"
ast = sqlglot.parse(sql)[0]
# 查看 AST 结构
print(ast.sql())
import sqlglot
# 从 AST 生成 SQL
ast = sqlglot.parse("SELECT * FROM t")[0]
# 生成不同方言的 SQL
print(ast.sql(dialect="spark")) # SparkSQL
print(ast.sql(dialect="mysql")) # MySQL
print(ast.sql(dialect="postgres")) # PostgreSQL
import sqlglot
# 将 MySQL SQL 转换为 SparkSQL
sql = """
SELECT DATE_FORMAT(date, '%Y-%m') as month,
COUNT(*) as count
FROM sales
GROUP BY month
"""
transpiled = sqlglot.transpile(sql,
read="mysql",
write="spark")[0]
print(transpiled)
# 输出:SELECT DATE_FORMAT(date, 'yyyy-MM') AS month, COUNT(*) AS count FROM sales GROUP BY month
import sqlglot
import sqlglot.expressions as exp
# 构建表达式
expr = exp.Column(this="id")
expr = exp.EQ(this=expr, expression=exp.Literal(this="1"))
# 遍历和修改表达式
for node in expr.walk():
if isinstance(node, exp.Literal):
node.set("this", "2")
print(expr.sql())
import sqlglot
from sqlglot.optimizer import optimize
sql = """
SELECT * FROM (
SELECT a, b FROM t
) subq
WHERE a > 10
"""
# 优化 SQL(如谓词下推)
optimized = optimize(sql)
print(optimized.sql())
import sqlglot
from sqlglot import exp
def remove_alias(node):
"""移除所有别名"""
if isinstance(node, exp.Alias):
return node.this
sql = "SELECT a AS col1, b AS col2 FROM t"
transformed = sqlglot.transpile(sql,
identify=True,
transforms=[remove_alias])[0]
print(transformed) # SELECT a, b FROM t
import sqlglot
sql = "select a,b,c from t where x=1"
pretty_sql = sqlglot.transpile(sql, pretty=True)[0]
print(pretty_sql)
import sqlglot
from sqlglot.schema import Schema
# 定义模式
schema = {
"users": {
"id": "INT",
"name": "VARCHAR",
"created_at": "TIMESTAMP"
}
}
# 验证 SQL
sql = "SELECT id, name, invalid_col FROM users"
try:
sqlglot.validate(sql, schema=schema)
except Exception as e:
print(f"验证错误: {e}")
import sqlglot
# 处理方言特定的函数
sql = "SELECT GETDATE()" # SQL Server 语法
# 转换为 PostgreSQL
pg_sql = sqlglot.transpile(sql, read="tsql", write="postgres")[0]
print(pg_sql) # SELECT NOW()
def migrate_redshift_to_bigquery(sql):
"""将 Redshift SQL 迁移到 BigQuery"""
return sqlglot.transpile(
sql,
read="redshift",
write="bigquery",
identify=True, # 自动标识符加引号
pretty=True # 美化输出
)[0]
def standardize_sql(sql, target_dialect="spark"):
"""将各种 SQL 统一为标准格式"""
return sqlglot.transpile(
sql,
write=target_dialect,
pretty=True,
normalize=True # 标准化关键字大小写
)[0]
def check_sql_quality(sql):
"""检查 SQL 质量问题"""
issues = []
# 1. 检查 SELECT *
if "SELECT *" in sql.upper():
issues.append("避免使用 SELECT *")
# 2. 使用 AST 进行更复杂的检查
try:
ast = sqlglot.parse(sql)[0]
# 检查未使用 WHERE 的 DELETE
if isinstance(ast, exp.Delete) and not ast.args.get("where"):
issues.append("DELETE 语句缺少 WHERE 条件")
except:
issues.append("SQL 语法错误")
return issues
| 特性 | SQLGlot | sqlparse | ANTLR | sqloxide (Rust) |
|---|---|---|---|---|
| 语言 | Python | Python | 多语言 | Rust |
| 依赖 | 无 | 无 | Java | 无 |
| 多方言 | ✅ | ❌ | ✅ | ✅ |
| AST 操作 | ✅ | 有限 | ✅ | ✅ |
| SQL 生成 | ✅ | ❌ | ✅ | ✅ |
| 转换 | ✅ | ❌ | 需要自定义 | ✅ |
import sqlglot
def safe_transpile(sql, read=None, write=None):
try:
return sqlglot.transpile(sql, read=read, write=write)[0]
except sqlglot.errors.ParseError as e:
print(f"解析错误: {e}")
return sql
except Exception as e:
print(f"转换错误: {e}")
return sql
# 对于大量 SQL 处理,使用解析器缓存
from sqlglot import parse
# 复用解析器实例
parser = sqlglot.Parser()
def batch_process(sql_list):
results = []
for sql in sql_list:
# 使用缓存的解析器
ast = parser.parse(sql)[0]
# 处理 AST
processed = ast.sql(dialect="spark")
results.append(processed)
return results
from sqlglot import exp
from sqlglot.dialects.dialect import Dialect
from sqlglot.generator import Generator
class CustomDialect(Dialect):
class Generator(Generator):
TRANSFORMS = {
**Generator.TRANSFORMS,
exp.CurrentDate: lambda self, e: "CURRENT_DATE()",
}
SQLGlot 是一个功能全面且易于使用的 SQL 工具库,特别适合以下场景:
它的主要优势在于:
对于需要处理多方言 SQL 的 Python 项目,SQLGlot 通常是首选解决方案。