解析hive sql——将别名表、字段转化为指示表字段
解析hive sql——将别名表、字段转化为指示表字段
为了确保从复杂的 SQL 表达式中正确提取实际涉及的表列,我们需要递归地解析 SQL 表达式,并准确地提取所有嵌套函数和运算中的列。下面是完整的代码实现,确保能够解析并提取嵌套表达式中的具体列信息。
完整代码
import net.sf.jsqlparser.JSQLParserException;
import net.sf.jsqlparser.expression.*;
import net.sf.jsqlparser.parser.CCJSqlParserUtil;
import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.statement.Statement;
import net.sf.jsqlparser.statement.select.*;import java.util.*;public class JHiveSqlParser {public static void main(String[] args) {String sql = "select a.ACCT_MONTH as`账期`, a.user_no as `用户编码`, AL.channel_name as `所属厅店`, a.OPEN_DATE as `开户时间` , a.INNET_DATE as `竣工时间`, a.STOP_DATE as `停机时间`, a.payment_mode_cd as `付费方式`, a.SIG_OFFER_SPEC_ID as `主套餐ID`, I.name as `主套餐`, a.COMP_OFFER_SPEC_ID as `融合ID`, O.name as `融合套餐`, F.TELE_TYPE_LVL3_NAME as `三级产品编码`, k.name as `产品编码`, h.city_name as `一级网格`, j.town_name as `二级网格`, G.USER_STATUS_NAME as `用户状态`, a.IS_COMP as `是否融合`, A.RECV_RATE as `速率`, t2.DATA_DUR as`数据时长(分钟)`, t2.DATA_FLUX/1024 as`总流量(MB)`, A.ACTIVE_USER_M as `是否当月活跃`, AL.tyaddresscode as `小区编码`, AL.tyaddressname as `小区`, AL.STANDARD_ADDR_NAME as `装机地址`, x.MB_USER_NO as `移动主卡用户编码`, x.MB_SIG_OFFER_SPEC_ID as `移动主销售品`, x.MB_OPEN_DATE as `移动开通时间`, v3.APRU as `移动AP`, hh.city_name as `一级网格`, jj.town_name as `二级网格` FROM (SELECT *from edww.dww_d_pr_pri_al_inst WHERE ACCT_MONTH='202402' AND DAY_ID='29' and IS_VALID='1' and TELE_TYPE_LVL2_ID='2010' and area_no='17799' )a LEFT JOIN (select * from edww.DWW_D_PR_COMP_MEMBER_INST WHERE ACCT_MONTH='202402' AND DAY_ID='29' and MB_IS_DUPETY='0')x on x.KD1_USER_NO=a.user_no LEFT JOIN (SELECT * FROM edww.DWW_D_RE_NRE_ACCNO_DETAIL WHERE ACCT_MONTH='202402' AND DAY_ID='29' )AL ON A.USER_NO=AL.USER_NO left join (select *from edww.DWW_M_EV_BIL_MB_TOTAL where acct_month='202402')t2 on a.user_no=t2.user_no LEFT JOIN EDIM.DIM_USER_STATUS G ON A.STD_USER_STATUS = G.USER_STATUS LEFT JOIN EDIM.dim_city H on a.CITY_NO=h.CITY_NO LEFT JOIN EDIM.dim_town J on a.town_no=j.town_no LEFT JOIN EDIM.dim_city HH on x.MB_CITY_NO=hh.CITY_NO LEFT JOIN EDIM.dim_town JJ on x.MB_town_no=jj.town_no LEFT JOIN (select * from stage.offer_spec )I on i.offer_spec_id=a.SIG_OFFER_SPEC_ID LEFT JOIN (select * from stage.offer_spec ) O on O.offer_spec_id=a.COMP_OFFER_SPEC_ID LEFT JOIN EDIM.dim_prod_spec k on a.PROD_SPEC_ID=k.PROD_SPEC_ID LEFT JOIN stage.DIM_TELE_TYPE F on a.TELE_TYPE_LVL3_ID=f.TELE_TYPE_LVL3_ID LEFT JOIN (select user_no, sum(FK_20160504160557970)/count(case when nvl(FK_20160504160557970,0)<>0 then 1 else null end)/100 APRU from edww.dww_m_pr_pri_al_kpi where ACCT_MONTH between '202312'and '202402' group by user_no )V3 on x.MB_USER_NO= V3.user_no \n";try {Map<String, List<String>> tableColumnMap = parseSQL(sql);tableColumnMap.forEach((table, columns) -> {System.out.println("Table: " + table);columns.forEach(column -> System.out.println(" Column: " + column));});} catch (JSQLParserException e) {e.printStackTrace();}}public static Map<String, List<String>> parseSQL(String sql) throws JSQLParserException {sql = removeComments(sql);Statement statement = CCJSqlParserUtil.parse(sql);Map<String, Set<String>> tableColumnMap = new HashMap<>();if (statement instanceof Select) {Select selectStatement = (Select) statement;SelectBody selectBody = selectStatement.getSelectBody();Map<String, Set<String>> aliasToTableMap = new HashMap<>();processSelectBody(selectBody, tableColumnMap, aliasToTableMap);}Map<String, List<String>> convertedMap = convertToMapOfLists(tableColumnMap);return convertedMap;}public static Map<String, List<String>> convertToMapOfLists(Map<String, Set<String>> originalMap) {Map<String, List<String>> result = new HashMap<>();for (Map.Entry<String, Set<String>> entry : originalMap.entrySet()) {String key = entry.getKey();Set<String> valueSet = entry.getValue();List<String> valueList = new ArrayList<>(valueSet); // 将 Set 转换为 Listresult.put(key, valueList);}return result;}private static void processSelectBody(SelectBody selectBody, Map<String, Set<String>> tableColumnMap, Map<String, Set<String>> aliasToTableMap) {if (selectBody instanceof PlainSelect) {PlainSelect plainSelect = (PlainSelect) selectBody;processFromItem(plainSelect.getFromItem(), tableColumnMap, aliasToTableMap);if (plainSelect.getJoins() != null) {for (Join join : plainSelect.getJoins()) {processFromItem(join.getRightItem(), tableColumnMap, aliasToTableMap);}}processSelectItems(plainSelect.getSelectItems(), tableColumnMap, aliasToTableMap);} else if (selectBody instanceof SetOperationList) {SetOperationList setOperationList = (SetOperationList) selectBody;for (SelectBody body : setOperationList.getSelects()) {processSelectBody(body, tableColumnMap, aliasToTableMap);}} else if (selectBody instanceof WithItem) {WithItem withItem = (WithItem) selectBody;processSelectBody(withItem.getSelectBody(), tableColumnMap, aliasToTableMap);}}private static void processFromItem(FromItem fromItem, Map<String, Set<String>> tableColumnMap, Map<String, Set<String>> aliasToTableMap) {if (fromItem instanceof Table) {Table table = (Table) fromItem;String fullTableName = table.getFullyQualifiedName();tableColumnMap.putIfAbsent(fullTableName, new HashSet<>());if (table.getAlias() != null) {aliasToTableMap.computeIfAbsent(table.getAlias().getName(), k -> new HashSet<>()).add(fullTableName);}} else if (fromItem instanceof SubSelect) {SubSelect subSelect = (SubSelect) fromItem;Map<String, Set<String>> subSelectTableColumnMap = new HashMap<>();processSelectBody(subSelect.getSelectBody(), subSelectTableColumnMap, aliasToTableMap);if (subSelect.getAlias() != null) {String alias = subSelect.getAlias().getName();for (String realTableName : subSelectTableColumnMap.keySet()) {aliasToTableMap.computeIfAbsent(alias, k -> new HashSet<>()).add(realTableName);}}tableColumnMap.putAll(subSelectTableColumnMap);}}private static void processSelectItems(List<SelectItem> selectItems, Map<String, Set<String>> tableColumnMap, Map<String, Set<String>> aliasToTableMap) {for (SelectItem selectItem : selectItems) {if (selectItem instanceof AllColumns) {// Handle SELECT *tableColumnMap.values().forEach(columns -> {columns.clear(); // Clear existing columnscolumns.add("*");});} else if (selectItem instanceof SelectExpressionItem) {SelectExpressionItem selectExpressionItem = (SelectExpressionItem) selectItem;Expression expr = selectExpressionItem.getExpression();// Recursively process expressions to extract all columnsprocessExpression(expr, tableColumnMap, aliasToTableMap);}}}private static void processExpression(Expression expr, Map<String, Set<String>> tableColumnMap, Map<String, Set<String>> aliasToTableMap) {if (expr instanceof net.sf.jsqlparser.schema.Column) {net.sf.jsqlparser.schema.Column column = (net.sf.jsqlparser.schema.Column) expr;String originalTableName = column.getTable() != null ? column.getTable().getFullyQualifiedName() : "";addColumnToTable(tableColumnMap, aliasToTableMap, originalTableName, column.getColumnName());} else if (expr instanceof Function) {Function function = (Function) expr;if (function.getParameters() != null) {for (Expression param : function.getParameters().getExpressions()) {processExpression(param, tableColumnMap, aliasToTableMap);}}} else if (expr instanceof BinaryExpression) {BinaryExpression binaryExpr = (BinaryExpression) expr;processExpression(binaryExpr.getLeftExpression(), tableColumnMap, aliasToTableMap);processExpression(binaryExpr.getRightExpression(), tableColumnMap, aliasToTableMap);} else if (expr instanceof CaseExpression) {CaseExpression caseExpr = (CaseExpression) expr;processExpression(caseExpr.getSwitchExpression(), tableColumnMap, aliasToTableMap);for (Expression whenClause : caseExpr.getWhenClauses()) {processExpression(whenClause, tableColumnMap, aliasToTableMap);}processExpression(caseExpr.getElseExpression(), tableColumnMap, aliasToTableMap);}// Add other expression types here if necessary}private static void addColumnToTable(Map<String, Set<String>> tableColumnMap, Map<String, Set<String>> aliasToTableMap, String tableName, String columnName) {if (!tableName.isEmpty()) {Set<String> realTableNames = aliasToTableMap.getOrDefault(tableName, Collections.singleton(tableName));for (String realTableName : realTableNames) {Set<String> columns = tableColumnMap.computeIfAbsent(realTableName, k -> new HashSet<>());if (!columns.contains("*")) {columns.add(columnName);}}} else {tableColumnMap.values().forEach(columns -> {if (!columns.contains("*")) {columns.add(columnName);}});}}private static String removeComments(String sql) {int index = sql.indexOf("--");while (index >= 0) {int endIndex = sql.indexOf("\n", index);if (endIndex == -1) {endIndex = sql.length();}sql = sql.substring(0, index) + sql.substring(endIndex);index = sql.indexOf("--", index);}// Handle multi-line commentswhile (sql.contains("/*")) {int start = sql.indexOf("/*");int end = sql.indexOf("*/", start + 2);if (end == -1) {break;}sql = sql.substring(0, start) + sql.substring(end + 2);}// Remove any WITH ROLLUP clausessql = sql.trim().toUpperCase().replaceAll("(?i)\s+WITH\s+ROLLUP\s*", " ");return sql;}
}
解释主要逻辑:
-
处理表达式:
processExpression
递归处理各种表达式类型,包括Column
,Function
,BinaryExpression
,CaseExpression
和WhenClause
等,确保提取嵌套表达式中的具体列。
-
输出替换别名为真实表名:
replaceAliasesWithRealTables
方法确保在最终输出时将所有别名替换为真实的表名。
-
解析 SQL 并创建别名映射:
parseSQL
解析 SQL 并分别处理SelectBody
及其子元素,确保将别名映射到真实表。
通过这些修改和实现,可以确保从复杂 SQL 中提取正确的列信息,同时处理掉 CASE WHEN 表达式中的嵌套操作。