当前位置: 首页 > news >正文

Spark 之 like 表达式

LikeSimplification 会做优化

/*** Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.* For example, when the expression is just checking to see if a string starts with a given* pattern.*/
object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {// if guards below protect from escapes on trailing %.// Cases like "something\%" are not optimized, but this does not affect correctness.private val startsWith = "([^_%]+)%".rprivate val endsWith = "%([^_%]+)".rprivate val startsAndEndsWith = "([^_%]+)%([^_%]+)".rprivate val contains = "%([^_%]+)%".rprivate val equalTo = "([^_%]*)".rprivate def simplifyLike(input: Expression, pattern: String, escapeChar: Char = '\\'): Option[Expression] = {if (pattern.contains(escapeChar)) {// There are three different situations when pattern containing escapeChar:// 1. pattern contains invalid escape sequence, e.g. 'm\aca'// 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'// 3. pattern contains escaped escape character, e.g. 'ma\\ca'// Although there are patterns can be optimized if we handle the escape first, we just// skip this rule if pattern contains any escapeChar for simplicity.None} else {pattern match {case startsWith(prefix) =>Some(StartsWith(input, Literal(prefix)))case endsWith(postfix) =>Some(EndsWith(input, Literal(postfix)))// 'a%a' pattern is basically same with 'a%' && '%a'.// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.case startsAndEndsWith(prefix, postfix) =>Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))))case contains(infix) =>Some(Contains(input, Literal(infix)))case equalTo(str) =>Some(EqualTo(input, Literal(str)))case _ => None}}}private def simplifyMultiLike(child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = {val (remainPatternMap, replacementMap) =patterns.map { p =>p -> Option(p).flatMap(p => simplifyLike(child, p.toString))}.partition(_._2.isEmpty)val remainPatterns = remainPatternMap.map(_._1)val replacements = replacementMap.map(_._2.get)if (replacements.isEmpty) {multi} else {multi match {case l: LikeAll =>val and = buildBalancedPredicate(replacements, And)if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else andcase l: NotLikeAll =>val and = buildBalancedPredicate(replacements.map(Not(_)), And)if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else andcase l: LikeAny =>val or = buildBalancedPredicate(replacements, Or)if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else orcase l: NotLikeAny =>val or = buildBalancedPredicate(replacements.map(Not(_)), Or)if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or}}}def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(_.containsPattern(LIKE_FAMLIY), ruleId) {case l @ Like(input, Literal(pattern, StringType), escapeChar) =>if (pattern == null) {// If pattern is null, return null value directly, since "col like null" == null.Literal(null, BooleanType)} else {simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)}case l @ LikeAll(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ NotLikeAll(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ LikeAny(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ NotLikeAny(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)}
}
测试
  test("test data, force apply AQE") {withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {val df = sql("SELECT * FROM testData where value not like '%HotFocus%'")df.showdf.printSchema()}}

在这里插入图片描述

  test("test data like, force apply AQE") {withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {val df = sql("SELECT * FROM testData where value not like '%%HotFocus%%'")df.showdf.printSchema()}}

在这里插入图片描述

http://www.dtcms.com/a/278455.html

相关文章:

  • 软件测试中的BUG等级与生命周期详解
  • 走近科学IT版:EasyTire设置了ip,但是一闪之后就变回到原来的dhcp获得的地址
  • ros2版本自定义插件的实现与热插拔
  • 设计模式(行为型)-迭代器模式
  • java 判断两个集合中没有重复元素
  • iOS高级开发工程师面试——Objective-C 语言特性
  • Linux(Ubuntu)硬盘使用情况解析(已房子举例)
  • rk3588ubuntu 系统移植AIC8800D Wi-Fi6/BT5.0芯片
  • EMQX + Amazon S3 Tables:从实时物联网数据到数据湖仓
  • C++函数指针
  • Redis作缓存时存在的问题及其解决方案
  • 云原生核心技术解析:Docker vs Kubernetes vs Docker Compose
  • Word 与 Excel 下拉菜单对比(附示例下载)
  • 前端将传回的List数据组织成树形数据并展示
  • MEMS IMU如何赋能无人机与机器人精准感知?
  • 跨膜粘蛋白MUC17
  • MAC安装虚拟机
  • UE5多人MOBA+GAS 22、创建技能图标UI,实现显示蓝耗,冷却,以及数字显示的倒数计时还有雷达显示的倒数计时
  • IDEA中使用Servlet,tomcat输出中文乱码
  • ubuntu22.04下配置qt5.15.17开发环境
  • Kotlin委托
  • 【Python】基础语法
  • 亚马逊新规!7月13日起合规性文件须出自符合要求的实验室!
  • 【飞牛云fnOS】告别数据孤岛:飞牛云fnOS私人资料管家
  • 【Hadoop科普篇】大数据怎么处理?Hadoop是什么?跟HDFS, Spark, Flink, Hive, Hbase是什么关系?
  • 嵌入式硬件篇---晶体管的分类
  • 大数据系列之:通过trino查询hive表
  • [Nagios Core] struct监控对象 | 配置.cfg加载为内存模型
  • Kotlin集合接口
  • HTTP 四种常见方法