真实项目中使用到的ES自定义评分脚本
查询语句
POST /tm_index/_search
{
"query": {
"bool": {
"filter": [
{ "match": {
"original": {
"query": "你多少岁",
"minimum_should_match": "69%"
}
}}
],
"must": [
{ "function_score": {
"functions": [
{
"filter": { "term": { "originalTag": "<1>1</1><2/><3>3</3>" } },
"weight": 2
},
{
"script_score": {
"script": {
"source": """
// 定义相似度计算函数(放于脚本顶部,str1是请求参数)
double calculate(String str, String str1) {
// 快速返回条件
if (str == null || str1 == null || str.isEmpty() || str1.isEmpty()) return 10.0;
int len = str.length(), len1 = str1.length();
int minLen = len < len1 ? len : len1;
int maxLen = len < len1 ? len1 : len;
double ratio = (double)minLen / maxLen;
if(ratio < 0.7) {
return 30;
}
String strLower = str.toLowerCase();
String str1Lower = str1.toLowerCase();
if(str.equals(str1)){
return 100;
}
if(strLower.equals(str1Lower)){
return 90;
}
// 预处理
char[] arr1 = strLower.toCharArray();
char[] arr2 = str1Lower.toCharArray();
// 动态规划数组
int[] dp = new int[len1 + 1];
for (int j = 0; j <= len1; j++) dp[j] = j;
for (int i = 1; i <= len; i++) {
int prevDiagonal = dp[0];
dp[0] = i;
char c1 = arr1[i-1];
for (int j = 1; j <= len1; j++) {
int cost = (c1 == arr2[j-1]) ? 0 : 1;
// 取最小值
int newVal = dp[j] + 1;
newVal = newVal < dp[j-1] + 1 ? newVal : dp[j-1] + 1;
newVal = newVal < prevDiagonal + cost ? newVal : prevDiagonal + cost;
prevDiagonal = dp[j];
dp[j] = newVal;
}
}
return 80.0 * (1.0 / dp[len1]);
}
String es2 = doc['original.keyword'].value;
String str2 = params.val2;
// 计算纯文本的评分
double textRatio = calculate(es2, str2);
return textRatio;
""",
"params": {
"val2": "你多少岁"
}
}
}}
],
"score_mode": "sum",
"boost_mode": "replace"
}}
]
}
}
}
自定义评分脚本
double calculate(String str, String str1) {
// 快速返回条件
if (str == null || str1 == null || str.isEmpty() || str1.isEmpty()) return 10.0;
int len = str.length(), len1 = str1.length();
int minLen = len < len1 ? len : len1;
int maxLen = len < len1 ? len1 : len;
double ratio = (double)minLen / maxLen;
if(ratio < 0.7) {
return 30;
}
String strLower = str.toLowerCase();
String str1Lower = str1.toLowerCase();
if(str.equals(str1)){
return 100;
}
if(strLower.equals(str1Lower)){
return 90;
}
// 预处理
char[] arr1 = strLower.toCharArray();
char[] arr2 = str1Lower.toCharArray();
// 动态规划数组
int[] dp = new int[len1 + 1];
for (int j = 0; j <= len1; j++) dp[j] = j;
for (int i = 1; i <= len; i++) {
int prevDiagonal = dp[0];
dp[0] = i;
char c1 = arr1[i-1];
for (int j = 1; j <= len1; j++) {
int cost = (c1 == arr2[j-1]) ? 0 : 1;
// 取最小值
int newVal = dp[j] + 1;
newVal = newVal < dp[j-1] + 1 ? newVal : dp[j-1] + 1;
newVal = newVal < prevDiagonal + cost ? newVal : prevDiagonal + cost;
prevDiagonal = dp[j];
dp[j] = newVal;
}
}
return 80.0 * (1.0 / dp[len1]);
}
根据纯文本和标签格式匹配语料。标签格式一致则加分。
加分逻辑如下:
{
"filter": { "term": { "originalTag": "<1>1</1><2/><3>3</3>" } },
"weight": 2
}