当前位置: 首页 > news >正文

ES海量数据更新及导入导出备份

一、根据查询条件更新字段

from elasticsearch import Elasticsearch
import redis
import json# 替换下面的用户名、密码和Elasticsearch服务器地址
username = 'elastic'
password = 'password '
es_host = 'https://127.0.0.2:30674'# 使用Elasticsearch实例化时传递用户名和密码
es = Elasticsearch(hosts=[es_host],basic_auth=(username, password),verify_certs=False# 如果你的Elasticsearch是通过SSL加密的,还可以添加下面的参数# use_ssl=True,# verify_certs=True,# ca_certs='/path/to/ca/cert',
)# 使用Elasticsearch实例进行操作,例如搜索
# print(es.cluster.state())# response = es.search(index="remote_statistics_202409", query={"match_all": {}})
# print(response)# # 连接Elasticsearch和Redis
# #es = Elasticsearch("http://localhost:9200")
r = redis.StrictRedis(host='10.7.9.13', port=32197, db=11)# 假设你的Redis键是'my_key'
keys = r.keys()count = 0my_map = dict()
for key in keys:maps = r.hgetall(key)print(key)#list_map = dict()for key01, value in maps.items():# print(type(json.loads(key)))# print(type(value))# print(type(json.loads(value)))# print(json.loads(key),  json.loads(value))aa = json.loads(value).get("Latitude")if not aa.startswith("0."):# list_map[json.loads(key01)] = json.loads(value)my_map[key.decode('utf-8') + str(json.loads(key01))] = json.loads(value)# my_list.append(list_map)print(len(my_map))# print(type(my_map.get("21V70000110122B000139"+str(1719713910000))))# print(my_map.get("21V70000110122B000139"+str(1719713910000)))# print(my_map["21V70000110122B0001391719713910000"])# new_map={}# new_map["21V70000110122B000139"] = my_map["21V70000110122B0001391719713910000"]# for key02, value in my_map.items():#     print(key02)#     print(type(key02))#     print(type(value))#     count = len(value) + count#     print(count)# print(key)# print(maps.__len__())# count = count + maps.__len__()# print(count)## one = r.hget(name=key, key='1719563700000')# print(one)# print(type(one))## if one is not None:#     one_json = json.loads(one)#     print(type(one_json))#     print(one_json.get("Latitude"))#     print(one_json.get("Longitude"))## print("-------------------------------------------------")# 从Redis获取数据# redis_data = r.get(redis_key)# key_map = my_map["21V70000110122B000139"]# print(type(key_map))# key_json = key_map.get(1719713910000)# print(type(key_json))# print(key_json)# print(key_json.get('Latitude'))# minutes = create_at // 60000# left = create_at % 60000 // 1000## if left <= 15:#     left = 0# elif left > 45:#     left = 60# else:#     left = 30## map_key = minutes * 60000 + left * 1000# 如果存在,解析数据并更新Elasticsearch# 构建更新查询script = {# "source": "ctx._source.field_to_update = params.new_value",# "source": "ctx._source['Latitude'] = params.new_value[ctx._source.RemoteId][ctx._source.createAt].get('Latitude')",# "source": "ctx._source['Latitude'] = params.new_value['21V70000110122B0001391719713910000']['Latitude'];ctx._source['Longitude'] = params.new_value['21V70000110122B0001391719713910000']['Longitude'];ctx._source['tmp'] = params.new_value['21V70000110122B0001391719713910000']",# "source": "def aa=ctx._source.RemoteId;ctx._source['Latitude'] = params.new_value['21V70000110122B0001391719713910000']['Latitude'];ctx._source['Longitude'] = params.new_value['21V70000110122B0001391719713910000']['Longitude'];ctx._source['rrc'] = params.new_value['21V70000110122B0001391719713910000'];ctx._source.remove('tmp')","source": "def create_at=ctx._source.createAt;def minutes = Math.floor(create_at / 60000);""def left = Math.floor(create_at % 60000 / 1000);if(left<=15) {left=0} else if(left >45){ left=60} else {left=30} def form_time= minutes * 60000 + left * 1000;""ctx._source['create_lltime'] = form_time;def key = ctx._source.RemoteId + (Long)form_time ;def rru=  params.new_value[key];""if(rru !=null) {ctx._source['Latitude']=rru['Latitude'];ctx._source['Longitude']=rru['Longitude'];ctx._source['rrc'] = rru}",# "source": "def aa=ctx._source.RemoteId;ctx._source['Latitude'] = aa;ctx._source['Longitude'] = params.new_value['21V70000110122B0001391719713910000']['Longitude'];ctx._source['tmp'] = params.new_value['21V70000110122B0001391719713910000']","params": {"new_value": my_map}, "lang": "painless"}
#
# # 更新查询
ret = es.update_by_query(index="remote_statistics_202406",script=script,# body={#     "query": {#         "match": {#             "id_field": my_list['id_value'],,,,,,,#         }#     }## },slices="auto",wait_for_completion=False,conflicts="proceed")print(ret)

二、ES数据批量导出

import json
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from datetime import datetime
import urllib3
urllib3.disable_warnings()
# 变量
start_date = 1735689600000
end_date = 1738368000000
#index_name = 'remote_statistics_202410'
index_name = 'remote_statistics_202501'# 替换下面的用户名、密码和Elasticsearch服务器地址
username = 'elastic'
password = 'password'
es_host = 'https://127.0.0.1:32293'# 使用Elasticsearch实例化时传递用户名和密码
es = Elasticsearch(hosts=[es_host],basic_auth=(username, password),verify_certs=False# 如果你的Elasticsearch是通过SSL加密的,还可以添加下面的参数# use_ssl=True,# verify_certs=True,# ca_certs='/path/to/ca/cert',
)print("----------start---------------")def fetch_data(start_time, end_time):results = helpers.scan(es, body={"query": {"range": {"createAt": {"gte": start_time,"lt": end_time}}}}, index=index_name)return resultsif __name__ == "__main__":step = 60 * 60 * 1000for i in range(start_date, end_date+8*step, step):date = datetime.fromtimestamp(i / 1000)print("**********************************************************")print(date)print("**********************************************************")ret = fetch_data(i, i + step)count = 0with open(str(i) + '.json', 'w') as f:for doc in ret:f.write(json.dumps(doc['_source']) + '\n')count = count + 1print(count)

三、ES数据批量导入

import json
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import urllib3
urllib3.disable_warnings()
# 变量
#index_name = 'remote_statistics_202410'
index_name = 'network_statistics_202410'# 替换下面的用户名、密码和Elasticsearch服务器地址
username = 'elastic'
password = 'password '
es_host = 'https://127.0.0.1:32067'# 使用Elasticsearch实例化时传递用户名和密码
es = Elasticsearch(hosts=[es_host],basic_auth=(username, password),verify_certs=False# 如果你的Elasticsearch是通过SSL加密的,还可以添加下面的参数# use_ssl=True,# verify_certs=True,# ca_certs='/path/to/ca/cert',
)def bulk_index_file(idx_name, file_path):current_dir = os.getcwd()file_names = os.listdir(current_dir)for file_name in file_names:if file_name.endswith(".json"):with open(file_name, 'r') as file:try:print(file_name)actions = (json.loads(line) for line in file)helpers.bulk(es, actions, index=idx_name)except Exception as e:print("error-----------------------")print(e)print(file_name)# 调用函数
bulk_index_file(index_name, None)

相关文章:

  • 你工作中涉及的安全方面的测试有哪些怎么回答
  • 第6篇:中间件 SQL 重写与语义分析引擎实现原理
  • 瀚文(HelloWord)智能键盘项目深度剖析:从0到1的全流程解读
  • Ubuntu24.04 交叉编译 aarch64 ffmpeg
  • 旅游微信小程序制作指南
  • 高并发区块链系统实战:从架构设计到性能优化
  • 华为VanillaNet遇上BiFPN:YOLOv8的性能突破之旅
  • `<CLS>` 向量是 `logits` 计算的“原材料”,`logits` 是基于 `<CLS>` 向量的下游预测结果
  • Python爬虫:trafilatura 的详细使用(高效的网页正文提取工具)
  • stress 服务器压力测试的工具学习
  • MySQL范式和反范式
  • 深度学习之模型压缩三驾马车:模型剪枝、模型量化、知识蒸馏
  • LeetCode 3370.仅含置位位的最小整数
  • 42、响应处理-【源码分析】-浏览器与PostMan内容协商完全适配
  • 面试题小结(真实面试)
  • Elasticsearch中的语义搜索(Semantic Search)介绍
  • Go语言学习-->项目中引用第三方库方式
  • Zookeeper 集群部署与故障转移
  • 细说C语言将格式化输出到字符串的函数sprintf、_sprintf_l、swprintf、_swprintf_l、__swprintf_l
  • Git安装与常用命令全攻略
  • 建设网站公司专业/平台推广是做什么的
  • 深圳制作网站搜行者seo/沈阳seo建站
  • 安陆做网站公司/中山疫情最新消息
  • 自己电脑做服务器发布网站/贵阳网站建设推广
  • 深业资本有限公司网站建设/谷歌海外推广
  • 徐州做网站管理的公司/会计培训班一般多少钱