基于Pandas数据分析的设备巡检计划生成算法设计及实现
需求说明
有一批设备编号、部门(编号里第一位)、设备到期的整年数据,设计一套算法:
硬性要求:
1、使得每个部门的设备最多只能被巡检3次,(1、2最好)
2、到期的设备最少要提前1个月安排检查
非硬性要求:
1、每个月的巡检设备数据总和相差不大
2、2月份安排的巡检数据总和是其它月份的80%
数据示例
设备编号 到期日期
SZDF17652001 2025-04-21
AZGDEY28763112 2025-03-01
设计思路
1、对拿到的数据进行数据预处理,即提取出部门
2、对当前的到期日减1个月,生成新的日期startdate
3、先按照时间顺序排列数据
4、针对第一列和第二列部门重复的,把第二列的数据移到第一列里去,形成该部门的第一组
5、针对部门在第一列里有,第二列没有的,把第一列的数据形成该部门的第一组
6、针对部门在第一列里没有,第一列有的,把第二列的数据形成该部门的第一组
7、从后往前按照部门设定阈值,使得所在部门尽量往后排,产生部门的第三组
8、针对每个部门仍有数据的合并或者生成第二组
9、按照生成的数据按照年月统计总数,如果超出阈值的,适当往前、往后浮动一个月
完整代码
import pandas as pd
from datetime import timedelta
import datetime
import json
import itertools
def adjust_date(row):if row['dateval'].day >24 : #比如10-25的算成9-10 而不是8-10,如果安排到8月只能是8-25或者之后lower_date = (row['dateval'] - pd.DateOffset(months=2) + pd.DateOffset(months=1))row['lower'] = lower_date.strftime('%Y-%m')return row
def dataload(data,startdate,enddate):# 删除时间列为空的记录df = data.dropna(subset=['dateval']).copy()#取id里的第一位作为部门编号df['catlog'] = df['id'].str.slice(0, 1)df['dateval'] = pd.to_datetime(df['dateval'])# 计算15天前的日期、2个月前的日期,并生成新列df['15d'] = df['dateval'] - timedelta(days=20) #15df['60d'] = df['dateval'] - pd.DateOffset(months=2)# 将日期转换为字符串,并在小于10的数字前添加'0'以方便比较df['lower'] = df['60d'].dt.strftime('%Y-%m')df['upper'] = df['15d'].dt.strftime('%Y-%m')#如果上述df里的dateval列是每月27号之后的,则更新lower和upper两列的值为lower+1和upper# 应用更新函数df = df.apply(adjust_date, axis=1)#筛选2025年以后的数据filtered_df = df.loc[(df['dateval'] >= startdate) & (df['dateval'] < enddate)]#按照lower和upper分组,计算每个分组中id的数量group_count = filtered_df.groupby(['lower', 'upper'])['id'].count().reset_index()group_count.rename(columns={'id': 'cnt'}, inplace=True)group_count['cum'] = group_count.groupby(['lower'])['cnt'].cumsum()#仅按照lower分组,计算每个分组中id的数量grouplower_count = filtered_df.groupby(['lower'])['id'].count().reset_index()grouplower_count.rename(columns={'id': 'cnt'}, inplace=True)group_count.to_csv(r'D:\tmp\group_count_1219.csv')return df,filtered_df,group_count,grouplower_countdef schedule_v5(baseamt=900,lowrate=0.2,upperrate=0.2,transrate=0.1,data=pd.DataFrame,startdate='2025-01-01',enddate='2026-01-16'): #2025-01-16print(startdate,enddate)df, filtered_df, group_count, grouplower_count = dataload(data,startdate,enddate)updatedf = pd.DataFrame(columns=['id','period']) #用于存储匀数的表 #mon2 = (pd.Timestamp(startdate) + pd.DateOffset(months=1)).strftime('%Y-%m')mon1 = str(pd.Timestamp(startdate).strftime('%Y-%m'))mon11 = (pd.Timestamp(startdate) + pd.DateOffset(months=-2)).strftime('%Y-%m')mon12 = (pd.Timestamp(startdate) + pd.DateOffset(months=-1)).strftime('%Y-%m')mon1_filtered_3df = filtered_df[((filtered_df['lower'] == mon12) & (filtered_df['upper'] == mon2)) | (filtered_df['lower'] == mon1)]mon1_filtered_2df = filtered_df[(filtered_df['lower'] == mon1)]mon12_filtered_2df = filtered_df[(filtered_df['upper'] == mon1) |((filtered_df['lower'] == mon11) & (filtered_df['upper'] == mon12))]# 针对去年12月份的处理,11-01 12-01合并成12月temdf = pd.DataFrame(mon12_filtered_2df['id'])temdf['period'] = mon12updatedf = pd.concat([updatedf, temdf], ignore_index=True)# 针对1月份的特殊处理,如果12-02 1-2 1-3 <900*(1+0.2) #else 合并1-2 1-3if len(mon1_filtered_3df) < baseamt * (1 + upperrate):temdf = pd.DataFrame(mon1_filtered_3df['id'])temdf['period'] = mon1updatedf = pd.concat([updatedf, temdf], ignore_index=True)else:temdf = pd.DataFrame(mon1_filtered_2df['id'])temdf['period'] = mon1updatedf = pd.concat([updatedf, temdf], ignore_index=True)for index, row in group_count[group_count['lower'] >= mon2].iterrows():currlower = row['lower']currupper = row['upper']nextmon = (pd.to_datetime(currlower, format='%Y-%m') + pd.DateOffset(months=1)).strftime('%Y-%m')next2mon = (pd.to_datetime(currlower, format='%Y-%m') + pd.DateOffset(months=2)).strftime('%Y-%m')currcnt =row['cnt']currcum = row['cum']if currupper==nextmon: #如果截止月份等于开始月份+1if currlower.split("-")[-1] == '02' and currcum > baseamt * (1 - lowrate) :transcnt = row['cnt'] - baseamt * (1 - lowrate) #transcnt 可能小于0elif row['cnt']+len(updatedf[updatedf['period']==currlower])+currcum > baseamt*(1+upperrate):transcnt = currcnt+ len(updatedf[updatedf['period'] == currlower]) - baseamt * (1 + upperrate)if transcnt>0:curroverlapdf = filtered_df.loc[(filtered_df['lower'] == str(currlower)) & (filtered_df['upper'] == str(currupper))]curroverlapuq = curroverlapdf['catlog'].value_counts().reset_index(name='cnt')print(curroverlapuq)##min_diff_idx = (curroverlapuq['count'] - transcnt*(1+transrate)).abs().idxmin()values = curroverlapuq['cnt'].valuescombinations = list(itertools.combinations(values, 2)) ##生成所有可能的两个数的组合differences = [(sum(combo), abs(sum(combo) - transcnt*(1+transrate)), combo) for combo in combinations] #计算每个组合的和与固定值的差值sorted_differences = sorted(differences, key=lambda x: x[1]) ## 按照差值排序,并取出差值最小的前两个组合top_2_combinations = sorted_differences[:1]for diff in top_2_combinations:sum_val, abs_diff, combo = diffprint(f"Actual: {sum_val}, Expected {int(transcnt*(1+transrate))},Diff:{int(abs_diff)}, Com1: {combo[0]},Com2: {combo[1]},Date:{currlower}")filtered_df2 = curroverlapuq[(curroverlapuq['cnt'] == combo[0]) | (curroverlapuq['cnt'] == combo[1])]result_df2 = pd.concat([ filtered_df2[filtered_df2['cnt'] == x].head(1) for x in [combo[0], combo[1]]])print(result_df2,type(result_df2))transdf = pd.DataFrame(pd.merge(curroverlapdf, result_df2, left_on='catlog', right_on='catlog', how='inner')['id'].copy())transdf['period'] = str(nextmon)updatedf = pd.concat([updatedf, transdf], ignore_index=True)updatedf.to_csv(r'D:\tmp\updatedf_1221_900.csv')print("XXX")elif currupper==next2mon: #如果截止月份等于开始月份+2if (currlower.split("-")[-1] == '02' and currcum > baseamt * (1 - lowrate)) or (currcum > baseamt*(1+upperrate)):transdf = pd.DataFrame(filtered_df.loc[(filtered_df['lower'] == str(currlower)) & (filtered_df['upper'] == str(currupper))]['id'].copy())transdf['period'] = str(nextmon)updatedf = pd.concat([updatedf, transdf], ignore_index=True)filtered_df_copy = filtered_df.copy()filtered_df_copy.set_index('id', inplace=True)updatedf.set_index('id', inplace=True)updatedf.rename(columns={'period': 'lower'}, inplace=True)filtered_df_copy.update(updatedf) # 这里只更新了匹配到的部分result_df = filtered_df_copy.reset_index()result_df[['id','lower']].to_csv(r'D:\\tmp\\Fina_1221.csv',index=False)print(result_df['lower'].value_counts())return result_dfdef jsonformat(df,last12):grouped = df.groupby('lower')['id'].apply(list).reset_index()# 将结果转换为字典result_dict = grouped.set_index('lower')['id'].to_dict()#日期转换为月份数字为key的字典converted_data = {}# # 遍历原始字典for date_str, ids in result_dict.items():if date_str == last12:month_num="last_12"else:date_obj = datetime.datetime.strptime(date_str, "%Y-%m")month_num = str(date_obj.month)converted_data[month_num] = ids#print(converted_data)default_dict = {'last_12': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': '', '7': '', '8': '', '9': '', '10': '', '11': ''}for key in default_dict.keys():if key in converted_data:default_dict[key] = converted_data[key]json_result = json.dumps(default_dict, indent=4)#print(json_result)