当前位置: 首页 > wzjs >正文

wordpress成品网站云部落wordpress视频全屏

wordpress成品网站云部落,wordpress视频全屏,wordpress到底是什么,网络宣传怎么做Predict Calorie Expenditure 题意: 给出每个人的基本信息,预测运动后的卡路里消耗值。 数据处理: 1.构造出人体机能、运动相关的特征值。 2.所有特征值进行从新组合,注意唯独爆炸 3.对连续信息分箱变成离散 建立模型&#x…

Predict Calorie Expenditure

题意:

给出每个人的基本信息,预测运动后的卡路里消耗值。

数据处理:

1.构造出人体机能、运动相关的特征值。
2.所有特征值进行从新组合,注意唯独爆炸
3.对连续信息分箱变成离散

建立模型:

1.xgb模型,lgb模型,cat模型
2.使用stack堆叠融合,使用3折交叉验证
3.对xgb、lgb、cat进行K折交叉验证,最终和stack进行结果融合。

代码:
import os
import sys
import warnings
import numpy as np
import pandas as pd
import seaborn
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from matplotlib import pyplot as plt
import lightgbm
from mlxtend.regressor import StackingCVRegressor
from sklearn import clone
from sklearn.ensemble import VotingRegressor, StackingClassifier, StackingRegressor
from sklearn.linear_model import Lasso, LogisticRegression, RidgeCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer, mean_squared_log_error
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridgedef init():os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 仅输出错误日志warnings.simplefilter('ignore')  # 忽略警告日志pd.set_option('display.width', 1000)pd.set_option('display.max_colwidth', 1000)pd.set_option("display.max_rows", 1000)pd.set_option("display.max_columns", 1000)def show_dataframe(df):print("查看特征值和特征值类型\n" + str(df.dtypes) + "\n" + "-" * 100)print("查看前10行信息\n" + str(df.head()) + "\n" + "-" * 100)print("查看每个特征值的各种数据统计信息\n" + str(df.describe()) + "\n" + "-" * 100)print("输出重复行的个数\n" + str(df.duplicated().sum()) + "\n" + "-" * 100)print("查看每列的缺失值个数\n" + str(df.isnull().sum()) + "\n" + "-" * 100)print("查看缺失值的具体信息\n" + str(df.info()) + "\n" + "-" * 100)#print("输出X所有值出现的是什么,还有对应出现的次数\n" + str(df['X'].value_counts()) + "\n" + "-" * 100)def show_relation(data, colx, coly):  # 输出某一特征值与目标值的关系if data[colx].dtype == 'object' or data[colx].dtype == 'category' or len(data[colx].unique()) < 20:seaborn.boxplot(x=colx, y=coly, data=data)else:plt.scatter(data[colx], data[coly])plt.xlabel(colx)plt.ylabel(coly)plt.show()# 自定义RMSLE评分函数(GridSearchCV需要最大化评分,因此返回负RMSLE)
def rmsle_scorer(y_true, y_pred):y_pred = np.clip(y_pred, 1e-15, None)  # 防止对0取对数y_true = np.clip(y_true, 1e-15, None)log_error = np.log(y_pred + 1) - np.log(y_true + 1)rmsle = np.sqrt(np.mean(log_error ** 2))return -rmsle  # 返回负值,因为GridSearchCV默认最大化评分if __name__ == '__main__':init()df_train = pd.read_csv('/kaggle/input/playground-series-s5e5/train.csv')df_test = pd.read_csv('/kaggle/input/playground-series-s5e5/test.csv')#for col in df_train.columns:#   show_relation(df_train, col, 'Calories')#特征工程df_all = pd.concat([df_train.drop(['id', 'Calories'], axis=1), df_test.drop(['id'], axis=1)], axis=0)df_all['Sex'] = df_all['Sex'].map({'male': 0, 'female': 1})df_all = df_all.reset_index(drop=True)#构造BMIdf_all['BMI'] = df_all['Weight'] / (df_all['Height'] / 100) ** 2#Harris-Benedict公式df_all['BMR'] = 0df_all.loc[df_all['Sex'] == 0, 'BMR'] = 88.362 + (13.397 * df_all['Weight']) + (4.799 * df_all['Height']) - (5.677 * df_all['Age'])df_all.loc[df_all['Sex'] == 1, 'BMR'] = 447.593 + (9.247 * df_all['Weight']) + (3.098 * df_all['Height']) - (4.330 * df_all['Age'])# 数值特征标准化#numeric_features = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']#scaler = StandardScaler()#df_all[numeric_features] = scaler.fit_transform(df_all[numeric_features])#运动强度特征df_all['Max_HR'] = 220 - df_all['Age']  # 最大心率df_all['HR_Reserve_Ratio'] = df_all['Heart_Rate'] / df_all['Max_HR']#交互特征df_all['Weight_Duration'] = df_all['Weight'] * df_all['Duration']df_all['Sex_Weight'] = df_all['Sex'] * df_all['Weight']# 构造运动功率特征df_all['workload'] = df_all['Weight'] * df_all['Duration'] * df_all['Heart_Rate'] / 1000# 构造生理特征交互项df_all['age_heart_ratio'] = df_all['Age'] / df_all['Heart_Rate']# 时间维度特征(如有时间戳)df_all['hour_of_day'] = df_all['Duration']/60/24# 组合特征numeric_cols = df_all.columnsfor i in range(len(numeric_cols)):feature_1 = numeric_cols[i]for j in range(i + 1, len(numeric_cols)):feature_2 = numeric_cols[j]df_all[f'{feature_1}_x_{feature_2}'] = df_all[feature_1] * df_all[feature_2]#数值归一化#scaler = RobustScaler()#df_all = scaler.fit_transform(df_all)now_col = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp', 'BMI']for i in now_col:df_all[i + "_box"] = pd.cut(df_all[i], bins=10, labels=False, right=False)X_train = df_all[:df_train.shape[0]]Y_train = np.log1p(df_train['Calories'])x_test = df_all[df_train.shape[0]:]#xgbmodel_xgb =estimator=XGBRegressor(random_state=42,n_estimators=8000,objective='reg:squarederror',eval_metric='rmse',device='cuda',learning_rate=0.05,max_depth=8,colsample_bytree=0.75,subsample=0.9,#reg_lambda = 1,#reg_alpha = 0.5,early_stopping_rounds=500,)#lgbmodel_lgb = lightgbm.LGBMRegressor(n_estimators=3000,  # 增加迭代次数配合早停learning_rate=0.03,  # 减小学习率num_leaves=15,  # 限制模型复杂度min_child_samples=25,  # 增加最小叶子样本数reg_alpha=0.1,  # L1正则化reg_lambda=0.1,  # L2正则化objective='regression_l1',  # 改用MAE损失early_stopping_rounds=500,)#catmodel_cat = CatBoostRegressor(iterations=3500,learning_rate=0.02,depth=12,loss_function='RMSE',l2_leaf_reg=3,random_seed=42,eval_metric='RMSE',early_stopping_rounds=200,verbose=1000,task_type='GPU',)#融合#创建基模型列表(需禁用早停以生成完整预测)base_models = [('xgb', XGBRegressor(early_stopping_rounds=None,  # 禁用早停**{k: v for k, v in model_xgb.get_params().items() if k != 'early_stopping_rounds'})),('lgb', LGBMRegressor(early_stopping_rounds=None,  # 禁用早停**{k: v for k, v in model_lgb.get_params().items() if k != 'early_stopping_rounds'})),('cat', CatBoostRegressor(early_stopping_rounds=None,  # 禁用早停**{k: v for k, v in model_cat.get_params().items() if k != 'early_stopping_rounds'}))]meta_model = RidgeCV()model_stack = StackingRegressor(estimators=base_models,final_estimator=meta_model,cv=3,  # 使用3折交叉验证生成元特征passthrough=False,  # 不使用原始特征verbose=1)FOLDS = 20KF = KFold(n_splits=FOLDS, shuffle=True, random_state=42)cat_features = ['Sex']oof_cat = np.zeros(len(df_train))pred_cat = np.zeros(len(df_test))oof_xgb = np.zeros(len(df_train))pred_xgb = np.zeros(len(df_test))oof_lgb = np.zeros(len(df_train))pred_lgb = np.zeros(len(df_test))for i, (train_idx, valid_idx) in enumerate(KF.split(X_train, Y_train)):print('#' * 15, i + 1, '#' * 15)## SPLIT DSx_train, y_train = X_train.iloc[train_idx], Y_train.iloc[train_idx]x_valid, y_valid = X_train.iloc[valid_idx], Y_train.iloc[valid_idx]## CATBOOST fitmodel_cat.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], cat_features=cat_features,use_best_model=True, verbose=0)## XGB FIRmodel_xgb.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], verbose=0)## LGB MODELmodel_lgb.fit(x_train, y_train, eval_set=[(x_valid, y_valid)])## PREDICTION CATBOOSToof_cat[valid_idx] = model_cat.predict(x_valid)pred_cat += model_cat.predict(x_test)## PREDICTION XGBoof_xgb[valid_idx] = model_xgb.predict(x_valid)pred_xgb += model_xgb.predict(x_test)## PREDICTION LGBoof_lgb[valid_idx] = model_lgb.predict(x_valid)pred_lgb += model_lgb.predict(x_test)cat_rmse = mean_squared_error(y_valid, oof_cat[valid_idx]) ** 0.5xgb_rmse = mean_squared_error(y_valid, oof_xgb[valid_idx]) ** 0.5lgb_rmse = mean_squared_error(y_valid, oof_lgb[valid_idx]) ** 0.5print(f'FOLD {i + 1} CATBOOST_RMSE = {cat_rmse:.4f} <=> XGB_RMSE = {xgb_rmse:.4f} <=> LGB_RMSE = {lgb_rmse:.4f}')#预测pred_cat /= FOLDSpred_xgb /= FOLDSpred_lgb /= FOLDSpred_stack = model_stack.predict(df_test)pred_all = np.expm1(pred_xgb) * 0.1 + np.expm1(pred_stack) * 0.80 + np.expm1(pred_cat) * 0.1submission = pd.DataFrame({'id': df_test['id'],'Calories': pred_all})submission['Calories'] = np.clip(submission['Calories'], a_min=1, a_max=20*df_test['Duration'])submission.to_csv('/kaggle/working/submission.csv', index=False)
http://www.dtcms.com/wzjs/815740.html

相关文章:

  • 如何加强网站内容建设第一接单网平台
  • 杭州软件网站建设永州静默管理
  • 临海做网站蛋糕教做网站
  • 经典重庆网站第三方推广平台
  • 建站平台功能结构图wordpress 文章索引
  • 网站ftp用户名和密码建设网站需要营业执照吗
  • 网站建设项目规划书seo网站建设视频教程
  • 南宁东凯做网站的公司wordpress个人博客前台模板下载
  • wordpress技术站主题科技公司网站设计方案
  • 网站建设 绍兴的公司哪家好哪个网站做化妆品效果好
  • jsp和php做网站那个快wordpress 虎嗅 小兽
  • 汕头网站it项目流程八个阶段
  • 临沂城乡建设管理局网站公司设计网站有什么好处
  • 北京网站制作工作室自我简介网页模板html
  • 做网站必须用对方服务器平顺网站建设
  • 2016年两学一做教育网站友链交换网站源码
  • 蚌埠本地网站大气公司网站源码
  • 中国住房和城乡建设部网站建造师东莞网站建议
  • 东莞做网站it s网络营销外包合同模板
  • 镇江房地产网站建设基于lnmp做wordpress
  • 北京网站建立公司做技术分享网站 盈利
  • 网站建设提供资料表枣阳做网站
  • 全球门户中企动力南京谷歌seo
  • 自己怎么做云购网站吗移动网站建设规定
  • 算命网站该怎样做上海工商网查询官网
  • 网站跳转站代码wordpress漏洞破解
  • 门户网站关键词淘客手机网站建设
  • wordpress them8主题网站seo主要是做什么的
  • 想做一个自己的网站怎么做的引用评论框代码wordpress6
  • 零售网站开发东莞网络推广哪家公司奿