当前位置: 首页 > wzjs >正文

wordpress 3.2 下载seo排名诊断

wordpress 3.2 下载,seo排名诊断,东莞智通人才网官网登录,广州网页设计师Predict Calorie Expenditure 题意: 给出每个人的基本信息,预测运动后的卡路里消耗值。 数据处理: 1.构造出人体机能、运动相关的特征值。 2.所有特征值进行从新组合,注意唯独爆炸 3.对连续信息分箱变成离散 建立模型&#x…

Predict Calorie Expenditure

题意:

给出每个人的基本信息,预测运动后的卡路里消耗值。

数据处理:

1.构造出人体机能、运动相关的特征值。
2.所有特征值进行从新组合,注意唯独爆炸
3.对连续信息分箱变成离散

建立模型:

1.xgb模型,lgb模型,cat模型
2.使用stack堆叠融合,使用3折交叉验证
3.对xgb、lgb、cat进行K折交叉验证,最终和stack进行结果融合。

代码:
import os
import sys
import warnings
import numpy as np
import pandas as pd
import seaborn
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from matplotlib import pyplot as plt
import lightgbm
from mlxtend.regressor import StackingCVRegressor
from sklearn import clone
from sklearn.ensemble import VotingRegressor, StackingClassifier, StackingRegressor
from sklearn.linear_model import Lasso, LogisticRegression, RidgeCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer, mean_squared_log_error
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridgedef init():os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 仅输出错误日志warnings.simplefilter('ignore')  # 忽略警告日志pd.set_option('display.width', 1000)pd.set_option('display.max_colwidth', 1000)pd.set_option("display.max_rows", 1000)pd.set_option("display.max_columns", 1000)def show_dataframe(df):print("查看特征值和特征值类型\n" + str(df.dtypes) + "\n" + "-" * 100)print("查看前10行信息\n" + str(df.head()) + "\n" + "-" * 100)print("查看每个特征值的各种数据统计信息\n" + str(df.describe()) + "\n" + "-" * 100)print("输出重复行的个数\n" + str(df.duplicated().sum()) + "\n" + "-" * 100)print("查看每列的缺失值个数\n" + str(df.isnull().sum()) + "\n" + "-" * 100)print("查看缺失值的具体信息\n" + str(df.info()) + "\n" + "-" * 100)#print("输出X所有值出现的是什么,还有对应出现的次数\n" + str(df['X'].value_counts()) + "\n" + "-" * 100)def show_relation(data, colx, coly):  # 输出某一特征值与目标值的关系if data[colx].dtype == 'object' or data[colx].dtype == 'category' or len(data[colx].unique()) < 20:seaborn.boxplot(x=colx, y=coly, data=data)else:plt.scatter(data[colx], data[coly])plt.xlabel(colx)plt.ylabel(coly)plt.show()# 自定义RMSLE评分函数(GridSearchCV需要最大化评分,因此返回负RMSLE)
def rmsle_scorer(y_true, y_pred):y_pred = np.clip(y_pred, 1e-15, None)  # 防止对0取对数y_true = np.clip(y_true, 1e-15, None)log_error = np.log(y_pred + 1) - np.log(y_true + 1)rmsle = np.sqrt(np.mean(log_error ** 2))return -rmsle  # 返回负值,因为GridSearchCV默认最大化评分if __name__ == '__main__':init()df_train = pd.read_csv('/kaggle/input/playground-series-s5e5/train.csv')df_test = pd.read_csv('/kaggle/input/playground-series-s5e5/test.csv')#for col in df_train.columns:#   show_relation(df_train, col, 'Calories')#特征工程df_all = pd.concat([df_train.drop(['id', 'Calories'], axis=1), df_test.drop(['id'], axis=1)], axis=0)df_all['Sex'] = df_all['Sex'].map({'male': 0, 'female': 1})df_all = df_all.reset_index(drop=True)#构造BMIdf_all['BMI'] = df_all['Weight'] / (df_all['Height'] / 100) ** 2#Harris-Benedict公式df_all['BMR'] = 0df_all.loc[df_all['Sex'] == 0, 'BMR'] = 88.362 + (13.397 * df_all['Weight']) + (4.799 * df_all['Height']) - (5.677 * df_all['Age'])df_all.loc[df_all['Sex'] == 1, 'BMR'] = 447.593 + (9.247 * df_all['Weight']) + (3.098 * df_all['Height']) - (4.330 * df_all['Age'])# 数值特征标准化#numeric_features = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']#scaler = StandardScaler()#df_all[numeric_features] = scaler.fit_transform(df_all[numeric_features])#运动强度特征df_all['Max_HR'] = 220 - df_all['Age']  # 最大心率df_all['HR_Reserve_Ratio'] = df_all['Heart_Rate'] / df_all['Max_HR']#交互特征df_all['Weight_Duration'] = df_all['Weight'] * df_all['Duration']df_all['Sex_Weight'] = df_all['Sex'] * df_all['Weight']# 构造运动功率特征df_all['workload'] = df_all['Weight'] * df_all['Duration'] * df_all['Heart_Rate'] / 1000# 构造生理特征交互项df_all['age_heart_ratio'] = df_all['Age'] / df_all['Heart_Rate']# 时间维度特征(如有时间戳)df_all['hour_of_day'] = df_all['Duration']/60/24# 组合特征numeric_cols = df_all.columnsfor i in range(len(numeric_cols)):feature_1 = numeric_cols[i]for j in range(i + 1, len(numeric_cols)):feature_2 = numeric_cols[j]df_all[f'{feature_1}_x_{feature_2}'] = df_all[feature_1] * df_all[feature_2]#数值归一化#scaler = RobustScaler()#df_all = scaler.fit_transform(df_all)now_col = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp', 'BMI']for i in now_col:df_all[i + "_box"] = pd.cut(df_all[i], bins=10, labels=False, right=False)X_train = df_all[:df_train.shape[0]]Y_train = np.log1p(df_train['Calories'])x_test = df_all[df_train.shape[0]:]#xgbmodel_xgb =estimator=XGBRegressor(random_state=42,n_estimators=8000,objective='reg:squarederror',eval_metric='rmse',device='cuda',learning_rate=0.05,max_depth=8,colsample_bytree=0.75,subsample=0.9,#reg_lambda = 1,#reg_alpha = 0.5,early_stopping_rounds=500,)#lgbmodel_lgb = lightgbm.LGBMRegressor(n_estimators=3000,  # 增加迭代次数配合早停learning_rate=0.03,  # 减小学习率num_leaves=15,  # 限制模型复杂度min_child_samples=25,  # 增加最小叶子样本数reg_alpha=0.1,  # L1正则化reg_lambda=0.1,  # L2正则化objective='regression_l1',  # 改用MAE损失early_stopping_rounds=500,)#catmodel_cat = CatBoostRegressor(iterations=3500,learning_rate=0.02,depth=12,loss_function='RMSE',l2_leaf_reg=3,random_seed=42,eval_metric='RMSE',early_stopping_rounds=200,verbose=1000,task_type='GPU',)#融合#创建基模型列表(需禁用早停以生成完整预测)base_models = [('xgb', XGBRegressor(early_stopping_rounds=None,  # 禁用早停**{k: v for k, v in model_xgb.get_params().items() if k != 'early_stopping_rounds'})),('lgb', LGBMRegressor(early_stopping_rounds=None,  # 禁用早停**{k: v for k, v in model_lgb.get_params().items() if k != 'early_stopping_rounds'})),('cat', CatBoostRegressor(early_stopping_rounds=None,  # 禁用早停**{k: v for k, v in model_cat.get_params().items() if k != 'early_stopping_rounds'}))]meta_model = RidgeCV()model_stack = StackingRegressor(estimators=base_models,final_estimator=meta_model,cv=3,  # 使用3折交叉验证生成元特征passthrough=False,  # 不使用原始特征verbose=1)FOLDS = 20KF = KFold(n_splits=FOLDS, shuffle=True, random_state=42)cat_features = ['Sex']oof_cat = np.zeros(len(df_train))pred_cat = np.zeros(len(df_test))oof_xgb = np.zeros(len(df_train))pred_xgb = np.zeros(len(df_test))oof_lgb = np.zeros(len(df_train))pred_lgb = np.zeros(len(df_test))for i, (train_idx, valid_idx) in enumerate(KF.split(X_train, Y_train)):print('#' * 15, i + 1, '#' * 15)## SPLIT DSx_train, y_train = X_train.iloc[train_idx], Y_train.iloc[train_idx]x_valid, y_valid = X_train.iloc[valid_idx], Y_train.iloc[valid_idx]## CATBOOST fitmodel_cat.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], cat_features=cat_features,use_best_model=True, verbose=0)## XGB FIRmodel_xgb.fit(x_train, y_train, eval_set=[(x_valid, y_valid)], verbose=0)## LGB MODELmodel_lgb.fit(x_train, y_train, eval_set=[(x_valid, y_valid)])## PREDICTION CATBOOSToof_cat[valid_idx] = model_cat.predict(x_valid)pred_cat += model_cat.predict(x_test)## PREDICTION XGBoof_xgb[valid_idx] = model_xgb.predict(x_valid)pred_xgb += model_xgb.predict(x_test)## PREDICTION LGBoof_lgb[valid_idx] = model_lgb.predict(x_valid)pred_lgb += model_lgb.predict(x_test)cat_rmse = mean_squared_error(y_valid, oof_cat[valid_idx]) ** 0.5xgb_rmse = mean_squared_error(y_valid, oof_xgb[valid_idx]) ** 0.5lgb_rmse = mean_squared_error(y_valid, oof_lgb[valid_idx]) ** 0.5print(f'FOLD {i + 1} CATBOOST_RMSE = {cat_rmse:.4f} <=> XGB_RMSE = {xgb_rmse:.4f} <=> LGB_RMSE = {lgb_rmse:.4f}')#预测pred_cat /= FOLDSpred_xgb /= FOLDSpred_lgb /= FOLDSpred_stack = model_stack.predict(df_test)pred_all = np.expm1(pred_xgb) * 0.1 + np.expm1(pred_stack) * 0.80 + np.expm1(pred_cat) * 0.1submission = pd.DataFrame({'id': df_test['id'],'Calories': pred_all})submission['Calories'] = np.clip(submission['Calories'], a_min=1, a_max=20*df_test['Duration'])submission.to_csv('/kaggle/working/submission.csv', index=False)
http://www.dtcms.com/wzjs/254178.html

相关文章:

  • 公司二次开发好吗网络营销的seo是做什么的
  • 做网站和SSH友情链接怎么连
  • 品牌网站制作推广引流工具
  • 网站排名怎么提升网站交易平台
  • 罗湖草铺附近做网站外贸网站建设平台
  • 徐州做网站的设计师企业培训机构有哪些
  • 免费企业网站注册石家庄百度seo
  • 国内大的网站建设公司深圳网站设计公司
  • 成都网站设计新闻seo诊断优化专家
  • 做断桥铝门窗网站孔宇seo
  • 苏州网站开发公司济南兴田德润地址行业数据统计网站
  • wordpress媒体插件河北电子商务seo
  • 购物网站 wordpress 英文模板seo优化招商
  • 开发app软件需要多少钱杭州seo价格
  • 百度站长资源平台深圳网络营销公司
  • 铜陵网站开发微信小程序开发详细步骤
  • 网站改进建议营业推广策略有哪些
  • 网站打开速度太慢seo文章推广
  • 网站建设 制作流量神器
  • 网站建设推广方案模版全国广告投放平台
  • 泰州网站制作企业百度推广服务
  • 用手机建立网站小红书sem是什么意思
  • 如何写网站建设实验结果分析如何推广品牌知名度
  • 富阳网站制作seo排名查询工具
  • 如何在社交网站上做视频推广常见的营销型网站
  • 常州网站建设麦策电商网络服务包括
  • 北京高端建设网站网店推广策划书
  • 广州专业网站制作哪家专业seo诊断书案例
  • 知名做网站app开发需要多少钱
  • 企业网站的建设与维护软文推广代理平台