Python训练营-Day29-复习日
对之前的内容做一个梳理,围绕机器学习全流程展开,从数据预处理 → 特征工程 → 模型训练 → 评估优化,形成完整闭环。
将之前做过的关键步骤记录下来:
# ==== 编程基础 ====
# DAY1: 变量与格式化字符串
name = "Alice"
print(f"Hello, {name}!")# DAY3: 列表、循环和判断
nums = [1, 2, 3]
for num in nums:if num > 1: print(num)# ==== 数据处理 ====
# DAY4: 缺失值处理 (Pandas)
import pandas as pd
df = pd.DataFrame({'A': [1, None, 3]})
df.fillna(df.mean(), inplace=True)# DAY5: 独热编码
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder().fit(df[['category']])# DAY8: 标签编码
from sklearn.preprocessing import LabelEncoder
LabelEncoder().fit_transform(['A', 'B', 'A'])# ==== 可视化 ====
# DAY9: 热力图 (Seaborn)
import seaborn as sns
sns.heatmap(df.corr(), annot=True)# ==== 机器学习 ====
# DAY10: 建模与评估 (Scikit-learn)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier().fit(X_train, y_train)
print(model.score(X_test, y_test))# DAY11: 调参 (GridSearchCV)
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': [50, 100]}
GridSearchCV(model, param_grid, cv=5).fit(X, y)# DAY14: SHAP分析
import shap
shap_values = shap.TreeExplainer(model).shap_values(X)# ==== 特征工程 ====
# DAY19: 特征筛选 (Lasso)
from sklearn.linear_model import Lasso
Lasso(alpha=0.1).fit(X, y).coef_# DAY20: SVD分解
from sklearn.decomposition import TruncatedSVD
TruncatedSVD(n_components=2).fit_transform(X)# ==== 高级语法 ====
# DAY25: 异常处理
try: x = 1/0
except ZeroDivisionError: print("Error")# DAY27: 装饰器
def my_decorator(func):def wrapper(): print("Before"); func()return wrapper# DAY28: 类定义
class MyClass:def __init__(self, x): self.x = xdef print_x(self): print(self.x)
@浙大疏锦行