Python Day8
@浙大疏锦行 PythonDay8.
内容:
- 字典(字典对)
-
dict_test = dict(name='zhangsan', age=18)
-
- 标签编码(离散特征 && 有顺序)
- 连续特征的归一化和标准化处理
代码:
# Question 1 dic
dict = {'Alice': 100, 'Bob': 200, 'Charlie': 300}# Question 2 label-Code and 连续变量编码
import pandas as pd
import numpy as np# 独热编码
def one_hot(data, columns):data = pd.get_dummies(data=data, columns=columns)return data
# 标签编码
def label_encoder(data, columns):if len(columns) == 0: # 处理异常情况return datavalue_index = list(range(len(columns))) # [1 2 3 4 5 6]mapping = dict(zip(columns, value_index))data[columns] = data[columns].map(mapping)return data
# 归一化
def min_max(data, columns):if len(columns) == 0: # 处理异常情况return datafor column in columns:column_data = data[column]min_val = column_data.min()max_val = column_data.max()column_data = (column_data - min_val) / (max_val - min_val)data[column] = column_datareturn datadata = pd.read_csv("./data/heart.csv")
discrete_columns = []
continuous_columns = []
for column in data.columns:if data[column].dtype == 'object':discrete_columns.append(column)else:continuous_columns.append(column)data = min_max(data, continuous_columns) # 归一化
# data = one_hot(data, discrete_columns) # 独热编码
data = label_encoder(data, discrete_columns) # 标签编码print(data.head())