当前位置: 首页 > news >正文

线性回归评价标准

In [1]:

1
2
3
4
5
import numpy as np
from sklearn.linear_model import LinearRegression
import sklearn.datasets as datasets
1
2
()
diabetes
diabetes $=$datasets.load_diabetes

In [2]:

Out[2]: {‘data’: array([[ 0.03807591,0.05068012,0.06169621,…,-0.00259226,

0.01990842,-0.01764613],

[-0.00188202,-0.04464164,-0.05147406,…,-0.03949338,

-0.06832974,-0.09220405],

[0.08529891,0.05068012, 0.04445121,… -0.00259226,

0.00286377,-0.02593034],

···,

[0.04170844, 0.05068012,-0.01590626,…,-0.01107952,

-0.04687948,0.01549073],

[-0.04547248,-0.04464164,0.03906215, .0.02655962,

0.04452837,-0.02593034],

[-0.04547248,-0.04464164,-0.0730303,…,-0.03949338,

-0.00421986, 0.00306441]]),

‘target’: array([151., 75.,141.,206.,135., 97.,138.,63.,110.,310.,101.,

69.,179.,185.,118.,171.,166.,144.,97.,168.,68.,49.,

68.,245.,184.,202.,137.,85.,131.,283.,129.,59.,341.,

87.,65.,102.,265.,276.,252.,90.,100.,55.,61.,92.,

259.,53.,190.,142.,75.,142.,155.,225.,59.,104.,182.,

128.,52.,37.,170.,170.,61.,144.,52.,128.,71.,163.,

150.,97.,160.,178.,48.,270.,202.,111.,85.,42.,170.,

200.,252.,113.,143.,51.,52.,210.,65.,141.,55.,134.,

42.,111.,98.,164.,48.,96.,90.,162.,150.,279.,92.,

83.,128.,102.,302.,198.,95.,53.,134.,144.,232.,81.,

104.,59.,246.,297.,258.,229.,275.,281.,179.,200.,200.,

173.,180.,84.,121.,161.,99.,109.,115.,268.,274.,158.,

107.,83.,103.,272.,85.,280.,336.,281.,118.,317.,235.,

60.,174.,259.,178.,128.,96.,126.,288.,88.,292.,71.,

197.,186.,25.,84.,96.,195.,53.,217.,172.,131.,214.,

59.,70.,220.,268.,152.,47.,74.,295.,101.,151.,127.,

237.,225.,81.,151.,107.,64.,138.,185.,265.,101.,137.,

143.,141.,79.,292.,178.,91.,116.,86.,122.,72.,129.,

142.,90.,158.,39.,196.,222.,277.,99.,196.,202.,155.,

77.,191.,70.,73.,49.,65.,263.,248.,296.,214.,185.,

78.,93.,252.,150.,77.,208.,77.,108.,160.,53.,220.,

154.,259.,90.,246.,124.,67.,72.,257.,262.,275.,177.,

71.,47.,187.,125.,78.,51.,258.,215.,303.,243.,91.,

150.,310.,153.,346.,63.,89.,50.,39.,103.,308.,116.,

145.,74.,45.,115.,264.,87.,202.,127.,182.,241.,66.,

94.,283.,64.,102.,200.,265.,94.,230.,181.,156.,233.,

60.,219.,80.,68.,332.,248.,84.,200.,55.,85.,89.,

31.,129.,83.,275.,65.,198.,236.,253.,124.,44.,172.,

114.,142.,109.,180.,144.,163.,147.,97.,220.,190.,109.,

191.,122.,230.,242.,248.,249.,192.,131.,237.,78.,135.,

244.,199.,270.,164.,72.,96.,306.,91.,214.,95.,216.,

263.,178.,113.,200.,139.,139.,88.,148.,88.,243.,71.,

77.,109.,272.,60.,54.,221.,90.,311.,281.,182.,321.,

58.,262.,206.,233.,242.,123.,167.,63.,197.,71.,168.,

140.,217.,121.,235.,245.,40.,52.,104.,132.,88.,69.,

219.,72.,201.,110.,51.,277.,63.,118.,69.,273.,258.,

43.,198.,242.,232.,175.,93.,168.,275.,293.,281.,72.,

140.,189.,181.,209.,136.,261.,113.,131.,174.,257.,55.,

84.,42.,146.,212.,233.,91.,111.,152.,120.,67.,310.,

94.,183.,66.,173.,72.,49.,64.,48.,178.,104.,132.,

220.,57.]),

‘DESCR’: 'Diabetes dataset\ \ n = = = = \backslash n==== \n==== = = = = 1 ====1 ====1 n\nNotes\n-----\n\nTen baseline variables, age, s ex, body mass index, average blood\npressure, and six blood serum measurements were obtained fo r each of n = \ n 442 \mathrm {n}=\backslash \mathrm {n}442 n=\n442 diabetes patients, as well as the response of interest, a\nquantitative meas ure of disease progression one year after baseline.\n\nData Set Characteristics:\n\n :Number o f Instances: 442 \ n \ n 442\backslash \mathrm {n}\backslash \mathrm {n} 442\n\n :Number of Attributes: First 10 columns are numeric predictive values\n\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\ n \ n n\backslash n n\n :Attributes:\n :Age:\n :Sex:\n :Body mass index:\n :Average blood pressure:\n :S

1 : \ n 1:\backslash \mathrm {n} 1:\n : S 2 : n S2:\ n S2: n : S 3 : n S3:\ n S3: n : S 4 : \ n S4:\backslash n S4:\n : S 5 : \ n S5:\backslash n S5:\n :S6:\n\nNote: Each of these 10 feature variable

s have been mean centered and scaled by the standard deviation times n_samples (i.e. the sum of squares of each column totals 1).\n\nSource URL:\nhttp://www4.stat.ncsu.edu/~boos/var.selec t/diabetes.html\n\nFor more information see:\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) “Least Angle Regression,” Annals of Statistics (with discussion), 407-499.\n(http://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n’,

‘feature_names’:[‘age’,

′ s e x ′ 'sex' sex

′ b m i ′ , 'bmi', bmi,

‘bp’

′ s 1 ′ 's1' s1

s 2 ′ , s2', s2,

s 3 ′ , s3', s3,

‘s4’,

s 5 ′ , s5', s5,

s6’ \text {s6'} s6’

In [3]:

$\begin{array}{l}1\\ 2\end{array}\mathrm {X}=\text {diabetes}\left[\text {'data'}^{\prime }\right]$3 $y=\text {diabetes}['$ $target']$

In [4]:

1 from sklearn.model_selection import train_test_split

In [5]: 1 X_train,> X t e s t X_test Xtest y_train, y t e s t = t r a i n t e s t s p l i t ( X , y , t e s t s i z e = 0.2 ) y_test=train_test_split(X,y,test_size=0.2) ytest=traintestsplit(X,y,testsize=0.2)

In [6]:

1 X_train.shape

Out[6]: (353,10)

In [7]: 1 X_train[:5]

Out[7]:array([[-0.08906294,-0.04464164,-0.01159501,-0.03665645,0.01219057,

0.02499059,-0.03603757, 0.03430886, 0.02269202,-0.00936191],

[0.02717829,0.05068012,-0.00620595,0.0287581,-0.01670444,

-0.00162703,-0.0581274,0.03430886,0.02930041,0.03205916],

[0.01628068, 0.05068012,-0.046085,0.01154374,-0.03321588,

-0.01603186,-0.01026611,-0.00259226,-0.0439854,-0.04249877],

[0.04170844,0.05068012,-0.01590626,0.01728186,-0.03734373,

-0.01383982,-0.02499266,-0.01107952,-0.04687948,0.01549073],

[-0.02367725,-0.04464164,-0.01590626,-0.01255635,0.02044629,

0.04127431,-0.04340085, 0.03430886,0.01407245,-0.00936191]])

:1
2
#该数据有正有负,说明此数据被处理过,归一化
X_train.std(axis $=0$)

Out[10]: array([0.04734822,0.0475412,0.04825435,0.04607366,0.04724651,

0.04758674,0.04927031,0.04779197,0.04657944,0.04707625])

In [11]:

1 1r = LinearRegression()
In [12]:1 lr.fit(X_train,$y_train)$

Out[12]: LinearRegression(copy X = True \mathrm {X}=\text {True} X=True ,fit_intercept = = = True, KaTeX parse error: Expected 'EOF', got '_' at position 9: \text {n_̲{jobs}=1} ,normali z e = F a l s e ) \mathrm {ze}=\mathrm {False}) ze=False)

In [16]:

1 y_=1r.predict(X_test)
2 y_.round(2)

Out[16]: array([85.12,163.71,63.55,218.3,163.06,150.55,82.88,193.32,

208.76,125.45,107.27,238.29,59.74,100.33,112.66,241.96,

187.74,188.74,148.74,199.43,209.59,229.64,160.79,149.22,

135.61,211.9,65.1,130.51,128.21,127.21,176.68,198.78,

217.68,74.84,122.66,130.45,99.47,155.85,71.78,82.48,

174.39,90.78,76.32,159.8,230.54,108.38,209.47,215.79,

198.95, 71.31,79.04,90.62,194.97,191.12,86.71,238.62,

162.68,196.31,210.25,78.46,147.33,201.89,291.03,215.39,

176.53, 58.49,122.49,192.83,175.76,130.75,249.97,152.78,

79.42,194.09,224.39,173.03,213.03,95.59,216.38,166.36,

81.86,93.82,209.76,116.94,115.04,144.28,256.24,179.02,

169.27])

In [14]:1 y_test

Out[14]: array([181.,179., 77.,295.,131.,202.,37.,257.,52.,139.,102.,

195.,65., 53.,97.,306.,78.,241.,25.,186.,221.,261.,

209.,100.,40.,288.,52.,92.,145.,150.,91.,265.,225.,

77.,84.,53.,54.,85.,55.,80.,262.,64.,200.,113.,

232.,97.,192.,275.,131.,92.,138.,49.,292.,232.,55.,

280.,220.,220.,265.,89.,150.,268.,270.,152.,77.,85.,

162.,123.,216.,131.,310.,246.,42.,68.,281.,141.,220.,

72.,163.,178.,51.,71.,121.,61.,160.,200.,336.,52.,

184.])

In [ ]:

1 '''The coefficient R'2 is defined as (1 - u/v), where u is the residual
2 sum of squares ((y_true - y pred) ** 2).sum() and v is the total
3 sum of squares ((y_true-y_true.meansum of squares ((y_true-y_true.mean())** 2).sum() ().
4 The best possible score is 1.0 and it can be negative (because the
5 model can be arbitrarily worse). A constant model that always
6 predicts the expected value of y, disregarding the input features,
7 would get a R'2 score of 0.0.'''

In [15]:

1
2
#R2 决定系数
$lr.score(X_test,y_test)$

Out[15]: 0.5100017419052714

In [17]:

$1$*$((y_true-y_pred)*$ $($ $)$ $2).sum$ $($ $)$ $\mathrm {u}=\left(\left(\mathrm {y}_{-}\text {test}-\mathrm {y}_{-}\right)**2\right).$ $.sum$u

Out[17]: 304838.15677376505

In [19]:

$1$ $2$3 #$((y_true-y_true.mean$ $($ $)$ $**2).sum$ $($ $)$ $)$V
$\mathrm{v}=\left(\left(\mathrm{y}_{-}\text{test}-\mathrm{y}_{-}\text{test.mean}$ $($ $)$ $\right)**\right.$ $*2)$ $.sum$ $($ $)$

Out[19]: 622120.8988764045

In [20]:

1 1-u/v

Out[20]: 0.5100017419052714

In [21]:

1 1-np.var(y_test-y_)/np.var(y_test)

Out[21]: 0.5108163438179167

In [22]:

1 np.abs(y_test-y_).mean ()

Out[22]: 47.55520213877001

In [25]:

1 from sklearn.metrics import mean_absolute_error,mean_squared_error, mean_squared_log_error

In [24]: 1 mean_absolute_error(y_test,y_)

Out[24]:47.55520213877001

相关文章:

  • Beyond Compare 5破解
  • 面试常问系列(一)-神经网络参数初始化-之-softmax
  • 第二章 Logback的架构(二)
  • [250504] Moonshot AI 发布 Kimi-Audio:开源通用音频大模型,驱动多模态 AI 新浪潮
  • Adobe卸载清理工具Creative Cloud Cleaner Tool下载
  • 学习Python的第二天之网络爬虫
  • 各国健康指标数据查询
  • P48-56 应用游戏标签
  • PCIe控制逻辑介绍(一)
  • GitHub中多个PR时,如何协同合并和管理
  • 【计算机网络】TCP为什么可靠?解决了哪些问题?
  • JPress安装(Docker)
  • iMeta | 临床研究+scRNA-seq的组合思路 | 真实世界新辅助研究,HER2⁺就一定受益?单细胞揭示真正的“疗效敏感克隆”
  • 【BUG】mmdetection ValueError: need at least one array to concatenate
  • 【Qt4】Qt4中实现PDF预览
  • 【东枫科技】代理英伟达产品:智能网卡的连接线
  • URP - 深度图
  • CSS网格布局
  • UE5 ML机械学习肌肉反应与布料反应
  • 大疆三方云平台部署
  • 金融监管总局将推出8项增量政策:涉房地产金融、险资入市、稳外贸等
  • 综合治理食品添加剂滥用问题,国务院食安办等六部门联合出手
  • 娱见 | 为了撕番而脱粉,内娱粉丝为何如此在乎番位
  • 李翔宁:城市的每个人都参与了上海的建造,这一过程还在持续
  • 特朗普要征100%关税,好莱坞这批境外摄制新片有麻烦了
  • 陈燮阳从艺60周年:指挥棒不停,心跳就不会老去