R语言数据结构与数据处理基础内容
- 5.1向量
- 5.2矩阵与数组
- 5.3数据框
- 5.4因子
- 5.5列表
5.1向量
seq(from = 1, to = 10, by = 1)
x <- rep(3, 3)
rep(1:3, 2)
rep(1:3, each = 2)
my_vex = 1:3
for(i in 1:length(my_vex)){print(my_vex[i])
}
my_vex[10]
my_vex[10] = 1; length(my_vex)
my_vex[c(-1, -2)]
my_vex = seq(1, 10, 1)
my_vex <- my_vex[-(length(my_vex) - 5 + 1) : -length(my_vex)]
my_vex[c(T,F)]
my_vex[c(T,T,F,F,F)]
append(my_vex, 1:3, after = 1)
c(1, 2, 3) == c(1, 2, 3)
identical(c(1, 2, 3), c(1, 2, 3) identical(1.1 - 0.2, 0.9)
all.equal(1.1 - 0.2, 0.9)v1 <- c(1: 5)
any(v1 > 3?
all(v1 > 3)
v1 <- -3:3
v1 * v1
v1 * v1 > 5
v2 <- v1[v1*v1 > 5]; v2
5.2矩阵与数组
matrix(1:12, nrow = 3, ncol = 4, byrow = F,dimnames = list(c("fir", "sec", "thi"), c("fir", "sec", "thi", "fou")))
y <- 1:12
dim(y) <- c(3, 4)
print(y)
x <- 1: 3; diag(x)
x <- rep(1: 3); diag(x)
mat1 <- rbind(A = 1:3, B = 4:6); mat1
mat2 <- cbind(mat1, c(7, 8), c(9, 10)); mat2
mat1 <- matrix(c(1: 6), nrow = 3); mat1
mat2 <- matrix(c(11:16), nrow = 2); mat2
mat1 %*% mat2
mat3 <- t(mat1); mat3
mat3 <- 1:4
dim(mat3) <- c(2, 2)
mat3 <- solve(mat3); mat3
det(mat3)
mat <- matrix(1: 9, nrow = 3)
eigen(mat)
mat1
mat1[,1]
mat1[1,];
mat1[2,2]
mat2
mat2[1,2:3]
mat <- matrix(1:9, nrow = 3)
apply(mat, 1, sum)
apply(mat, 2, sum)f <- function(x){
return (x - 1)
}
mat <- matrix(1:6, 3); mat
f(mat)
apply(mat, 1, f)
dim1 = c("Tom","Bob")
dim2 = c("math","chemis","phy")
dim3 = c("semes_one", "semes_two")
array(1:12, c(2, 3, 2), dimnames = list(dim1, dim2, dim3))
5.3数据框
names <- c("Tom", "Bob","Jerry")
ages <- c(19,18,20)
df <- data.frame(names, ages, stringsAsFactors = F); df
rbind(df, list("Rose", 20))
cbind(df, weight = c(70, 73, 60))
str(rbind(df, list("Fan","20")))
df1 <- data.frame( name = c("Tom", "Bob","Jerry"), age = c(19,18,20)); df1
df2 <- data.frame( name = c("Bob", "Tom","Jerry"), score = c(90,85,88)); df2
merge(df1, df2, by = "name")
df1[2,]
df1[,1]
df1['name']
df1$age
row.names(df1) <- c("stu1", "stu2", "stu3"); df1
install.packages("sqldf")
library(sqldf)
sqldf("select * from df1 where age > 18")
5.4因子
data = c("East", "West", "North", "South", "East", "East", "West")
data_fac <- factor(data); data_fac
levels(data_fac)
as.numeric(data_fac)
data_fac[length(data_fac) + 1] = "East"; data_fac
data_fac[length(data_fac) + 1] = "center"; data_fac
summary(data_fac)
5.5列表
my_list <- list("it's a string of a list", num_vec1 = c(1, 2, 1), num_vec2 = c(3, 4, 5), df = data.frame(name = c("Tom", "Bob"), age = c(19, 18)),fun = function(v1, v2){return (v1 + v2)}); my_list
my_list[[1]]
my_list$num_vec1
df <- read.table("C:\\Users\\21136\\OneDrive\\Desktop\\data.txt", header = T)
write.table(df, file = "C:\\Users\\21136\\OneDrive\\Desktop\\data.txt", sep = "\t")
v <- c(3, 1, 4, 2)
sort(v)
sort(v, decreasing = T)
order(v)
order(-v)
v[order(v)] df <- data.frame(a = c(5, 2, 2, 2), b = c(2, 5, 4, 9), c = c(75, 435, 43, 735))
df[order(df$a, -df$b), ]
x <- c(1:10, 20, -8)
boxplot.stats(x)$out
x <- c(1, 2, NA, 4, NA, 5)
mean(x)
mean(x, na.rm = T)