Data Types

Learn more about types of data in R.

Data Types In R

R can be used as a calculator:

2+3

To find the square root of 4 simply type:

sqrt(4)

Sequences of Numbers

1:10

It is a good practice to assign names to the vectors:

myseq1 <- 1:10

Observe that in this case the output is not printed, rather it is stored in the variable name myseq1. If you want to see the content of the variable myVEC1, simply type it:

myseq1

Another method to create a vector is to use seq function. If you want to get help about the seq function, type ?seq.

Help

help.start() # general help

help(seq) # help about function seq

?seq # same thing

apropos(“seq”) # list all functions containing string seq

example(seq) # show an example of function seq

Let’s generate a vector using seq function:

seq(from=0,to=10,by=1)
seq(from=1,to=10,length=19)

Assign a name to the generated vector:

myseq2<-seq(1,10,2)

seq(from = 1, to = 1, by = ((to - from)/(length.out - 1)), length.out = NULL, along.with = NULL, …)

seq.int(from, to, by, length.out, along.with, …)

seq_along(along.with)

seq_len(length.out)

my_seq <- seq(10,14)
seq_along(along.with = my_seq)
seq_len(length.out = 7)
seq.int(from=50,to=56)
seq.int(from=2,to=6)

Vectors

The third method is to use concatenate c() function:

c(1:4)
myVEC3 <- c(1:6,9,2)
print(myVEC3)
c("A","B","AB","O")
myVEC4<-c("A","Biden","AB","O")
(myVEC4<-c("A","Biden","AB","O"))

rep() function can be used to replicate:

rep(1,times=4)
c(rep(0,5),1)
rep(c(1:5),times=2)
rep(0:4,each=3)
rep(0:4,times=2:6)
## First control, then treatment:
gl(2, 8, labels = c("Control", "Treat"))
## 20 alternating 1s and 2s
gl(2, 1, 20)
## alternating pairs of 1s and 2s
gl(2, 2, 20)

Indexing Vectors

The vector elements can be indexed using []. Let’s print myVEC4 first and then extract the second element of it:

myVEC4
ind2 <- myVEC4[2]
cat("The answer is",ind2,"as printed.\n")  # \n produces a new line

You can extract many elements using vector to define the index of desired elements:

myVEC4[c(1,3)]
myVEC4[-2]                  # the minus means 'without'
myVEC4[-(3:4)] 

Vector operations

1.5 + myseq1
myVEC2 =c(1,2,3)
2*myVEC2

Change an element of a vector:

myVEC2[3]="AAA"

Print myVEC2 to see the change:

myVEC2
sqrt(c(1:5))
c(1:4)^2

Check certain conditions using which() function. Note that the output is the index of the elements which satisfy the condition:

which(myVEC3<3)

The elements itself can be drawn as follows:

myVEC3[which(myVEC3<3)]

The number of elements which satisfy the condition can be obtained by length() function:

length(which(myVEC3<3))
which.max(myVEC3)

Statistics on vectors:

sum(myVEC3)
mean(myVEC3)
min(myVEC3)
sort(myVEC3,decreasing=T)

Arrays

Arrays are the R data objects that can store data in more than two dimensions.

array(1:50,dim = c(3,3,1))
array(1:50,dim = c(3,3,2))
array(1:50,dim = c(3,3,3))

Matrices

Matrix creation

#create a matrix
matrix(1:8, nrow=2)
#create a matrix
matrix(1:9, nrow=3)
# Elements are arranged sequentially by row.
matrix(c(1:16), nrow = 4, byrow = TRUE)
M<-matrix(c(1:16), nrow = 4, byrow = FALSE)
M
M[3,1]
M[3,3]
M[4,]
zmat <- as.matrix(1:6)
print(zmat)
zmat2 <- as.matrix(c(3,10,5))
print(zmat2)
a1 <- c(0.7, -0.2)
a2 <- c(-0.3, 0.7)
A <- rbind(a1, a2)
# Use the solve() function to calculate the inverse.
solve(A)
m <- cbind(1, 1:7) # the '1' (= shorter vector) is recycled
m
class(m)
m1 <- cbind(m, 8:14) # insert a column
m1
m2 <- cbind(m, 8:14)[, c(1, 3, 2)] # insert a column
m2
cbind(1:7, diag(3)) # vector is subset -> warning
cbind(0, rbind(1, 1:3))
cbind(0, matrix(1, nrow = 0, ncol = 4)) #> Warning (making sense)
cbind(0, matrix(1, nrow = 2, ncol = 0)) #-> 2 x 1
## deparse.level
dd <- 10
rbind(1:4, c = 2, "a++" = 10, dd, deparse.level = 0) # middle 2 rownames
dd <- 10
rbind(1:4, c = 2, "a++" = 10, dd, deparse.level = 1) # 3 rownames (default)
dd <- 10
vc <- rbind(1:4, c = 2, "a++" = 10, dd, deparse.level = 2) # 4 rownames
class(vc)

Package matlib includes many useful functions.

library("matlib")

Inverse

inv(A)

Determinant

det(A)

Transpose

t(A)

Element-wise multiplication

m <- matrix(1:9, nrow=3)
n <- matrix(10:18, nrow=3)
m*n

Matrix multiplication

a1<- c(0,1)
a2<- c(0,0)
A<-rbind(a1,a2)
b1<- c(0,0)
b2<- c(1,0)
B<-rbind(b1,b2)
A%*%B
B%*%A

We can conclude that in matrix multiplication AB is not equal to BA.

matrix1 <- matrix(c(1.628, 0.465, 0.698, 1.628), nrow = 2,byrow=TRUE)
matrix2 <- matrix(c(0.4,0.1,0.1,0.3), nrow = 2,byrow=TRUE)
matrix1%*%matrix2

Identity matrix:

diag(4)

Lists

L <- list( c(1,5,3), matrix(1:6, nrow=3), c("Hello", "world") )
L
L[[1]]        # First element of L
L[[2]][2,1]   # Element [2,1] of the second element of L
L[[c(3,2)]]   # Recursively: 3. element of L, hereof the 2. element
List1 <- list(1:4,7:8) # A list of two vectors
List2 <- list( c("Hello","world"), c(1,5,3) )
Listconcat <- c(List1, List2)
print(Listconcat)
Listg <- list( vv=c(1,5,3), mm=matrix(1:6, nrow=3), txtt=c("Hello", "world") )
Listg$vv
Listg$mm
Listg$txtt
Listg$mm[2,1]          # L$m is a matrix which can be referenced with []
Listg$txtt[2]
Listg[[1]]

Data frames

xx <- data.frame(I = rep(0,2))
xx
vs <- cbind(xx, X = rbind(a = 1, b = 1:3))   # named differently
vs
class(vs)

Another example:

## cheap row names:
b0 <- gl(3,4, labels=letters[1:3])
b0
bf <- setNames(b0, paste0("o", seq_along(b0)))
bf
df  <- data.frame(a = 1, B = b0, f = gl(4,3))
df
df. <- data.frame(a = 1, B = bf, f = gl(4,3))
df.
new <- data.frame(a = 8, B ="B", f = "1")
new
(df1  <- rbind(df , new))
(df.1 <- rbind(df., new))

Let’s try another example:

names = c("Hans", "Caro", "Lars", "Ines", "Samira", "Peter", "Sarah") 
gender = c("male", "female", "male", "female", "female", "male", "female") 
department = c("IT", "IR", "TRD", "IR", "IT", "TRD", "SALES")
salary = c(8000,5500,7400,6400,5800,6100,5900)
ITmanagers <- data.frame(names,gender,department,salary)
ITmanagers
str(ITmanagers)
summary(ITmanagers)
head(ITmanagers)
ITmanagers$names
ITmanagers$salary
mean(ITmanagers$salary)
data.frame(ITmanagers$names,ITmanagers$salary)

Slicing data frames

ITmanagers$gender[2]
ITmanagers[,1]
ITmanagers[1,]
index1 <- ITmanagers[["department"]]=="IT"
ITmanagers[index1,]
index2 <- ITmanagers[["department"]]=="HR"
ITmanagers[index2,]
index3 <- ITmanagers[["gender"]]=="female"
ITmanagers[index3,]
index4 <- ITmanagers[["salary"]] > 2000
ITmanagers[index4,]
groupIT <- subset(ITmanagers,department=="IT")
print(groupIT)
groupIT$salary
subset(ITmanagers,department %in% c("IT","HR"))
SALESIRgroup <- subset(ITmanagers, department %in% c("SALES","IR"))
SALESIRgroup$salary
mean(SALESIRgroup$salary)
notIRgroup <- subset(ITmanagers,department != "IR")
print(notIRgroup)

Classifying the data frames

based on a specific feature can be done using split() function:

depCLASSES <- split(ITmanagers, ITmanagers$department)
print(depCLASSES)
typeof(depCLASSES)
class(depCLASSES)
depCLASSES$TRD
genCLASSES <- split(ITmanagers, ITmanagers$gender)
print(genCLASSES)

Adding a new feature

ITmanagers$age = c(34,39,42,46,52,19,52)
ITmanagers
nationality <- c("US","China","Japan","US","Iran","US","Brazil")
ITmanagers <- cbind(ITmanagers,nationality)
ITmanagers
# Create vector objects.
city <- c("Tampa","Seattle","Hartford","Denver",NA,NA,NA)
state <- c("FL","WA","CT","CO",NA,NA,NA)
zipcode <- c(33602,98104,06161,80294,NA,NA,NA)
addresses <- cbind(city,state,zipcode)
## Add addresses to the data frame
ITmanagers <- cbind(ITmanagers,addresses)
ITmanagers

Adding new entries can be done by creating new data frames and binding them:

# Create the second data frame
ITnewdata <- 	data.frame(
   names = c("Rasmi","Pranab","Tusar"),
   gender = c("female","male","female"),
   department = c("IT","OP","FI"),
   salary = c(5578,7422,6432), 
   age = c(46,52,52),
   nationality = c("IT","OP","FI"),
   city = c("IT","OP","FI"),
   state = c("IT","OP","FI"),
   zipcode = c("IT","OP","FI")
)

# Bind the two data frames.
ITmanagersFINAL <- rbind(ITmanagers,ITnewdata)
print(ITmanagersFINAL)
Next