################
## EXERCISE 1 ##
################
#0. read data from file
marks <- scan("marks.txt",sep=",");
#1. absolute and relative frequency
abs.freq <- table(marks);
rel.freq <- abs.freq/length(marks);
#2. absolute and relative frequency bar diagram (and barplot)
#pdf(file="C:/path/file.pdf"); ## save plots in a pdf file (e.g. path=/Users/Marco/Desktop/)-->windows
#par(mfrow=c(2,2)); ## mfrow=c(nrows, ncols): fills in a matrix of nrows x ncols plots by rows
plot(abs.freq,type="h",col=rainbow(length(abs.freq)),las=1,lwd=5,main="absolute frequency bar diagram",ylab="absolute frequency");
points(as.integer(names(abs.freq)),abs.freq,pch=3,lwd=2); ##add points to the plot
abline(h=abs.freq,lty="dotted"); ##add line to the plot
plot(rel.freq,type="h",col=rainbow(length(rel.freq)),las=1,lwd=5,main="relative frequency bar diagram",ylab="relative frequency");
points(as.integer(names(rel.freq)),rel.freq,pch=3,lwd=2);
abline(h=rel.freq,lty="dotted");
barplot(abs.freq,col=rainbow(length(abs.freq)),las=1,main="absolute frequency barplot",ylab="absolute frequency");
bp <- barplot(abs.freq,plot=F); #variable "bp" is a matrix giving the coordinates of all the bar midpoints drawn on the x-axis
text(y=abs.freq+0.2, x=bp, labels=as.character(abs.freq),xpd=T); ## add labels to barplot
barplot(rel.freq,col=rainbow(length(rel.freq)),ylim=c(0,0.2),las=1,main="relative frequency barplot",ylab="relative frequency");
bp <- barplot(rel.freq,plot=F);
text(y=rel.freq+0.005, x=bp, labels=as.character(round(rel.freq,3)),xpd=T,cex=0.7);
#dev.off();
#3.0 Relative Frequency Pie Chart
rel.freq.perc <- round(rel.freq * 100,2);
lbs <- paste(names(rel.freq)," ","(",rel.freq.perc,"%",")",sep="");
pie(rel.freq,labels=lbs,col=rainbow(length(rel.freq)),main="Relative Frequency Pie Chart ");
#3.1 Relative Frequency Pie Chart for the following intervals of marks: (18,24](24,25](25,29](29,30]
int.marks <- c(18,24,25,29,30);
class.marks <- cut(marks, breaks= int.marks);
freq.class <- table(class.marks);
freq.class.perc <- round(freq.class/length(class.marks),2);
lbs <- paste(names(freq.class.perc)," ","(",freq.class.perc,"%",")",sep="");
pie(freq.class,labels=lbs,col=rainbow(length(freq.class)));
##3D Pie Chart
library(plotrix);
lbs <- paste(names(rel.freq)," ","(",rel.freq.perc,"%",")",sep="");
pie3D(rel.freq.perc,labels=lbs,col=rainbow(length(rel.freq.perc)),main="Relative Frequency Pie Chart",explode=0.1,radius=2);
lbs <- paste(names(freq.class.perc)," ","(",freq.class.perc,"%",")",sep="");
pie3D(freq.class,labels=lbs,col=rainbow(length(freq.class)),explode=0.1,radius=2);
# 4.0 Histogram of relative frequency (the sum of the rectangles heights must be 1)
opar <- par(); ## make a copy of current settings
par(cex.axis=0.7) ## see Text and Symbol Size at following link: http://www.statmethods.net/advgraphs/parameters.html
hist(marks,freq=F,breaks=11,col="darkblue",axes=F,ylab="Relative Frequency"); ## 11 break points
den <- unique(hist(marks,breaks=11,plot=F)$density); ## save relative frequency
brk <- hist(marks,breaks=11,plot=F)$breaks; ## save breaks
axis(1,at= brk,las=1); ## add x-axis to the plot (1=bottom)
axis(2,at= c(0,round(den,4)),las=1); ## add y-axis to the plot (2=left)
abline(h=den,lty="dotted"); ## add line to the plot
#par(opar); ## restore original settings
# 4.1 Histogram (the bases of the rectangles are equal to the following intervals of marks: (18,24](24,25](25,29](29,30]
int.marks <- c(18,24,25,29,30);
hist(marks,freq=F,breaks=int.marks,col="darkblue",axes=F); ## here on y-axis there is the density (hi = fi/Di where fi is rel.freq and Di is "bin width")
den <- unique(hist(marks,breaks=int.marks,plot=F)$density); ## save density
brk <- hist(marks,breaks=int.marks,plot=F)$breaks; ## save breaks
axis(1,at= brk,las=1); ## add x-axis to the plot (1=bottom)
axis(2,at= c(0,round(den,4)),las=1); ## add y-axis to the plot (2=left)
abline(h=den,lty="dotted"); ## add line to the plot
#par(opar); ## restore original settings (neglecting the warnings)
#5.0 Empirical Cumulative Distribution Function of all marks
plot.ecdf(marks,main="Empirical Cumulative Distribution Function",ylab="cumulative relative frequency",xlab="marks");
##other ways to do cdf:
## 0. plot(ecdf(marks));
## 1. plot.stepfun(marks,xlab="marks");
## 2. cum.rel <- cumsum(rel.freq); plot(as.integer(names(cum.rel)),cum.rel,type="s",xlab="",ylab=""); points(as.integer(names(cum.rel)),cum.rel);
## note: in plot need to specify value on x-axis
# 5.1 ecdf for the following intervals of marks: (18,24](24,25](25,29](29,30]
int.marks <- c(18,24,25,29,30);
class.marks <- cut(marks,int.marks);
freq.class <- table(class.marks);
cum.class.marks <- cumsum(freq.class);
cum.rel.class.marks <- cum.class.marks/length(marks);
plot(int.marks,c(0,cum.rel.class.marks),type="s",xlab="marks",ylab="cumulative relative frequency",main="empirical cumulative function of marks' intervals");
#6. marks mode
#x: numeric vector containing the values for which mode is to be computed
mode.fun <- function(x){
abs.freq <- table(x);
mod <- abs.freq[max(abs.freq)];
names(mod);
}
mode.fun(marks);
#7 mean and standard deviation of marks
mean(marks);
sd(marks) ##sqrt(var(marks))
#8 median 1st and 3rd quartile
median(marks);
quantile(marks)[c(1,4)];
#9 10th,20th,90th,99th quantile
quantile(marks,probs=c(10,20,90,99)/100);
#10.0 Marks Box Plot
boxplot(marks,col="darkgreen",main="Marks Box Plot",yaxt = "n",range=1.5); ## try setting range=0.5(default range=1.5)...what happens?(see below)
stats <- as.vector(boxplot(marks,plot=F)$stats);
axis(2,stats,las=2);
abline(h=stats,lty="dotted");
#10.1 Marks Box Plot (changing option range)
boxplot(marks,col="darkgreen",main="Marks Box Plot",yaxt = "n",range=0.5); ##upper whisker end= QR3+0.5*IQR; lower whisker end=QR1-0.5*IQR
stats <- as.vector(boxplot(marks,plot=F,range=0.5)$stats);
axis(2,stats,las=2);
abline(h=stats,lty="dotted");
out <- unique(boxplot(marks,range=0.5,plot=F)$out);
n <- length(out);
identify(rep(1,n),out,labels=out);
##identify: 0.reads the position of the graphics pointer when the mouse button is pressed;
## 1.searches the coordinates closest to the pointer
## 2.print the coordinates' names
#10.2 summarize marks...
summary(marks)
#11 qqnormal plot
qqnorm(marks);
qqline(marks);