################
## EXERCISE 1 ##
################

#0. read data from file
marks <- scan("marks.txt",sep=",");

#1. absolute and relative frequency 
abs.freq <- table(marks);
rel.freq <- abs.freq/length(marks);

#2. absolute and relative frequency bar diagram (and barplot)
#pdf(file="C:/path/file.pdf");                                      ## save plots in a pdf file (e.g. path=/Users/Marco/Desktop/)-->windows
#par(mfrow=c(2,2));                                                 ## mfrow=c(nrows, ncols): fills in a matrix of nrows x ncols plots by rows
plot(abs.freq,type="h",col=rainbow(length(abs.freq)),las=1,lwd=5,main="absolute frequency bar diagram",ylab="absolute frequency");
points(as.integer(names(abs.freq)),abs.freq,pch=3,lwd=2);           ##add points to the plot 
abline(h=abs.freq,lty="dotted");                                    ##add line to the plot 

plot(rel.freq,type="h",col=rainbow(length(rel.freq)),las=1,lwd=5,main="relative frequency bar diagram",ylab="relative frequency");
points(as.integer(names(rel.freq)),rel.freq,pch=3,lwd=2);
abline(h=rel.freq,lty="dotted");

barplot(abs.freq,col=rainbow(length(abs.freq)),las=1,main="absolute frequency barplot",ylab="absolute frequency");
bp <- barplot(abs.freq,plot=F);     #variable "bp" is a matrix giving the coordinates of all the bar midpoints drawn on the x-axis
text(y=abs.freq+0.2, x=bp, labels=as.character(abs.freq),xpd=T);    ## add labels to barplot 

barplot(rel.freq,col=rainbow(length(rel.freq)),ylim=c(0,0.2),las=1,main="relative frequency barplot",ylab="relative frequency");
bp <- barplot(rel.freq,plot=F);
text(y=rel.freq+0.005, x=bp, labels=as.character(round(rel.freq,3)),xpd=T,cex=0.7);
#dev.off();

#3.0 Relative Frequency Pie Chart 
rel.freq.perc <- round(rel.freq * 100,2);
lbs <- paste(names(rel.freq)," ","(",rel.freq.perc,"%",")",sep="");
pie(rel.freq,labels=lbs,col=rainbow(length(rel.freq)),main="Relative Frequency Pie Chart ");
#3.1 Relative Frequency Pie Chart for the following intervals of marks: (18,24](24,25](25,29](29,30]
int.marks <- c(18,24,25,29,30);
class.marks <- cut(marks, breaks= int.marks);
freq.class <- table(class.marks);
freq.class.perc <- round(freq.class/length(class.marks),2);
lbs <- paste(names(freq.class.perc)," ","(",freq.class.perc,"%",")",sep="");
pie(freq.class,labels=lbs,col=rainbow(length(freq.class)));
##3D Pie Chart
library(plotrix);
lbs <- paste(names(rel.freq)," ","(",rel.freq.perc,"%",")",sep="");
pie3D(rel.freq.perc,labels=lbs,col=rainbow(length(rel.freq.perc)),main="Relative Frequency Pie Chart",explode=0.1,radius=2);
lbs <- paste(names(freq.class.perc)," ","(",freq.class.perc,"%",")",sep="");
pie3D(freq.class,labels=lbs,col=rainbow(length(freq.class)),explode=0.1,radius=2);

# 4.0  Histogram of relative frequency (the sum of the rectangles heights must be 1)
opar <- par();          ## make a copy of current settings
par(cex.axis=0.7)       ## see Text and Symbol Size at following link: http://www.statmethods.net/advgraphs/parameters.html
hist(marks,freq=F,breaks=11,col="darkblue",axes=F,ylab="Relative Frequency");       ## 11 break points
den <- unique(hist(marks,breaks=11,plot=F)$density);    ## save relative frequency 
brk <- hist(marks,breaks=11,plot=F)$breaks;             ## save breaks
axis(1,at= brk,las=1);                                  ## add x-axis to the plot (1=bottom)
axis(2,at= c(0,round(den,4)),las=1);                    ## add y-axis to the plot (2=left)
abline(h=den,lty="dotted");                             ## add line to the plot
#par(opar);                                             ## restore original settings 

# 4.1 Histogram (the bases of the rectangles are equal to the following intervals of marks: (18,24](24,25](25,29](29,30]
int.marks <- c(18,24,25,29,30);
hist(marks,freq=F,breaks=int.marks,col="darkblue",axes=F);  ## here on y-axis there is the density (hi = fi/Di where fi is rel.freq and Di is "bin width")
den <- unique(hist(marks,breaks=int.marks,plot=F)$density);     ## save density 
brk <- hist(marks,breaks=int.marks,plot=F)$breaks;              ## save breaks
axis(1,at= brk,las=1);                                          ## add x-axis to the plot (1=bottom)
axis(2,at= c(0,round(den,4)),las=1);                            ## add y-axis to the plot (2=left)
abline(h=den,lty="dotted");                                     ## add line to the plot
#par(opar);                                                     ## restore original settings (neglecting the warnings)

#5.0 Empirical Cumulative Distribution Function of all marks
plot.ecdf(marks,main="Empirical Cumulative Distribution Function",ylab="cumulative relative frequency",xlab="marks");
##other ways to do cdf:     
##              0. plot(ecdf(marks)); 
##              1. plot.stepfun(marks,xlab="marks");
##              2. cum.rel <- cumsum(rel.freq); plot(as.integer(names(cum.rel)),cum.rel,type="s",xlab="",ylab=""); points(as.integer(names(cum.rel)),cum.rel);
##              note: in plot need to specify value on x-axis
# 5.1 ecdf for the following intervals of marks: (18,24](24,25](25,29](29,30]
int.marks <- c(18,24,25,29,30);
class.marks <- cut(marks,int.marks);
freq.class <- table(class.marks);
cum.class.marks <- cumsum(freq.class);
cum.rel.class.marks <- cum.class.marks/length(marks);
plot(int.marks,c(0,cum.rel.class.marks),type="s",xlab="marks",ylab="cumulative relative frequency",main="empirical cumulative function of marks' intervals");

#6. marks mode 
#x: numeric vector containing the values for which mode is to be computed
mode.fun <- function(x){
abs.freq <- table(x);
mod <- abs.freq[max(abs.freq)];
names(mod);
}
mode.fun(marks);

#7 mean and standard deviation of marks
mean(marks);
sd(marks)   ##sqrt(var(marks))

#8 median 1st and 3rd quartile
median(marks);
quantile(marks)[c(1,4)];

#9 10th,20th,90th,99th quantile
quantile(marks,probs=c(10,20,90,99)/100);

#10.0 Marks Box Plot
boxplot(marks,col="darkgreen",main="Marks Box Plot",yaxt = "n",range=1.5);  ## try setting range=0.5(default range=1.5)...what happens?(see below)
stats <- as.vector(boxplot(marks,plot=F)$stats);
axis(2,stats,las=2);
abline(h=stats,lty="dotted");
#10.1 Marks Box Plot (changing option range) 
boxplot(marks,col="darkgreen",main="Marks Box Plot",yaxt = "n",range=0.5);  ##upper whisker end= QR3+0.5*IQR; lower whisker end=QR1-0.5*IQR 
stats <- as.vector(boxplot(marks,plot=F,range=0.5)$stats);
axis(2,stats,las=2);
abline(h=stats,lty="dotted");
out <- unique(boxplot(marks,range=0.5,plot=F)$out);
n <- length(out);
identify(rep(1,n),out,labels=out);
##identify: 0.reads the position of the graphics pointer when the mouse button is pressed;
##          1.searches the coordinates closest to the pointer
##          2.print the coordinates' names 

#10.2 summarize marks...
summary(marks)

#11 qqnormal plot
qqnorm(marks);
qqline(marks);