################
## EXERCISE 3 ##
################
#0. read data from file
marks <- read.table("dati_esercitazione_voti.txt",sep="\t",header=T); ##write: col1; oral.1: col2; oral.2: col3, oral.3: col4
#1. compute mean and variance of written marks
mw <- mean(marks[,1]);
vw <- var(marks[,1]);
#2. compute mean and variance of oral_1 marks
mo <- mean(marks[,2]);
vo <- var(marks[,2]);
#3.0 written: relative frequency plot
wfr <- table(marks[,1])/nrow(marks);
plot(wfr,type="h",xlab="written_marks",ylab="relative frequency",lwd=5,xaxt="n",yaxt="n",col="darkblue");
axis(1,marks[,1],las=1);
axis(2,unique(wfr),las=1,cex.axis=0.8);
######################################
## case study A: written vs oral_1 ##
######################################
### written and oral_1 test marks are similar (same mean, variance and distribution), but they are independent variables (see covariance and correlation)
#3.1 oral_1: relative frequency plot
o1fr <- table(marks[,2])/nrow(marks);
plot(o1fr,type="h",xlab="oral_1_marks",ylab="relative frequency",lwd=5,xaxt="n",yaxt="n",col="darkgreen");
axis(1,marks[,2],las=1);
axis(2,unique(o1fr),las=1,cex.axis=0.8);
#4 empirical cumulative function between written and oral_1 marks
##way 1
plot(ecdf(marks[,1]),main="marks distribution comparison between written and oral_1 marsk test");
plot(ecdf(marks[,2]),add=T,col="red");
##way 2
co <- cumsum(table(marks[,1])/nrow(marks));
cw <- cumsum(table(marks[,2])/nrow(marks));
plot(names(cw),cw,type="s",main="marks distribution comparison between written and oral_1 marsk test",ylab="F(x)");
lines(names(co),co,type="s",col="gold");
#5 contingency table
table(marks[,1],marks[,2]);
my.contingency.table <- function(x,y){
xf <- table(x);
yf <- table(y);
m <- matrix(0,length(xf),length(yf));
dimnames(m) <- list(names(xf),names(yf));
for(i in rownames(m)){
z <- table(y[which(x==i)]); ##z <- absolute.frequency(y[which(x==i)]) (see analisidati.R)
m[i,names(z)] <- z;
}
m;
}
my.contingency.table(marks[,1],marks[,2]);
#6. oral_1: scatter plot
plot(marks[,2],marks[,1],pch=20,col="gold",xlab="oral_1",ylab="written");
#7. correlation and covariance
cov(marks[,1],marks[,2]);
cor(marks[,1],marks[,2]);
######################################
## case study B: written vs oral_2 ##
######################################
### written and oral_2 test marks are dependent but not linearly related (see covariance and correlation)
#8 oral_2: relative frequency plot
o2fr <- table(marks[,3])/nrow(marks);
plot(o2fr,type="h",xlab="oral_2_marks",ylab="relative frequency",lwd=5,xaxt="n",yaxt="n",col="darkgreen");
axis(1,marks[,3],las=1);
axis(2,unique(o2fr),las=1,cex.axis=0.8);
#9. empirical cumulative function between written and oral_1 marks
##way 1
plot(ecdf(marks[,1]),main="marks distribution comparison between written and oral_2 marks test");
plot(ecdf(marks[,3]),add=T,col="red");
##way 2
co <- cumsum(table(marks[,1])/nrow(marks));
cw <- cumsum(table(marks[,3])/nrow(marks));
plot(names(cw),cw,type="s",main="marks distribution comparison between written and oral_2 marks test",ylab="F(x)",xlab="marks");
lines(names(co),co,type="s",col="gold");
#10. contingency table
table(marks[,1],marks[,3]) ##my.contingency.table(marks[,1],marks[,3])
#11. oral_1: scatter plot
plot(marks[,3],marks[,1],pch=20,col="gold",xlab="oral_2",ylab="written");
#12. correlation and covariance
cov(marks[,1],marks[,3]);
cor(marks[,1],marks[,3]);
######################################
## case study C: written vs oral_3 ##
######################################
### written and oral_3 test marks have different mean, variance and distribution, but they are linearly related
#13 oral_3: relative frequency plot
o3fr <- table(marks[,4])/nrow(marks);
plot(o3fr,type="h",xlab="oral_3_marks",ylab="relative frequency",lwd=5,xaxt="n",yaxt="n",col="darkgreen");
axis(1,marks[,4],las=1);
axis(2,unique(o3fr),las=1,cex.axis=0.8);
#14. empirical cumulative function between written and oral_1 marks
##way 1
plot(ecdf(marks[,1]),main="marks distribution comparison between written and oral_3 marks test");
plot(ecdf(marks[,4]),add=T,col="blue");
##way 2
co <- cumsum(table(marks[,1])/nrow(marks));
cw <- cumsum(table(marks[,4])/nrow(marks));
plot(names(co),co,type="s",main="marks distribution: comparison between written and oral_3 test marks",ylab="F(x)",xlab="marks");
lines(names(cw),cw,type="s",col="gold");
#15. contingency table
table(marks[,1],marks[,4]) ##my.contingency.table(marks[,1],marks[,4])
#16. oral_1: scatter plot
plot(marks[,4],marks[,1],pch=20,col="gold",xlab="oral_2",ylab="written");
#17. correlation and covariance
cov(marks[,1],marks[,4]);
cor(marks[,1],marks[,4]);