#
# recall-if.R, 15 Dec 12
#
# R code for processing ACCU12 remember/recall exeriment data
#
# subject,pat1,pat2,left id1,right id1,operator1,left id2,right id2,operator2
#
# id in range 0..5
# 0 wrong answer, 5 ? answer
#
# operator in range 0..3
# 0 wrong answer, 3 ? answer

# The following package is only needed for function assocstats
# If Cramer's V is not required then chisq.test in the base system will do.
library("vcd")

# Vector of wrong, correct, refer-back, switched
wr_co_rb_sw=function(id_pos, id_ans)
{
t=c(length(which(id_ans == 0)),
    length(which(id_ans == id_pos)),
    length(which(id_ans == 5)))

return(c(t, length(id_ans)-sum(t)))
}


# Totl number of answers of various kinds and operand positions
ans_pos=function(op_id)
{
# Some possible answers may not be represented in those given by subjects
# so create full table and then update it based on actual answers
t=as.table(c("0"=0, "1"=0, "2"=0, "3"=0, "4"=0, "5"=0))
q=table(op_id)
t[dimnames(q)[[1]]]=q
return(t)
}

# Build wrong/correct/wrb/switched vector for each operand position
single.wr_co_rb_sw=function(ans)
{
t1=wr_co_rb_sw(1, ans$left.id1)
t2=wr_co_rb_sw(2, ans$right.id1)
t3=wr_co_rb_sw(3, ans$left.id2)
t4=wr_co_rb_sw(4, ans$right.id2)

return(rbind(t1, t2, t3, t4))
}


# Build wrong/correct/wrb/switched vector for all operand positions
total.wr_co_rb_sw=function(ans)
{
t1=wr_co_rb_sw(1, ans$left.id1)
t2=wr_co_rb_sw(2, ans$right.id1)
t3=wr_co_rb_sw(3, ans$left.id2)
t4=wr_co_rb_sw(4, ans$right.id2)

return(t1+t2+t3+t4)
}


# Build wrong/correct/wrb/switched vector for operands in each row
horiz.wr_co_rb_sw=function(ans)
{
t1=wr_co_rb_sw(1, ans$left.id1)
t2=wr_co_rb_sw(2, ans$right.id1)
t3=wr_co_rb_sw(3, ans$left.id2)
t4=wr_co_rb_sw(4, ans$right.id2)

return(rbind(t1+t2, t3+t4))
}


# Build wrong/correct/wrb/switched vector for operands above/below each other
vert.wr_co_rb_sw=function(ans)
{
t1=wr_co_rb_sw(1, ans$left.id1)
t2=wr_co_rb_sw(2, ans$right.id1)
t3=wr_co_rb_sw(3, ans$left.id2)
t4=wr_co_rb_sw(4, ans$right.id2)

return(rbind(t1+t3, t2+t4))
}


# Return those answers involving the given operand patterns
matching_pattern=function(ans, pat1_str, pat2_str)
{
return(subset(ans, (pat1 == pat1_str) & (pat2 == pat2_str)))
}


# Print wrong, correct, swapped, refer back as totals & percentages
print.wr_co_rb_sw=function(ans.res)
{
t=as.integer(100*prop.table(ans.res))
print(c(ans.res[1], t[1],
        ans.res[2], t[2],
        ans.res[3], t[3],
        ans.res[4], t[4]))
}


ifans=read.csv("/usr1/experiment/accu12/results/ifans.csv")

# Quick general overview
rbind(ans_pos(ifans$left.id1),
      ans_pos(ifans$right.id1),
      ans_pos(ifans$left.id2),
      ans_pos(ifans$right.id2))


# Chi-squared test on operand answers
tot.op1=table(ifans$operator1)
t=table(ifans$operator2)
# Swap co/rb column order to agree with that of tot.op1
tot.op2=t
tot.op2[2]=t[3]
tot.op2[3]=t[2]
chisq.test(rbind(tot.op1,
                 tot.op2))

print.wr_co_rb_sw(tot.op1)
print.wr_co_rb_sw(tot.op2)


# How many times did each pattern occur?
table(ifans$pat1)
table(ifans$pat2)

# Summary of operand answers
wr_co_rb_sw(1, ifans$left.id1)
wr_co_rb_sw(2, ifans$right.id1)
wr_co_rb_sw(3, ifans$left.id2)
wr_co_rb_sw(4, ifans$right.id2)


# Sum answers by row/column and total
sum.single=single.wr_co_rb_sw(ifans)
sum.horiz=horiz.wr_co_rb_sw(ifans)
sum.vert=vert.wr_co_rb_sw(ifans)
sum.total=total.wr_co_rb_sw(ifans)

chisq.test(sum.horiz)
chisq.test(sum.vert)

# Get Cramer's V + other stuff
assocstats(sum.horiz)

print.wr_co_rb_sw(sum.horiz[1,])
print.wr_co_rb_sw(sum.horiz[2,])
print.wr_co_rb_sw(sum.vert[1,])
print.wr_co_rb_sw(sum.vert[2,])

# Get answer matching  all consistent operand pattern
CaC_CaC=matching_pattern(ifans, "CaC", "CaC")
CbC_CbC=matching_pattern(ifans, "CbC", "CbC")
CbC_CaC=matching_pattern(ifans, "CbC", "CaC")
CaC_CbC=matching_pattern(ifans, "CaC", "CbC")

# The 4 possible orderings involving four Cs
tot.CaC_CaC=total.wr_co_rb_sw(CaC_CaC)
tot.CbC_CbC=total.wr_co_rb_sw(CbC_CbC)
tot.CbC_CaC=total.wr_co_rb_sw(CbC_CaC)
tot.CaC_CbC=total.wr_co_rb_sw(CaC_CbC)

print.wr_co_rb_sw(tot.CaC_CaC)
print.wr_co_rb_sw(tot.CbC_CbC)
print.wr_co_rb_sw(tot.CbC_CaC)
print.wr_co_rb_sw(tot.CaC_CbC)

chisq.test(rbind(tot.CaC_CaC+tot.CbC_CbC,
                 tot.CbC_CaC+tot.CaC_CbC))

assocstats(rbind(tot.CaC_CaC+tot.CbC_CbC,
                 tot.CbC_CaC+tot.CaC_CbC))

# The 16 possible orderings involving one N
# Grouped by both operators the same or both different
NaaCCC=rbind(matching_pattern(ifans, "NaC", "CaC"),
             matching_pattern(ifans, "CaN", "CaC"),
             matching_pattern(ifans, "CaC", "NaC"),
             matching_pattern(ifans, "CaC", "CaN"))

NbbCCC=rbind(matching_pattern(ifans, "NbC", "CbC"),
             matching_pattern(ifans, "CbN", "CbC"),
             matching_pattern(ifans, "CbC", "NbC"),
             matching_pattern(ifans, "CbC", "CbN"))

NabCCC=rbind(matching_pattern(ifans, "NaC", "CbC"),
             matching_pattern(ifans, "CaN", "CbC"),
             matching_pattern(ifans, "CaC", "NbC"),
             matching_pattern(ifans, "CaC", "CbN"))

NbaCCC=rbind(matching_pattern(ifans, "NbC", "CaC"),
             matching_pattern(ifans, "CbN", "CaC"),
             matching_pattern(ifans, "CbC", "NaC"),
             matching_pattern(ifans, "CbC", "CaN"))

tot.NaaCCC=total.wr_co_rb_sw(NaaCCC)
tot.NbbCCC=total.wr_co_rb_sw(NbbCCC)
tot.NbaCCC=total.wr_co_rb_sw(NbaCCC)
tot.NabCCC=total.wr_co_rb_sw(NabCCC)

chisq.test(rbind(tot.NaaCCC+tot.NbbCCC,
                 tot.NbaCCC+tot.NabCCC))

chisq.test(rbind(tot.CaC_CaC+tot.CbC_CbC+tot.CbC_CaC+tot.CaC_CbC,
                 tot.NaaCCC+tot.NbbCCC+tot.NbaCCC+tot.NabCCC))


# The 16 possible orderings involving one N
# Grouped by N always having the same operand position
NCCC=rbind(matching_pattern(ifans, "NaC", "CaC"),
           matching_pattern(ifans, "NbC", "CbC"),
           matching_pattern(ifans, "NaC", "CbC"),
           matching_pattern(ifans, "NbC", "CaC"))

CNCC=rbind(matching_pattern(ifans, "CaN", "CaC"),
           matching_pattern(ifans, "CbN", "CbC"),
           matching_pattern(ifans, "CaN", "CbC"),
           matching_pattern(ifans, "CbN", "CaC"))

CCNC=rbind(matching_pattern(ifans, "CaC", "NaC"),
           matching_pattern(ifans, "CbC", "NbC"),
           matching_pattern(ifans, "CaC", "NbC"),
           matching_pattern(ifans, "CbC", "NaC"))

CCCN=rbind(matching_pattern(ifans, "CaC", "CaN"),
           matching_pattern(ifans, "CbC", "CbN"),
           matching_pattern(ifans, "CaC", "CbN"),
           matching_pattern(ifans, "CbC", "CaN"))

tot.NCCC=total.wr_co_rb_sw(NCCC)
tot.CNCC=total.wr_co_rb_sw(CNCC)
tot.CCNC=total.wr_co_rb_sw(CCNC)
tot.CCCN=total.wr_co_rb_sw(CCCN)


# 24 of the possible orderings involving two Ns
CCNN_row=rbind(matching_pattern(ifans, "CaC", "NbN"),
               matching_pattern(ifans, "CaC", "NaN"),
               matching_pattern(ifans, "CbC", "NbN"),
               matching_pattern(ifans, "CaC", "NaN"))
NNCC_row=rbind(matching_pattern(ifans, "NaN", "CbC"),
               matching_pattern(ifans, "NaN", "CaC"),
               matching_pattern(ifans, "NbN", "CbC"),
               matching_pattern(ifans, "NbN", "CaC"))

CNCN_col=rbind(matching_pattern(ifans, "CaN", "CbN"),
               matching_pattern(ifans, "CaN", "CaN"),
               matching_pattern(ifans, "CbN", "CbN"),
               matching_pattern(ifans, "CaN", "CaN"))
NCNC_col=rbind(matching_pattern(ifans, "CaN", "CbN"),
               matching_pattern(ifans, "CaN", "CaN"),
               matching_pattern(ifans, "CbN", "CbN"),
               matching_pattern(ifans, "CbN", "CaN"))

CNNC_dia=rbind(matching_pattern(ifans, "CaN", "NbC"),
               matching_pattern(ifans, "CaN", "NaC"),
               matching_pattern(ifans, "CbN", "NbC"),
               matching_pattern(ifans, "CaN", "NaC"))
NCCN_dia=rbind(matching_pattern(ifans, "NaC", "CbN"),
               matching_pattern(ifans, "NaC", "CaN"),
               matching_pattern(ifans, "NbC", "CbN"),
               matching_pattern(ifans, "NbC", "CaN"))



tot.CCNN=total.wr_co_rb_sw(CCNN_row)
tot.NNCC=total.wr_co_rb_sw(NNCC_row)
tot.row=tot.CCNN+tot.NNCC
tot.CNCN=total.wr_co_rb_sw(CNCN_col)
tot.NCNC=total.wr_co_rb_sw(NCNC_col)
tot.col=tot.CNCN+tot.NCNC
tot.CNNC=total.wr_co_rb_sw(CNNC_dia)
tot.NCCN=total.wr_co_rb_sw(NCCN_dia)
tot.dia=tot.CNNC+tot.NCCN

chisq.test(rbind(tot.row, tot.col))
chisq.test(rbind(tot.row, tot.dia))
chisq.test(rbind(tot.dia, tot.col))

chisq.test(rbind(tot.CaC_CaC+tot.CbC_CbC+tot.CbC_CaC+tot.CaC_CbC,
                 tot.row+tot.col+tot.dia))

print.wr_co_rb_sw(tot.CaC_CaC+tot.CbC_CbC+tot.CbC_CaC+tot.CaC_CbC)
print.wr_co_rb_sw(tot.NaaCCC+tot.NbbCCC+tot.NbaCCC+tot.NabCCC)
print.wr_co_rb_sw(tot.row+tot.col+tot.dia)


# By subject analysis

library(plyr)

summary.subj=daply(ifans, "subject",
			 function(df) return(colSums(single.wr_co_rb_sw(df))))

# pdf(file="ifoperand_perc.pdf", height=5, width=5)

par(mfcol=c(1,1))
par(las=1)
par(bty="l")

# percentage by row
subj.per=100*prop.table(summary.subj, margin=1)
subj.order=order(subj.per[ , 2])

par(pch=20)
plot(subj.per[subj.order, 1],
     ylim=c(0, 100), type="b",
     xlab="Subject", ylab="Percent")
par(new=TRUE)

par(pch=22)
plot(subj.per[subj.order, 2],
     ylim=c(0, 100), type="b", col="red",
     xlab="", ylab="")
par(new=TRUE)

par(pch=23)
plot(subj.per[subj.order, 3],
     ylim=c(0, 100), type="b", col="blue",
     xlab="", ylab="")
par(new=TRUE)

par(pch=24)
plot(subj.per[subj.order, 4],
     ylim=c(0, 100), type="b", col="green",
     xlab="", ylab="")

legend("topleft", legend=c("correct", "refer back", "swapped", "wrong"),
              pch=c(22, 23, 24, 20),
              col=c("red", "blue", "green", "black"),
	      bty="n")


colMeans(subj.per)
sd(subj.per[ , 1])
sd(subj.per[ , 2])
sd(subj.per[ , 3])
sd(subj.per[ , 4])

# How many questions did each subject answer?
mean(table(ifans$subject))
sd(table(ifans$subject))

cor.test(subj.per[ ,1], subj.per[ , 2])
cor.test(subj.per[ ,1], subj.per[ , 4])
 
cor.test(subj.per[ ,2], subj.per[ , 3])
cor.test(subj.per[ ,2], subj.per[ , 4])

cor.test(subj.per[ ,1], subj.per[ , 4])
cor.test(subj.per[ ,3], subj.per[ , 4])
 
 
