#### This project received funding under award NA16NMF4270252 from NOAA 
#### Fisheries Service, in cooperation with the Saltonstall-Kennedy Grant 
#### Program. The statements, findings, conclusions, and recommendations 
#### are those of the author(s) and do not necessarily reflect the views 
#### of NOAA Fisheries. 

#### Project Primary Investigators are Lorna Wilson, Rich Brenner, and Bev 
#### Agler. Please direct questions about this project to Lorna Wilson.


####
#need rtools, then rstan installed to use brms(), helpful site:
#https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started


####Load libraries
while(!require(broom)) {install.packages("broom")}
while(!require(fitdistrplus)) {install.packages("fitdistrplus")}
while(!require(tidyr)) {install.packages("tidyr")}
while(!require(dplyr)) {install.packages("dplyr")}
while(!require(plyr)) {install.packages("plyr")}
while(!require(reshape2)) {install.packages("reshape2")}
while(!require(ggplot2)) {install.packages("ggplot2")}
while(!require(ggridges)) {install.packages("ggridges")}
#while(!require(matrixSats)) {install.packages("matrixStats")}
while(!require(brms)) {install.packages("brms")}
while(!require(moments)) {install.packages("moments")}
while(!require(data.table)) {install.packages("data.table")}


library(broom)
library(fitdistrplus)
library(tidyr)
library(plyr)
library(dplyr)
library(reshape2)
library(ggplot2)
library(ggridges)
library(matrixStats)
library(brms)
library(moments)
library(data.table)

#Set ggplot2 themes
theme_set(theme_bw(base_size=12)+ 
  theme(panel.grid.major = element_blank(),
  panel.grid.minor = element_blank()))


########################################################
###Load data
#put the SK.Scale.Age.Study.data.csv file in your working directory.
sk <- read.csv("SK.Scale.Age.Study.data.csv", sep=",", header=TRUE)
dim(sk) #10,200 scales reads, 9 columns
head(sk)
summary(sk)

sk = as.data.frame(sk)

#get mean, median age

mean.sk = sk %>% 
  group_by(IMAGE_NAME) %>% 
  summarize(mean = mean(fish.age, na.rm = T), median = (median(fish.age, na.rm = T)))
mean.sk = as.data.frame(mean.sk)
head(mean.sk)



#pull age data by reader
r7 = data.frame(r7age = sk[sk$USER_ID==7,]$fish.age, NAME= sk[sk$USER_ID==7,]$IMAGE_NAME)
r8 = data.frame(r8age = sk[sk$USER_ID==8,]$fish.age, NAME= sk[sk$USER_ID==8,]$IMAGE_NAME)
r9 = data.frame(r9age = sk[sk$USER_ID==9,]$fish.age, NAME= sk[sk$USER_ID==9,]$IMAGE_NAME)
r10 = data.frame(r10age = sk[sk$USER_ID==10,]$fish.age, NAME= sk[sk$USER_ID==10,]$IMAGE_NAME)
r11 = data.frame(r11age = sk[sk$USER_ID==11,]$fish.age, NAME= sk[sk$USER_ID==11,]$IMAGE_NAME)
r12 = data.frame(r12age = sk[sk$USER_ID==12,]$fish.age, NAME= sk[sk$USER_ID==12,]$IMAGE_NAME)
r13 = data.frame(r13age = sk[sk$USER_ID==13,]$fish.age, NAME= sk[sk$USER_ID==13,]$IMAGE_NAME)
r15 = data.frame(r15age = sk[sk$USER_ID==15,]$fish.age, NAME= sk[sk$USER_ID==15,]$IMAGE_NAME)
r16 = data.frame(r16age = sk[sk$USER_ID==16,]$fish.age, NAME= sk[sk$USER_ID==16,]$IMAGE_NAME)
r17 = data.frame(r17age = sk[sk$USER_ID==17,]$fish.age, NAME= sk[sk$USER_ID==17,]$IMAGE_NAME)
head(r17)
 

#remerge reader data with mean and mean ages
mean.sk1 = merge(mean.sk, r7, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk2 = merge(mean.sk1, r8, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk3 = merge(mean.sk2, r9, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk4 = merge(mean.sk3, r10, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk5 = merge(mean.sk4, r11, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk6 = merge(mean.sk5, r12, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk7 = merge(mean.sk6, r13, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk8 = merge(mean.sk7, r15, by.x = "IMAGE_NAME", by.y = "NAME")
mean.sk9 = merge(mean.sk8, r16, by.x = "IMAGE_NAME", by.y = "NAME")
sk.df = merge(mean.sk9, r17, by.x = "IMAGE_NAME", by.y = "NAME")


##########################################################################
#pull Eu age data by reader
r7 = data.frame(r7.eu = sk[sk$USER_ID==7,]$fish.age.EU, NAME= sk[sk$USER_ID==7,]$IMAGE_NAME)
r8 = data.frame(r8.eu = sk[sk$USER_ID==8,]$fish.age.EU, NAME= sk[sk$USER_ID==8,]$IMAGE_NAME)
r9 = data.frame(r9.eu = sk[sk$USER_ID==9,]$fish.age.EU, NAME= sk[sk$USER_ID==9,]$IMAGE_NAME)
r10 = data.frame(r10.eu = sk[sk$USER_ID==10,]$fish.age.EU, NAME= sk[sk$USER_ID==10,]$IMAGE_NAME)
r11 = data.frame(r11.eu = sk[sk$USER_ID==11,]$fish.age.EU, NAME= sk[sk$USER_ID==11,]$IMAGE_NAME)
r12 = data.frame(r12.eu = sk[sk$USER_ID==12,]$fish.age.EU, NAME= sk[sk$USER_ID==12,]$IMAGE_NAME)
r13 = data.frame(r13.eu = sk[sk$USER_ID==13,]$fish.age.EU, NAME= sk[sk$USER_ID==13,]$IMAGE_NAME)
r15 = data.frame(r15.eu = sk[sk$USER_ID==15,]$fish.age.EU, NAME= sk[sk$USER_ID==15,]$IMAGE_NAME)
r16 = data.frame(r16.eu = sk[sk$USER_ID==16,]$fish.age.EU, NAME= sk[sk$USER_ID==16,]$IMAGE_NAME)
r17 = data.frame(r17.eu = sk[sk$USER_ID==17,]$fish.age.EU, NAME= sk[sk$USER_ID==17,]$IMAGE_NAME)

sk.eu = merge(r7, r8, by = "NAME")
sk.eu = merge(sk.eu, r9, by = "NAME"); sk.eu = merge(sk.eu, r10, by = "NAME")
sk.eu = merge(sk.eu, r11, by = "NAME"); sk.eu = merge(sk.eu, r12, by = "NAME")
sk.eu = merge(sk.eu, r13, by = "NAME"); sk.eu = merge(sk.eu, r15, by = "NAME")
sk.eu = merge(sk.eu, r16, by = "NAME"); sk.eu = merge(sk.eu, r17, by = "NAME")



####add median reader fw ages
sk.eu$r7.fw = substr(sk.eu$r7.eu, 1,1); sk.eu$r8.fw = substr(sk.eu$r8.eu, 1,1)
sk.eu$r9.fw = substr(sk.eu$r9.eu, 1,1); sk.eu$r10.fw = substr(sk.eu$r10.eu, 1,1)
sk.eu$r11.fw = substr(sk.eu$r11.eu, 1,1); sk.eu$r12.fw = substr(sk.eu$r12.eu, 1,1)
sk.eu$r13.fw = substr(sk.eu$r13.eu, 1,1); sk.eu$r15.fw = substr(sk.eu$r15.eu, 1,1) 
sk.eu$r16.fw = substr(sk.eu$r16.eu, 1,1); sk.eu$r17.fw = substr(sk.eu$r17.eu, 1,1)

#get median fw age
sk.eu$median.fw = rowMedians(as.matrix(cbind(as.numeric(sk.eu$r7.fw), as.numeric(sk.eu$r8.fw), as.numeric(sk.eu$r9.fw), as.numeric(sk.eu$r10.fw), as.numeric(sk.eu$r11.fw),
 as.numeric(sk.eu$r12.fw), as.numeric(sk.eu$r13.fw), as.numeric(sk.eu$r15.fw), as.numeric(sk.eu$r16.fw), as.numeric(sk.eu$r17.fw))))


#reader sw
sk.eu$r7.sw = substr(sk.eu$r7.eu,2,2); sk.eu$r8.sw = substr(sk.eu$r8.eu,2,2)
sk.eu$r9.sw = substr(sk.eu$r9.eu,2,2); sk.eu$r10.sw = substr(sk.eu$r10.eu,2,2)
sk.eu$r11.sw = substr(sk.eu$r11.eu,2,2); sk.eu$r12.sw = substr(sk.eu$r12.eu,2,2)
sk.eu$r13.sw = substr(sk.eu$r13.eu,2,2);sk.eu$r15.sw = substr(sk.eu$r15.eu,2,2);
sk.eu$r16.sw = substr(sk.eu$r16.eu,2,2); sk.eu$r17.sw = substr(sk.eu$r17.eu,2,2)

#get median sw age
sk.eu$median.sw = rowMedians(as.matrix(cbind(as.numeric(sk.eu$r7.sw), as.numeric(sk.eu$r8.sw), as.numeric(sk.eu$r9.sw), as.numeric(sk.eu$r10.sw), as.numeric(sk.eu$r11.sw),
 as.numeric(sk.eu$r12.sw), as.numeric(sk.eu$r13.sw), as.numeric(sk.eu$r15.sw), as.numeric(sk.eu$r16.sw), as.numeric(sk.eu$r17.sw))))

#sk.eu.reader = (sk.eu[,1:11])
#Pull age data by reader for Objective 3
#options(max.print=999999999)
#sink("readerages.txt")
#sk.eu.reader
#sink()

#sink("sk.txt")
#sk

median.eu = data.frame(sk.eu$NAME, sk.eu$median.fw, sk.eu$median.sw)
head(median.eu)
summary(median.eu)
#sink("median.txt")
#median.eu
#sink()



##################################################################################
#####Summary stats
#sample size
dim(sk.df)
n.r = dim(sk.df)[2] -3
n.r
n.fish <- dim(sk.df[!is.na(sk.df$median),])[1]
n.fish


### Precision stats by fish using fw+sw age
ape = (abs(sk.df$r7age-sk.df$mean)/sk.df$mean + abs(sk.df$r8age-sk.df$mean)/sk.df$mean + abs(sk.df$r9age-sk.df$mean)/sk.df$mean + abs(sk.df$r10age-sk.df$mean)/sk.df$mean + abs(sk.df$r11age-sk.df$mean)/sk.df$mean + abs(sk.df$r12age-sk.df$mean)/sk.df$mean + abs(sk.df$r13age-sk.df$mean)/sk.df$mean + abs(sk.df$r15age-sk.df$mean)/sk.df$mean + abs(sk.df$r16age-sk.df$mean)/sk.df$mean + abs(sk.df$r17age-sk.df$mean)/sk.df$mean)*1/n.r*100
var = ((sk.df$r7age - sk.df[,2])^2 + (sk.df$r8age - sk.df[,2])^2 + (sk.df$r9age - sk.df[,2])^2 + (sk.df$r10age - sk.df[,2])^2 + (sk.df$r11age - sk.df[,2])^2 + (sk.df$r12age - sk.df[,2])^2 + (sk.df$r13age - sk.df[,2])^2 + (sk.df$r15age - sk.df[,2])^2 + (sk.df$r16age - sk.df[,2])^2 + (sk.df$r17age - sk.df[,2])^2)/(n.r - 1)
sd = sqrt(var)
sk.df.ape = data.frame(sk.df, ape, var, sd)
cv = sk.df.ape$sd/sk.df.ape$mean; d = cv / sqrt(n.r)
sk.df.ape = data.frame(sk.df.ape, cv,d)
sys = substr(sk.df.ape$IMAGE_NAME, 1,2) 
yr = substr(sk.df.ape$IMAGE_NAME, 3, 4) 
oage = substr(sk.df.ape$IMAGE_NAME, 5, 6) 
sk.df.ape = data.frame(sk.df.ape, sys, yr, oage)

#remove scales (420) that originally had an error code
sk.df.ape = as.data.frame(sk.df.ape)
sk.df.ape = (sk.df.ape[sk.df.ape$oage!="00",])
dim(sk.df.ape) #9780

#add year 4 charcaters and 3 time periods
#tp = (read.table("clipboard", header=T, sep="\t"))
tp$yr = c("80","81","82","83","84","85","86","87","88","89","90","91","92","93","94","95","96","97","98","99","00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15")
sk.df.ape = merge(sk.df.ape, tp, by="yr")
head(sk.df.ape)

###Add european ages by reader median location, 
sk.df.ape = merge(sk.eu, sk.df.ape, by.x = "NAME", by.y="IMAGE_NAME")
head(sk.df.ape)

#### Worst scale
WorstScales = sk.df.ape[sk.df.ape$cv>.28,]
WorstScales = na.omit(WorstScales)
WorstScales

out = data.frame(sk.df.ape$NAME, median.eu = paste(sk.df.ape$median.fw, sk.df.ape$median.sw,sep=""))
head(out)
#sink("median.txt")
#out
#sink()

head(sk.df.ape)
sk.df.ape.n = sk.df.ape %>% 
  group_by(median) %>% 
  dplyr::summarise(n = n(), na.rm=T)
sk.df.ape.n  #6172 scales with a median age (6092 whole numbers, 80 with x.5 ages)

summary(sk.df.ape$cv) #Mean CV is 6.3 from 6154 scales with a median age
sk.df.ape.cv = sk.df.ape %>% 
  group_by(median.fw) %>% 
  summarise(avg = mean(cv, na.rm = T))
sk.df.ape.cv  #0.0630


summary(sk.df.ape$median)  #Mean age is 4.2


#############################################################################################
################################################################
#Group by median age
sk.df.ape$med.eu = paste(sk.df.ape$median.fw, sk.df.ape$median.sw, sep="")
head(sk.df.ape)
sk.df.ape.n = sk.df.ape %>% 
  group_by(med.eu) %>% 
  dplyr::summarise(n = n())
sk.df.ape.n  #6172 scales with a median age (6092 whole numbers, 80 with x.5 ages)



unique(sk.df.ape$med.eu)  #remove ages that are x.5
sk.df.ape2 = sk.df.ape[sk.df.ape$med.eu!="13.5",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="12.5",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="14.5",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="11.5",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="1.53",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="1.5NA",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="2NA",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="1NA",]
sk.df.ape2 = sk.df.ape2[sk.df.ape2$med.eu!="NANA",]

unique(sk.df.ape2$med.eu)
names(sk.df.ape2)
head(sk.df.ape2)



#######################################################################################
###Is median age different without reader of interest than the median age from all readers?
#get mean, median age by reader
dim(sk.df.ape2)
notR7 = rep(NA, dim(sk.df.ape2)[1])
notR8 = rep(NA, dim(sk.df.ape2)[1])
notR9 = rep(NA, dim(sk.df.ape2)[1])
notR10 = rep(NA, dim(sk.df.ape2)[1])
notR11 = rep(NA, dim(sk.df.ape2)[1])
notR12 = rep(NA, dim(sk.df.ape2)[1])
notR13 = rep(NA, dim(sk.df.ape2)[1])
notR15 = rep(NA, dim(sk.df.ape2)[1])
notR16 = rep(NA, dim(sk.df.ape2)[1])
notR17 = rep(NA, dim(sk.df.ape2)[1])

#Get 10 median ages without reader
for (i in 1:nrow(sk.df.ape2)){
	notR7[i] = median(sk.df.ape2$r8age[i]:sk.df.ape2$r17age[i])
	notR8[i] = median(sk.df.ape2$r7age[i],sk.df.ape2$r9age[i]:sk.df.ape2$r17age[i])
	notR9[i] = median(sk.df.ape2$r7age[i],sk.df.ape2$r8age[i],sk.df.ape2$r10age[i]:sk.df.ape2$r17age[i])
	notR10[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r9age[i],sk.df.ape2$r11age[i]:sk.df.ape2$r17age[i])
	notR11[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r10age[i],sk.df.ape2$r12age[i]:sk.df.ape2$r17age[i])
	notR12[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r11age[i],sk.df.ape2$r13age[i]:sk.df.ape2$r17age[i])
	notR13[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r12age[i],sk.df.ape2$r15age[i]:sk.df.ape2$r17age[i])
	notR15[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r13age[i],sk.df.ape2$r16age[i],sk.df.ape2$r17age[i])
	notR16[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r15age[i],sk.df.ape2$r17age[i])
	notR17[i] = median(sk.df.ape2$r7age[i]:sk.df.ape2$r16age[i])
	}
noRMA = data.frame(sk.df.ape2$NAME,notR7,notR8,notR9,notR10,notR11,notR12,notR13,notR15,notR16,notR17,sk.df.ape2$median)
head(noRMA)

##Get the difference between 10 median ages without reader and median age 
#empty vectors
noR7MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR8MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR9MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR10MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR11MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR12MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR13MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR15MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR16MAdiff = rep(NA, dim(sk.df.ape2)[1])
noR17MAdiff = rep(NA, dim(sk.df.ape2)[1])

#subtract
for (i in 1:nrow(noRMA)){
	noR7MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR7[i]
	noR8MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR8[i]
	noR9MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR9[i]
	noR10MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR10[i]
	noR11MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR11[i]
	noR12MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR12[i]
	noR13MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR13[i]
	noR15MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR15[i]
	noR16MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR16[i]
	noR17MAdiff[i] = noRMA$sk.df.ape2.median[i]- noRMA$notR17[i]
	}
noRMA.med.diff = data.frame(sk.df.ape2$NAME,noR7MAdiff,noR8MAdiff,noR9MAdiff,noR10MAdiff,noR11MAdiff,noR12MAdiff,noR13MAdiff,noR15MAdiff,noR16MAdiff,noR17MAdiff)
head(noRMA.med.diff)
summary(noRMA.med.diff) #min = -2, max = 3

diffs = seq(from = -2, to = 3, by=0.5 )
diffs

#Sample size table by age 
no7RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR7MAdiff) %>%   dplyr::summarise(n = n())
no8RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR8MAdiff) %>%   dplyr::summarise(n = n())
no9RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR9MAdiff) %>%   dplyr::summarise(n = n())
no10RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR10MAdiff) %>%   dplyr::summarise(n = n())
no11RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR11MAdiff) %>%   dplyr::summarise(n = n())
no12RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR12MAdiff) %>%   dplyr::summarise(n = n())
no13RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR13MAdiff) %>%   dplyr::summarise(n = n())
no15RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR15MAdiff) %>%   dplyr::summarise(n = n())
no16RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR16MAdiff) %>%   dplyr::summarise(n = n())
no17RMA.med.diff.n = noRMA.med.diff %>%   group_by(noR17MAdiff) %>%   dplyr::summarise(n = n())

no7RMA.med.diff.n
no8RMA.med.diff.n
no9RMA.med.diff.n
no10RMA.med.diff.n
no11RMA.med.diff.n
no12RMA.med.diff.n
no13RMA.med.diff.n
no15RMA.med.diff.n
no16RMA.med.diff.n
no17RMA.med.diff.n



plot(sk.df.ape2$median ~sk.df.ape2$r15age)
p <- ggplot(sk.df.ape2, aes(median,r15age))
p + geom_point()
p + geom_jitter()


p <- ggplot(sk.df.ape2, aes(median,r8age))
p + geom_point()
p + geom_jitter()

p <- ggplot(sk.df.ape2, aes(median,r9age))
p + geom_point()
p + geom_jitter()







###############################################
#look at distribution of CV (percentage) data
hist(sk.df.ape2$cv, nclass = 30, col = "gray")

#no transformation would make the data normal




###########################################################
#bring in fish lengths and day of year (doy)
fishlengths = read.csv("SK.Scale.Age.Study.fish.data.csv", sep=",", header=TRUE)
head(fishlengths)

sk.df.ape2$ofw = substr(sk.df.ape2$NAME, 5,5)
sk.df.ape2$osw = substr(sk.df.ape2$NAME, 6,6)
sk.df.ape2$otot = as.numeric(sk.df.ape2$ofw)+as.numeric(sk.df.ape2$osw)

########################################################################################################
#########Get length residuals
########## Ludwig von Bertalanffy model:
LvB = function(age, k, L.inf, t0) {
	L.inf*(1-exp(-k*(age-t0)))
}
LvB
# Starting values for non-linear least-squares estimation:
START = c(k = 0.14, L.inf = 1800, t0=.5)
x = sk.df.ape2$median
y = sk.df.ape2$fishlength

length(y)
summary(x)
summary(y)

#missing median ages?
sk.df.ape2.na <- sk.df.ape2[is.na(sk.df.ape2$median),]
sk.df.ape2.na  #should be 0 rows!

#Multiplicative error structure, log(y)~log(x)
logage.fit = nls(log(y) ~ log(LvB(x, k, L.inf, t0)), start = c(k = 0.15, L.inf = 1800, t0=0.6))
summary(logage.fit)

sk.df.ape2 = as.data.frame(sk.df.ape2, lvb.pred = fitted(logage.fit), lvb.resid=resid(logage.fit))
sk.df.ape2$lvb.pred = fitted(logage.fit)
sk.df.ape2$l.resid = resid(logage.fit)

cf = coef(logage.fit)
plot(fitted(logage.fit)~sk.df.ape2$median)
plot(fitted(logage.fit)~resid(logage.fit))


#LVB plot
plot(fishlength~jitter(median,4), pch = 20,cex=.75,xlab="",ylab="", data = sk.df.ape2,xaxt = "n")

mtext(side=1, line=2.25, "Median age", col="black", font=1,cex=1)
mtext(side=2, line=2.25, "Fish length (MEF, mm)", col="black", font=1,cex=1)
lines(1:50, LvB(1:50, k = cf["k"], L.inf = cf["L.inf"], t0 = cf["t0"]), col="blue", lty = 2, lwd=3.5)

#    Regression assumptions met? 
	#Residuls by Length
plot(sk.df.ape2$fishlength, sk.df.ape2$resid,xlab="Length",ylab="Residuals"); abline(h=0, lty=2)
	# Residuals against fitted values:
plot(sk.df.ape2$l.resid ~ sk.df.ape2$l.pred, xlab="Fitted Values",ylab="Residuals"); abline(h=0, lty=2)
	# Residuals over time 
plot(sk.df.ape2$l.resid, type="b", xlab="Time", ylab="Residuals"); abline(h=0, lty=2)



########################################################################################################################33
#### Make sk.tall (age estimates stacked from all readers' are stacked)
names(sk.df.ape2)
r7 = data.frame("NAME" = sk.df.ape2$NAME,"r.eu" = sk.df.ape2$r7.eu, "rage" = sk.df.ape2$r7age, "rfw" = sk.df.ape2$r7.fw, "rsw" = sk.df.ape2$r7.sw, "reader" = rep("reader7", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r8 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r8.eu, "rage" = sk.df.ape2$r8age, "rfw" = sk.df.ape2$r8.fw, "rsw" = sk.df.ape2$r8.sw, "reader" = rep("reader8", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r9 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r9.eu, "rage" = sk.df.ape2$r9age, "rfw" = sk.df.ape2$r9.fw, "rsw" = sk.df.ape2$r9.sw, "reader" = rep("reader9", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r10 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r10.eu, "rage" = sk.df.ape2$r10age, "rfw" = sk.df.ape2$r10.fw, "rsw" = sk.df.ape2$r10.sw, "reader" = rep("reader10", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw, 
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r11 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r11.eu, "rage" = sk.df.ape2$r11age, "rfw" = sk.df.ape2$r11.fw, "rsw" = sk.df.ape2$r11.sw, "reader" = rep("reader11", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r12 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r12.eu, "rage" = sk.df.ape2$r12age, "rfw" = sk.df.ape2$r12.fw, "rsw" = sk.df.ape2$r12.sw, "reader" = rep("reader12", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r13 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r13.eu, "rage" = sk.df.ape2$r13age, "rfw" = sk.df.ape2$r13.fw, "rsw" = sk.df.ape2$r13.sw, "reader" = rep("reader13", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r15 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r15.eu, "rage" = sk.df.ape2$r15age, "rfw" = sk.df.ape2$r15.fw, "rsw" = sk.df.ape2$r15.sw, "reader" = rep("reader15", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r16 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r16.eu, "rage" = sk.df.ape2$r16age, "rfw" = sk.df.ape2$r16.fw, "rsw" = sk.df.ape2$r16.sw, "reader" = rep("reader16", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)
r17 = data.frame("NAME" = sk.df.ape2[,1],"r.eu" = sk.df.ape2$r17.eu, "rage" = sk.df.ape2$r17age, "rfw" = sk.df.ape2$r17.fw, "rsw" = sk.df.ape2$r17.sw, "reader" = rep("reader17", dim(sk.df.ape2)[1]), "median.fw" = sk.df.ape2$median.fw, "median.sw" = sk.df.ape2$median.sw,
 "median" = sk.df.ape2$median, "cv" = sk.df.ape2$cv, "sys" = sk.df.ape2$sys, "Year" = sk.df.ape2$Year, "tp" = sk.df.ape2$tp, "osw" = sk.df.ape2$osw, "ofw" = sk.df.ape2$ofw,"otot" = sk.df.ape2$otot, "fishlength" = sk.df.ape2$fishlength, "l.resid" = sk.df.ape2$l.resid, "doy" = sk.df.ape2$doy)


sk.tall = rbind.data.frame(r7, r8, r9, r10, r11, r12, r13, r15, r16, r17)
head(sk.tall)
sk.tall$oage = substr(sk.tall$NAME, 5,6)

sk.tall$fw.diff = as.numeric(sk.tall$rfw) - as.numeric(sk.tall$median.fw)
sk.tall$sw.diff = as.numeric(sk.tall$rsw) - as.numeric(sk.tall$median.sw)



############################################################################
#####Add read order to sk.tall
readers.numbers2 = c(7, 8, 9, 10, 11, 12, 13, 14, 16, 17)
readers.numbers3 = c("R07", "R08", "R09", "R10", "R11", "R12", "R13", "R14", "R16", "R17")
reader.numbers = data.frame(cbind(readers.numbers2, readers.numbers3))
names(reader.numbers) = c("Number", "Reader")
head(reader.numbers)

#merge to sk
sk = merge(sk, reader.numbers, by.x="USER_ID", by.y = "Number")
sk$I.R = paste(sk$IMAGE_NAME, sk$Reader)
#merge to sk.tall
head(sk)
sk.tall$I.R = paste(sk.tall$NAME, sk.tall$reader)
sk.read.sort$sk.I.R = paste(sk.tall$NAME, sk.tall$reader)
head(sk.tall)


head(sk.tall)
unique(sk.tall$median)
#remove non-integers
sk.tall = sk.tall[sk.tall$median!="5.5",]
sk.tall = sk.tall[sk.tall$median!="3.5",]
sk.tall = sk.tall[sk.tall$median!="4.5",]
sk.tall = sk.tall[sk.tall$median!="2.5",]

head(sk.tall)




#fix R13 sw age 7 in plot
unique(sk.tall$rsw)
sk.tall$rsw = as.numeric(substr(sk.tall$r.eu, 2, 2))







################################################################################################################################

#To test for difference among sys, oage, year/tp:
n.cv = sk.df.ape2 %>% 
  group_by(sys, med.eu, Year, add=T) %>% 
  summarise(mean = mean(cv, na.rm = T))
n.cv = as.data.frame(n.cv)

summary(n.cv$mean)
shapiro.test(n.cv$mean)#not normal

#Attempts to transform CV to something with a normal distribution....
shapiro.test(log(n.cv$mean+1))#NaN, so add 1
shapiro.test(sqrt(n.cv$mean))#not normal
shapiro.test((n.cv$mean)^(1/2))#not normal
shapiro.test((n.cv$mean)^(1/3))#not normal
shapiro.test((n.cv$mean)^(2))#not normal
shapiro.test(1/(n.cv$mean+1)^10) #reciprocal is normal
shapiro.test(1/(n.cv$mean+1)) #reciprocal is normal
shapiro.test(scale(n.cv$mean))#not normal
shapiro.test((n.cv$mean-min(n.cv$mean))/(max(n.cv$mean)-min(n.cv$mean)))
shapiro.test(boxcox(n.cv$mean))#NaN, so add 1
shapiro.test((n.cv$mean+1)^-0.18181818)#not normal
shapiro.test(log((n.cv$mean+1)/(1-(n.cv$mean+1))))#not normal
shapiro.test(asin(sqrt(n.cv$mean)))
qqnorm(sk.df.ape2$cv)
qqnorm(asin(sqrt(n.cv$mean)))



names(n.cv)
####variability is not normal, but zero inflated...
##############################################################
#what distribution does CV in age estimates data have
descdist(sk.df.ape2$cv, discrete = FALSE) #normal? uniform?
normal_dist <- fitdist(sk.df.ape2$cv, "norm")
plot(normal_dist)
gamma_dist <- fitdist(sk.df.ape2$cv, "gamma") #
plot(gamma_dist)
beta_dist <- fitdist(sk.df.ape2$cv, "beta")
plot(beta_dist)
beta_dist

summary(beta_dist)
summary(normal_dist)
summary(gamma_dist)

###fits beta distribution best

############################################################################################################################################
library(brms)
dim(sk.df.ape2)
names(sk.df.ape2)
###########Zero inflated beta with brms library
#brm.fit.zoib <- brm(cv ~ 0+ sys + median.sw + median.fw + tp + l.resid, data = sk.df.ape2, family = zero_inflated_beta())
#made a decision to scale for comparable coefficients

#Helpful vingette: https://cran.r-project.org/web/packages/brms/vignettes/brms_distreg.html
sk.df.ape2$median.sw = as.factor(sk.df.ape2$median.sw)
sk.df.ape2$median.fw = as.factor(sk.df.ape2$median.fw)

#Full model
scale.brm.fit.zoib.wfw <- brm(cv ~ 0+ sys + median.fw + median.sw + tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wfw)
loo.scale.brm.fit.zoib.wfw = loo(scale.brm.fit.zoib.wfw)
loo.scale.brm.fit.zoib.wfw

(scale.brm.fit.zoib.wfw)$prior
prior_summary(scale.brm.fit.zoib.wfw)



#re-fit without FW (zero variance)
scale.brm.fit.zoib <- brm(cv ~ 0+ sys + median.sw + tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib)
loo.scale.brm.fit.zoib = loo(scale.brm.fit.zoib)
loo.scale.brm.fit.zoib

loo.1 = loo_compare(loo.scale.brm.fit.zoib.wfw,loo.scale.brm.fit.zoib)
loo.1

#re-fit without tp
scale.brm.fit.zoib.wotp <- brm(cv ~ 0+ sys + median.fw + median.sw + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wotp)
loo.scale.brm.fit.zoib.wotp = loo(scale.brm.fit.zoib.wotp,reloo = TRUE)
loo.scale.brm.fit.zoib.wotp

#re-fit without sw
scale.brm.fit.zoib.wosw <- brm(cv ~ 0+ sys + median.fw + tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wosw)
loo.scale.brm.fit.zoib.wosw = loo(scale.brm.fit.zoib.wosw)
loo.scale.brm.fit.zoib.wosw

#re-fit without sys
scale.brm.fit.zoib.wosys <- brm(cv ~ 0 + median.fw + median.sw + tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wosys)
loo.scale.brm.fit.zoib.wosys = loo(scale.brm.fit.zoib.wosys)
loo.scale.brm.fit.zoib.wosys

scale.brm.fit.zoib.wolr <- brm(cv ~ 0+ sys + median.fw + median.sw + tp, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wolr)
loo.scale.brm.fit.zoib.wolr = loo(scale.brm.fit.zoib.wolr,reloo = TRUE)
loo.scale.brm.fit.zoib.wolr


###Drop two vars, fw and....
#without fw and sys
scale.brm.fit.zoib.wofwsys <- brm(cv ~ 0+ median.sw + tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wofwsys)
loo.scale.brm.fit.zoib.wofwsys = loo(scale.brm.fit.zoib.wofwsys,reloo = TRUE)
loo.scale.brm.fit.zoib.wofwsys 

#without fw and sw
scale.brm.fit.zoib.wofwsw <- brm(cv ~ 0+ sys + tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wofwsw)
loo.scale.brm.fit.zoib.wofwsw = loo(scale.brm.fit.zoib.wofwsw,reloo = TRUE)
loo.scale.brm.fit.zoib.wofwsw

#without fw and tp
scale.brm.fit.zoib.wofwtp <- brm(cv ~ 0+ sys + median.sw + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wofwtp)
loo.scale.brm.fit.zoib.wofwtp = loo(scale.brm.fit.zoib.wofwtp,reloo = TRUE)
loo.scale.brm.fit.zoib.wofwtp

#without fw and lresid
scale.brm.fit.zoib.wofwlr <- brm(cv ~ 0+ sys + median.sw + tp, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.wofwlr)
loo.scale.brm.fit.zoib.wofwlr = loo(scale.brm.fit.zoib.wofwlr,reloo = TRUE)
loo.scale.brm.fit.zoib.wofwlr


###Drop three (leave two)
#Yes: sys, median.sw, No: median.fw, tp, scale(l.resid)
scale.brm.fit.zoib.syssw <- brm(cv ~ 0+ sys + median.sw, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.syssw)
loo.scale.brm.fit.zoib.syssw = loo(scale.brm.fit.zoib.syssw,reloo=TRUE)
loo.scale.brm.fit.zoib.syssw

#Yes: sys, tp, No: median.fw, median.sw, scale(l.resid)
scale.brm.fit.zoib.systp <- brm(cv ~ 0+ sys + tp, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.systp)
loo.scale.brm.fit.zoib.systp = loo(scale.brm.fit.zoib.systp,reloo=TRUE)
loo.scale.brm.fit.zoib.systp

#Yes: sys, scale(l.resid), No: median.fw, median.sw, tp
scale.brm.fit.zoib.syslr <- brm(cv ~ 0+ sys + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.syslr)
loo.scale.brm.fit.zoib.syslr = loo(scale.brm.fit.zoib.syslr,reloo=TRUE)
loo.scale.brm.fit.zoib.syslr

#Yes: median.sw, scale(l.resid), No: (median.fw, sys), tp
scale.brm.fit.zoib.swlr <- brm(cv ~ 0+ median.sw + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.swlr)
loo.scale.brm.fit.zoib.swlr = loo(scale.brm.fit.zoib.swlr,reloo=TRUE)
loo.scale.brm.fit.zoib.swlr

#Yes: median.sw, tp No: (median.fw, sys), scale(l.resid)
scale.brm.fit.zoib.swtp <- brm(cv ~ 0+ median.sw + tp, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.swtp)
loo.scale.brm.fit.zoib.swtp = loo(scale.brm.fit.zoib.swtp,reloo=TRUE)
loo.scale.brm.fit.zoib.swtp

#Yes: tp, scale(l.resid), No: (median.fw, sys), median.sw
scale.brm.fit.zoib.tplr <- brm(cv ~ 0+ tp + scale(l.resid), data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.tplr)
loo.scale.brm.fit.zoib.tplr = loo(scale.brm.fit.zoib.tplr,reloo=TRUE)
loo.scale.brm.fit.zoib.tplr


###One predictor out of sys + median.fw + median.sw + tp + scale(l.resid)
scale.brm.fit.zoib.fw <- brm(cv ~ 0+ median.fw, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.fw)
loo.scale.brm.fit.zoib.fw = loo(scale.brm.fit.zoib.fw,reloo=TRUE)
loo.scale.brm.fit.zoib.fw
#1 problematic observation(s) found.
#The model will be refit 1 times.
#Fitting model 1 out of 1 (leaving out observation 6095)
#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
#  contrasts can be applied only to factors with 2 or more levels


scale.brm.fit.zoib.sys <- brm(cv ~ 0+ sys, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.sys)
loo.scale.brm.fit.zoib.sys = loo(scale.brm.fit.zoib.sys,reloo=TRUE)
loo.scale.brm.fit.zoib.sys

scale.brm.fit.zoib.tp <- brm(cv ~ 0+ tp, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.tp)
loo.scale.brm.fit.zoib.tp = loo(scale.brm.fit.zoib.tp,reloo=TRUE)
loo.scale.brm.fit.zoib.tp

#################################################
###Add a random uniform nonsense variable 

x = runif(6095, min=-1, max=1) 
sk.df.ape2$RandUni = x
head(sk.df.ape2,2)

#Yes: median.sw, scale(l.resid), No: median.fw, sys, tp
scale.brm.fit.zoib.swlr.rand <- brm(cv ~ 0+ median.sw + scale(l.resid)+RandUni, data = sk.df.ape2, family = zero_inflated_beta())
summary(scale.brm.fit.zoib.swlr.rand)
loo.scale.brm.fit.zoib.swlr.rand = loo(scale.brm.fit.zoib.swlr.rand,reloo=TRUE)
loo.scale.brm.fit.zoib.swlr.rand





#
summary(nsys.scale.brm.fit.zoib)
par(mfrow=c(2,2))
marginal_effects(nsys.scale.brm.fit.zoib)
plot(nsys.scale.brm.fit.zoib)

#Same y-axis?
p = marginal_effects(nt.scale.brm.fit.zoib)
plot(p, ylim=c(0, .25))
plot(marginal_effects(nt.scale.brm.fit.zoib, effects = c("sys", "median.sw","resid"), ylim=c(0, 0.25)))

plot(p)$resid + ggplot2::ylim(0, 0.25)

plot(p)$resid[[1]] + ggplot2::ylim(0, 0.25)




####Repeats
repeats = read.csv("SK.Scale.Age.Study.repeats.csv", sep=",", header=TRUE)
head(repeats)
head(sk.df.ape2)
CompareRepeats = merge(sk.df.ape2, repeats, by.x="NAME", by.y="ImageName")
CompRep = data.frame(CompareRepeats$sys,CompareRepeats$oage,CompareRepeats$l.resid,CompareRepeats$median.sw,CompareRepeats$median,CompareRepeats$med.eu,CompareRepeats$Rep,CompareRepeats$ï..RepNum,
CompareRepeats$r7age,CompareRepeats$r8age,CompareRepeats$r9age,CompareRepeats$r10age,CompareRepeats$r11age,CompareRepeats$r12age,CompareRepeats$r13age,CompareRepeats$r15age,CompareRepeats$r16age,CompareRepeats$r17age)
names(CompRep) = c("Stock","oage","lvb.resid","median.sw","median","med.EU","Rep","RepNum","r7age","r8age","r9age","r10age","r11age","r12age","r13age","r15age","r16age","r17age")
CompRep
summary(CompRep)
dim(CompRep)

CompRep7 = data.frame(CompRep[,1:8],CompRep$r7age)
CompRep8 = data.frame(CompRep[,1:8],CompRep$r8age)
CompRep9 = data.frame(CompRep[,1:8],CompRep$r9age)
CompRep10 = data.frame(CompRep[,1:8],CompRep$r10age)
CompRep11 = data.frame(CompRep[,1:8],CompRep$r11age)
CompRep12 = data.frame(CompRep[,1:8],CompRep$r12age)
CompRep13 = data.frame(CompRep[,1:8],CompRep$r13age)
CompRep15 = data.frame(CompRep[,1:8],CompRep$r15age)
CompRep16 = data.frame(CompRep[,1:8],CompRep$r16age)
CompRep17 = data.frame(CompRep[,1:8],CompRep$r17age)

head(CompRep7)

#get list of scales with full repeats of 3
CompRep7.3 = CompRep7 %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(n = n())
CompRep7.3 = as.data.frame(CompRep7.3)
summary(CompRep7.3)
CompRep7.3

CompRep.lim = CompRep7.3[CompRep7.3$n==3,]
dim(CompRep.lim)

CompRep7.lim = merge(CompRep7, CompRep.lim, by.x="RepNum", by.y="RepNum")
head(CompRep7.lim)
CompRep8.lim = merge(CompRep8, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep9.lim = merge(CompRep9, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep10.lim = merge(CompRep10, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep11.lim = merge(CompRep11, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep12.lim = merge(CompRep12, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep13.lim = merge(CompRep13, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep15.lim = merge(CompRep15, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep16.lim = merge(CompRep16, CompRep.lim, by.x="RepNum", by.y="RepNum")
CompRep17.lim = merge(CompRep17, CompRep.lim, by.x="RepNum", by.y="RepNum")

head(CompRep7.lim)
CompRep7.sum = CompRep7.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r7age))
CompRep7.sum = as.data.frame(CompRep7.sum)

CompRep8.sum = CompRep8.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r8age))
CompRep8.sum = as.data.frame(CompRep8.sum)

CompRep9.sum = CompRep9.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r9age))
CompRep9.sum = as.data.frame(CompRep9.sum)

CompRep10.sum = CompRep10.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r10age))
CompRep10.sum = as.data.frame(CompRep10.sum)

CompRep11.sum = CompRep11.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r11age))
CompRep11.sum = as.data.frame(CompRep11.sum)

CompRep12.sum = CompRep12.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r12age))
CompRep12.sum = as.data.frame(CompRep12.sum)

CompRep13.sum = CompRep13.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r13age))
CompRep13.sum = as.data.frame(CompRep13.sum)

CompRep15.sum = CompRep15.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r15age))
CompRep15.sum = as.data.frame(CompRep15.sum)

CompRep16.sum = CompRep16.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r16age))
CompRep16.sum = as.data.frame(CompRep16.sum)

CompRep17.sum = CompRep17.lim %>% 
  group_by(RepNum) %>% 
  dplyr::summarise(cv = cv(CompRep.r17age))
CompRep17.sum = as.data.frame(CompRep17.sum)

head(CompRep16.sum)
head(CompRep17.sum)

CompRep.cv = data.frame(CompRep7.sum,CompRep8.sum[,2],CompRep9.sum[,2],CompRep10.sum[,2],CompRep11.sum[,2],CompRep12.sum[,2],CompRep13.sum[,2],CompRep15.sum[,2],CompRep16.sum[,2],CompRep17.sum[,2])
names(CompRep.cv) = c("rep.cv7","RepNum","rep.cv8","rep.cv9","rep.cv10","rep.cv11","rep.cv12","rep.cv13","rep.cv15","rep.cv16","rep.cv17")
head(CompRep.cv)
summary(CompRep.cv)
dim(CompRep.cv)

CompRep.cv



#what happens when you take the absolute value of residual
#the sign of the residual matters.... so do not do this!
#nt.scale.brm.fit.zoib.abs <- brm(cv ~ 0+ sys + median.sw + abs(scale(resid)), data = sk.df.ape2, family = zero_inflated_beta())
#summary(nt.scale.brm.fit.zoib.abs)
#marginal_effects(nt.scale.brm.fit.zoib.abs)
LOO(nt.scale.brm.fit.zoib, nt.scale.brm.fit.zoib.abs)
#                                                      LOOIC     SE
#nt.scale.brm.fit.zoib                             -10358.26 254.39
#nt.scale.brm.fit.zoib.abs                         -10297.38 255.31
#nt.scale.brm.fit.zoib - nt.scale.brm.fit.zoib.abs    -60.88  20.53


###Ad a few prettier names
#stock = read.table("clipboard", header=T, sep="\t")
#sk.df.ape2 = merge(sk.df.ape2, stock, by.x="sys", by.y="sys")
head(sk.df.ape2)

Euro.age = read.table("clipboard", header=T, sep="\t")
head(Euro.age)
sk.df.ape2 = merge(sk.df.ape2, Euro.age, by.x="med.eu", by.y="med.eu")
head(sk.df.ape2)


#sk.df.ape2 <- sk.df.ape2[,c(1:65)]

##################################################################################################
###Plots of raw data, preliminary look at results
##plot CV vs. age, sys, year
#base plots
mean.cv.sys = mean(aggregate(sk.df.ape2$cv, by=list(sk.df.ape2$sys), FUN=mean, na.rm=T)[,2]); mean.cv.sys
plot(sk.df.ape2$cv ~sk.df.ape2$sys,xlab="",ylab="", cex.axis = .8)
mtext(side=1, line=2.25, "Stock", col="black", font = 1, cex=1)
mtext(side=2, line=2.25, "CV in age estimates", col="black", font = 1, cex=1)

#summarize data by year original age and sys
n.cv = sk.df.ape2 %>% 
  group_by(sys, oage, add=T) %>% 
  dplyr::summarise(mean = mean(cv, na.rm = T))
n.cv = as.data.frame(n.cv)
m.sys = aggregate(sk.df.ape2$cv, by=list(sk.df.ape2$sys), FUN=mean, na.rm=T);m.sys
shapiro.test(m.sys$x)

sys.aov = aov(mean+1~sys, data = n.cv); summary(sys.aov)
plot(sys.aov, 1)
plot(sys.aov, 2)
shapiro.test(x = residuals(object = sys.aov))


plot(sk.df.ape2$cv[!is.na(sk.df.ape2$med.eu)]~as.factor(sk.df.ape2$med.eu[!is.na(sk.df.ape2$med.eu)]),xlab="",ylab="", cex.axis = 1)
mtext(side=1, line=2.5, "Median age (European notation)", col="black", font = 1)
mtext(side=2, line=2.5, "CV in age estimates", col="black", font = 1)


#plot length residual ~ CV
mean.cv.lresid = mean(aggregate(sk.df.ape2$cv, by=list(sk.df.ape2$l.resid), FUN=mean, na.rm=T)[,2]); mean.cv.lresid
plot(sk.df.ape2$cv~sk.df.ape2$l.resid,xlab="",ylab="", cex.axis = .9)
mtext(side=1, line=2.5, "Length residual", col="black", font = 1)
mtext(side=2, line=2.5, "CV in age estimates", col="black", font = 1)



head(n.cv)
#oage, stock interaction?
summary(aov(mean~oage*sys, data = n.cv)) #Significant interaction
#interaction plots
fit  = aov(mean~oage*sys, data = n.cv)

fit; summary(fit)
interaction.plot(n.cv$oage, n.cv$sys, n.cv$mean)
se = sqrt(0.002669)/sqrt(dim(n.cv)[1])  #sigma value from summary(fit)
pd = position_dodge(.65)    ### How much to jitter the points on the plot
ggplot(n.cv,  aes(x = oage, y = mean, color = sys)) +
    geom_point(shape = 15, size  = 1,  position = pd) +
    geom_errorbar(aes(ymin  = mean - se,
	ymax  = mean + se), width = 0.25, size  = 0.75, position = pd) +
	theme_bw()

#Summarize data by sys, year, and original age
n.cv = sk.df.ape2 %>% 
  group_by(sys, oage, Year, add=T) %>% 
  dplyr::summarise(mean = mean(cv, na.rm = T))
n.cv = as.data.frame(n.cv)

mean.cv.yr = mean(aggregate(sk.df.ape$cv, by=list(sk.df.ape$Year), FUN=mean, na.rm=T)[,2]); mean.cv.yr
plot(sk.df.ape$cv ~as.factor(sk.df.ape$Year),xlab="",ylab="", cex.axis = .9)
mtext(side=1, line=2.5, "Year", col="black")
mtext(side=2, line=2.5, "CV in age estimates", col="black", font = 1)
box()
axis(1,at=seq(0,30,10),labels=T)

xtick<-seq(1980, 2015, by=10)


summary(aov(mean~Year, data = n.cv)) # p = 0.84
#plot(aov(mean~Year, data = n.cv))
aggregate(sk.df.ape$cv, by=list(sk.df.ape$Year), FUN=mean, na.rm=T)


#Summarize data by sys, time period, and original age
n.cv = sk.df.ape2 %>% 
  group_by(sys, oage, tp, add=T) %>% 
  dplyr::summarise(mean = mean(cv, na.rm = T))
n.cv = as.data.frame(n.cv)
mean.cv.tp = mean(aggregate(sk.df.ape$cv, by=list(sk.df.ape$tp), FUN=mean, na.rm=T)[,2]); mean.cv.tp
aggregate(sk.df.ape$cv, by=list(sk.df.ape$tp), FUN=mean, na.rm=T)
plot(sk.df.ape$cv ~as.factor(sk.df.ape$tp)); abline(mean.cv.tp,0, col = "red")
summary(aov(mean~tp, data = n.cv)) #0.89
#plot(aov(mean~tp, data = n.cv))


##look for year:sys with high variability possibly indicating checks(inspired by Larry)
n.cv = sk.df.ape2 %>% 
  group_by(sys, Year, add=T) %>% 
  dplyr::summarise(mean = mean(cv, na.rm = T)); n.cv = as.data.frame(n.cv)
p2 = ggplot(sk.df.ape, aes(Year, cv, group = Year)) + geom_boxplot() + facet_grid(sys ~.) +theme_bw(); p2
head(n.cv)
summary(aov(mean~Year*sys, data = n.cv)) #Significant interaction
interaction.plot(n.cv$Year, n.cv$sys, n.cv$mean)

##look for age:sys with high variabliity
interaction.plot(n.cv$Year, n.cv$sys, n.cv$mean)


####################################################################################33
###Summary table: count of age by system:
head(sk.tall)
count.age.sys = sk.tall %>% 
  group_by(sys, add=T) %>% 
  dplyr::summarise(n = n(), n.r = sum(!is.na(median)), median = (median(median, na.rm = T)))
as.data.frame(count.age.sys)
count.age.sys
#sk.tall <- sk.tall[,1:16]
head(sk.tall)
dim(sk.tall)


SysRead = paste(sk.tall$reader, sk.tall$sys) 
sk.tall = cbind(sk.tall,SysRead)



######################################################################################
###reader vs experienced, inexperienced
###bring in original ages

###Difference between Median age and Original age (by sys)
sk.tall$mage.oage = sk.tall$median - sk.tall$otot
hw = aggregate(sk.tall$mage.oage, by=list(sk.tall$sys), FUN=mean, na.rm=T); hw
hw = aggregate(sk.tall$mage.oage, by=list(sk.tall$sys), FUN=var, na.rm=T); hw
hw = aggregate(sk.tall$mage.oage, by=list(sk.tall$sys), FUN=skewness, na.rm=T); hw
hw = aggregate(sk.tall$mage.oage, by=list(sk.tall$sys), FUN=kurtosis, na.rm=T); hw


#ggplot(sk.tall, aes(sys, mage.oage)) + geom_jitter(position = position_jitter(0.4))+ theme_bw()
#ggplot(sk.tall, aes(sys, mage.oage)) + geom_boxplot()+ theme_bw()
#ggplot(sk.tall, aes(sys, mage.oage)) + geom_violin(position = position_jitter(0.025))+ theme_bw()
ggplot(sk.tall[which(sk.tall$mage.oage!=0),], aes(mage.oage)) + geom_bar(width = 0.5) +facet_grid(sys~.)+ theme_bw()

ggplot(sk.tall[which(sk.tall$mage.oage!=0),], aes(mage.oage)) + geom_bar(width = 0.5) +facet_grid(sys~median)+ theme_bw()

#add dotted gray line at 0?
ggplot(sk.tall[which(sk.tall$mage.oage!=0),], aes(mage.oage)) + geom_bar(width = 0.5) +facet_grid(sys~.)+ theme_bw()

library(moments)
###Difference between reader and original age
sk.tall$rage.oage = sk.tall$rage - sk.tall$otot
hw = as.data.frame(aggregate(sk.tall$rage.oage, by=list(sk.tall$family:sk.tall$reader), FUN=mean, na.rm=T)); hw$cat = substr(hw$Group.1, 1, 3); hw; summary(aov(x~cat, data = hw))
hw = aggregate(sk.tall$rage.oage, by=list(sk.tall$family:sk.tall$reader), FUN=var, na.rm=T); hw$cat = substr(hw$Group.1, 1, 3); hw; summary(aov(x~cat, data = hw))
hw = aggregate(sk.tall$rage.oage, by=list(sk.tall$family:sk.tall$reader), FUN=skewness, na.rm=T); hw$cat = substr(hw$Group.1, 1, 3); hw; summary(aov(x~cat, data = hw))
hw = aggregate(sk.tall$rage.oage, by=list(sk.tall$family:sk.tall$reader), FUN=kurtosis, na.rm=T); hw$cat = substr(hw$Group.1, 1, 3); hw; summary(aov(x~cat, data = hw))

ggplot(sk.tall[which(sk.tall$rage.oage!=0),], aes(rage.oage)) + geom_bar(width = 0.5) +facet_grid(reader~family)+ theme_bw()


###Difference between reader and median age 
unique(sk.tall$rage)
unique(sk.tall$median)

sk.tall$rage.mage = sk.tall$rage - sk.tall$median
hw = aggregate(sk.tall$rage.mage, by=list(sk.tall$family:sk.tall$reader), FUN=mean, na.rm=T); hw; hw$cat = substr(hw$Group.1, 1, 3); summary(aov(x~cat, data = hw))
hw = aggregate(sk.tall$rage.mage, by=list(sk.tall$family:sk.tall$reader), FUN=var, na.rm=T); hw; hw$cat = substr(hw$Group.1, 1, 3); summary(aov(x~cat, data = hw))
hw = aggregate(sk.tall$rage.mage, by=list(sk.tall$family:sk.tall$reader), FUN=skewness, na.rm=T); hw; hw$cat = substr(hw$Group.1, 1, 3); summary(aov(x~cat, data = hw))
hw = aggregate(sk.tall$rage.mage, by=list(sk.tall$family:sk.tall$reader), FUN=kurtosis, na.rm=T); hw; hw$cat = substr(hw$Group.1, 1, 3); summary(aov(x~cat, data = hw))
ggplot(sk.tall[which(sk.tall$rage.mage!=0),], aes(rage.mage)) + geom_bar() +facet_grid(reader~family)+ theme_bw()

ggplot(sk.tall, aes(rage.mage)) + geom_bar() +facet_grid(reader~family)+ theme_bw()

x <- c(-3, -2, -1, 0, 1, 2, 3)

#Group averages for ppt
summary(sk.tall$rage.mage)[4]
aggregate(sk.tall$rage.mage, by=list(sk.tall$family), FUN=mean, na.rm=T)

head(sk.df.ape)

sk.tall.int = sk.tall[sk.tall$rage.mage %in% c(1,2,3,4,0,-1, -2, -3, -4),]
ggplot(sk.tall.int, aes(rage.mage)) + geom_bar( aes(fill = factor(rage.mage)))+facet_grid(reader~family)+ theme_bw()
ggplot(sk.tall.int, aes(rage.mage)) + geom_bar(aes(fill = factor(rage.mage), y = (..count..)/sum(..count..)  ))+scale_y_continuous(labels=scales::percent) + ylab("relative frequencies") + facet_grid(reader~family)+ theme_bw()
#experience and not are not copmarable sample sizes, inappropriate plots

#OK, cheated and used Excel to pivot data. 
names(sk.tall)
count.rage.mage = sk.tall %>% 
  group_by(rage.mage, add=T) %>% 
  group_by(family, add=T) %>% 
  group_by(reader, add=T) %>% 
  dplyr::summarize(n = n(), n.r = sum(!is.na(median)), mean = mean(mean, na.rm = T), median = (median(median, na.rm = T)))
count.rage.mage = as.data.frame(count.rage.mage)
count.rage.mage

fam.percent = (read.table("clipboard", header=T, sep="\t")) 

p = ggplot(fam.percent, aes(x = Difference, y = Percent, fill = as.factor(Difference))) + geom_bar(stat = "identity")+ facet_grid(Reader~Family) + theme_bw()
p + scale_fill_manual(values=c("#E69F00", "#56B4E9", "#009E73", "#708090", "#CC79A7", "#D55E00"))




###Sample size of median ages by stock
med.n.sys = sk.df.ape2 %>% 
  group_by(sys) %>% 
  dplyr::summarise(r_n=n())
med.n.sys = as.data.frame(med.n.sys)

med.n.sys

med.n.sys.m = sk.df.ape2 %>% 
  group_by(sys,median.sw) %>% 
  dplyr::summarise(r_n=n())
med.n.sys.m = as.data.frame(med.n.sys.m)

med.n.sys.m






###############################Age bias plots
#By reader, SW
sk.tall$median.sw = as.numeric(sk.tall$median.sw)
sk.tall$rsw = as.numeric(sk.tall$rsw)
head(sk.tall,2)
rb.data = sk.tall %>% 
  group_by(reader, median.sw) %>% 
  dplyr::summarise(mean = mean(rsw), L95 = quantile(rsw, .05), U95 = quantile(rsw, .95), r_n=n())
rb.data = as.data.frame(rb.data)
head(rb.data)

#Merge pretty reader names
r.XX1 = c("reader7","reader8","reader9","reader10","reader11","reader12","reader13","reader14","reader16","reader17")
r.XX2 = c("R07", "R08", "R09", "R10", "R11", "R12", "R13", "R14", "R16", "R17")
r.XX = data.frame(cbind(r.XX1,r.XX2))
head(r.XX)
names(r.XX) = c("Reader", "Rnum")

rb.data = merge(rb.data, rXX, by="reader")
head(rb.data)

#plot
p<- ggplot(rb.data, aes(x=median.sw, y=mean, group=Rnum)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5), size = 0.8)+  
  geom_point(size=1.5)+
  facet_wrap(.~Rnum)+ 
  geom_abline(intercept = 0, slope = 1)+
  xlab("Median marine age") +
  ylab("Reader marine age") 
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))+
  theme(strip.background =element_rect(fill="White"))+
  theme(axis.text = element_text(colour = "black"))


###
#By stock
sk.tall$median.sw = as.numeric(sk.tall$median.sw)
sk.tall$rsw = as.numeric(sk.tall$rsw)
head(sk.tall)
rb.sys.data = sk.tall %>% 
  group_by(sys, median.sw) %>% 
  dplyr::summarise(mean = mean(rsw), L95 = quantile(rsw, .05), U95 = quantile(rsw, .95), r_n=n())
rb.sys.data = as.data.frame(rb.sys.data)
head(rb.sys.data)

#pretty sys names
rSTOCK1 = c("ZA","ZC","ZK","ZN","ZS")
rSTOCK2 = c("Karluk","Copper","Kuskokwim","Nushagak","Stikine")
rSTOCK = data.frame(cbind(rSTOCK1,rSTOCK2))
names(rSTOCK) = c("sys", "Stock")

head(rSTOCK)
rb.sys.data = merge(rb.sys.data, rSTOCK, by="sys")
head(rb.sys.data)
rb.sys.data.full = rb.sys.data #save a full version 

rb.sys.data = rb.sys.data.full
#only include marine age-1 for Karluk and Stikine, all others did not originally have this age
rb.sys.data = rb.sys.data[-c(6,15), ]  
rb.sys.data


#plot
p<- ggplot(rb.sys.data, aes(x=median.sw, y=mean, group=Stock)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5), size = 0.8)+  
  geom_point(size=1.5)+
  facet_wrap(.~Stock)+ 
  geom_abline(intercept = 0, slope = 1)+
  xlab("Median marine age") +
  ylab("Reader marine age") 
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))+
  theme(strip.background =element_rect(fill="White"))+
  theme(axis.text = element_text(colour = "black"))




#By reader, FW
head(sk.tall)
sk.tall$median.fw = as.numeric(sk.tall$median.fw)
sk.tall$rfw = as.numeric(sk.tall$rfw)
fw.rb.data = sk.tall %>% 
  group_by(reader, median.fw) %>% 
  dplyr::summarise(mean = mean(rfw), L95 = quantile(rfw, .05), U95 = quantile(rfw, .95), r_n=n())
fw.rb.data = as.data.frame(fw.rb.data)
head(fw.rb.data)


#pretty reader names above
fw.rb.data = merge(fw.rb.data, rXX, by="reader")
head(fw.rb.data)

#plot
p<- ggplot(fw.rb.data, aes(x=median.fw, y=mean, group=Rnum)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5), size = 0.8)+  
  geom_point(size=1.5)+
  facet_wrap(.~Rnum)+ 
  geom_abline(intercept = 0, slope = 1)+
  xlab("Median freshwater age") +
  ylab("Reader freshwater age") 
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))+
  theme(strip.background =element_rect(fill="White"))+
  theme(axis.text = element_text(colour = "black"))






######Age bias plot by ocean age
#fix R13 sw age 7 in plot
unique(sk.tall$rsw)
sk.tall$rsw = as.numeric(substr(sk.tall$r.eu, 2, 2))

#No reader 13
sk.tall.9 = sk.tall[sk.tall$reader!="reader13",]

rb.data.9 = sk.tall.9 %>% 
  group_by(median.sw) %>% 
  dplyr::summarise(mean = mean(rsw), L95 = quantile(rsw, .05), U95 = quantile(rsw, .95), r_n=n())

rb.data.9 = as.data.frame(rb.data.9)

head(sk.tall.9)
length(sk.tall.9[sk.tall.9$reader=="reader10",]$NAME)

sk.tall.9[sk.tall.9$NAME=="ZS0013M004",]


#sink("sk.tall.names.txt")
#sink()


p<- ggplot(rb.data.9, aes(x=median.sw, y=mean)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5))+  
  geom_point()+
  xlab("Median marine age") +
  ylab("Reader marine age") +
  geom_abline(intercept = 0, slope = 1)
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))+ 
  theme(axis.text = element_text(colour = "blue"))
p
dim(sk.df.ape2)


#with reader 13
rb.data.13 = sk.tall %>% 
  group_by(median.sw) %>% 
  dplyr::summarise(mean = mean(rsw), L95 = quantile(rsw, .05), U95 = quantile(rsw, .95), r_n=n())
rb.data.13 = as.data.frame(rb.data.13)

p<- ggplot(rb.data.13, aes(x=median.sw, y=mean)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5))+  
  geom_point()+
  xlab("Median marine age") +
  ylab("Reader marine age") +
  geom_abline(intercept = 0, slope = 1)
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))






#Age bias plot by familiarity
head(sk.tall)
#rFAM = (read.table("clipboard", header=T, sep="\t"))
head(rFAM)
#sk.tall$fam = paste(sk.tall$reader, sk.tall$sys)
sk.tall= merge(sk.tall, rFAM, by="fam")
head(sk.tall)


rfam.data = sk.tall %>% 
  group_by(family.y, reader, median.sw) %>% 
  dplyr::summarise(mean = mean(rsw), L95 = quantile(rsw, .05), U95 = quantile(rsw, .95), r_n=n())
rfam.data = as.data.frame(rfam.data)
head(rfam.data)

#pretty reader names
#rXX = (read.table("clipboard", header=T, sep="\t"))
head(rXX)
head(rfam.data)
rfam.data = merge(rfam.data, rXX, by="reader")
head(rfam.data)

#plot
p<- ggplot(rfam.data, aes(x=median.sw, y=mean, group=family.y)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5))+  
  geom_point()+
  facet_grid(family.y~Rnum)+ 
  geom_abline(intercept = 0, slope = 1)+
  xlab("Median marine age") +
  ylab("Reader marine age")  + 
  theme(strip.background=element_rect(fill="white"))
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))







names(sk.tall)
summary(sk.tall$sys)
names(sk.tall)
#fix R13 sw age 7 in plot
unique(sk.tall$median.sw)
head(sk.tall)
sk.tall$rsw = as.numeric(substr(sk.tall$r.eu, 2, 2))

#Age bias plot original vs. median
#dplyr is lleaving out 0's?
om.data = sk.tall %>% 
  group_by(sys, median.sw) %>% 
  summarise(mean = mean(as.numeric(osw)), L95 = quantile(as.numeric(osw), .025), U95 = quantile(as.numeric(osw), .975), r_n=n())
om.data = as.data.frame(om.data)

summary(om.data)
#plot
p<- ggplot(om.data, aes(x=median.sw, y=mean)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5))+  
  geom_point()+
  facet_wrap(~sys)+ 
  geom_abline(intercept = 0, slope = 1)+
  xlab("Median ocean age") +
  ylab("Original ocean age") 
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))

#ggMarginal(p, type = "histogram")




head(sk.df.ape2)

#Subset for length~age plot
strat = paste(sk.df.ape2$yr, sk.df.ape2$median, sk.df.ape2$sys, sk.df.ape2$osw)
sk.df.ape2 = cbind(sk.df.ape2, strat)
rndid <- with(sk.df.ape2, ave(fishlength, strat, FUN=function(x) {sample.int(length(x))}))
sk.df.ape.sm = sk.df.ape2[rndid<=5,]
head(sk.df.ape.sm)

ggplot(sk.df.ape.sm, aes(fishlength, median))+ geom_smooth()




############################################################################
#Reader vs. yrs of experience
#Reader experience and variability are presented by reader in a random order:

R.exp1 = c(0.030019297, 0.025753226,0.02544597,0.034424468,0.015528872,0.024274153,0.02036326,0.026199558,0.013729702)
R.exp2 = c(5.5,4,4,4,22,4,27,9,24)
R.exp = data.frame(cbind(R.exp1,R.exp2))
head(R.exp)
names(R.exp) = c("avg.cv", "yrs.exp")

head(R.exp)
lm(avg.cv~yrs.exp, data = R.exp)
summary(lm(avg.cv~yrs.exp, data = R.exp))
plot(lm(avg.cv~yrs.exp, data = R.exp))

library(scales)

ggplot(R.exp, aes(x=yrs.exp, y=avg.cv)) +
    geom_point(shape=19, cex = 2) +    
    geom_smooth(method=lm)  +
    xlab("Years of experience") + ylab("Average reader bias") +  
    scale_x_continuous(breaks = scales::pretty_breaks(n = 5))

# Add linear regression line (by default includes 95% confidence region)




############################################################################
#Did readers cause change length at age over time? Compare median age trends to original age trends
###Regression for length over time for each age, stock
#For median.sw age

head(sk.df.ape2)
summary(sk.df.ape2)
sk.df.ape.length = group_by(sk.df.ape2, sys, median.sw, Year) 
names(sk.df.ape.length)
per_year = summarise(sk.df.ape.length, yr.mean = mean(fishlength))
per_year = as.data.frame(per_year)
head(per_year)

#########Regression for length over time for each age, stock
####median ocean age
lm.length.time = sk.df.ape.length %>%  
 group_by(sys, median.sw) %>%  
 do(lm1 = lm(fishlength~Year, data=.))
as.data.frame(tidy(lm.length.time, lm1))

#the plots
unique(per_year$median.sw)
head(per_year)
p1 = ggplot(per_year, aes(Year, yr.mean, group = Year)) + geom_point(color = "black") + facet_grid(median.sw~sys) +theme_bw() + 
 geom_smooth(method = "lm", se=F, aes(group=1), color = "blue", weight = 0.75)+ facet_grid(median.sw~sys)+ theme_bw()+ 
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()); p1


###Original ocean age
sk.df.ape.length = group_by(sk.df.ape2, sys, osw, Year) 
per_year.o = summarize(sk.df.ape.length, yr.mean = mean(fishlength))
per_year.o = as.data.frame(per_year.o)

#regression coefficients
lm.length.time = per_year.0 %>%  
 group_by(sys, osw) %>%  
 do(lm1 = lm(yr.mean~Year, data=.))
as.data.frame(tidy(lm.length.time, lm1))

names(per_year.o)
#plot length at age over time
p1 = ggplot(per_year.o, aes(Year, yr.mean, group = Year)) + 
geom_point(color = "black") + 
facet_grid(osw~sys) +theme_bw() + 
geom_smooth(method = "lm", se=F, aes(group=1), color = "red", weight = 0.75, alpha = .5)+ 
facet_grid(osw~sys)+ 
theme(panel.grid.major = element_blank(), axis.text = element_text(colour = "black"), panel.grid.minor = element_blank()); p1

p2 = ggplot(per_year, aes(Year, yr.mean, group = Year)) + 
geom_point(color = "black") + 
facet_grid(median.sw~sys) +theme_bw() + 
geom_smooth(method = "lm", se=F, aes(group=1), color = "blue", weight = 0.755, alpha = .5)+ 
facet_grid(median.sw~sys)+ theme_bw()+ 
theme(panel.grid.major = element_blank(), axis.text = element_text(colour = "black"), panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA)); p2 

library(cowplot)

plot_grid(p1, p2)

#There are more trends length at age from median agse than from original ages




############################################################################################
#Perfect reader agreement on age
head(sk.eu)
#remove NAs
sk.eu.full <- sk.eu[!is.na(sk.eu$median.fw),]
sk.eu.full <- sk.eu.full[!is.na(sk.eu.full$median.sw),]
head(sk.eu.full)

sk.eu.full <- sk[!is.na(sk$median.fw),]
sk.eu.full <- sk.eu.full[!is.na(sk.eu.full$median.sw),]
head(sk.eu.full)



names(sk.eu.full)
for (i in 1:nrow(sk.eu.full)){
	if(sk.eu.full$r7.eu[i]!=sk.eu.full$r8.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r8.eu[i]!=sk.eu.full$r9.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r9.eu[i]!=sk.eu.full$r10.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r10.eu[i]!=sk.eu.full$r11.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r11.eu[i]!=sk.eu.full$r12.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r12.eu[i]!=sk.eu.full$r15.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r15.eu[i]!=sk.eu.full$r16.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else if(sk.eu.full$r16.eu[i]!=sk.eu.full$r17.eu[i]){ sk.eu.full$perfTF[i] = "F"}
	else {sk.eu.full$perfTF[i] = "T"}
	}

head(sk.eu.full)
tail(sk.eu.full)

sk.eu.T = sk.eu.full[sk.eu.full$perfTF %in% c("T"),]
sk.eu.T$sex = substr(sk.eu.T$NAME, 7, 7) 
sk.eu.T$sys = substr(sk.eu.T$NAME, 1, 2) 
sk.eu.T$Eu = paste(sk.eu.T$median.fw, sk.eu.T$median.sw,sep = "")

head(sk.eu.T)
names(sk.eu.T)
dim(sk.eu.T)
length(sk.eu.T$NAME)

unique(sk.eu.T$Eu)

#Number of fish by age group
sk.eu.T %>% 
  group_by(Eu) %>%
  dplyr::summarise(number = n())

theme_set(theme_bw(base_size=12)+ 
  theme(panel.grid.major = element_blank(),
  panel.grid.minor = element_blank()))

head(sk.eu.T)

dim(sk.eu.T)

p2 = ggplot(sk.eu.T, aes(x = fishlength, y = Eu, group = Eu, fill=factor(..quantile..))) +
  ylab("Median age (European notation)")+
  xlab( "Fish length (mm)" )+
  stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95),quantile_lines =TRUE) +
  scale_fill_manual(name = "Quantile", values = c("#E69F00", "#999999", "#56B4E9"),
                    labels = c("(0, 0.05)", "(0.05, 0.95)", "(0.95, 1)"))

p2
p2 + facet_grid(sex~.) + theme_bw()
p2 + facet_wrap(sys~.) + theme_bw()



###############################################################
####Perfect agreement with all readers, including R13

summary(sk.eu)
sk.eu.full2 <- sk.eu[!is.na(sk.eu$median.fw),]
sk.eu.full2 <- sk.eu.full2[!is.na(sk.eu.full2$median.sw),]
head(sk.eu.full2)


names(sk.eu.full2)
for (i in 1:nrow(sk.eu.full2)){
	if(sk.eu.full2$r7.eu[i]!=sk.eu.full2$r8.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r8.eu[i]!=sk.eu.full2$r9.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r9.eu[i]!=sk.eu.full2$r10.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r10.eu[i]!=sk.eu.full2$r11.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r11.eu[i]!=sk.eu.full2$r12.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r12.eu[i]!=sk.eu.full2$r15.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r15.eu[i]!=sk.eu.full2$r13.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r13.eu[i]!=sk.eu.full2$r16.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else if(sk.eu.full2$r16.eu[i]!=sk.eu.full2$r17.eu[i]){ sk.eu.full2$perfTF[i] = "F"}
	else {sk.eu.full2$perfTF[i] = "T"}
	}

head(sk.eu.full2)
tail(sk.eu.full2)

sk.eu.T2 = sk.eu.full2[sk.eu.full2$perfTF %in% c("T"),]
sk.eu.T2$sex = substr(sk.eu.T2$NAME, 7, 7) 
sk.eu.T2$sys = substr(sk.eu.T2$NAME, 1, 2) 
sk.eu.T2$Eu = paste(sk.eu.T2$median.fw, sk.eu.T2$median.sw,sep = "")

head(sk.eu.T2)
names(sk.eu.T2)
dim(sk.eu.T2)
length(sk.eu.T2$NAME)

unique(sk.eu.T2$Eu)

#Number of fish by age group
sk.eu.T2 %>% 
  group_by(Eu) %>%
  dplyr::summarise(number = n())


p2 = ggplot(sk.eu.T2, aes(x = fishlength, y = Eu, group = Eu, fill=factor(..quantile..))) +
  ylab("Median age (European notation)")+
  xlab( "Fish length (mm)" )+
  stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95),quantile_lines =TRUE) +
  scale_fill_manual(name = "Quantile", values = c("#E69F00", "#999999", "#56B4E9"),
                    labels = c("(0, 0.05)", "(0.05, 0.95)", "(0.95, 1)"))

p2
p2 + facet_grid(sex~.) + theme_bw()
p2 + facet_wrap(sys~.) + theme_bw()











###summary age/length tables
#detach(package:plyr)
sk.eu.T %>% 
  group_by(as.factor(Eu)) %>% 
  summarise(n = n(), Mean = mean(fishlength.x), Median = (median(fishlength.x)),FiveP = quantile(fishlength.x,.05), NinetyfiveP = quantile(fishlength.x,.95))
head(sk.eu.T)


summary(as.data.frame(sk.eu.Tn$sys))
sk.eu.Tn.ZA = sk.eu.T[sk.eu.T$sys %in% c("ZA"),]
sk.eu.Tn.ZC = sk.eu.T[sk.eu.T$sys %in% c("ZC"),]
sk.eu.Tn.ZK = sk.eu.T[sk.eu.T$sys %in% c("ZK"),]
sk.eu.Tn.ZN = sk.eu.T[sk.eu.T$sys %in% c("ZN"),]
sk.eu.Tn.ZS = sk.eu.T[sk.eu.T$sys %in% c("ZS"),]


head(sk.eu.Tn.ZA)
sk.eu.Tn.ZA %>% 
  group_by(as.factor(Eu)) %>% 
  summarise(n = n(), Mean = mean(fishlength.x), Median = (median(fishlength.x)),FiveP = quantile(fishlength.x,.05), NinetyfiveP = quantile(fishlength.x,.95))
head(sk.eu.T)





#out = (aov(fishlength ~ as.factor(r7.eu)/sex/sys, data = sk.eu.Tn))
#summary(out); plot(out)
#ks.test
#sk.eu.Tn %>% summarise_each(funs(ks.test(.[r7.eu == "14"], .[r7.eu == "15"])$p.value), vars = fishlength)

#Ugh. Large sample size so need to use bayesian approach
library(BANOVA)
?BANOVA



##################################################
#Import CWT-matched scales
#cwt = (read.table("clipboard", header=T, sep="\t")) #only 49200 records get imported
names(cwt)
p2 = ggplot(cwt, aes(x = LENGTH, y = AGE, group = AGE, fill=factor(..quantile..))) +
  stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95)) +
  scale_fill_manual(name = "Quantile", values = c("#E69F00", "#999999", "#56B4E9"),
                    labels = c("(0, 0.05)", "(0.05, 0.95)", "(0.95, 1)"))
p2
p2 + facet_grid(SYSTEM_CODE~SEX) + theme_bw()

###
#CWT age data only
#cwt.ages = read.table("clipboard", header=T, sep="\t")
head(cwt.ages)

r7 = data.frame(r7 = cwt.ages[cwt.ages$USER_ID==7,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==7,]$IMAGE_NAME)
r8 = data.frame(r8 = cwt.ages[cwt.ages$USER_ID==8,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==8,]$IMAGE_NAME)
r9 = data.frame(r9 = cwt.ages[cwt.ages$USER_ID==9,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==9,]$IMAGE_NAME)
r10 = data.frame(r10 = cwt.ages[cwt.ages$USER_ID==10,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==10,]$IMAGE_NAME)
r11 = data.frame(r11 = cwt.ages[cwt.ages$USER_ID==11,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==11,]$IMAGE_NAME)
r12 = data.frame(r12 = cwt.ages[cwt.ages$USER_ID==12,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==12,]$IMAGE_NAME)
r13 = data.frame(r13 = cwt.ages[cwt.ages$USER_ID==13,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==13,]$IMAGE_NAME)
r14 = data.frame(r14 = cwt.ages[cwt.ages$USER_ID==14,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==14,]$IMAGE_NAME)
r15 = data.frame(r15 = cwt.ages[cwt.ages$USER_ID==15,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==15,]$IMAGE_NAME)
r16 = data.frame(r16 = cwt.ages[cwt.ages$USER_ID==16,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==16,]$IMAGE_NAME)
r17 = data.frame(r17 = cwt.ages[cwt.ages$USER_ID==17,]$user.age, NAME= cwt.ages[cwt.ages$USER_ID==17,]$IMAGE_NAME)

#Make dataframe by merging reader data with CWT marine age
cwt.df = as.data.frame(unique(cwt.ages$IMAGE_NAME)); colnames(cwt.df) = c("NAME")
head(cwt.df)
cwt.df = merge(cwt.df, r7, by = "NAME"); cwt.df = merge(cwt.df, r8, by = "NAME")
cwt.df = merge(cwt.df, r9, by = "NAME"); cwt.df = merge(cwt.df, r10, by = "NAME")
cwt.df = merge(cwt.df, r11, by = "NAME"); cwt.df = merge(cwt.df, r12, by = "NAME")
cwt.df = merge(cwt.df, r13, by = "NAME"); cwt.df = merge(cwt.df, r14, by = "NAME")
cwt.df = merge(cwt.df, r15, by = "NAME"); cwt.df = merge(cwt.df, r16, by = "NAME")
cwt.df = merge(cwt.df, r17, by = "NAME")
cwt.df$cwt.age = substr(cwt.df$NAME,5,6)

cwt.df$r7.sw = as.numeric(substr(cwt.df$r7,2,2)); cwt.df$r8.sw = as.numeric(substr(cwt.df$r8,2,2))
cwt.df$r9.sw = as.numeric(substr(cwt.df$r9,2,2));cwt.df$r10.sw = as.numeric(substr(cwt.df$r10,2,2))
cwt.df$r11.sw = as.numeric(substr(cwt.df$r11,2,2)); cwt.df$r12.sw = as.numeric(substr(cwt.df$r12,2,2))
cwt.df$r13.sw = as.numeric(substr(cwt.df$r13,2,2));cwt.df$r14.sw = as.numeric(substr(cwt.df$r14,2,2))
cwt.df$r15.sw = as.numeric(substr(cwt.df$r15,2,2));cwt.df$r16.sw = as.numeric(substr(cwt.df$r16,2,2))
cwt.df$r17.sw = as.numeric(substr(cwt.df$r17,2,2))
cwt.df$cwt.sw = as.numeric(substr(cwt.df$cwt.age,2,2))

cwt.df = cwt.df[cwt.df$NAME!="ZS0713F010",]  #CWT not matched to scale

head(cwt.df)

cwt.df = na.omit(cwt.df) #to use sample size with NAs omitted
n.cwt = dim(cwt.df)[1]

cwt.var = ((cwt.df$r7.sw - cwt.df$cwt.sw)^2 + (cwt.df$r8.sw - cwt.df$cwt.sw)^2 + (cwt.df$r9.sw - cwt.df$cwt.sw)^2 +
 (cwt.df$r10.sw - cwt.df$cwt.sw)^2 + (cwt.df$r11.sw - cwt.df$cwt.sw)^2 + (cwt.df$r12.sw - cwt.df$cwt.sw)^2 + 
 (cwt.df$r13.sw - cwt.df$cwt.sw)^2 + (cwt.df$r16.sw - cwt.df$cwt.sw)^2 + (cwt.df$r17.sw - cwt.df$cwt.sw)^2)/(n.cwt - 1)
cwt.sd = sqrt(cwt.var)
cwt.cv = cwt.sd/cwt.df$cwt.sw
cwt.df = data.frame(cwt.df, cwt.var, cwt.sd, cwt.cv)

dim(cwt.df)
head(cwt.df)

#re-shape in Excel for plot...
sink("cwt.df.txt")
cwt.df; sink()
cwt.tall = read.table("clipboard", header=T, sep="\t")

head(cwt.df)
cwt.df %>% 
  group_by(as.factor(cwt.sw)) %>% 
  summarise(n = n(), Mean = mean(cwt.cv))


ggplot(cwt.tall, aes(as.factor(cwt.sw), r.sw, group = cwt.sw)) +
  geom_point(pch = 21, size = 3, position = position_jitter(h = 0.13, w= 0.13), aes(fill = as.factor(cwt.sw), shape = as.factor(cwt.sw))) +
   facet_wrap(.~reader, scales = 'fixed') + geom_abline(intercept = 1, slope = 1, lty = "longdash")+theme_bw()







###########################################
#Info to select scales to print for Oct 2018 meeting

print.info =data.frame(sk.df.ape$NAME, sk.df.ape$median.fw,sk.df.ape$median.sw, sk.df.ape$cv, 
sk.df.ape$fishlength, sk.df.ape$l.resid, sk.df.ape$doy, sk.df.ape$r7.eu,sk.df.ape$r8.eu, sk.df.ape$r9.eu,sk.df.ape$r10.eu, sk.df.ape$r11.eu, 
sk.df.ape$r12.eu, sk.df.ape$r13.eu, sk.df.ape$r16.eu, sk.df.ape$r17.eu)

colnames(print.info) = c("Name", "med.fw", "med.sw", "cv", "fish.l", "l.resid", "doy", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r16", "r17")

head(print.info)

sink("printinfo.txt")
print.info
sink()

sk.df.ape$m.fw.sw = paste(sk.df.ape$median.fw, sk.df.ape$median.sw)
#######################
age.comp.sys = sk.df.ape %>% 
  group_by(yr, sys) %>% 
  group_by(m.fw.sw, add=T) %>% 
  summarise(n = n())
as.data.frame(age.comp.sys)




###
#KS distribution tests
##############################################################
#TEst for difference in distribution of error between familiar and unfamiliar stocks
fam.bias = read.table("clipboard", header=T, sep="\t")
head(fam.bias)

summary(fam.bias)

exp.bias = fam.bias[fam.bias$Family=="Experienced",]
inx.bias = fam.bias[fam.bias$Family=="Inexperienced",]
ks.test(exp.bias$Under, inx.bias$Under)
ks.test(exp.bias$Not.diff, inx.bias$Not.diff)
ks.test(exp.bias$Over, inx.bias$Over)

##############################################################
orig.bias = read.table("clipboard", header=T, sep="\t")
head(orig.bias)
unique(orig.bias$SW.Age)

#Among ages
o.sw2.bias = orig.bias[orig.bias$SW.Age=="2",]
o.sw3.bias = orig.bias[orig.bias$SW.Age=="3",]
o.sw4.bias = orig.bias[orig.bias$SW.Age=="4",]
o.sw5.bias = orig.bias[orig.bias$SW.Age=="5",]

ks.test(o.sw2.bias$Error, o.sw3.bias$Error)
ks.test(o.sw2.bias$Error, o.sw4.bias$Error)
ks.test(o.sw2.bias$Error, o.sw5.bias$Error)
ks.test(o.sw3.bias$Error, o.sw4.bias$Error)
ks.test(o.sw3.bias$Error, o.sw5.bias$Error)
ks.test(o.sw4.bias$Error, o.sw5.bias$Error)

#Among stocks
#no marine age 1
orig.bias2 = read.table("clipboard", header=T, sep="\t")

unique(orig.bias2$Population)
o.KA.bias = orig.bias2[orig.bias2$Population=="Karluk",]
o.CP.bias = orig.bias2[orig.bias2$Population=="Copper",]
o.BC.bias = orig.bias2[orig.bias2$Population=="Kuskokwim",]
o.NC.bias = orig.bias2[orig.bias2$Population=="Nushagak",]
o.SK.bias = orig.bias2[orig.bias2$Population=="Stikine",]
head(o.SK.bias)

ks.test(o.KA.bias$Error, o.CP.bias$Error)
ks.test(o.KA.bias$Error, o.BC.bias$Error)
ks.test(o.KA.bias$Error, o.NC.bias$Error)
ks.test(o.KA.bias$Error, o.SK.bias$Error)
ks.test(o.CP.bias$Error, o.BC.bias$Error)
ks.test(o.CP.bias$Error, o.NC.bias$Error)
ks.test(o.CP.bias$Error, o.SK.bias$Error)
ks.test(o.BC.bias$Error, o.NC.bias$Error)
ks.test(o.BC.bias$Error, o.SK.bias$Error)
ks.test(o.NC.bias$Error, o.SK.bias$Error)


#Age bias plot median vs. original
sk.tall2 = sk.tall
sk.tall2$osw = as.numeric(sk.tall2$osw)
MO.data = sk.tall2 %>% 
  group_by(sys, median.sw) %>% 
  dplyr::summarise(mean = mean(osw), L95 = quantile(osw, .05), U95 = quantile(osw, .95), r_n=n())
MO.data = as.data.frame(MO.data)
head(MO.data)

#pretty names
StockNames = (read.table("clipboard", header=T, sep="\t"))
head(StockNames)
head(MO.data)
MO.data = merge(MO.data, StockNames, by="sys")

head(MO.data)

p<- ggplot(MO.data, aes(x=median.sw, y=mean, group=Stock.y)) + 
  geom_errorbar(aes(ymin=L95, ymax=U95, width=.5))+  
  geom_point()+
  facet_wrap(~Stock.y)+ 
  geom_abline(intercept = 0, slope = 1)+
  xlab("Median marine age") +
  ylab("Original marine age") +
  theme(strip.background =element_rect(fill="white"))
p +
  scale_y_continuous(breaks=c(0,1,2, 3, 4, 5, 6,7))






##############################################################
#TEst for difference in distribution of ocean age error among populations, ages, readers
bias.data = read.table("clipboard", header=T, sep="\t")
head(bias.data)
summary(bias.data)
dim(bias.data)

#among populations
CP.bias = bias.data[bias.data$Population=="Copper",]
KA.bias = bias.data[bias.data$Population=="Karluk",]
KS.bias = bias.data[bias.data$Population=="Kuskokwim",]
NS.bias = bias.data[bias.data$Population=="Nushagak",]
ST.bias = bias.data[bias.data$Population=="Stikine",]

ks.test(CP.bias$Error, KA.bias$Error)
ks.test(CP.bias$Error, KS.bias$Error)
ks.test(CP.bias$Error, NS.bias$Error)
ks.test(CP.bias$Error, ST.bias$Error)

ks.test(KA.bias$Error, KS.bias$Error)
ks.test(KA.bias$Error, NS.bias$Error)
ks.test(KA.bias$Error, ST.bias$Error)

ks.test(KS.bias$Error, NS.bias$Error)

#Discussion with Hamachan
ks.test(KS.bias$Error, ST.bias$Error)
shapiro.test(KS.bias$Error)
shapiro.test(ST.bias$Error)
plot(ST.bias$Error)

ks.test(NS.bias$Error, ST.bias$Error)



#Among ages
unique(bias.data$SW.Age)
SW1.bias = bias.data[bias.data$SW.Age=="1",]
SW2.bias = bias.data[bias.data$SW.Age=="2",]
SW3.bias = bias.data[bias.data$SW.Age=="3",]
SW4.bias = bias.data[bias.data$SW.Age=="4",]
SW5.bias = bias.data[bias.data$SW.Age=="5",]

ks.test(SW1.bias$Error, SW2.bias$Error)
ks.test(SW1.bias$Error, SW3.bias$Error)
ks.test(SW1.bias$Error, SW4.bias$Error)
ks.test(SW1.bias$Error, SW5.bias$Error)

ks.test(SW2.bias$Error, SW3.bias$Error)
ks.test(SW2.bias$Error, SW4.bias$Error)
ks.test(SW2.bias$Error, SW5.bias$Error)

ks.test(SW3.bias$Error, SW4.bias$Error)
ks.test(SW3.bias$Error, SW5.bias$Error)

ks.test(SW4.bias$Error, SW5.bias$Error)


#Among readers
R07.bias = bias.data[bias.data$Reader=="7",]
R08.bias = bias.data[bias.data$Reader=="8",]
R09.bias = bias.data[bias.data$Reader=="9",]
R10.bias = bias.data[bias.data$Reader=="10",]
R11.bias = bias.data[bias.data$Reader=="11",]
R12.bias = bias.data[bias.data$Reader=="12",]
R13.bias = bias.data[bias.data$Reader=="13",]
R15.bias = bias.data[bias.data$Reader=="15",]
R16.bias = bias.data[bias.data$Reader=="16",]
R17.bias = bias.data[bias.data$Reader=="17",]

ks.test(R07.bias$Error, R08.bias$Error)
ks.test(R07.bias$Error, R09.bias$Error)
ks.test(R07.bias$Error, R10.bias$Error)
ks.test(R07.bias$Error, R11.bias$Error)
ks.test(R07.bias$Error, R12.bias$Error)
ks.test(R07.bias$Error, R13.bias$Error)
ks.test(R07.bias$Error, R15.bias$Error)
ks.test(R07.bias$Error, R16.bias$Error)
ks.test(R07.bias$Error, R17.bias$Error)


ks.test(R08.bias$Error, R09.bias$Error)
ks.test(R08.bias$Error, R10.bias$Error)
ks.test(R08.bias$Error, R11.bias$Error)
ks.test(R08.bias$Error, R12.bias$Error)
ks.test(R08.bias$Error, R13.bias$Error)
ks.test(R08.bias$Error, R15.bias$Error)
ks.test(R08.bias$Error, R16.bias$Error)
ks.test(R08.bias$Error, R17.bias$Error)


ks.test(R09.bias$Error, R10.bias$Error)
ks.test(R09.bias$Error, R11.bias$Error)
ks.test(R09.bias$Error, R12.bias$Error)
ks.test(R09.bias$Error, R13.bias$Error)
ks.test(R09.bias$Error, R15.bias$Error)
ks.test(R09.bias$Error, R16.bias$Error)
ks.test(R09.bias$Error, R17.bias$Error)


ks.test(R10.bias$Error, R11.bias$Error)
ks.test(R10.bias$Error, R12.bias$Error)
ks.test(R10.bias$Error, R13.bias$Error)
ks.test(R10.bias$Error, R15.bias$Error)
ks.test(R10.bias$Error, R16.bias$Error)
ks.test(R10.bias$Error, R17.bias$Error)


ks.test(R11.bias$Error, R12.bias$Error)
ks.test(R11.bias$Error, R13.bias$Error)
ks.test(R11.bias$Error, R15.bias$Error)
ks.test(R11.bias$Error, R16.bias$Error)
ks.test(R11.bias$Error, R17.bias$Error)


ks.test(R12.bias$Error, R13.bias$Error)
ks.test(R12.bias$Error, R15.bias$Error)
ks.test(R12.bias$Error, R16.bias$Error)
ks.test(R12.bias$Error, R17.bias$Error)


ks.test(R13.bias$Error, R15.bias$Error)
ks.test(R13.bias$Error, R16.bias$Error)
ks.test(R13.bias$Error, R17.bias$Error)

ks.test(R15.bias$Error, R16.bias$Error)
ks.test(R15.bias$Error, R17.bias$Error)

ks.test(R16.bias$Error, R17.bias$Error)


######################################################
#Among stock and ages
CP.bias = bias.data[bias.data$Population=="Copper",]

CP.SW1.bias = CP.bias[CP.bias$SW.Age=="1",]
CP.SW2.bias = CP.bias[CP.bias$SW.Age=="2",]
CP.SW3.bias = CP.bias[CP.bias$SW.Age=="3",]
CP.SW4.bias = CP.bias[CP.bias$SW.Age=="4",]
CP.SW5.bias = CP.bias[CP.bias$SW.Age=="5",]

ks.test(CP.SW1.bias$Error, CP.SW2.bias$Error)
ks.test(CP.SW1.bias$Error, CP.SW3.bias$Error)
ks.test(CP.SW1.bias$Error, CP.SW4.bias$Error)
ks.test(CP.SW1.bias$Error, CP.SW5.bias$Error)

ks.test(CP.SW2.bias$Error, CP.SW3.bias$Error)
ks.test(CP.SW2.bias$Error, CP.SW4.bias$Error)
ks.test(CP.SW2.bias$Error, CP.SW5.bias$Error)

ks.test(CP.SW3.bias$Error, CP.SW4.bias$Error)
ks.test(CP.SW3.bias$Error, CP.SW5.bias$Error)
ks.test(CP.SW4.bias$Error, CP.SW5.bias$Error)



KA.bias = bias.data[bias.data$Population=="Karluk",]
KA.SW1.bias = KA.bias[KA.bias$SW.Age=="1",]
KA.SW2.bias = KA.bias[KA.bias$SW.Age=="2",]
KA.SW3.bias = KA.bias[KA.bias$SW.Age=="3",]
KA.SW4.bias = KA.bias[KA.bias$SW.Age=="4",]
KA.SW5.bias = KA.bias[KA.bias$SW.Age=="5",]

ks.test(KA.SW1.bias$Error, KA.SW2.bias$Error)
ks.test(KA.SW1.bias$Error, KA.SW3.bias$Error)
ks.test(KA.SW1.bias$Error, KA.SW4.bias$Error)
ks.test(KA.SW1.bias$Error, KA.SW5.bias$Error)

ks.test(KA.SW2.bias$Error, KA.SW3.bias$Error)
ks.test(KA.SW2.bias$Error, KA.SW4.bias$Error)
ks.test(KA.SW2.bias$Error, KA.SW5.bias$Error)

ks.test(KA.SW3.bias$Error, KA.SW4.bias$Error)
ks.test(KA.SW3.bias$Error, KA.SW5.bias$Error)
ks.test(KA.SW4.bias$Error, KA.SW5.bias$Error)



KS.bias = bias.data[bias.data$Population=="Kuskokwim",]
KS.SW1.bias = KS.bias[KS.bias$SW.Age=="1",]
KS.SW2.bias = KS.bias[KS.bias$SW.Age=="2",]
KS.SW3.bias = KS.bias[KS.bias$SW.Age=="3",]
KS.SW4.bias = KS.bias[KS.bias$SW.Age=="4",]
KS.SW5.bias = KS.bias[KS.bias$SW.Age=="5",]

ks.test(KS.SW1.bias$Error, KS.SW2.bias$Error)
ks.test(KS.SW1.bias$Error, KS.SW3.bias$Error)
ks.test(KS.SW1.bias$Error, KS.SW4.bias$Error)
ks.test(KS.SW1.bias$Error, KS.SW5.bias$Error)

ks.test(KS.SW2.bias$Error, KS.SW3.bias$Error)
ks.test(KS.SW2.bias$Error, KS.SW4.bias$Error)
ks.test(KS.SW2.bias$Error, KS.SW5.bias$Error)

ks.test(KS.SW3.bias$Error, KS.SW4.bias$Error)
ks.test(KS.SW3.bias$Error, KS.SW5.bias$Error)
ks.test(KS.SW4.bias$Error, KS.SW5.bias$Error)






NS.bias = bias.data[bias.data$Population=="Nushagak",]
NS.SW1.bias = NS.bias[NS.bias$SW.Age=="1",]
NS.SW2.bias = NS.bias[NS.bias$SW.Age=="2",]
NS.SW3.bias = NS.bias[NS.bias$SW.Age=="3",]
NS.SW4.bias = NS.bias[NS.bias$SW.Age=="4",]
NS.SW5.bias = NS.bias[NS.bias$SW.Age=="5",]

ks.test(NS.SW1.bias$Error, NS.SW2.bias$Error)
ks.test(NS.SW1.bias$Error, NS.SW3.bias$Error)
ks.test(NS.SW1.bias$Error, NS.SW4.bias$Error)
ks.test(NS.SW1.bias$Error, NS.SW5.bias$Error)

ks.test(NS.SW2.bias$Error, NS.SW3.bias$Error)
ks.test(NS.SW2.bias$Error, NS.SW4.bias$Error)
ks.test(NS.SW2.bias$Error, NS.SW5.bias$Error)

ks.test(NS.SW3.bias$Error, NS.SW4.bias$Error)
ks.test(NS.SW3.bias$Error, NS.SW5.bias$Error)
ks.test(NS.SW4.bias$Error, NS.SW5.bias$Error)



ST.bias = bias.data[bias.data$Population=="Stikine",]
ST.SW1.bias = ST.bias[ST.bias$SW.Age=="1",]
ST.SW2.bias = ST.bias[ST.bias$SW.Age=="2",]
ST.SW3.bias = ST.bias[ST.bias$SW.Age=="3",]
ST.SW4.bias = ST.bias[ST.bias$SW.Age=="4",]
ST.SW5.bias = ST.bias[ST.bias$SW.Age=="5",]

ks.test(ST.SW1.bias$Error, ST.SW2.bias$Error)
ks.test(ST.SW1.bias$Error, ST.SW3.bias$Error)
ks.test(ST.SW1.bias$Error, ST.SW4.bias$Error)
ks.test(ST.SW1.bias$Error, ST.SW5.bias$Error)

ks.test(ST.SW2.bias$Error, ST.SW3.bias$Error)
ks.test(ST.SW2.bias$Error, ST.SW4.bias$Error)
ks.test(ST.SW2.bias$Error, ST.SW5.bias$Error)

ks.test(ST.SW3.bias$Error, ST.SW4.bias$Error)
ks.test(ST.SW3.bias$Error, ST.SW5.bias$Error)
ks.test(ST.SW4.bias$Error, ST.SW5.bias$Error)






##############################################################
#TEst for difference in distribution of fw age error among populations, readers
fw.bias = read.table("clipboard", header=T, sep="\t")
head(bias.data)
summary(fw.bias)

#among populations
CP.bias = fw.bias[fw.bias$Population=="Copper",]
KA.bias = fw.bias[fw.bias$Population=="Karluk",]
KS.bias = fw.bias[fw.bias$Population=="Kuskokwim",]
NS.bias = fw.bias[fw.bias$Population=="Nushagak",]
ST.bias = fw.bias[fw.bias$Population=="Stikine",]

ks.test(CP.bias$Error, KA.bias$Error)
ks.test(CP.bias$Error, KS.bias$Error)
ks.test(CP.bias$Error, NS.bias$Error)
ks.test(CP.bias$Error, ST.bias$Error)

ks.test(KA.bias$Error, KS.bias$Error)
ks.test(KA.bias$Error, NS.bias$Error)
ks.test(KA.bias$Error, ST.bias$Error)

ks.test(KS.bias$Error, NS.bias$Error)
ks.test(KS.bias$Error, ST.bias$Error)

ks.test(NS.bias$Error, ST.bias$Error)


#Among readers
R07.bias = fw.bias[fw.bias$Reader=="7",]
R08.bias = fw.bias[fw.bias$Reader=="8",]
R09.bias = fw.bias[fw.bias$Reader=="9",]
R10.bias = fw.bias[fw.bias$Reader=="10",]
R11.bias = fw.bias[fw.bias$Reader=="11",]
R12.bias = fw.bias[fw.bias$Reader=="12",]
R13.bias = fw.bias[fw.bias$Reader=="13",]
R15.bias = fw.bias[fw.bias$Reader=="15",]
R16.bias = fw.bias[fw.bias$Reader=="16",]
R17.bias = fw.bias[fw.bias$Reader=="17",]

ks.test(R07.bias$Error, R08.bias$Error)
ks.test(R07.bias$Error, R09.bias$Error)
ks.test(R07.bias$Error, R10.bias$Error)
ks.test(R07.bias$Error, R11.bias$Error)
ks.test(R07.bias$Error, R12.bias$Error)
ks.test(R07.bias$Error, R13.bias$Error)
ks.test(R07.bias$Error, R15.bias$Error)
ks.test(R07.bias$Error, R16.bias$Error)
ks.test(R07.bias$Error, R17.bias$Error)


ks.test(R08.bias$Error, R09.bias$Error)
ks.test(R08.bias$Error, R10.bias$Error)
ks.test(R08.bias$Error, R11.bias$Error)
ks.test(R08.bias$Error, R12.bias$Error)
ks.test(R08.bias$Error, R13.bias$Error)
ks.test(R08.bias$Error, R15.bias$Error)
ks.test(R08.bias$Error, R16.bias$Error)
ks.test(R08.bias$Error, R17.bias$Error)


ks.test(R09.bias$Error, R10.bias$Error)
ks.test(R09.bias$Error, R11.bias$Error)
ks.test(R09.bias$Error, R12.bias$Error)
ks.test(R09.bias$Error, R13.bias$Error)
ks.test(R09.bias$Error, R15.bias$Error)
ks.test(R09.bias$Error, R16.bias$Error)
ks.test(R09.bias$Error, R17.bias$Error)


ks.test(R10.bias$Error, R11.bias$Error)
ks.test(R10.bias$Error, R12.bias$Error)
ks.test(R10.bias$Error, R13.bias$Error)
ks.test(R10.bias$Error, R15.bias$Error)
ks.test(R10.bias$Error, R16.bias$Error)
ks.test(R10.bias$Error, R17.bias$Error)


ks.test(R11.bias$Error, R12.bias$Error)
ks.test(R11.bias$Error, R13.bias$Error)
ks.test(R11.bias$Error, R15.bias$Error)
ks.test(R11.bias$Error, R16.bias$Error)
ks.test(R11.bias$Error, R17.bias$Error)


ks.test(R12.bias$Error, R13.bias$Error)
ks.test(R12.bias$Error, R15.bias$Error)
ks.test(R12.bias$Error, R16.bias$Error)
ks.test(R12.bias$Error, R17.bias$Error)


ks.test(R13.bias$Error, R15.bias$Error)
ks.test(R13.bias$Error, R16.bias$Error)
ks.test(R13.bias$Error, R17.bias$Error)

ks.test(R15.bias$Error, R16.bias$Error)
ks.test(R15.bias$Error, R17.bias$Error)

ks.test(R16.bias$Error, R17.bias$Error)



