Load data and get libraries togther
knitr::opts_chunk$set(echo = TRUE)
library(rstan)
library(shinystan)
library(ggplot2)
library(dplyr)
library(readr)
ardata <- read_csv("ardata.csv")
sf6data <- read_csv("sf6data.csv")
Site: Stream name and number
Date: date of the Ar addition
Trial: arbitrary integer value for each Ar addition. Needed for Stan
Station: sampling station (meters below addition point)
Rep: integer for sample rep at each station
type: “pre” is before addition, “post” is during addition
cond : specific conductivity measured at each station. (units \(\mu\)S /cm)
pressure: Baromentric pressure at each station. (units mm Hg)
temp: tempaerature at each station. \(^\circ\)C
stationcorr: corrected stations where 0 is not the addition site, but rather the first station downstream.
arncalc: calculated ratio of Ar:N_2. Units mg/L : mg/L, thus this is a mass ratio
arconc: calculated Ar concentration. mg/L
nconc: calculated N2 concentration. mg/L
volume: volume of sample injected into GC. \(\mu\)L
area: peak area from GC
sf6: area \(\times\) volume
First need to calculate saturation concentrations. These are based on Hamme and Emerson (2004)
watdens<-function(temp){
t<-temp
A <- 7.0132e-5
B <- 7.926295e-3
C <- -7.575477e-5
D<- 7.314701e-7
E <- -3.596363e-9
to<- 3.9818
dens<- (999.97358- (A*(t-to) + B*(t-to)^2 +C*(t-to)^3 + D*(t-to)^4+E*(t-to)^5) ) -4.873e-3 + 1.708e-4*t - 3.108e-6 * t^2
dens/1000
}
nsat<- function(temp, bp) {
ts<-log((298.15-temp) / (273.15 + temp))
a0<-6.42931
a1<-2.92704
a2<-4.32531
a3<-4.69149
u<-10^(8.10765-(1750.286/(235+temp)))
satn<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
watdens(temp)*satn*(28.014/1000)##converts umol/kg to mg/L
}
arsat<- function(temp, bp) {
ts<-log((298.15-temp) / (273.15 + temp))
a0<-2.79150
a1<-3.17609
a2<-4.13116
a3<-4.90379
u<-10^(8.10765-(1750.286/(235+temp)))
satar<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
watdens(temp)*satar*(39.948/1000)##converts umol/kg to mg/L
}
## Estimates K600 for KO2 at a given temperature. From Wanninkhof (1992).
K600fromO2<-function (temp, KO2) {
((600/(1800.6 - (120.1 * temp) + (3.7818 * temp^2) - (0.047608 * temp^3)))^-0.5) * KO2
}
## Estimates K600 for KAr at a given temperature. From Raymond et al (2012).
K600fromAr<-function (temp, KAr) {
((600/(1799 - (106.96 * temp) + (2.797 * temp^2) - (0.0289 * temp^3)))^-0.5) * KAr
}
Now calculate saturation concentrations, and split data into pre and post
ardata$arnsat <- arsat(ardata$temp,ardata$pressure) / nsat(ardata$temp,ardata$pressure)
ardatapre<- ardata[ardata$type=='pre', ]
ardatapost<- ardata[ardata$type=='post', ]
sf6datapost<- sf6data[sf6data$type=='post', ]
Plot of raw Ar:N2. Note the high varibility in some of the pre Ar data. We used the calculated Ar:N2 since it was in the middle of our samples, but not nearly as variable. The two lines on the Ar sat are the post and pre calculations; they vary because of temperature. The upper line is the warmer, pleateau collection temperature.
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
Now for calculations where we subtract background and correct for conductivity
ardatapost$arcorr<- ardatapost$arncalc - ardatapost$arnsat
#2.2. Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))
## Warning: package 'bindrcpp' was built under R version 3.3.2
#join with the post cond
ardatapost<-merge(ardatapost,ardataprecond)
ardatapost$condcor<- ardatapost$cond - ardatapost$precond
ardatapost$arcond<- ardatapost$arcorr / ardatapost$condcor
##calc the mean for station 0
ardatapost_0<-ardatapost[ardatapost$stationcorr==0,]
ardata_0sum <- ardatapost_0 %>% group_by(Trial) %>% summarise(arcond_0=mean(arcond), arn_enrich=mean(arncalc/arnsat))
#join with ardatapost
ardatapost<-merge(ardatapost,ardata_0sum)
ardatapost$arcondnorm<-ardatapost$arcond/ardatapost$arcond_0
Prep SF6 in same way
#2.2. Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))
#join with the post cond
sf6datapost<-merge(sf6datapost,ardataprecond) #yes, using pre from Ar
sf6datapost$condcor<- sf6datapost$cond - sf6datapost$precond
sf6datapost$sf6cond<- sf6datapost$sf6 / sf6datapost$condcor
##calc the mean for station 0
sf6datapost_0<-sf6datapost[sf6datapost$stationcorr==0,]
sf6data_0sum <- sf6datapost_0 %>% group_by(Trial) %>% summarise(sf6cond_0=mean(sf6cond))
#join with sf6datapost
sf6datapost<-merge(sf6datapost,sf6data_0sum)
sf6datapost$sf6condnorm<-sf6datapost$sf6cond/sf6datapost$sf6cond_0
Here is the Stan model that we used for the analysis. Details on priors etc are in the text.
sink("combineargonmodelb.stan")
cat("
data {
int <lower=1> N; //number of data points, ar
int <lower=1> NS; //number of data points, sf6
int <lower=1> sites; //number of sites
int <lower=0> sitear[N]; //stream number for ar indexing vector
int <lower=0> sitesf6[NS]; //stream number for sf6 indexing vector
vector [N] ar;//conductivity corrected Ar data
vector [N] distar;//
vector [NS] sf6;//conductivity corrected sf6 data
vector [NS] distsf6;//
}
parameters {
vector <lower=0> [sites] a;
real mu_a; //take out for hyperprior info // mean prior
real<lower=0, upper=0.5> sigma_ar; // error ar
real<lower=0, upper=0.5> sigma_sf6; // error sf6
vector[sites] k; // decline
real <lower=0, upper=2> Ar0;
real <lower=0, upper=2> SF60;
//real d;
//real b;
real <lower=0> sigma_a; // mean prior
}
model {
//priors.
k ~ normal(0, 10);
a~normal (mu_a,sigma_a); // mean prior
Ar0 ~normal(1,0.05);
SF60~normal(1,0.05);
mu_a~normal(1.35, 1); // mean prior
sigma_a ~ normal(0, 2);
//likelihood
for (i in 1:N) {
ar[i] ~ normal( Ar0 * exp(-k[sitear[i]]*distar[i]*0.01) , sigma_ar);
}
for (j in 1:NS) {
sf6[j] ~ normal( SF60 * exp(-k[sitesf6[j]]*distsf6[j]*0.01/a[sitesf6[j]]) , sigma_sf6);
}
}
generated quantities { //These estimate the posterior predicted
vector [N] ar_tilde;
vector [NS] sf6_tilde;
for (n in 1:N) ar_tilde[n] = normal_rng( Ar0 * exp(-k[sitear[n]]*distar[n]*0.01) , sigma_ar);
for (p in 1:NS) sf6_tilde[p] = normal_rng(SF60 * exp(-k[sitesf6[p]]*distsf6[p]*0.01/a[sitesf6[p]]) , sigma_sf6);
}
"
,fill=TRUE)
sink()
Get data together into a list for Stan.
arstandata=list("ar"= ardatapost$arcondnorm, "distar"=ardatapost$stationcorr, "N"=length(ardatapost$stationcorr),
"sf6"= sf6datapost$sf6condnorm, "distsf6"=sf6datapost$stationcorr, "NS"=length(sf6datapost$stationcorr),
"sites"=max(ardatapost$Trial), "sitear" = ardatapost$Trial,
"sitesf6" = sf6datapost$Trial)
Run the stan model
arfit <- stan(file = "combineargonmodelb.stan", data = arstandata,
iter = 3000, chains = 4)
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 2
## count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 3
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 3
## count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 4
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 54: normal_log: Location parameter is inf, but must be finite! 4
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
Print out parameters. a[j] is the ratio of \(K_{Ar}: K_{SF6}\) for stream j. k is \(K_{Ar,j}\), i.e., the per unit distance evaion rate of Ar. It is scaled 100 fold for the purposes of Stan, which like all parameters to be around -1 to 1. mu_a is the hyperparamter for \(a\)
print(arfit, pars=c("a","mu_a","sigma_ar", "sigma_sf6", "k", "Ar0", "SF60", "sigma_a" ))
## Inference for Stan model: combineargonmodelb.
## 4 chains, each with iter=3000; warmup=1500; thin=1;
## post-warmup draws per chain=1500, total post-warmup draws=6000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a[1] 2.01 0.00 0.27 1.52 1.82 2.00 2.18 2.58 4812 1
## a[2] 0.93 0.00 0.14 0.67 0.83 0.92 1.01 1.23 4857 1
## a[3] 1.76 0.00 0.21 1.37 1.61 1.75 1.89 2.20 5034 1
## a[4] 2.00 0.00 0.21 1.61 1.85 1.99 2.13 2.44 5192 1
## a[5] 1.46 0.00 0.17 1.16 1.35 1.45 1.57 1.82 4789 1
## a[6] 2.31 0.00 0.23 1.89 2.15 2.30 2.46 2.80 4660 1
## a[7] 0.94 0.00 0.12 0.73 0.86 0.93 1.01 1.19 4609 1
## a[8] 3.43 0.01 0.45 2.67 3.12 3.38 3.68 4.45 3730 1
## mu_a 1.79 0.00 0.35 1.07 1.57 1.79 2.01 2.50 6000 1
## sigma_ar 0.09 0.00 0.01 0.08 0.09 0.09 0.10 0.11 6000 1
## sigma_sf6 0.06 0.00 0.00 0.05 0.06 0.06 0.06 0.07 6000 1
## k[1] 0.18 0.00 0.02 0.15 0.17 0.18 0.19 0.22 4691 1
## k[2] 0.13 0.00 0.02 0.09 0.11 0.12 0.14 0.16 4798 1
## k[3] 0.51 0.00 0.05 0.42 0.47 0.51 0.54 0.61 5263 1
## k[4] 0.57 0.00 0.05 0.47 0.53 0.56 0.60 0.68 4991 1
## k[5] 1.78 0.00 0.19 1.45 1.65 1.77 1.90 2.18 4824 1
## k[6] 1.66 0.00 0.14 1.40 1.56 1.65 1.75 1.95 4731 1
## k[7] 2.55 0.00 0.26 2.08 2.37 2.53 2.71 3.10 4442 1
## k[8] 10.01 0.02 1.21 8.01 9.17 9.88 10.71 12.77 3657 1
## Ar0 0.99 0.00 0.01 0.96 0.98 0.99 1.00 1.02 6000 1
## SF60 0.99 0.00 0.01 0.97 0.98 0.99 1.00 1.01 6000 1
## sigma_a 0.99 0.01 0.34 0.53 0.75 0.93 1.15 1.87 4073 1
##
## Samples were drawn using NUTS(diag_e) at Thu Apr 19 14:48:53 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Extract parameters of interest. Note that the model multiplied k by 0.01 so that all parameters are on the same scale. We fix that here.
arfitsum<- summary(arfit)$summary
asum<-arfitsum[1:8,c("2.5%", "50%", "97.5%")]
ksum<-(arfitsum[12:19,c("2.5%", "50%", "97.5%")])*0.01# the 0.01 here rescales the k estimate
Essentailly we are using the model output to peredict a new set of data which we compare with our actual data. This corresponds to \(p(y^{rep}|y)\) where \(y\) are the normalized gas data. We plot these below. Note that for Ar, negative values (i.e. samples lower than ambient Ar concentrations) allow negative values that are not captured by the model.
ar_tildesum<- summary(arfit, pars="ar_tilde", probs=0.5)$summary
ar_tildesum<-ar_tildesum[,4]
sf6_tildesum<- summary(arfit, pars="sf6_tilde", probs=0.5)$summary
sf6_tildesum<-sf6_tildesum[,4]
par( mfrow=c(2,1), mai=c(0.7,0.7,0.2,0.3), mgp=c(2,0.7,0) )
plot(ardatapost$arcondnorm, ar_tildesum, xlab="Measured normalized Ar concentration", ylab="Predicted normalized Ar concentration", pch=16, cex.lab=0.8, cex.axis=0.8)
plot(sf6datapost$sf6condnorm, sf6_tildesum, xlab="Measured normalized SF6 concentration", ylab="Predicted normalized SF6 concentration", pch=16, cex.lab=0.8, cex.axis=0.8)
Calculate derived variables such as \(K\) (per time) and \(k600\)
streamslope<-c( 0.00703, 0.00703,0.015,0.015, 0.06, 0.11,0.12,0.12)
Q<- c(0.084,0.07,0.02,0.02,0.021,0.097,0.022,0.022)*60
w<- c(2.3,1.6,0.8,0.8,0.9,3.3,0.7,1.3)
ardataposttemp<-ardatapost %>% group_by(Trial) %>% summarise(temp=mean(temp, na.rm=T))
k<- ksum*Q*1440/w
k600<-K600fromAr(ardataposttemp$temp, k)
v<-c(12,15.4,9.5,9.5,3.1,4,6.3,5.2)
z<-(Q)/(w*v)
Kt<-v*ksum[,2]*1440