Load data and get libraries togther

knitr::opts_chunk$set(echo = TRUE)
library(rstan)
library(shinystan)
library(ggplot2)
library(dplyr)
library(readr)
ardata <- read_csv("ardata.csv")
sf6data <- read_csv("sf6data.csv")

Metadata for the ardata.csv and sf6data.csv files.

Both ardata.csv and sf6data.csv

Site: Stream name and number
Date: date of the Ar addition
Trial: arbitrary integer value for each Ar addition. Needed for Stan
Station: sampling station (meters below addition point)
Rep: integer for sample rep at each station
type: “pre” is before addition, “post” is during addition
cond : specific conductivity measured at each station. (units \(\mu\)S /cm)
pressure: Baromentric pressure at each station. (units mm Hg)
temp: tempaerature at each station. \(^\circ\)C
stationcorr: corrected stations where 0 is not the addition site, but rather the first station downstream.

ardata.csv

arncalc: calculated ratio of Ar:N_2. Units mg/L : mg/L, thus this is a mass ratio
arconc: calculated Ar concentration. mg/L
nconc: calculated N2 concentration. mg/L

sf6data.csv

volume: volume of sample injected into GC. \(\mu\)L
area: peak area from GC
sf6: area \(\times\) volume

Prepare data

First need to calculate saturation concentrations. These are based on Hamme and Emerson (2004)

watdens<-function(temp){
  
  t<-temp
  
  A <- 7.0132e-5
  B <- 7.926295e-3 
  C <-  -7.575477e-5 
  D<- 7.314701e-7
  E <-  -3.596363e-9
  to<- 3.9818
  
  dens<- (999.97358- (A*(t-to) + B*(t-to)^2 +C*(t-to)^3 + D*(t-to)^4+E*(t-to)^5) ) -4.873e-3 + 1.708e-4*t - 3.108e-6 * t^2
  dens/1000
}

nsat<- function(temp, bp) {
  
  
  ts<-log((298.15-temp) / (273.15 + temp))
  a0<-6.42931
  a1<-2.92704
  a2<-4.32531
  a3<-4.69149
  
  u<-10^(8.10765-(1750.286/(235+temp)))
  satn<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
  watdens(temp)*satn*(28.014/1000)##converts umol/kg to mg/L
}

arsat<- function(temp, bp) {
  
  
  ts<-log((298.15-temp) / (273.15 + temp))
  a0<-2.79150
  a1<-3.17609
  a2<-4.13116
  a3<-4.90379
  
  u<-10^(8.10765-(1750.286/(235+temp)))
  satar<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
  watdens(temp)*satar*(39.948/1000)##converts umol/kg to mg/L
}

## Estimates K600 for KO2 at a given temperature. From Wanninkhof (1992).
K600fromO2<-function (temp, KO2) {
  ((600/(1800.6 - (120.1 * temp) + (3.7818 * temp^2) - (0.047608 * temp^3)))^-0.5) * KO2
}



## Estimates K600 for KAr at a given temperature. From Raymond et al  (2012).
K600fromAr<-function (temp, KAr) {
  ((600/(1799 - (106.96 * temp) + (2.797 * temp^2) - (0.0289 * temp^3)))^-0.5) * KAr
}

Now calculate saturation concentrations, and split data into pre and post

ardata$arnsat <- arsat(ardata$temp,ardata$pressure) / nsat(ardata$temp,ardata$pressure)
ardatapre<- ardata[ardata$type=='pre', ]
ardatapost<- ardata[ardata$type=='post', ]
sf6datapost<- sf6data[sf6data$type=='post', ]

Ar tracer data

Plot of raw Ar:N2. Note the high varibility in some of the pre Ar data. We used the calculated Ar:N2 since it was in the middle of our samples, but not nearly as variable. The two lines on the Ar sat are the post and pre calculations; they vary because of temperature. The upper line is the warmer, pleateau collection temperature.

## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).

Now for calculations where we subtract background and correct for conductivity

ardatapost$arcorr<- ardatapost$arncalc - ardatapost$arnsat

#2.2.  Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))
## Warning: package 'bindrcpp' was built under R version 3.3.2
#join with the post cond
ardatapost<-merge(ardatapost,ardataprecond)



ardatapost$condcor<- ardatapost$cond - ardatapost$precond

ardatapost$arcond<- ardatapost$arcorr / ardatapost$condcor

##calc the mean for station 0
ardatapost_0<-ardatapost[ardatapost$stationcorr==0,]
ardata_0sum <- ardatapost_0 %>% group_by(Trial) %>% summarise(arcond_0=mean(arcond), arn_enrich=mean(arncalc/arnsat))


#join with ardatapost
ardatapost<-merge(ardatapost,ardata_0sum)

ardatapost$arcondnorm<-ardatapost$arcond/ardatapost$arcond_0

Prep SF6 in same way

#2.2.  Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))

#join with the post cond
sf6datapost<-merge(sf6datapost,ardataprecond) #yes, using pre from Ar



sf6datapost$condcor<- sf6datapost$cond - sf6datapost$precond

sf6datapost$sf6cond<- sf6datapost$sf6 / sf6datapost$condcor

##calc the mean for station 0
sf6datapost_0<-sf6datapost[sf6datapost$stationcorr==0,]
sf6data_0sum <- sf6datapost_0 %>% group_by(Trial) %>% summarise(sf6cond_0=mean(sf6cond))


#join with sf6datapost
sf6datapost<-merge(sf6datapost,sf6data_0sum)

sf6datapost$sf6condnorm<-sf6datapost$sf6cond/sf6datapost$sf6cond_0

Stan model

Here is the Stan model that we used for the analysis. Details on priors etc are in the text.

sink("combineargonmodelb.stan")

cat("
    
    data {
    
    int <lower=1> N; //number of data points, ar
    int <lower=1> NS; //number of data points, sf6
    int <lower=1> sites; //number of sites
    int <lower=0> sitear[N]; //stream number  for ar indexing vector
    int <lower=0> sitesf6[NS]; //stream number for sf6 indexing vector
    
    
    vector [N] ar;//conductivity corrected Ar data
    vector [N] distar;//
    
    vector [NS] sf6;//conductivity corrected sf6 data
    vector [NS] distsf6;// 
    
    }
    
    parameters {
    
    vector <lower=0> [sites] a;
    real mu_a;   //take out for hyperprior info  // mean prior
    real<lower=0, upper=0.5> sigma_ar; // error ar
    real<lower=0, upper=0.5> sigma_sf6; // error sf6
    
    vector[sites] k; // decline
    real <lower=0, upper=2> Ar0;
    real <lower=0, upper=2> SF60;
    
    //real d;
    //real b;
    real <lower=0> sigma_a; // mean prior
    }
    
    model { 
    
    //priors. 
    k ~ normal(0, 10);
  
    a~normal (mu_a,sigma_a); // mean prior

    Ar0 ~normal(1,0.05);
    SF60~normal(1,0.05);

    mu_a~normal(1.35, 1); // mean prior
     sigma_a ~ normal(0, 2);
    
    //likelihood        
    for (i in 1:N) {
    ar[i] ~ normal( Ar0 * exp(-k[sitear[i]]*distar[i]*0.01) , sigma_ar); 
    }
    for (j in 1:NS) {
    sf6[j] ~ normal( SF60 * exp(-k[sitesf6[j]]*distsf6[j]*0.01/a[sitesf6[j]]) , sigma_sf6); 
    }
    
    }

generated quantities {   //These estimate the posterior predicted

    vector [N] ar_tilde;
    vector [NS] sf6_tilde;
    
    for (n in 1:N) ar_tilde[n] = normal_rng( Ar0 * exp(-k[sitear[n]]*distar[n]*0.01) , sigma_ar);
    for (p in 1:NS) sf6_tilde[p] = normal_rng(SF60 * exp(-k[sitesf6[p]]*distsf6[p]*0.01/a[sitesf6[p]]) , sigma_sf6);
    
}

    "
    ,fill=TRUE)
sink()

Run model, look at output

Get data together into a list for Stan.

arstandata=list("ar"= ardatapost$arcondnorm, "distar"=ardatapost$stationcorr, "N"=length(ardatapost$stationcorr), 
                "sf6"= sf6datapost$sf6condnorm, "distsf6"=sf6datapost$stationcorr, "NS"=length(sf6datapost$stationcorr), 
                "sites"=max(ardatapost$Trial),  "sitear" = ardatapost$Trial, 
                "sitesf6" = sf6datapost$Trial)

Run the stan model

arfit <- stan(file = "combineargonmodelb.stan", data = arstandata, 
               iter = 3000, chains = 4)
## The following numerical problems occured the indicated number of times after warmup on chain 1
##                                                                                         count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 2
##                                                                                         count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     3
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 3
##                                                                                         count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     4
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
##                                                                                         count
## Exception thrown at line 54: normal_log: Location parameter is inf, but must be finite!     4
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.

Print out parameters. a[j] is the ratio of \(K_{Ar}: K_{SF6}\) for stream j. k is \(K_{Ar,j}\), i.e., the per unit distance evaion rate of Ar. It is scaled 100 fold for the purposes of Stan, which like all parameters to be around -1 to 1. mu_a is the hyperparamter for \(a\)

print(arfit, pars=c("a","mu_a","sigma_ar", "sigma_sf6", "k", "Ar0", "SF60", "sigma_a" ))
## Inference for Stan model: combineargonmodelb.
## 4 chains, each with iter=3000; warmup=1500; thin=1; 
## post-warmup draws per chain=1500, total post-warmup draws=6000.
## 
##            mean se_mean   sd 2.5%  25%  50%   75% 97.5% n_eff Rhat
## a[1]       2.01    0.00 0.27 1.52 1.82 2.00  2.18  2.58  4812    1
## a[2]       0.93    0.00 0.14 0.67 0.83 0.92  1.01  1.23  4857    1
## a[3]       1.76    0.00 0.21 1.37 1.61 1.75  1.89  2.20  5034    1
## a[4]       2.00    0.00 0.21 1.61 1.85 1.99  2.13  2.44  5192    1
## a[5]       1.46    0.00 0.17 1.16 1.35 1.45  1.57  1.82  4789    1
## a[6]       2.31    0.00 0.23 1.89 2.15 2.30  2.46  2.80  4660    1
## a[7]       0.94    0.00 0.12 0.73 0.86 0.93  1.01  1.19  4609    1
## a[8]       3.43    0.01 0.45 2.67 3.12 3.38  3.68  4.45  3730    1
## mu_a       1.79    0.00 0.35 1.07 1.57 1.79  2.01  2.50  6000    1
## sigma_ar   0.09    0.00 0.01 0.08 0.09 0.09  0.10  0.11  6000    1
## sigma_sf6  0.06    0.00 0.00 0.05 0.06 0.06  0.06  0.07  6000    1
## k[1]       0.18    0.00 0.02 0.15 0.17 0.18  0.19  0.22  4691    1
## k[2]       0.13    0.00 0.02 0.09 0.11 0.12  0.14  0.16  4798    1
## k[3]       0.51    0.00 0.05 0.42 0.47 0.51  0.54  0.61  5263    1
## k[4]       0.57    0.00 0.05 0.47 0.53 0.56  0.60  0.68  4991    1
## k[5]       1.78    0.00 0.19 1.45 1.65 1.77  1.90  2.18  4824    1
## k[6]       1.66    0.00 0.14 1.40 1.56 1.65  1.75  1.95  4731    1
## k[7]       2.55    0.00 0.26 2.08 2.37 2.53  2.71  3.10  4442    1
## k[8]      10.01    0.02 1.21 8.01 9.17 9.88 10.71 12.77  3657    1
## Ar0        0.99    0.00 0.01 0.96 0.98 0.99  1.00  1.02  6000    1
## SF60       0.99    0.00 0.01 0.97 0.98 0.99  1.00  1.01  6000    1
## sigma_a    0.99    0.01 0.34 0.53 0.75 0.93  1.15  1.87  4073    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Apr 19 14:48:53 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Extract parameters of interest. Note that the model multiplied k by 0.01 so that all parameters are on the same scale. We fix that here.

arfitsum<- summary(arfit)$summary

asum<-arfitsum[1:8,c("2.5%", "50%", "97.5%")]

ksum<-(arfitsum[12:19,c("2.5%", "50%", "97.5%")])*0.01# the 0.01 here rescales the k estimate

Examine model fit via posterior predicted distribution

Essentailly we are using the model output to peredict a new set of data which we compare with our actual data. This corresponds to \(p(y^{rep}|y)\) where \(y\) are the normalized gas data. We plot these below. Note that for Ar, negative values (i.e. samples lower than ambient Ar concentrations) allow negative values that are not captured by the model.

ar_tildesum<- summary(arfit, pars="ar_tilde", probs=0.5)$summary
ar_tildesum<-ar_tildesum[,4]


sf6_tildesum<- summary(arfit, pars="sf6_tilde", probs=0.5)$summary
sf6_tildesum<-sf6_tildesum[,4]

par( mfrow=c(2,1),  mai=c(0.7,0.7,0.2,0.3), mgp=c(2,0.7,0) )
plot(ardatapost$arcondnorm, ar_tildesum, xlab="Measured normalized Ar concentration", ylab="Predicted normalized Ar concentration", pch=16, cex.lab=0.8, cex.axis=0.8)

plot(sf6datapost$sf6condnorm, sf6_tildesum, xlab="Measured normalized SF6 concentration", ylab="Predicted normalized SF6 concentration", pch=16, cex.lab=0.8, cex.axis=0.8)

Derived variables

Calculate derived variables such as \(K\) (per time) and \(k600\)

streamslope<-c( 0.00703, 0.00703,0.015,0.015, 0.06, 0.11,0.12,0.12)
Q<- c(0.084,0.07,0.02,0.02,0.021,0.097,0.022,0.022)*60
w<- c(2.3,1.6,0.8,0.8,0.9,3.3,0.7,1.3)
ardataposttemp<-ardatapost %>% group_by(Trial) %>% summarise(temp=mean(temp, na.rm=T))
k<- ksum*Q*1440/w
k600<-K600fromAr(ardataposttemp$temp, k)
v<-c(12,15.4,9.5,9.5,3.1,4,6.3,5.2)

z<-(Q)/(w*v)

Kt<-v*ksum[,2]*1440

Enjoy.