Exercise 1: Non-linear regression

The following table presents data for the hemoglobin saturation dependin on \(O_2\) pressure:

mydata <- data.frame(
  O2 = c(3.08, 4.61, 6.77, 10.15, 12.31, 15.38, 18.77, 22.77, 25.85, 30.15, 36.00, 45.23, 51.69, 61.85, 75.38, 87.08, 110.5),
  Y = c(2.21, 3.59, 6.08, 10.50, 14.09, 19.34, 28.45, 40.33, 50.00, 60.50, 69.89, 80.11, 83.98, 88.95, 93.37, 95.86, 98.07)
)

knitr::kable(mydata, align = c("c", "c"))
O2 Y
3.08 2.21
4.61 3.59
6.77 6.08
10.15 10.50
12.31 14.09
15.38 19.34
18.77 28.45
22.77 40.33
25.85 50.00
30.15 60.50
36.00 69.89
45.23 80.11
51.69 83.98
61.85 88.95
75.38 93.37
87.08 95.86
110.50 98.07

Fit these data to a curve of the form

\[ \begin{align} Y = 100\cdot\frac{\sum_{j = 0}^4 j\alpha_j [O_2]^j}{4\sum_{j = 0}^4 \alpha_j [O_2]^j}. \end{align} \] Instruction: Suppose we have data points \(\{x_i, y_i\}\), \(i = 1, \dots, n\), that we wish to fit to some function \(y = f(x)\), and that the function \(f\) depends on parameters \(\{\alpha_i\}\), \(i = 1, \dots, m\). A fit of the data is achieved when the parameters are picked such that the function \[ \begin{align} F = \sum_{i = 1}^n (f(x_i) - y_i)^2 \end{align} \] is minimized. Fo find this fit, start with reasonable estimates for the parameters and then allow them to change dynamically (as a function of a time-like variable \(t\)) according to \[ \begin{align} \forall j:\quad \frac{d\alpha_j}{dt} = - \sum_{i = 1}^n \big(f(x_i)-y_i\big)\frac{\partial f(x_i)}{\partial \alpha_j}.\qquad\textrm{(1)} \end{align} \] With this choice, \[ \begin{align} \frac{dF}{dt} = \sum_{j = 1}^{m}\sum_{i = 1}^n 2\big(f(x_i)-y_i\big)\frac{\partial f(x_i)}{\partial \alpha_j}\frac{d\alpha_j}{dt}\leq 0, \end{align} \] so that \(F\) is a decreasing function of \(t\). A fit is found when numerical integration of \(\frac{d\vec\alpha}{dt}\) reaches a steady-state solution.

Implementation:

## Set-up model with derivatives
model <- function(x, pars) {
  j <- 0:4
  predictor <- outer(x, j, function(a, b) a^b)
  jpredictor <- outer(x, j, function(a, b) b*a^b)
  
  value <- 100*as.vector((jpredictor %*% pars)/(4*predictor %*% pars))
  gradient <- 100*(jpredictor/as.vector(4*predictor%*%pars)- 
    4*predictor*as.vector(predictor %*% (j * pars))/as.vector(4*predictor %*% pars)^2)
  
  attr(value, "gradient") <- gradient
  
  return(value)
  
}

## Check if derivatives are implemented correctly
pars <- rnorm(5)
x <- rnorm(3)
grad.analytical <- attr(model(x, pars), "gradient")
grad.numerical <- numDeriv::jacobian(function(pars) model(x, pars), pars)

precision <- max(abs(grad.analytical - grad.numerical))
rel.precision <- precision/mean(abs(grad.numerical))

cat("precision:", precision, ", rel. precision:", rel.precision, "\n")
## precision: 1.031212e-07 , rel. precision: 3.695935e-11
## Set-up dynamic model for integration
func <- function(times, states, parameters, mydata) {
  prediction <- model(mydata$O2, states)
  dalpha <- -(prediction - mydata$Y) %*% attr(prediction, "gradient")
  list(dalpha)
}

alpha <- exp(rnorm(5, -1))
times <- c(0, exp(seq(-22, 0, .1)))
out <- ode(alpha, times, func, mydata = mydata)


estimate <- tail(out, 1)[,-1]
cat("estimated parameters:", estimate, "\n")
## estimated parameters: 1.365444 0.03490661 0.000829054 2.69911e-05 3.30215e-06
library(tidyr)
out <- gather(as.data.frame(out), parameter, value, -time)
ggplot(out, aes(x = time, y = value)) + facet_wrap(~parameter, scales = "free") + 
  geom_line() + scale_x_log10(labels = scales::trans_format("log10", scales::math_format(10^.x)))
## Warning: Transformation introduced infinite values in continuous x-axis

x <- seq(min(mydata$O2), max(mydata$O2), 1)
prediction <- data.frame(O2 = x, Y = model(x, estimate))
prediction.ini <- data.frame(O2 = x, Y = model(x, alpha))

ggplot(mydata, aes(x = O2, y = Y)) + geom_point() + 
  geom_line(data = prediction) +
  geom_line(data = prediction.ini, lty = 2, color = "red")

Cathedral exercise:

Episcopal churches have two towers. Freiburg is a diocesan town. Why does the cathedral have only one tower?