Exercise 1: The profile-likelihood method

The profile likelihood method is used to assess confidence intervals of parameters in non-linear models. A model of the reaction flux of an enzymatic reaction associated with the substrate concentration \([S]\) is given by \[ \begin{align*} \dot v = \frac{10^{\rm Vmax}\cdot [S]}{10^{\rm Km} + [S]}. \end{align*} \] Here, Km and Vmax denote the Michaelis-Menten constant and maximal flux on the \(\log_{10}\) scale, respectively. Let us assume that the reaction flux \(\dot v\) can be measured as a function of the substrate concentration and that the parameters Vmax and Km are to be determined from the experimental data.

library(bbmle) # Maximum likelihood estimation
library(ggplot2)
  1. Simulate the dose-response curve \(\dot v([S])\) for parameters Km = 0 and Vmax = 1 and concentrations of \([S]\) between 0 and 2. Plot the result.
# Model function
prdfn <- function(conc, Km, Vmax) {
    10^Vmax*conc/(10^Km + conc)
}

# Set parameter values for simulation
Km <- 0
Vmax <- 1

# Set concentrations for simulation
conc <- seq(0, 2, .01)

# Generate plot of dose-response
d <- data.frame(conc = conc, flux = prdfn(conc, Km, Vmax))
P <- ggplot(d, aes(x = conc, y = flux)) + geom_line()
print(P)

  1. For concentrations \([S]\in\{.1, .5, 1, 2\}\) simulate the model prediction and pick normally distributed random numbers around the prediction with a standard deviation of 7%. These values represent the simulated data with measurement uncertainty. Add the data points with error bars to the previous plot.
# Simulate data points for set of concentrations
# Add 7% noise to the data
conc_D <- c(.1,.5, 1, 2)
pred_D <- prdfn(conc_D, Km, Vmax)
d_D <- data.frame(conc = conc_D, 
                  flux = rnorm(length(pred_D), mean = pred_D, sd = 0.07 * pred_D),
                  sigma = 0.07 * pred_D)

# Add the simulated data and error bars to the plot
P2 <- P + 
  geom_errorbar(data = d_D, aes(ymin = flux - sigma, ymax = flux + sigma), width = .02) + 
  geom_point(data = d_D)

print(P2)

  1. The basis of the profile-likelihood method is the log-likelihood function and a maximum-likelihood fit serving as the point of departure for the profile likelihood analysis. Define the negative log-likelihood as a function objfn <- function(Km, Vmax) of the parameters. Make explicit use of the previously defined data.frame with simulated data and its uncertainty values. You can either analytically derive the expression for the negative log-likelihood from the Gauß function or use the dnorm() function. Finally, use mle2() from the bbmle package to minimize the negative log-likelihood and obtain parameter estimates.
### Define negative log-likelihood for given simulated data set
objfn <- function(Km, Vmax) {
  with(d_D, {
    -sum(dnorm(flux, mean = prdfn(conc, Km, Vmax), sd = sigma, log = TRUE))  
  })
}

# Do max-likelihood fit
fit <- mle2(objfn, start = list(Km = 0, Vmax = 0))
  1. Implement an algorithm for the profile-likelihood method. In principle, the algorithm should include:

The algorithm should have the general form:

For whichPar in parameters
  
  For sign in (-1, 1)
    
    Set mypars to parameters from original fit
    Set deviance to 0
  
    While deviance < 4 and parameter values in [-5, 5]
  
      fit the model with value of whichPar fixed to value(whichPar) + sign*stepsize
      overwrite mypars by fit result
      compute deviance = -2 ( logLik(fit_0) - logLik(fit_new) )
  
    End
  
  End    
      
End

Generate a data.frame with columns deviance (the difference of twice the negative log-likelihood between the “refit” and the original fit), fixed (the value of the parameter held fixed during max-likelihood estimation), Km, Vmax (the fit result per iteration of the while-loop) and parameter (whichPar, the name of the parameter for which the profile was computed). Show the results in (1) a plot showing the fixed parameter value vs. the deviance and (2) a plot showing Km vs Vmax for each of the profiles.

# Function to compute profiles for both parameters
profilesC <- function(fit, objfn){
  do.call(rbind, lapply(names(coef(fit)), function(myp) {
  
  # Initialization
  deviance <- 0
  stepsize <- .01
  mypars <- coef(fit)
  variable <- setdiff(names(mypars), myp)
  output <- matrix(c(deviance, mypars[myp], mypars), nrow = 1, 
                   dimnames = list(NULL, c("deviance", "fixed", names(mypars))))
 
  # Loop over left and right direction
  for (sign in c(1, -1)) {
    
    # Reset
    mypars <- coef(fit)
    deviance <- 0
 
    
    # Profile computation
    while(deviance < 4 & all(mypars > -5) & all(mypars < 5)) {
      
      # Fit with fixed parameter shifted by stepsize
      fit_mod <- mle2(objfn, 
                      start = as.list(mypars[variable]), 
                      fixed = as.list(mypars[myp] + sign*stepsize))
      
      # Overwrite actual parameter
      mypars <- coef(fit_mod)
      # Compute deviance
      deviance <- -2*as.numeric(logLik(fit_mod) - logLik(fit))
      # Bind result to previous result
      output <- rbind(output, c(deviance, mypars[myp], mypars))
      
    }
    
  }
  
  # Sort output by increasind value of the parameter of interest
  cbind(as.data.frame(output[order(output[ ,"fixed"]),]), parameter = myp)
  
}))
}

profiles <- profilesC(fit, objfn)

# Deviance plot
cl <- c(0.68, 0.95)
thresholds <- qchisq(p = cl, df = 1)
ggplot(profiles, aes(x = fixed, y = deviance)) + facet_wrap(~parameter, scales = "free") + geom_line() + geom_point(data = subset(profiles, deviance == 0)) +
  scale_y_continuous(sec.axis = sec_axis(trans = ~., breaks = thresholds, labels = cl, name = "confidence level")) +
  geom_hline(yintercept = thresholds, lty = 2)

# Plot parameter paths
ggplot(profiles, aes(x = Km, y = Vmax)) + facet_wrap(~parameter, scales = "free") + geom_line() + geom_point(data = subset(profiles, deviance == 0))

  1. For the above choice of substrate concentrations we find that the profiles of both parameters exceed the deviance threshold associated with a 95% confidence level. Repeat steps 1-4 for simulated data points with \([S]\in(0, .1]\) or with \([S]\in[5, 10]\). Interpret the results obtained for the likelihood-profiles and profile paths.
set.seed(12)

conc_D <- c(0.01,0.05, 0.08, 0.1)
pred_D <- prdfn(conc_D, Km, Vmax)
d_D_lower <- data.frame(conc = conc_D, 
                  flux = rnorm(length(pred_D), mean = pred_D, sd = 0.07 * pred_D),
                  sigma = 0.07 * pred_D)

# Add the simulated data and error bars to the plot
print(P + 
      geom_errorbar(data = d_D_lower, aes(ymin = flux - sigma, ymax = flux + sigma), width = .02, color = "red") + 
      geom_point(data = d_D_lower, color = "red")
)

# New negative log-likelihood for lower simulated data set
objfn_lower <- function(Km, Vmax) {
  with(d_D_lower, {
    -sum(dnorm(flux, mean = prdfn(conc, Km, Vmax), sd = sigma, log = TRUE))  
  })
}

# Do max-likelihood fit
fit_lower <- mle2(objfn_lower, start = list(Km = 0, Vmax = 0))

# Compute Profiles
profiles_lower <- profilesC(fit_lower, objfn_lower)

# Deviance plot
ggplot(profiles_lower, aes(x = fixed, y = deviance)) + facet_wrap(~parameter, scales = "free") + geom_line() + geom_point(data = subset(profiles_lower, deviance == 0)) +
  scale_y_continuous(sec.axis = sec_axis(trans = ~., breaks = thresholds, labels = cl, name = "confidence level")) +
  geom_hline(yintercept = thresholds, lty = 2)

# Now upper S values
conc_D <- c(5, 6, 8, 10)
pred_D <- prdfn(conc_D, Km, Vmax)
d_D_upper <- data.frame(conc = conc_D, 
                  flux = rnorm(length(pred_D), mean = pred_D, sd = 0.07 * pred_D),
                  sigma = 0.07 * pred_D)

# New negative log-likelihood for upper simulated data set
objfn_upper <- function(Km, Vmax) {
  with(d_D_upper, {
    -sum(dnorm(flux, mean = prdfn(conc, Km, Vmax), sd = sigma, log = TRUE))  
  })
}

# Do max-likelihood fit
fit_upper <- mle2(objfn_upper, start = list(Km = 0, Vmax = 0))

# Compute Profiles
profiles_upper <- profilesC(fit_upper, objfn_upper)

# Deviance plot
ggplot(profiles_upper, aes(x = fixed, y = deviance)) + facet_wrap(~parameter, scales = "free") + geom_line() + geom_point(data = subset(profiles_upper, deviance == 0)) +
  scale_y_continuous(sec.axis = sec_axis(trans = ~., breaks = thresholds, labels = cl, name = "confidence level")) +
  geom_hline(yintercept = thresholds, lty = 2)

# Combined plot of all results
profiles$range <- "wide"
profiles_lower$range <- "lower"
profiles_upper$range <- "upper"

profiles_all <- rbind(profiles, profiles_lower, profiles_upper)

# Deviance plot
ggplot(profiles_all, aes(x = fixed, y = deviance, color = range)) + facet_wrap(~parameter, scales = "free") +
  geom_line() + geom_point(data = subset(profiles_all, deviance == 0)) +
  scale_y_continuous(sec.axis = sec_axis(trans = ~., breaks = thresholds, labels = cl, name = "confidence level")) +
  geom_hline(yintercept = thresholds, lty = 2)

# Plot parameter paths
ggplot(profiles_all, aes(x = Km, y = Vmax, color = range)) + facet_wrap(~parameter, scales = "free") + geom_line() + geom_point(data = subset(profiles_all, deviance == 0))

Cathedral exercise:

How does the cathedral relate to the symposium of Plato? Study the Apocalypse of Saint John and the writings of Proklos, Plotinus and the Abbot Suger of St. Denis.