You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi! first of all thanks for the great support of doing glm in Rcpp!
I am using the rcppglm to modify MASS:polr to enhance the speeds over a normal GLM, but i encountered an error like above in my attempts, here is the full code:
#original polr
polr <- function (formula, data, weights, start, ..., subset, na.action,
contrasts = NULL, Hess = FALSE, model = TRUE, method = c("logistic",
"probit", "loglog", "cloglog", "cauchit"))
{
m <- match.call(expand.dots = FALSE)
method <- match.arg(method)
if (is.matrix(eval.parent(m$data)))
m$data <- as.data.frame(data)
m$start <- m$Hess <- m$method <- m$model <- m$... <- NULL
m[[1L]] <- quote(stats::model.frame)
m <- eval.parent(m)
Terms <- attr(m, "terms")
x <- model.matrix(Terms, m, contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0L)
n <- nrow(x)
pc <- ncol(x)
cons <- attr(x, "contrasts")
if (xint > 0L) {
x <- x[, -xint, drop = FALSE]
pc <- pc - 1L
}
else warning("an intercept is needed and assumed")
wt <- model.weights(m)
if (!length(wt))
wt <- rep(1, n)
offset <- model.offset(m)
if (length(offset) <= 1L)
offset <- rep(0, n)
y <- model.response(m)
if (!is.factor(y))
stop("response must be a factor")
lev <- levels(y)
llev <- length(lev)
if (llev <= 2L)
stop("response must have 3 or more levels")
y <- unclass(y)
q <- llev - 1L
if (missing(start)) {
q1 <- llev%/%2L
y1 <- (y > q1)
X <- cbind(Intercept = rep(1, n), x)
fit <- switch(method, logistic = glm.fit(X, y1, wt, family = binomial(),
offset = offset), probit = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset), loglog = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset), cloglog = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset), cauchit = glm.fit(X, y1, wt, family = binomial("cauchit"),
offset = offset))
if (!fit$converged)
stop("attempt to find suitable starting values failed")
coefs <- fit$coefficients
if (any(is.na(coefs))) {
warning("design appears to be rank-deficient, so dropping some coefs")
keep <- names(coefs)[!is.na(coefs)]
coefs <- coefs[keep]
x <- x[, keep[-1L], drop = FALSE]
pc <- ncol(x)
}
logit <- function(p) log(p/(1 - p))
spacing <- logit((1L:q)/(q + 1L))
if (method != "logistic")
spacing <- spacing/1.7
gammas <- -coefs[1L] + spacing - spacing[q1]
start <- c(coefs[-1L], gammas)
}
else if (length(start) != pc + q)
stop("'start' is not of the correct length")
ans <- polr.fit(x, y, wt, start, offset, method, hessian = Hess,
...)
beta <- ans$coefficients
zeta <- ans$zeta
deviance <- ans$deviance
res <- ans$res
niter <- c(f.evals = res$counts[1L], g.evals = res$counts[2L])
eta <- if (pc)
offset + drop(x %*% beta)
else offset + rep(0, n)
pfun <- switch(method, logistic = plogis, probit = pnorm,
loglog = pgumbel, cloglog = pGumbel, cauchit = pcauchy)
cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta),
, q)
fitted <- t(apply(cumpr, 1L, function(x) diff(c(0, x, 1))))
dimnames(fitted) <- list(row.names(m), lev)
fit <- list(coefficients = beta, zeta = zeta, deviance = deviance,
fitted.values = fitted, lev = lev, terms = Terms, df.residual = sum(wt) -
pc - q, edf = pc + q, n = sum(wt), nobs = sum(wt),
call = match.call(), method = method, convergence = res$convergence,
niter = niter, lp = eta)
if (Hess) {
dn <- c(names(beta), names(zeta))
H <- res$hessian
dimnames(H) <- list(dn, dn)
fit$Hessian <- H
}
if (model)
fit$model <- m
fit$na.action <- attr(m, "na.action")
fit$contrasts <- cons
fit$xlevels <- .getXlevels(Terms, m)
class(fit) <- "polr"
fit
}
<bytecode: 0x000000002263b7f8>
<environment: namespace:MASS>
modified polr (adding the rcppglm in fit section)
polr_cpp <- function(formula, data, weights, start, ..., subset, na.action,
contrasts = NULL, Hess = FALSE, model = TRUE,
method = c("logistic", "probit", "loglog", "cloglog", "cauchit")){
library(Rcpp)
library(RcppGLM)
library(RcppArmadillo)
cppFunction('NumericMatrix mmult(const NumericMatrix& m1, const NumericMatrix& m2){
if (m1.ncol() != m2.nrow()) stop ("Incompatible matrix dimensions");
NumericMatrix out(m1.nrow(),m2.ncol());
NumericVector rm1, cm2;
for (size_t i = 0; i < m1.nrow(); ++i) {
rm1 = m1(i,_);
for (size_t j = 0; j < m2.ncol(); ++j) {
cm2 = m2(_,j);
out(i,j) = std::inner_product(rm1.begin(), rm1.end(), cm2.begin(), 0.);
}
}
return out;
}')
polr.fit.enhanced <- function(x, y, wt, start, offset, method, ...){
fmin <- function(beta){
theta <- beta[pc + ind_q]
gamm <- c(-Inf , cumsum(c(theta[1L], exp(theta[-1L]))), Inf)
eta <- offset
if (pc) eta <- eta + drop(x %*% beta[ind_pc])
pr <- pfun(pmin(100, gamm[y + 1] - eta)) -
pfun(pmax(-100, gamm[y] - eta))
if (all(pr > 0)) -sum(wt * log(pr)) else Inf
}
gmin <- function(beta){
jacobian <- function(theta) { ## dgamma by dtheta matrix
k <- length(theta)
etheta <- exp(theta)
mat <- matrix(0 , k, k)
mat[, 1L] <- rep(1, k)
for (i in 2L:k) mat[i:k, i] <- etheta[i]
mat
}
theta <- beta[pc + ind_q]
gamm <- c(-Inf, cumsum(c(theta[1L], exp(theta[-1L]))), Inf)
eta <- offset
if(pc) eta <- eta + drop(x %*% beta[ind_pc])
z1 <- pmin(100, gamm[y+1L] - eta)
z2 <- pmax(-100, gamm[y] - eta)
pr <- pfun(z1) - pfun(z2)
p1 <- dfun(z1); p2 <- dfun(z2)
g1 <- if(pc) t(x) %*% (wt*(p1 - p2)/pr) else numeric()
xx <- .polrY1*p1 - .polrY2*p2
g2 <- - t(xx) %*% (wt/pr)
g2 <- t(g2) %*% jacobian(theta)
if(all(pr > 0)) c(g1, g2) else rep(NA_real_, pc+q)
}
pfun <- switch(method, logistic = plogis, probit = pnorm,
loglog = pgumbel, cloglog = pGumbel, cauchit = pcauchy)
dfun <- switch(method, logistic = dlogis, probit = dnorm,
loglog = dgumbel, cloglog = dGumbel, cauchit = dcauchy)
n <- nrow(x)
pc <- ncol(x)
ind_pc <- seq_len(pc)
lev <- levels(y)
if(length(lev) <= 2L) stop("response must have 3 or more levels")
y <- unclass(y)
q <- length(lev) - 1L
ind_q <- seq_len(q)
Y <- matrix(0, n, q)
.polrY1 <- col(Y) == y; .polrY2 <- col(Y) == (y - 1L)
# pc could be 0.
s0 <- if(pc) c(start[seq_len(pc+1L)], log(diff(start[-seq_len(pc)])))
else c(start[1L], log(diff(start)))
library(optimParallel)
res <- optimParallel(s0, fmin, gmin, method="BFGS", ...)
beta <- res$par[seq_len(pc)]
theta <- res$par[pc + ind_q]
zeta <- cumsum(c(theta[1L], exp(theta[-1L])))
deviance <- 2 * res$value
names(zeta) <- paste(lev[-length(lev)], lev[-1L], sep="|")
if(pc) names(beta) <- colnames(x)
list(coefficients = beta, zeta = zeta, deviance = deviance, res = res)
}
m <- match.call(expand.dots = FALSE)
method <- match.arg(method)
if (is.matrix(eval.parent(m$data)))
m$data <- as.data.frame(data)
m$start <- m$Hess <- m$method <- m$model <- m$... <- NULL
m[[1L]] <- quote(stats::model.frame)
m <- eval.parent(m)
Terms <- attr(m, "terms")
x <- model.matrix(Terms, m, contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0L)
n <- nrow(x)
pc <- ncol(x)
cons <- attr(x, "contrasts")
if (xint > 0L) {
x <- x[, -xint, drop = FALSE]
pc <- pc - 1L
}
else warning("an intercept is needed and assumed")
wt <- model.weights(m)
if (!length(wt))
wt <- rep(1, n)
offset <- model.offset(m)
if (length(offset) <= 1L)
offset <- rep(0, n)
y <- model.response(m)
if (!is.factor(y))
stop("response must be a factor")
lev <- levels(y)
llev <- length(lev)
if (llev <= 2L)
stop("response must have 3 or more levels")
y <- unclass(y)
q <- llev - 1L
if (missing(start)) {
q1 <- llev%/%2L
y1 <- (y > q1)
X <- cbind(Intercept = rep(1, n), x)
fit <- switch(method, logistic = glm_fit(X, y1, family = binomial()),
probit = glm_fit(X, y1, family = binomial("probit")),
loglog = glm_fit(X, y1, family = binomial("probit")),
cloglog = glm_fit(X, y1, family = binomial("probit")),
cauchit = glm_fit(X, y1, family = binomial("cauchit")))
if (!fit$converged)
stop("attempt to find suitable starting values failed")
coefs <- fit$coefficients
if (any(is.na(coefs))) {
warning("design appears to be rank-deficient, so dropping some coefs")
keep <- names(coefs)[!is.na(coefs)]
coefs <- coefs[keep]
x <- x[, keep[-1L], drop = FALSE]
pc <- ncol(x)
}
logit <- function(p) log(p/(1 - p))
spacing <- logit((1L:q)/(q + 1L))
if (method != "logistic")
spacing <- spacing/1.7
gammas <- -coefs[1L] + spacing - spacing[q1]
start <- c(coefs[-1L], gammas)
}
else if (length(start) != pc + q)
stop("'start' is not of the correct length")
ans <- polr.fit.enhanced(x, y, wt, start, offset, method, hessian = Hess, ...)
beta <- ans$coefficients
zeta <- ans$zeta
deviance <- ans$deviance
res <- ans$res
niter <- c(f.evals = res$counts[1L], g.evals = res$counts[2L])
eta <- if (pc){
beta <- matrix(beta, nrow = length(beta), ncol = 1)
offset + mmult(x, beta)
offset <- as.numeric(offset)
names(offset) <- 1:length(offset)
}
else offset + rep(0, n)
return(eta)
pfun <- switch(method, logistic = plogis, probit = pnorm,
loglog = pgumbel, cloglog = pGumbel, cauchit = pcauchy)
cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta), , q)
fitted <- t(apply(cumpr, 1L, function(x) diff(c(0, x, 1))))
dimnames(fitted) <- list(row.names(m), lev)
fit <- list(coefficients = beta, zeta = zeta, deviance = deviance,
fitted.values = fitted, lev = lev, terms = Terms, df.residual = sum(wt) -
pc - q, edf = pc + q, n = sum(wt), nobs = sum(wt),
call = match.call(), method = method, convergence = res$convergence,
niter = niter, lp = eta)
if (Hess) {
dn <- c(names(beta), names(zeta))
H <- res$hessian
dimnames(H) <- list(dn, dn)
fit$Hessian <- H
}
if (model)
fit$model <- m
fit$na.action <- attr(m, "na.action")
fit$contrasts <- cons
fit$xlevels <- .getXlevels(Terms, m)
class(fit) <- "polr"
fit
}
The text was updated successfully, but these errors were encountered:
Hi! first of all thanks for the great support of doing glm in Rcpp!
I am using the rcppglm to modify MASS:polr to enhance the speeds over a normal GLM, but i encountered an error like above in my attempts, here is the full code:
#original polr
modified polr (adding the rcppglm in fit section)
The text was updated successfully, but these errors were encountered: