Title: | Miscellaneous Functions for Survival Data |
---|---|
Description: | A collection of functions to help in the analysis of right-censored survival data. These extend the methods available in package:survival. |
Authors: | Chris Dardis |
Maintainer: | Chris Dardis <[email protected]> |
License: | GPL-2 |
Version: | 0.5.1 |
Built: | 2024-12-25 03:10:47 UTC |
Source: | https://github.com/dardisco/survmisc |
Convert an object to "wide" or "long" form.
asWide(x, ...) ## S3 method for class 'ten' asWide(x, ...) asLong(x, ...) ## S3 method for class 'ten' asLong(x, ...)
asWide(x, ...) ## S3 method for class 'ten' asWide(x, ...) asLong(x, ...) ## S3 method for class 'ten' asLong(x, ...)
x |
An object of class |
... |
Additional arguments (not implemented). |
A new data.table
is returned,
with the data in 'wide' or 'long' format.
There is one row for each time point.
For a ten
object generated from a numeric
or Surv
object,
this has columns:
t |
time. |
e |
number of events. |
n |
number at risk. |
If derived from a survfit
, coxph
or formula
object,
there are additional columns for e
and n
for each covariate group.
Most methods for ten
objects are designed for the 'long' form.
data("bmt", package="KMsurv") require("survival") t1 <- ten(c1 <- coxph(Surv(t2, d3) ~ z3*z10, data=bmt)) asWide(t1) asLong(asWide(t1)) stopifnot(asLong(asWide(t1)) == ten(ten(t1)))
data("bmt", package="KMsurv") require("survival") t1 <- ten(c1 <- coxph(Surv(t2, d3) ~ z3*z10, data=bmt)) asWide(t1) asLong(asWide(t1)) stopifnot(asLong(asWide(t1)) == ten(ten(t1)))
Arrange a survival plot with corresponding table and legend.
## S3 method for class 'tableAndPlot' autoplot(object, ..., hideTabLeg = TRUE, tabHeight = 0.25)
## S3 method for class 'tableAndPlot' autoplot(object, ..., hideTabLeg = TRUE, tabHeight = 0.25)
object |
An object of class |
... |
Additional arguments (not implemented). |
hideTabLeg |
Hide table legend.
|
tabHeight |
Table height, as a fraction/ proportion of the whole.
|
Arguments to plotHeigth
and tabHeight
are
best specified as fractions adding to ,
A graph, plotted with gridExtra::grid.arrange
.
This method is called by print.tableAndPlot
and by print.stratTableAndPlot
.
Chris Dardis. Based on existing work by R. Saccilotto, Abhijit Dasgupta, Gil Tomas and Mark Cowley.
data("kidney", package="KMsurv") autoplot(survfit(Surv(time, delta) ~ type, data=kidney), type="fill") autoplot(ten(survfit(Surv(time, delta) ~ type, data=kidney)), type="fill") data("bmt", package="KMsurv") s2 <- survfit(Surv(time=t2, event=d3) ~ group, data=bmt) autoplot(s2)
data("kidney", package="KMsurv") autoplot(survfit(Surv(time, delta) ~ type, data=kidney), type="fill") autoplot(ten(survfit(Surv(time, delta) ~ type, data=kidney)), type="fill") data("bmt", package="KMsurv") s2 <- survfit(Surv(time=t2, event=d3) ~ group, data=bmt) autoplot(s2)
ggplot
for a survfit
or ten
objectGenerate a ggplot
for a survfit
or ten
object
autoplot(object, ...) ## S3 method for class 'ten' autoplot(object, ..., title = "Marks show times with censoring", type = c("single", "CI", "fill"), alpha = 0.05, ciLine = 10, censShape = 3, palette = c("Dark2", "Set2", "Accent", "Paired", "Pastel1", "Pastel2", "Set1", "Set3"), jitter = c("none", "noEvents", "all"), tabTitle = "Number at risk by time", xLab = "Time", timeTicks = c("major", "minor", "days", "months", "custom"), times = NULL, yLab = "Survival", yScale = c("perc", "frac"), legend = TRUE, legTitle = "Group", legLabs = NULL, legOrd = NULL, titleSize = 15, axisTitleSize = 15, axisLabSize = 10, survLineSize = 0.5, censSize = 5, legTitleSize = 10, legLabSize = 10, fillLineSize = 0.05, tabTitleSize = 15, tabLabSize = 5, nRiskSize = 5) ## S3 method for class 'stratTen' autoplot(object, ..., title = NULL, type = c("single", "CI", "fill"), alpha = 0.05, ciLine = 10, censShape = 3, palette = c("Dark2", "Set2", "Accent", "Paired", "Pastel1", "Pastel2", "Set1", "Set3"), jitter = c("none", "noEvents", "all"), tabTitle = "Number at risk by time", xLab = "Time", timeTicks = c("major", "minor", "days", "months", "custom"), times = NULL, yLab = "Survival", yScale = c("perc", "frac"), legend = TRUE, legTitle = "Group", legLabs = NULL, legOrd = NULL, titleSize = 15, axisTitleSize = 15, axisLabSize = 10, survLineSize = 0.5, censSize = 5, legTitleSize = 10, legLabSize = 10, fillLineSize = 0.05, tabTitleSize = 15, tabLabSize = 5, nRiskSize = 5) ## S3 method for class 'survfit' autoplot(object, ..., title = "Marks show times with censoring", type = c("single", "CI", "fill"), alpha = 0.05, ciLine = 10, censShape = 3, palette = c("Dark2", "Set2", "Accent", "Paired", "Pastel1", "Pastel2", "Set1", "Set3"), jitter = c("none", "noEvents", "all"), tabTitle = "Number at risk by time", xLab = "Time", timeTicks = c("major", "minor", "weeks", "months", "custom"), times = NULL, yLab = "Survival", yScale = c("perc", "frac"), legend = TRUE, legLabs = NULL, legOrd = NULL, legTitle = "Group", titleSize = 15, axisTitleSize = 15, axisLabSize = 10, survLineSize = 0.5, censSize = 5, legTitleSize = 10, legLabSize = 10, fillLineSize = 0.05, tabTitleSize = 15, tabLabSize = 5, nRiskSize = 5, pVal = FALSE, sigP = 1, pX = 0.1, pY = 0.1)
autoplot(object, ...) ## S3 method for class 'ten' autoplot(object, ..., title = "Marks show times with censoring", type = c("single", "CI", "fill"), alpha = 0.05, ciLine = 10, censShape = 3, palette = c("Dark2", "Set2", "Accent", "Paired", "Pastel1", "Pastel2", "Set1", "Set3"), jitter = c("none", "noEvents", "all"), tabTitle = "Number at risk by time", xLab = "Time", timeTicks = c("major", "minor", "days", "months", "custom"), times = NULL, yLab = "Survival", yScale = c("perc", "frac"), legend = TRUE, legTitle = "Group", legLabs = NULL, legOrd = NULL, titleSize = 15, axisTitleSize = 15, axisLabSize = 10, survLineSize = 0.5, censSize = 5, legTitleSize = 10, legLabSize = 10, fillLineSize = 0.05, tabTitleSize = 15, tabLabSize = 5, nRiskSize = 5) ## S3 method for class 'stratTen' autoplot(object, ..., title = NULL, type = c("single", "CI", "fill"), alpha = 0.05, ciLine = 10, censShape = 3, palette = c("Dark2", "Set2", "Accent", "Paired", "Pastel1", "Pastel2", "Set1", "Set3"), jitter = c("none", "noEvents", "all"), tabTitle = "Number at risk by time", xLab = "Time", timeTicks = c("major", "minor", "days", "months", "custom"), times = NULL, yLab = "Survival", yScale = c("perc", "frac"), legend = TRUE, legTitle = "Group", legLabs = NULL, legOrd = NULL, titleSize = 15, axisTitleSize = 15, axisLabSize = 10, survLineSize = 0.5, censSize = 5, legTitleSize = 10, legLabSize = 10, fillLineSize = 0.05, tabTitleSize = 15, tabLabSize = 5, nRiskSize = 5) ## S3 method for class 'survfit' autoplot(object, ..., title = "Marks show times with censoring", type = c("single", "CI", "fill"), alpha = 0.05, ciLine = 10, censShape = 3, palette = c("Dark2", "Set2", "Accent", "Paired", "Pastel1", "Pastel2", "Set1", "Set3"), jitter = c("none", "noEvents", "all"), tabTitle = "Number at risk by time", xLab = "Time", timeTicks = c("major", "minor", "weeks", "months", "custom"), times = NULL, yLab = "Survival", yScale = c("perc", "frac"), legend = TRUE, legLabs = NULL, legOrd = NULL, legTitle = "Group", titleSize = 15, axisTitleSize = 15, axisLabSize = 10, survLineSize = 0.5, censSize = 5, legTitleSize = 10, legLabSize = 10, fillLineSize = 0.05, tabTitleSize = 15, tabLabSize = 5, nRiskSize = 5, pVal = FALSE, sigP = 1, pX = 0.1, pY = 0.1)
object |
An object of class |
... |
Additional arguments (not implemented). |
title |
Title for survival plot. |
type |
|
alpha |
Opacity of lines indicating confidence intervals
or filled rectangles. Should be in range |
ciLine |
Confidence interval line type. See 'line type specification' in
|
censShape |
Shape of marks to indicate censored onservations.
|
palette |
Options are taken from color_brewer.
|
jitter |
By default,
|
tabTitle |
Table title.
|
xLab |
Label for |
timeTicks |
Numbers to mark on the
|
times |
Vector of custom times to use for |
yLab |
Label for |
yScale |
Display for point on
–Legend arguments:
|
legend |
If |
legTitle |
Legend title. |
legLabs |
Legend labels. These can be used to replace the names
of the covariate groups ('strata' in the case of a |
legOrd |
Legend order.
|
titleSize |
Title size for survival plot. |
axisTitleSize |
Title size for axes. |
axisLabSize |
Title size for labels on axes. |
survLineSize |
Survival line size. |
legTitleSize |
Title size for legend. |
legLabSize |
Legend labels width and height. |
censSize |
Size of marks to indicate censored onservations. |
fillLineSize |
Line size surrouding filled boxes. |
tabTitleSize |
Table title text size. |
tabLabSize |
Table legend text size. |
nRiskSize |
Number at risk - text size.
|
pVal |
If |
sigP |
No. of significant digits to display in |
pX |
Location of |
pY |
Location of |
autoplot.survfit
may be deprecated after packageVersion 0.6.
Please try to use autoplot.ten
instead.
Chris Dardis. autoplot.survfit
based on existing work by
R. Saccilotto, Abhijit Dasgupta, Gil Tomas and Mark Cowley.
?ggplot2::ggplot_build
## examples are slow to run; see vignette for output from these ## Not run: ### autoplot.ten data("kidney", package="KMsurv") t1 <- ten(survfit(Surv(time, delta) ~ type, data=kidney)) autoplot(t1) autoplot(t1, type="fill", survLineSize=2, jitter="all") autoplot(t1, timeTicks="months", type="CI", jitter="all", legLabs=c("surgical", "percutaneous"), title="Time to infection following catheter placement \n by type of catheter, for dialysis patients", titleSize=10, censSize=2)$plot t2 <- ten(survfit(Surv(time=time, event=delta) ~ 1, data=kidney)) autoplot(t2, legLabs="")$plot autoplot(t2, legend=FALSE) data("rectum.dat", package="km.ci") t3 <- ten(survfit(Surv(time, status) ~ 1, data=rectum.dat)) ## change confidence intervals to log Equal-Precision confidence bands ci(t3, how="nair", tL=1, tU=40) autoplot(t3, type="fill", legend=FALSE)$plot ## manually changing the output t4 <- ten(survfit(Surv(time, delta) ~ type, data=kidney)) (a4 <- autoplot(t4, type="CI", alpha=0.8, survLineSize=2)$plot) ## change default colors a4 + list(ggplot2::scale_color_manual(values=c("red", "blue")), ggplot2::scale_fill_manual(values=c("red", "blue"))) ## change limits of y-axis suppressMessages(a4 + ggplot2::scale_y_continuous(limits=c(0, 1))) ## End(Not run) ## Not run: data("pbc", package="survival") t1 <- ten(Surv(time, status==2) ~ trt + strata(edema), data=pbc, abbNames=FALSE) autoplot(t1) ## End(Not run) ### autoplot.survfit ## Not run: data(kidney, package="KMsurv") s1 <- survfit(Surv(time, delta) ~ type, data=kidney) autoplot(s1, type="fill", survLineSize=2) autoplot(s1, type="CI", pVal=TRUE, pX=0.3, legLabs=c("surgical", "percutaneous"), title="Time to infection following catheter placement \n by type of catheter, for dialysis patients")$plot s1 <- survfit(Surv(time=time, event=delta) ~ 1, data=kidney) autoplot(s1, legLabs="")$plot autoplot(s1, legend=FALSE)$plot data(rectum.dat, package="km.ci") s1 <- survfit(Surv(time, status) ~ 1, data=rectum.dat) ## change confidence intervals to log Equal-Precision confidence bands if (require("km.ci")) { km.ci::km.ci(s1, method="logep") autoplot(s1, type="fill", legend=FALSE)$plot } ## manually changing the output s1 <- survfit(Surv(time, delta) ~ type, data=kidney) g1 <- autoplot(s1, type="CI", alpha=0.8, survLineSize=2)$plot ## change default colors g1 + ggplot2::scale_colour_manual(values=c("red", "blue")) + ggplot2::scale_fill_manual(values=c("red", "blue")) ## change limits of y-axis g1 + ggplot2::scale_y_continuous(limits=c(0, 1)) ## End(Not run)
## examples are slow to run; see vignette for output from these ## Not run: ### autoplot.ten data("kidney", package="KMsurv") t1 <- ten(survfit(Surv(time, delta) ~ type, data=kidney)) autoplot(t1) autoplot(t1, type="fill", survLineSize=2, jitter="all") autoplot(t1, timeTicks="months", type="CI", jitter="all", legLabs=c("surgical", "percutaneous"), title="Time to infection following catheter placement \n by type of catheter, for dialysis patients", titleSize=10, censSize=2)$plot t2 <- ten(survfit(Surv(time=time, event=delta) ~ 1, data=kidney)) autoplot(t2, legLabs="")$plot autoplot(t2, legend=FALSE) data("rectum.dat", package="km.ci") t3 <- ten(survfit(Surv(time, status) ~ 1, data=rectum.dat)) ## change confidence intervals to log Equal-Precision confidence bands ci(t3, how="nair", tL=1, tU=40) autoplot(t3, type="fill", legend=FALSE)$plot ## manually changing the output t4 <- ten(survfit(Surv(time, delta) ~ type, data=kidney)) (a4 <- autoplot(t4, type="CI", alpha=0.8, survLineSize=2)$plot) ## change default colors a4 + list(ggplot2::scale_color_manual(values=c("red", "blue")), ggplot2::scale_fill_manual(values=c("red", "blue"))) ## change limits of y-axis suppressMessages(a4 + ggplot2::scale_y_continuous(limits=c(0, 1))) ## End(Not run) ## Not run: data("pbc", package="survival") t1 <- ten(Surv(time, status==2) ~ trt + strata(edema), data=pbc, abbNames=FALSE) autoplot(t1) ## End(Not run) ### autoplot.survfit ## Not run: data(kidney, package="KMsurv") s1 <- survfit(Surv(time, delta) ~ type, data=kidney) autoplot(s1, type="fill", survLineSize=2) autoplot(s1, type="CI", pVal=TRUE, pX=0.3, legLabs=c("surgical", "percutaneous"), title="Time to infection following catheter placement \n by type of catheter, for dialysis patients")$plot s1 <- survfit(Surv(time=time, event=delta) ~ 1, data=kidney) autoplot(s1, legLabs="")$plot autoplot(s1, legend=FALSE)$plot data(rectum.dat, package="km.ci") s1 <- survfit(Surv(time, status) ~ 1, data=rectum.dat) ## change confidence intervals to log Equal-Precision confidence bands if (require("km.ci")) { km.ci::km.ci(s1, method="logep") autoplot(s1, type="fill", legend=FALSE)$plot } ## manually changing the output s1 <- survfit(Surv(time, delta) ~ type, data=kidney) g1 <- autoplot(s1, type="CI", alpha=0.8, survLineSize=2)$plot ## change default colors g1 + ggplot2::scale_colour_manual(values=c("red", "blue")) + ggplot2::scale_fill_manual(values=c("red", "blue")) ## change limits of y-axis g1 + ggplot2::scale_y_continuous(limits=c(0, 1)) ## End(Not run)
confidence intervals for survival curves.
ci(x, ...) ## S3 method for class 'ten' ci(x, ..., CI = c("0.95", "0.9", "0.99"), how = c("point", "nair", "hall"), trans = c("log", "lin", "asi"), tL = NULL, tU = NULL, reCalc = FALSE) ## S3 method for class 'stratTen' ci(x, ..., CI = c("0.95", "0.9", "0.99"), how = c("point", "nair", "hall"), trans = c("log", "lin", "asi"), tL = NULL, tU = NULL)
ci(x, ...) ## S3 method for class 'ten' ci(x, ..., CI = c("0.95", "0.9", "0.99"), how = c("point", "nair", "hall"), trans = c("log", "lin", "asi"), tL = NULL, tU = NULL, reCalc = FALSE) ## S3 method for class 'stratTen' ci(x, ..., CI = c("0.95", "0.9", "0.99"), how = c("point", "nair", "hall"), trans = c("log", "lin", "asi"), tL = NULL, tU = NULL)
x |
An object of class |
CI |
Confidence intervals. As the function currently relies on lookup tables, currently only 90%, 95% (the default) and 99% are supported. |
how |
Method to use for confidence interval.
|
trans |
Transformation to use.
|
tL |
Lower time point. Used in construction of confidence bands. |
tU |
Upper time point. Used in construction of confidence bands. |
... |
Additional arguments (not implemented). |
reCalc |
Recalcuate the values?
|
In the equations below
Where is the Kaplan-Meier survival estimate and
is Greenwood's estimate of its
variance.
The pointwise confidence intervals are valid for individual
times, e.g. median
and quantile
values.
When plotted and joined for multiple points they tend to
be narrower than the bands described below.
Thus they tend to exaggerate the impression of certainty
when used to plot confidence intervals for a time range.
They should not be interpreted as giving the intervals
within which the entire survival function lies.
For a given significance level ,
they are calculated using the standard normal distribution
as follows:
linear
log transform
where
arcsine-square root transform
upper:
lower:
Confidence bands give the values within which the survival function
falls within a range of timepoints.
The time range under consideration is given so that
, the minimum or lowest event time and
, the maximum or largest event time.
For a sample size and
:
For the Nair or equal precision (EP) confidence bands,
we begin by obtaining the relevant
confidence coefficient . This is obtained from
the upper
-th fractile of the random variable
Where is a standard Brownian bridge.
The intervals are:
linear
log transform (the default)
This uses as below:
And is given by:
arcsine-square root transform
upper:
lower:
For the Hall-Wellner bands the confidence coefficient
is obtained from the upper
-th fractile of a
Brownian bridge.
In this case can be
.
The intervals are:
linear
log transform
where
arcsine-square root transform
upper:
lower:
The ten
object is modified in place by the additional of a
data.table
as an attribute
.
attr(x, "ci")
is printed.
This A survfit
object. The upper
and lower
elements in the list (representing confidence intervals)
are modified from the original.
Other elements will also be shortened if the time range under consideration has been
reduced from the original.
For the Nair and Hall-Wellner bands, the function currently
relies on the lookup tables in package:km.ci
.
Generally, the arcsin-square root transform has the best coverage properties.
All bands have good coverage properties for samples as small as ,
except for the Nair / EP bands with a linear transformation,
which perform poorly when
.
The function is loosely based on km.ci::km.ci
.
Nair V, 1984. Confidence bands for survival functions with censored data: a comparative study. Technometrics. 26(3):265-75. JSTOR.
Hall WJ, Wellner JA, 1980. Confidence bands for a survival curve from censored data. Biometrika. 67(1):133-43. JSTOR.
## K&M 2nd ed. Section 4.3. Example 4.2, pg 105. data("bmt", package="KMsurv") b1 <- bmt[bmt$group==1, ] # ALL patients ## K&M 2nd ed. Section 4.4. Example 4.2 (cont.), pg 111. ## patients with ALL t1 <- ten(Surv(t2, d3) ~ 1, data=bmt[bmt$group==1, ]) ci(t1, how="nair", trans="lin", tL=100, tU=600) ## Table 4.5, pg. 111. lapply(list("lin", "log", "asi"), function(x) ci(t1, how="nair", trans=x, tL=100, tU=600)) ## Table 4.6, pg. 111. lapply(list("lin", "log", "asi"), function(x) ci(t1, how="hall", trans=x, tL=100, tU=600)) t1 <- ten(Surv(t2, d3) ~ group, data=bmt) ci(t1, CI="0.95", how="nair", trans="lin", tL=100, tU=600) ## stratified model data("pbc", package="survival") t1 <- ten(coxph(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc)) ci(t1)
## K&M 2nd ed. Section 4.3. Example 4.2, pg 105. data("bmt", package="KMsurv") b1 <- bmt[bmt$group==1, ] # ALL patients ## K&M 2nd ed. Section 4.4. Example 4.2 (cont.), pg 111. ## patients with ALL t1 <- ten(Surv(t2, d3) ~ 1, data=bmt[bmt$group==1, ]) ci(t1, how="nair", trans="lin", tL=100, tU=600) ## Table 4.5, pg. 111. lapply(list("lin", "log", "asi"), function(x) ci(t1, how="nair", trans=x, tL=100, tU=600)) ## Table 4.6, pg. 111. lapply(list("lin", "log", "asi"), function(x) ci(t1, how="hall", trans=x, tL=100, tU=600)) t1 <- ten(Surv(t2, d3) ~ group, data=bmt) ci(t1, CI="0.95", how="nair", trans="lin", tL=100, tU=600) ## stratified model data("pbc", package="survival") t1 <- ten(coxph(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc)) ci(t1)
compare survival curves
comp(x, ...) ## S3 method for class 'ten' comp(x, ..., p = 1, q = 1, scores = seq.int(attr(x, "ncg")), reCalc = FALSE)
comp(x, ...) ## S3 method for class 'ten' comp(x, ..., p = 1, q = 1, scores = seq.int(attr(x, "ncg")), reCalc = FALSE)
x |
A |
p |
|
q |
|
scores |
scores for tests for trend |
... |
Additional arguments (not implemented). |
reCalc |
Recalcuate the values?
|
The log-rank tests are formed from the following elements, with values for each time where there is at least one event:
, the weights, given below.
, the number of events (per time).
, the number of predicted events,
given by
predict
.
, the covariance matrix for time
,
given by
COV
.
It is calculated as:
If there are groups, then
are selected (arbitrary).
Likewise the corresponding variance-covariance matrix is reduced to the
appropriate dimensions.
is distributed as chi-square with
degrees of freedom.
For covariate groups, we can use:
the number of events (per time).
the number at risk overall.
the number of events in group
.
the number at risk in group
.
Then:
Below, for the Fleming-Harrington weights,
is the Kaplan-Meier (product-limit) estimator.
Note that both and
need to be
.
The weights are given as follows:
|
log-rank | |
|
Gehan-Breslow generalized Wilcoxon | |
|
Tarone-Ware | |
|
Peto-Peto's modified survival estimate |
|
|
modified Peto-Peto (by Andersen) |
|
|
Fleming-Harrington |
The weight at and thereafter is:
|
The supremum (Renyi) family of tests are designed
to detect differences in survival curves which cross.
That is, an early difference in survival in favor of one group
is balanced by a later reversal.
The same weights as above are used.
They are calculated by finding
(which is similar to the numerator used to find
in the log-rank test for 2 groups above).
and it's variance:
where is the largest
where both groups have at least one subject at risk.
Then calculate:
When the null hypothesis is true,
the distribution of is approximately
And for a standard Brownian motion (Wiener) process:
Tests for trend are designed to detect ordered differences in survival curves.
That is, for at least one group:
where is the largest
where all groups have at least one subject at risk.
The null hypothesis is that
Scores used to construct the test are typically ,
but may be given as a vector representing a numeric characteristic of the group.
They are calculated by finding:
The test statistic is:
where is the the appropriate element in the
variance-covariance matrix (see
COV
).
If ordering is present, the statistic will be greater than the
upper
-th
percentile of a standard normal distribution.
The tne
object is given
additional attributes
.
The following are always added:
lrt |
The log-rank family of tests |
lrw |
The log-rank weights (used in calculating the tests). |
An additional item depends on the number of covariate groups.
If this is :
sup |
The supremum or Renyi family of tests |
and if this is :
tft |
Tests for trend. This is given as a |
Regarding the Fleming-Harrington weights:
gives the log-rank test, i.e.
gives a version of the Mann-Whitney-Wilcoxon test
(tests if populations distributions are identical)
gives more weight to differences later on
gives more weight to differences early on
The example using alloauto
data illustrates this.
Here the log-rank statistic
has a p-value of around 0.5
as the late advantage of allogenic transplants
is offset by the high early mortality. However using
Fleming-Harrington weights of ,
emphasising differences later in time, gives a p-value of 0.04.
Stratified models (stratTen
) are not yet supported.
Gehan A. A Generalized Wilcoxon Test for Comparing Arbitrarily Singly-Censored Samples. Biometrika 1965 Jun. 52(1/2):203–23. JSTOR
Tarone RE, Ware J 1977 On Distribution-Free Tests for Equality of Survival Distributions. Biometrika;64(1):156–60. JSTOR
Peto R, Peto J 1972 Asymptotically Efficient Rank Invariant Test Procedures. J Royal Statistical Society 135(2):186–207. JSTOR
Fleming TR, Harrington DP, O'Sullivan M 1987 Supremum Versions of the Log-Rank and Generalized Wilcoxon Statistics. J American Statistical Association 82(397):312–20. JSTOR
Billingsly P 1999 Convergence of Probability Measures. New York: John Wiley & Sons. Wiley (paywall)
## Two covariate groups ## K&M 2nd ed. Example 7.2, Table 7.2, pp 209--210. data("kidney", package="KMsurv") t1 <- ten(Surv(time=time, event=delta) ~ type, data=kidney) comp(t1, p=c(0, 1, 1, 0.5, 0.5), q=c(1, 0, 1, 0.5, 2)) ## see the weights used attributes(t1)$lrw ## supremum (Renyi) test; two-sided; two covariate groups ## K&M 2nd ed. Example 7.9, pp 223--226. data("gastric", package="survMisc") g1 <- ten(Surv(time, event) ~ group, data=gastric) comp(g1) ## Three covariate groups ## K&M 2nd ed. Example 7.4, pp 212-214. data("bmt", package="KMsurv") b1 <- ten(Surv(time=t2, event=d3) ~ group, data=bmt) comp(b1, p=c(1, 0, 1), q=c(0, 1, 1)) ## Tests for trend ## K&M 2nd ed. Example 7.6, pp 217-218. data("larynx", package="KMsurv") l1 <- ten(Surv(time, delta) ~ stage, data=larynx) comp(l1) attr(l1, "tft") ### see effect of F-H test data("alloauto", package="KMsurv") a1 <- ten(Surv(time, delta) ~ type, data=alloauto) comp(a1, p=c(0, 1), q=c(1, 1))
## Two covariate groups ## K&M 2nd ed. Example 7.2, Table 7.2, pp 209--210. data("kidney", package="KMsurv") t1 <- ten(Surv(time=time, event=delta) ~ type, data=kidney) comp(t1, p=c(0, 1, 1, 0.5, 0.5), q=c(1, 0, 1, 0.5, 2)) ## see the weights used attributes(t1)$lrw ## supremum (Renyi) test; two-sided; two covariate groups ## K&M 2nd ed. Example 7.9, pp 223--226. data("gastric", package="survMisc") g1 <- ten(Surv(time, event) ~ group, data=gastric) comp(g1) ## Three covariate groups ## K&M 2nd ed. Example 7.4, pp 212-214. data("bmt", package="KMsurv") b1 <- ten(Surv(time=t2, event=d3) ~ group, data=bmt) comp(b1, p=c(1, 0, 1), q=c(0, 1, 1)) ## Tests for trend ## K&M 2nd ed. Example 7.6, pp 217-218. data("larynx", package="KMsurv") l1 <- ten(Surv(time, delta) ~ stage, data=larynx) comp(l1) attr(l1, "tft") ### see effect of F-H test data("alloauto", package="KMsurv") a1 <- ten(Surv(time, delta) ~ type, data=alloauto) comp(a1, p=c(0, 1), q=c(1, 1))
covariance matrix for survival data
COV(x, ...) ## S3 method for class 'ten' COV(x, ..., reCalc = FALSE) ## S3 method for class 'stratTen' COV(x, ..., reCalc = FALSE) ## S3 method for class 'numeric' COV(x, ..., n, ncg)
COV(x, ...) ## S3 method for class 'ten' COV(x, ..., reCalc = FALSE) ## S3 method for class 'stratTen' COV(x, ..., reCalc = FALSE) ## S3 method for class 'numeric' COV(x, ..., n, ncg)
x |
A |
... |
Additional arguments (not implemented). |
reCalc |
Recalcuate the values?
|
n |
number at risk (total). |
ncg |
number at risk, per covariate group.
|
Gives variance-covariance matrix for comparing survival
data for two or more groups.
Inputs are vectors corresponding to observations at a set of discrete
time points for right censored data, except for ,
the no. at risk by predictor.
This should be specified as a vector for one group,
otherwise as a matrix with each column corresponding to a group.
An array
.
The first two dimensions = the number of covariate groups ,
.
This is the square matrix below.
The third dimension is the number of observations
(discrete time points).
To calculate this, we use x
(= below) and
, the number at risk in covariate group
.
Where there are groups, the resulting sparse square matrix
(i.e. the non-diagonal elements are
)
at time
has diagonal elements:
For groups, the resulting square matrix
has diagonal elements given by:
The off diagonal elements are:
Where the is just one subject at risk at
the final timepoint, the equations above may produce
NaN
due to division by zero. This is converted to 0
for
simplicity.
Called by comp
The name of the function is capitalized
to distinguish it from:
?stats::cov
## Two covariate groups ## K&M. Example 7.2, pg 210, table 7.2 (last column). data("kidney", package="KMsurv") k1 <- with(kidney, ten(Surv(time=time, event=delta) ~ type)) COV(k1)[COV(k1) > 0] ## Four covariate groups ## K&M. Example 7.6, pg 217. data("larynx", package="KMsurv") l1 <- ten(Surv(time, delta) ~ stage, data=larynx) rowSums(COV(l1), dims=2) ## example of numeric method ## Three covariate groups ## K&M. Example 7.4, pg 212. data("bmt", package="KMsurv") b1 <- asWide(ten(Surv(time=t2, event=d3) ~ group, data=bmt)) rowSums(b1[, COV(x=e, n=n, ncg=matrix(data=c(n_1, n_2, n_3), ncol=3))], dims=2)
## Two covariate groups ## K&M. Example 7.2, pg 210, table 7.2 (last column). data("kidney", package="KMsurv") k1 <- with(kidney, ten(Surv(time=time, event=delta) ~ type)) COV(k1)[COV(k1) > 0] ## Four covariate groups ## K&M. Example 7.6, pg 217. data("larynx", package="KMsurv") l1 <- ten(Surv(time, delta) ~ stage, data=larynx) rowSums(COV(l1), dims=2) ## example of numeric method ## Three covariate groups ## K&M. Example 7.4, pg 212. data("bmt", package="KMsurv") b1 <- asWide(ten(Surv(time=t2, event=d3) ~ group, data=bmt)) rowSums(b1[, COV(x=e, n=n, ncg=matrix(data=c(n_1, n_2, n_3), ncol=3))], dims=2)
coxph
or survfit
.Determine the optimal cut point for a continuous variable
in a coxph
or survfit
model.
cutp(x, ...) ## S3 method for class 'coxph' cutp(x, ..., defCont = 3) ## S3 method for class 'survfit' cutp(x, ..., defCont = 3)
cutp(x, ...) ## S3 method for class 'coxph' cutp(x, ..., defCont = 3) ## S3 method for class 'survfit' cutp(x, ..., defCont = 3)
x |
A |
defCont |
definition of a continuous variable.
|
... |
Additional arguments (not implemented). |
For a cut point , of a predictor
,
the variable is split
into two groups, those
and
those
.
The score (or log-rank) statistic, ,
is calculated for each unique element
in
and uses
the number of events
the number at risk
in those above the cut point, respectively.
The basic statistic is
The sum is taken across times with observed events, to ,
the largest of these.
It is normalized (standardized), in the case of censoring,
by finding which is:
The test statistic is then
Under the null hypothesis that the chosen cut point
does not predict survival,
the distribution of has a limiting distibution which
is the supremum of the
absolute value of a Brownian bridge:
A list
of data.table
s.
There is one list element per continuous variable.
Each has a column with possible values of the cut point
(i.e. unique values of the variable), and the
additional columns:
U |
The score (log-rank) test for a model with the variable 'cut'
into into those |
Q |
The test statistic. |
p |
The |
The tables are ordered by -value, lowest first.
Contal C, O'Quigley J, 1999. An application of changepoint methods in studying the effect of age on survival in breast cancer. Computational Statistics & Data Analysis 30(3):253–70. ScienceDirect (paywall)
Mandrekar JN, Mandrekar, SJ, Cha SS, 2003. Cutpoint Determination Methods in Survival Analysis using SAS. Proceedings of the 28th SAS Users Group International Conference (SUGI). Paper 261-28. SAS (free)
## Mandrekar et al. above data("bmt", package="KMsurv") b1 <- bmt[bmt$group==1, ] # ALL patients c1 <- coxph(Surv(t2, d3) ~ z1, data=b1) # z1=age c1 <- cutp(c1)$z1 data.table::setorder(c1, "z1") ## [] below is used to print data.table to console c1[] ## Not run: ## compare to output from survival::coxph matrix( unlist( lapply(26:30, function(i) c(i, summary(coxph(Surv(t2, d3) ~ z1 >= i, data=b1))$sctest))), ncol=5, dimnames=list(c("age", "score_test", "df", "p"))) cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==2, ]))$z1[] cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==3, ]))[[1]][] ## K&M. Example 8.3, pg 273-274. data("kidtran", package="KMsurv") k1 <- kidtran ## patients who are male and black k2 <- k1[k1$gender==1 & k1$race==2, ] c2 <- coxph(Surv(time, delta) ~ age, data=k2) print(cutp(c2)) ## check significance of computed value summary(coxph(Surv(time, delta) ~ age >= 58, data=k2)) k3 <- k1[k1$gender==2 & k1$race==2, ] c3 <- coxph(Surv(time, delta) ~ age, data=k3) print(cutp(c3)) ## doesn't apply to binary variables e.g. gender print(cutp(coxph(Surv(time, delta) ~ age + gender, data=k1))) ## End(Not run)
## Mandrekar et al. above data("bmt", package="KMsurv") b1 <- bmt[bmt$group==1, ] # ALL patients c1 <- coxph(Surv(t2, d3) ~ z1, data=b1) # z1=age c1 <- cutp(c1)$z1 data.table::setorder(c1, "z1") ## [] below is used to print data.table to console c1[] ## Not run: ## compare to output from survival::coxph matrix( unlist( lapply(26:30, function(i) c(i, summary(coxph(Surv(t2, d3) ~ z1 >= i, data=b1))$sctest))), ncol=5, dimnames=list(c("age", "score_test", "df", "p"))) cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==2, ]))$z1[] cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==3, ]))[[1]][] ## K&M. Example 8.3, pg 273-274. data("kidtran", package="KMsurv") k1 <- kidtran ## patients who are male and black k2 <- k1[k1$gender==1 & k1$race==2, ] c2 <- coxph(Surv(time, delta) ~ age, data=k2) print(cutp(c2)) ## check significance of computed value summary(coxph(Surv(time, delta) ~ age >= 58, data=k2)) k3 <- k1[k1$gender==2 & k1$race==2, ] c3 <- coxph(Surv(time, delta) ~ age, data=k3) print(cutp(c3)) ## doesn't apply to binary variables e.g. gender print(cutp(coxph(Surv(time, delta) ~ age + gender, data=k1))) ## End(Not run)
gastric cancer trial data
A data.frame
with rows (observations) and
columns (variables).
Data from a trial of locally unresectable gastic cancer.
Patients ( in each group) were randomized to one of two groups:
chemotheapy vs. chemotherapy + radiotherapy.
Columns are:
Time, in days
Death
Treatment
chemotherapy
chemotherapy + radiotherapy
Klein J, Moeschberger. Survival Analysis, 2nd edition. Springer 2003. Example 7.9, pg 224.
Gastrointestinal Tumor Study Group, 1982. A comparison of combination chemotherapy and combined modality therapy for locally advanced gastric carcinoma. Cancer. 49(9):1771-7. Wiley (free).
Stablein DM, Koutrouvelis IA, 1985. A two-sample test sensitive to crossing hazards in uncensored and singly censored data. Biometrics. 41(3):643-52. JSTOR.
Examples in comp
data("gastric", package="survMisc", verbose=TRUE) head(gastric)
data("gastric", package="survMisc", verbose=TRUE) head(gastric)
coxph
objectgoodness of fit test for a coxph
object
gof(x, ...) ## S3 method for class 'coxph' gof(x, ..., G = NULL)
gof(x, ...) ## S3 method for class 'coxph' gof(x, ..., G = NULL)
x |
An object of class |
... |
Additional arguments (not implemented) |
G |
Number of groups into which to divide risk score.
If
where |
In order to verify the overall goodness of fit,
the risk score for each observation
is given by
where is the vector of fitted coefficients
and
is the vector of predictor variables for
observation
.
This risk score is then sorted and 'lumped' into
a grouping variable with groups,
(containing approximately equal numbers of observations).
The number of observed () and expected (
) events in
each group are used to generate a
statistic for each group,
which is assumed to follow a normal distribution with
.
The indicator variable indicG
is added to the
original model and the two models are compared to determine the
improvement in fit via the likelihood ratio test.
A list
with elements:
groups |
A
|
lrTest |
Likelihood-ratio test.
Tests the improvement in log-likelihood with addition
of an indicator variable with |
The choice of is somewhat arbitrary but rarely should
be
.
As illustrated in the example, a larger value for
makes the
test for each group more likely to be significant.
This does not affect the significance of adding the
indicator variable
indicG
to the original model.
The score is chosen for simplicity, as for large sample sizes
the Poisson distribution approaches the normal. Strictly speaking,
the Poisson would be more appropriate for
and
as
per Counting Theory.
The score may be somewhat conservative as the expected events
are calculated using the martingale residuals from the overall model,
rather than by group. This is likely to bring the expected events
closer to the observed events.
This test is similar to the Hosmer-Lemeshow test for logistic regression.
Method and example are from:
May S, Hosmer DW 1998.
A simplified method of calculating an overall goodness-of-fit test
for the Cox proportional hazards model.
Lifetime Data Analysis 4(2):109–20.
Springer (paywall)
Default value for as per:
May S, Hosmer DW 2004.
A cautionary note on the use of the Gronnesby and Borgan
goodness-of-fit test for the Cox proportional hazards model.
Lifetime Data Analysis 10(3):283–91.
Springer (paywall)
Changes to the pbc
dataset in the example are as detailed in:
Fleming T, Harrington D 2005.
Counting Processes and Survival Analysis.
New Jersey: Wiley and Sons. Chapter 4, section 4.6, pp 188.
Wiley (paywall)
data("pbc", package="survival") pbc <- pbc[!is.na(pbc$trt), ] ## make corrections as per Fleming pbc[pbc$id==253, "age"] <- 54.4 pbc[pbc$id==107, "protime"] <- 10.7 ### misspecified; should be log(bili) and log(protime) instead c1 <- coxph(Surv(time, status==2) ~ age + log(albumin) + bili + edema + protime, data=pbc) gof(c1, G=10) gof(c1)
data("pbc", package="survival") pbc <- pbc[!is.na(pbc$trt), ] ## make corrections as per Fleming pbc[pbc$id==253, "age"] <- 54.4 pbc[pbc$id==107, "protime"] <- 10.7 ### misspecified; should be log(bili) and log(protime) instead c1 <- coxph(Surv(time, status==2) ~ age + log(albumin) + bili + edema + protime, data=pbc) gof(c1, G=10) gof(c1)
Add number censored.
nc(x, ...) ## S3 method for class 'ten' nc(x, ...) ## S3 method for class 'stratTen' nc(x, ...)
nc(x, ...) ## S3 method for class 'ten' nc(x, ...) ## S3 method for class 'stratTen' nc(x, ...)
x |
An object of class |
... |
Additional arguments (not implemented). |
The original object, with new column(s) added indicating the
number censored at each time point, depending on attr(x, "shape")
:
"long" |
the new column, |
"wide" |
new columns, beginning with |
A stratTen
object has each ten
element in the
list
modified as above.
data("kidney", package="KMsurv") t1 <- ten(survfit(Surv(time, delta) ~ type, data=kidney)) nc(t1) nc(asWide(t1)) ## stratified model data("pbc", package="survival") t1 <- ten(coxph(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc)) nc(t1)
data("kidney", package="KMsurv") t1 <- ten(survfit(Surv(time, delta) ~ type, data=kidney)) nc(t1) nc(asWide(t1)) ## stratified model data("pbc", package="survival") t1 <- ten(coxph(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc)) nc(t1)
Surv
object.Plot a Surv
object.
## S3 method for class 'Surv' plot(x, l = 3, ...)
## S3 method for class 'Surv' plot(x, l = 3, ...)
x |
A |
l |
length of arrow. Length is |
... |
Additional arguments.
|
A graph (base graphics).
The type of graph depends on the type
of the Surv
object.
This is given by attr(s, which="type")
:
counting |
Lines with an arrow pointing right if right censored. |
right |
Lines with an arrow pointing right if right censored. |
left |
Lines with an arrow pointing left if left censored. |
interval |
If censored:
If not censored:
|
?graphics::arrows
?graphics::segments
df0 <- data.frame(t1=c(0, 2, 4, 6, NA, NA, 12, 14), t2=c(NA, NA, 4, 6, 8, 10, 16, 18)) s5 <- Surv(df0$t1, df0$t2, type="interval2") plot(s5)
df0 <- data.frame(t1=c(0, 2, 4, 6, NA, NA, 12, 14), t2=c(NA, NA, 4, 6, 8, 10, 16, 18)) s5 <- Surv(df0$t1, df0$t2, type="interval2") plot(s5)
predicted events
## S3 method for class 'ten' predict(object, ..., eMP = TRUE, reCalc = FALSE)
## S3 method for class 'ten' predict(object, ..., eMP = TRUE, reCalc = FALSE)
object |
An object of class |
eMP |
Add column(s) indicating events minus predicted. |
... |
Additional arguments (not implemented). |
reCalc |
Recalcuate the values?
|
With covariate groups, We use
,
the number at risk for group
,
to calculate the number of expected events:
An attribute
, pred
is added
to object
:
t |
Times with at least one observation |
P_ |
predicted number of events |
And if eMP==TRUE
(the default):
eMP_ |
events minus predicted |
The names of the object
's covariate groups are
used to make the suffixes of the column names (i.e. after the
_
character).
There is a predicted value for each unique time, for each covariate group.
?survival::predict.coxph methods("predict")
## K&M. Example 7.2, Table 7.2, pp 209-210. data("kidney", package="KMsurv") k1 <- ten(Surv(time=time, event=delta) ~ type, data=kidney) predict(k1) predict(asWide(k1)) stopifnot(predict(asWide(k1))[, sum(eMP_1 + eMP_2)] <= .Machine$double.neg.eps) ## Three covariate groups ## K&M. Example 7.4, pp 212-214. data("bmt", package="KMsurv") b1 <- ten(Surv(time=t2, event=d3) ~ group, data=bmt) predict(b1) ## one group only predict(ten(Surv(time=t2, event=d3) ~ 1, data=bmt))
## K&M. Example 7.2, Table 7.2, pp 209-210. data("kidney", package="KMsurv") k1 <- ten(Surv(time=time, event=delta) ~ type, data=kidney) predict(k1) predict(asWide(k1)) stopifnot(predict(asWide(k1))[, sum(eMP_1 + eMP_2)] <= .Machine$double.neg.eps) ## Three covariate groups ## K&M. Example 7.4, pp 212-214. data("bmt", package="KMsurv") b1 <- ten(Surv(time=t2, event=d3) ~ group, data=bmt) predict(b1) ## one group only predict(ten(Surv(time=t2, event=d3) ~ 1, data=bmt))
print
methodsprint
methods
## S3 method for class 'ten' print(x, ..., maxRow = getOption("datatable.print.nrows", 50L), nRowP = getOption("datatable.print.topn", 5L), pRowNames = TRUE, maxCol = getOption("survMisc.maxCol", 8L), nColSP = getOption("survMisc.nColSP", 7L), sigDig = getOption("survMisc.sigDig", 2L)) ## S3 method for class 'COV' print(x, ..., n = 2L) ## S3 method for class 'lrt' print(x, ..., dist = c("n", "c")) ## S3 method for class 'sup' print(x, ...) ## S3 method for class 'tableAndPlot' print(x, ..., hideTabLeg = TRUE, tabHeight = 0.25) ## S3 method for class 'stratTableAndPlot' print(x, ..., hideTabLeg = TRUE, tabHeight = 0.25)
## S3 method for class 'ten' print(x, ..., maxRow = getOption("datatable.print.nrows", 50L), nRowP = getOption("datatable.print.topn", 5L), pRowNames = TRUE, maxCol = getOption("survMisc.maxCol", 8L), nColSP = getOption("survMisc.nColSP", 7L), sigDig = getOption("survMisc.sigDig", 2L)) ## S3 method for class 'COV' print(x, ..., n = 2L) ## S3 method for class 'lrt' print(x, ..., dist = c("n", "c")) ## S3 method for class 'sup' print(x, ...) ## S3 method for class 'tableAndPlot' print(x, ..., hideTabLeg = TRUE, tabHeight = 0.25) ## S3 method for class 'stratTableAndPlot' print(x, ..., hideTabLeg = TRUE, tabHeight = 0.25)
x |
An object of class |
... |
Additional arguments (not implemented).
|
maxRow |
Maximum number of rows to print.
|
nRowP |
Number of rows to print from
the start and end of the object. Used if |
pRowNames |
Print row names?
|
maxCol |
Maximum number of columns to print.
|
nColSP |
Number of columns to print from
the start of the object. Used if Used if |
sigDig |
Significant digits. This is passed as an argument to
|
hideTabLeg |
Hide table legend. |
tabHeight |
Table height (relative to whole plot).
|
n |
Similar to |
dist |
Which distribution to use for the statistics
when printing.
|
Prints a ten
object with 'nice' formatting.
Options may be set for a session using e.g.
options(survMisc.nColSP=4L)
It is similar to the behavior of print.data.table
but
has additional arguments controlling the number of columns
sent to the terminal.
A printed representation of the object
is send to the terminal as a side effect of
calling the function.
The return value cannot be assign
ed.
All numeric arguments to the function must be supplied as integers.
Chris Dardis. Based on existing work by Brian Diggs.
For print.ten
:
data.table:::print.data.table
?stats::printCoefmat
options()$datatable.print.nrows
sapply(c("datatable.print.nrows", "datatable.print.topn"), getOption)
set.seed(1) (x <- data.table::data.table(matrix(rnorm(1800), ncol=15, nrow=120))) data.table::setattr(x, "class", c("ten", class(x))) p1 <- print(x) stopifnot(is.null(p1)) x[1:80, ] x[0, ] (data.table::set(x, j=seq.int(ncol(x)), value=NULL))
set.seed(1) (x <- data.table::data.table(matrix(rnorm(1800), ncol=15, nrow=120))) data.table::setattr(x, "class", c("ten", class(x))) p1 <- print(x) stopifnot(is.null(p1)) x[1:80, ] x[0, ] (data.table::set(x, j=seq.int(ncol(x)), value=NULL))
coxph
modelProfile likelihood for coefficients in a coxph
model
profLik(x, CI = 0.95, interval = 50, mult = c(0.1, 2), devNew = TRUE, ...)
profLik(x, CI = 0.95, interval = 50, mult = c(0.1, 2), devNew = TRUE, ...)
x |
A |
CI |
Confidence Interval. |
interval |
Number of points over which to evaluate coefficient. |
mult |
Multiplier. Coefficent will be multiplied by lower and upper value and evaluated across this range. |
devNew |
Open a new device for each plot. See
|
... |
Additional parameters passed to |
Plots of range of values for coefficient in model with log-likelihoods
for the model with the coefficient fixed at these values.
For each coefficient a range of possible values is chosen, given by
.
A series of models are fit (given by
interval
).
The coefficient is included in the model as a
fixed term and the partial log-likelihood for the model is calculated.
A curve is plotted which gives the partial log-likelihood for each of these candidate values.
An appropriate confidence interval (CI) is given
by subtracting 1/2 the value of the appropriate quantile
of a chi-squared distribution with degree of freedom.
Two circles are also plotted giving the 95
One plot for each coefficient in the model.
Example is from: T&G. Section 3.4.1, pg 57.
data("pbc", package="survival") c1 <- coxph(formula = Surv(time, status == 2) ~ age + edema + log(bili) + log(albumin) + log(protime), data = pbc) profLik(c1, col="red")
data("pbc", package="survival") c1 <- coxph(formula = Surv(time, status == 2) ~ age + edema + log(bili) + log(albumin) + log(protime), data = pbc) profLik(c1, col="red")
coxph
or survfit
modelr^2 measures for a a coxph
or survfit
model
rsq(x, ...) ## S3 method for class 'coxph' rsq(x, ..., sigD = 2) ## S3 method for class 'survfit' rsq(x, ..., sigD = 2)
rsq(x, ...) ## S3 method for class 'coxph' rsq(x, ..., sigD = 2) ## S3 method for class 'survfit' rsq(x, ..., sigD = 2)
x |
A |
sigD |
significant digits (for ease of display).
If |
... |
Additional arguments (not implemented). |
A list
with the following elements:
cod |
The coefficient of determination, which is
where |
mer |
The measure of explained randomness, which is:
where |
mev |
The measure of explained variation (similar to that for linear regression), which is:
|
Nagelkerke NJD, 1991. A Note on a General Definition of the Coefficient of Determination. Biometrika 78(3):691–92. JSTOR
O'Quigley J, Xu R, Stare J, 2005. Explained randomness in proportional hazards models. Stat Med 24(3):479–89. Wiley (paywall) Available at UCSD
Royston P, 2006. Explained variation for survival models. The Stata Journal 6(1):83–96. The Stata Journal
data("kidney", package="KMsurv") c1 <- coxph(Surv(time=time, event=delta) ~ type, data=kidney) cbind(rsq(c1), rsq(c1, sigD=NULL))
data("kidney", package="KMsurv") c1 <- coxph(Surv(time=time, event=delta) ~ type, data=kidney) cbind(rsq(c1), rsq(c1, sigD=NULL))
and
.survival (or hazard) function
based on and
.
sf(x, ...) ## Default S3 method: sf(x, ..., what = c("S", "H"), SCV = FALSE, times = NULL) ## S3 method for class 'ten' sf(x, ..., what = c("S", "H"), SCV = FALSE, times = NULL, reCalc = FALSE) ## S3 method for class 'stratTen' sf(x, ..., what = c("S", "H"), SCV = FALSE, times = NULL, reCalc = FALSE) ## S3 method for class 'numeric' sf(x, ..., n = NULL, what = c("all", "S", "Sv", "H", "Hv"), SCV = FALSE, times = NULL)
sf(x, ...) ## Default S3 method: sf(x, ..., what = c("S", "H"), SCV = FALSE, times = NULL) ## S3 method for class 'ten' sf(x, ..., what = c("S", "H"), SCV = FALSE, times = NULL, reCalc = FALSE) ## S3 method for class 'stratTen' sf(x, ..., what = c("S", "H"), SCV = FALSE, times = NULL, reCalc = FALSE) ## S3 method for class 'numeric' sf(x, ..., n = NULL, what = c("all", "S", "Sv", "H", "Hv"), SCV = FALSE, times = NULL)
x |
One of the following:
|
... |
Additional arguments (not implemented). |
n |
Number at risk. |
what |
See return, below. |
SCV |
Include the Squared Coefficient of
Variation, which is calcluated using
the mean
This measure of dispersion is also referred to as the 'standardized variance' or the 'noise'. |
times |
Times for which to calculate the function.
|
reCalc |
Recalcuate the values?
|
A data.table which is stored as an attribute of
the ten
object.
If what="s"
, the survival is returned, based on the
Kaplan-Meier or product-limit estimator.
This is at
and thereafter is given by:
If what="sv"
, the survival variance is returned.
Greenwoods estimtor of the variance of the
Kaplan-Meier (product-limit) estimator is:
If what="h"
, the hazard is returned,
based on the the Nelson-Aalen estimator.
This has a value of at
and thereafter is given by:
If what="hv"
, the hazard variance is returned.
The variance of the Nelson-Aalen estimator is given by:
If what="all"
(the default), all of the above
are returned in a data.table
, along with:
Survival, based on the Nelson-Aalen hazard estimator ,
which is:
Hazard, based on the Kaplan-Meier survival estimator ,
which is:
data("kidney", package="KMsurv") k1 <- ten(Surv(time=time, event=delta) ~ type, data=kidney) sf(k1) sf(k1, times=1:10, reCalc=TRUE) k2 <- ten(with(kidney, Surv(time=time, event=delta))) sf(k2) ## K&M. Table 4.1A, pg 93. ## 6MP patients data("drug6mp", package="KMsurv") d1 <- with(drug6mp, Surv(time=t2, event=relapse)) (d1 <- ten(d1)) sf(x=d1$e, n=d1$n, what="S") data("pbc", package="survival") t1 <- ten(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc) sf(t1) ## K&M. Table 4.2, pg 94. data("bmt", package="KMsurv") b1 <- bmt[bmt$group==1, ] # ALL patients t2 <- ten(Surv(time=b1$t2, event=b1$d3)) with(t2, sf(x=e, n=n, what="Hv")) ## K&M. Table 4.3, pg 97. sf(x=t2$e, n=t2$n, what="all")
data("kidney", package="KMsurv") k1 <- ten(Surv(time=time, event=delta) ~ type, data=kidney) sf(k1) sf(k1, times=1:10, reCalc=TRUE) k2 <- ten(with(kidney, Surv(time=time, event=delta))) sf(k2) ## K&M. Table 4.1A, pg 93. ## 6MP patients data("drug6mp", package="KMsurv") d1 <- with(drug6mp, Surv(time=t2, event=relapse)) (d1 <- ten(d1)) sf(x=d1$e, n=d1$n, what="S") data("pbc", package="survival") t1 <- ten(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc) sf(t1) ## K&M. Table 4.2, pg 94. data("bmt", package="KMsurv") b1 <- bmt[bmt$group==1, ] # ALL patients t2 <- ten(Surv(time=b1$t2, event=b1$d3)) with(t2, sf(x=e, n=n, what="Hv")) ## K&M. Table 4.3, pg 97. sf(x=t2$e, n=t2$n, what="all")
Package: | survMisc |
Type: | Package |
Version: | 0.5 |
Date: | 2015-07-15 |
License: | GPL (>= 2) |
LazyLoad: | yes |
A collection of functions for the analysis of survival data. These
extend the methods already available in package:survival
.
The intent is to generate a workspace for some of the common tasks
arising in survival analysis.
There are references in many of the functions to the textbooks:
K&M | Klein J, Moeschberger M (2003). Survival Analysis, 2nd edition. |
New York: Springer. Springer (paywall). | |
T&G | Therneau TM, Grambsch PM (2000). Modeling Survival Data: Extending the Cox Model. |
New York: Springer. Springer (paywall). |
This package should be regarded as 'in development' until release 1.0, meaning that there may be changes to certain function names and parameters, although I will try to keep this to a minimum. As such it is recommended that other packages do not depend on or import from this one until at least version 1.0.
Naming tends to follow the camelCase convention;
variables within functions are typically alphanumeric e.g. a1 <- 1
.
For bug reports, feature requests or suggestions for improvement, please try to submit to github. Otherwise email me at the address below.
Chris Dardis [email protected]
time, event(s) and number at risk.
ten(x, ...) ## S3 method for class 'numeric' ten(x, ...) ## S3 method for class 'Surv' ten(x, ..., call = NULL) ## S3 method for class 'coxph' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL) ## S3 method for class 'survfit' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL) ## S3 method for class 'formula' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL) ## S3 method for class 'data.frame' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL, call = NULL) ## S3 method for class 'data.table' ten(x, ..., abbNames = TRUE, mm = NULL, call = NULL) ## S3 method for class 'ten' ten(x, ..., abbNames = NULL, call = NULL)
ten(x, ...) ## S3 method for class 'numeric' ten(x, ...) ## S3 method for class 'Surv' ten(x, ..., call = NULL) ## S3 method for class 'coxph' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL) ## S3 method for class 'survfit' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL) ## S3 method for class 'formula' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL) ## S3 method for class 'data.frame' ten(x, ..., abbNames = TRUE, contrasts.arg = NULL, call = NULL) ## S3 method for class 'data.table' ten(x, ..., abbNames = TRUE, mm = NULL, call = NULL) ## S3 method for class 'ten' ten(x, ..., abbNames = NULL, call = NULL)
x |
For the default method, a |
abbNames |
Abbreviate names?
|
contrasts.arg |
Methods for handling factors.
|
call |
Used to pass the |
mm |
Used to pass the |
... |
Additional arguments (not implemented). |
A data.table
with the additional class
ten
.
By default, the shape returned is 'long' i.e. there is one row for each unique
timepoint per covariate group.
The basic form, for a numeric
or Surv
object, has columns:
t |
time. |
e |
number of events. |
n |
number at risk. |
A survfit
, coxph
or formula
object
will have additional columns:
cg |
covariate group. This is formed by combining the variables; these are separated by a comma ','. |
ncg |
number at risk, by covariate group |
Special terms.
The following are considered 'special'
terms in a survival model:
strata |
For a stratified model, |
cluster |
These terms are dropped. |
tt |
The variable is unchanged. That is, time-transform
terms are handled as if the the function
|
Attribures.
The returned object will also have the following attributes
:
shape |
The default is |
abbNames |
Abbreviate names? |
longNames |
A |
ncg |
Number of covariate groups |
call |
The call used to generate the object |
mm |
The |
Additional attributes will be added by the following functions:
sf
ci
The methods for data.frame
(for a model frame)
and data.table
are not typically intended for interactive use.
Currently only binary status and right-censoring
are supported.
In stratified models, only one level of stratification is supported
(i.e. strata cannot be 'nested' currently).
Partial matching is available for the
following arguments, based on the characters in bold:
abbNames
contrasts.arg
require("survival") ## binary vector ten(c(1, 0, 1, 0, 1)) ## Surv object df0 <- data.frame(t=c(1, 1, 2, 3, 5, 8, 13, 21), e=rep(c(0, 1), 4)) s1 <- with(df0, Surv(t, e, type="right")) ten(s1) ## some awkward values suppressWarnings( s1 <- Surv(time=c(Inf, -1, NaN, NA, 10, 12), event=c(c(NA, 1, 1, NaN, Inf, 0.75)))) ten(s1) ## coxph object ## K&M. Section 1.2. Table 1.1, page 2. data("hodg", package="KMsurv") hodg <- data.table::data.table(hodg) data.table::setnames(hodg, c(names(hodg)[!names(hodg) %in% c("score", "wtime")], "Z1", "Z2")) c1 <- coxph(Surv(time=time, event=delta) ~ Z1 + Z2, data=hodg[gtype==1 && dtype==1, ]) ten(c1) data("bmt", package="KMsurv") ten(c1 <- coxph(Surv(t2, d3) ~ z3*z10, data=bmt)) ## T&G. Section 3.2, pg 47. ## stratified model data("pbc", package="survival") c1 <- coxph(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc) ten(c1) ## K&M. Example 7.2, pg 210. data("kidney", package="KMsurv") with(kidney[kidney$type==2, ], ten(Surv(time=time, event=delta))) s1 <- survfit(Surv(time=time, event=delta) ~ type, data=kidney) ten(s1)[e > 0, ] ## A null model is passed to ten.Surv (t1 <- with(kidney, ten(Surv(time=time, event=delta) ~ 0))) ## but the original call is preserved attr(t1, "call") ## survival::survfit doesn't accept interaction terms... ## Not run: s1 <- survfit(Surv(t2, d3) ~ z3*z10, data=bmt) ## End(Not run) ## but ten.formula does: ten(Surv(time=t2, event=d3) ~ z3*z10, data=bmt) ## the same is true for the '.' (dot operator) in formulas (t1 <- ten(Surv(time=t2, event=d3) ~ ., data=bmt)) ## impractical long names stored as an attribute attr(t1, "longNames") ## not typically intended to be called directly mf1 <- model.frame(Surv(time, status==2) ~ age + strata(edema) + strata(spiders), pbc, drop.unused.levels = TRUE) ten(mf1)
require("survival") ## binary vector ten(c(1, 0, 1, 0, 1)) ## Surv object df0 <- data.frame(t=c(1, 1, 2, 3, 5, 8, 13, 21), e=rep(c(0, 1), 4)) s1 <- with(df0, Surv(t, e, type="right")) ten(s1) ## some awkward values suppressWarnings( s1 <- Surv(time=c(Inf, -1, NaN, NA, 10, 12), event=c(c(NA, 1, 1, NaN, Inf, 0.75)))) ten(s1) ## coxph object ## K&M. Section 1.2. Table 1.1, page 2. data("hodg", package="KMsurv") hodg <- data.table::data.table(hodg) data.table::setnames(hodg, c(names(hodg)[!names(hodg) %in% c("score", "wtime")], "Z1", "Z2")) c1 <- coxph(Surv(time=time, event=delta) ~ Z1 + Z2, data=hodg[gtype==1 && dtype==1, ]) ten(c1) data("bmt", package="KMsurv") ten(c1 <- coxph(Surv(t2, d3) ~ z3*z10, data=bmt)) ## T&G. Section 3.2, pg 47. ## stratified model data("pbc", package="survival") c1 <- coxph(Surv(time, status==2) ~ log(bili) + age + strata(edema), data=pbc) ten(c1) ## K&M. Example 7.2, pg 210. data("kidney", package="KMsurv") with(kidney[kidney$type==2, ], ten(Surv(time=time, event=delta))) s1 <- survfit(Surv(time=time, event=delta) ~ type, data=kidney) ten(s1)[e > 0, ] ## A null model is passed to ten.Surv (t1 <- with(kidney, ten(Surv(time=time, event=delta) ~ 0))) ## but the original call is preserved attr(t1, "call") ## survival::survfit doesn't accept interaction terms... ## Not run: s1 <- survfit(Surv(t2, d3) ~ z3*z10, data=bmt) ## End(Not run) ## but ten.formula does: ten(Surv(time=t2, event=d3) ~ z3*z10, data=bmt) ## the same is true for the '.' (dot operator) in formulas (t1 <- ten(Surv(time=t2, event=d3) ~ ., data=bmt)) ## impractical long names stored as an attribute attr(t1, "longNames") ## not typically intended to be called directly mf1 <- model.frame(Surv(time, status==2) ~ age + strata(edema) + strata(spiders), pbc, drop.unused.levels = TRUE) ten(mf1)