Methods for accessing modelling results.
Usage
binaryComparisons(x, cls = "class")
# S4 method for AnalysisData
binaryComparisons(x, cls = "class")
mtry(x, cls = "class")
# S4 method for AnalysisData
mtry(x, cls = "class")
type(x)
# S4 method for RandomForest
type(x)
# S4 method for Univariate
type(x)
response(x)
# S4 method for RandomForest
response(x)
# S4 method for Univariate
response(x)
metrics(x)
# S4 method for RandomForest
metrics(x)
# S4 method for list
metrics(x)
# S4 method for Analysis
metrics(x)
predictions(x)
# S4 method for RandomForest
predictions(x)
# S4 method for list
predictions(x)
# S4 method for Analysis
predictions(x)
importanceMetrics(x)
# S4 method for RandomForest
importanceMetrics(x)
importance(x)
# S4 method for RandomForest
importance(x)
# S4 method for Univariate
importance(x)
# S4 method for list
importance(x)
# S4 method for Analysis
importance(x)
proximity(x, idx = NULL)
# S4 method for RandomForest
proximity(x, idx = NULL)
# S4 method for list
proximity(x, idx = NULL)
# S4 method for Analysis
proximity(x, idx = NULL)
explanatoryFeatures(x, ...)
# S4 method for Univariate
explanatoryFeatures(
x,
threshold = 0.05,
value = c("adjusted.p.value", "p.value")
)
# S4 method for RandomForest
explanatoryFeatures(
x,
metric = "false_positive_rate",
value = c("value", "p-value", "adjusted_p-value"),
threshold = 0.05
)
# S4 method for list
explanatoryFeatures(x, ...)
# S4 method for Analysis
explanatoryFeatures(x, ...)
Arguments
- x
S4 object of class
AnalysisData
,RandomForest
,Univariate
,Analysis
or a list.- cls
sample information column to use
- idx
sample information column to use for sample names. If
NULL
, the sample row number will be used. Sample names should be unique for each row of data.- ...
arguments to parse to method for specific class
- threshold
threshold below which explanatory features are extracted
- value
the importance value to threshold. See the usage section for possible values for each class.
- metric
importance metric for which to retrieve explanatory features
Methods
binaryComparisons
: Return a vector of all possible binary comparisons for a given sample information column.mtry
: Return the defaultmtry
random forest parameter value for a given sample information column.type
: Return the type of random forest analysis.response
: Return the response variable name used for a random forest analysis.metrics
: Retrieve the model performance metrics for a random forest analysispredictions
: Retrieve the out of bag model response predictions for a random forest analysis.importanceMetrics
: Retrieve the available feature importance metrics for a random forest analysis.importance
: Retrieve feature importance results.proximity
: Retrieve the random forest sample proximities.explanatoryFeatures
: Retrieve explanatory features.
Examples
library(metaboData)
d <- analysisData(abr1$neg[,200:300],abr1$fact)
## Return possible binary comparisons for the `day` response column
binaryComparisons(d,cls = 'day')
#> [1] "1~2" "1~3" "1~4" "1~5" "1~H" "2~3" "2~4" "2~5" "2~H" "3~4" "3~5" "3~H"
#> [13] "4~5" "4~H" "5~H"
## Return the default random forest `mtry` parameter for the `day` response column
mtry(d,cls = 'day')
#> [1] 10
## Perform random forest analysis
rf_analysis <- randomForest(d,cls = 'day')
## Return the type of random forest
type(rf_analysis)
#> [1] "classification"
## Return the response variable name used
response(rf_analysis)
#> [1] "day"
## Retrieve the model performance metrics
metrics(rf_analysis)
#> # A tibble: 4 × 5
#> response comparison .metric .estimator .estimate
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 day 1~2~3~4~5~H accuracy multiclass 0.567
#> 2 day 1~2~3~4~5~H kap multiclass 0.48
#> 3 day 1~2~3~4~5~H margin NA 0.0424
#> 4 day 1~2~3~4~5~H roc_auc hand_till 0.886
## Retrieve the out of bag model response predictions
predictions(rf_analysis)
#> # A tibble: 120 × 13
#> response comparison rep sample obs pred margin `1` `2` `3` `4`
#> <chr> <chr> <chr> <int> <fct> <fct> <marg> <dbl> <dbl> <dbl> <dbl>
#> 1 day 1~2~3~4~5… 1 1 2 2 0.05… 0.191 0.273 0.113 0.124
#> 2 day 1~2~3~4~5… 1 2 3 2 -0.10… 0.0782 0.397 0.291 0.0838
#> 3 day 1~2~3~4~5… 1 3 4 4 0.02… 0.197 0.149 0.144 0.223
#> 4 day 1~2~3~4~5… 1 4 1 H -0.02… 0.210 0.2 0.210 0.0769
#> 5 day 1~2~3~4~5… 1 5 2 2 0.12… 0.0843 0.343 0.197 0.152
#> 6 day 1~2~3~4~5… 1 6 1 H -0.09… 0.283 0.174 0.114 0.0272
#> 7 day 1~2~3~4~5… 1 7 2 H -0.17… 0.0788 0.241 0.217 0.0345
#> 8 day 1~2~3~4~5… 1 8 4 4 0.02… 0.075 0.16 0.27 0.295
#> 9 day 1~2~3~4~5… 1 9 H 3 -0.10… 0.168 0.196 0.261 0.136
#> 10 day 1~2~3~4~5… 1 10 H 1 -0.01… 0.306 0.167 0.144 0.0722
#> # ℹ 110 more rows
#> # ℹ 2 more variables: `5` <dbl>, H <dbl>
## Show the available feature importance metrics
importanceMetrics(rf_analysis)
#> [1] "1" "2" "3"
#> [4] "4" "5" "H"
#> [7] "MeanDecreaseAccuracy" "MeanDecreaseGini" "false_positive_rate"
#> [10] "selection_frequency"
## Retrieve the feature importance results
importance(rf_analysis)
#> # A tibble: 1,010 × 5
#> response comparison feature metric value
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 day 1~2~3~4~5~H N200 1 0
#> 2 day 1~2~3~4~5~H N200 2 0
#> 3 day 1~2~3~4~5~H N200 3 0
#> 4 day 1~2~3~4~5~H N200 4 0
#> 5 day 1~2~3~4~5~H N200 5 0
#> 6 day 1~2~3~4~5~H N200 H 0
#> 7 day 1~2~3~4~5~H N200 MeanDecreaseAccuracy 0
#> 8 day 1~2~3~4~5~H N200 MeanDecreaseGini 6.00e- 2
#> 9 day 1~2~3~4~5~H N200 false_positive_rate 2.35e-40
#> 10 day 1~2~3~4~5~H N200 selection_frequency 1.6 e+ 1
#> # ℹ 1,000 more rows
## Retrieve the sample proximities
proximity(rf_analysis)
#> # A tibble: 14,400 × 5
#> response comparison sample1 sample2 proximity
#> <chr> <chr> <int> <dbl> <dbl>
#> 1 day 1~2~3~4~5~H 1 1 1
#> 2 day 1~2~3~4~5~H 1 2 0.0704
#> 3 day 1~2~3~4~5~H 1 3 0.0580
#> 4 day 1~2~3~4~5~H 1 4 0.0930
#> 5 day 1~2~3~4~5~H 1 5 0.0556
#> 6 day 1~2~3~4~5~H 1 6 0.0435
#> 7 day 1~2~3~4~5~H 1 7 0.0556
#> 8 day 1~2~3~4~5~H 1 8 0.0441
#> 9 day 1~2~3~4~5~H 1 9 0.106
#> 10 day 1~2~3~4~5~H 1 10 0
#> # ℹ 14,390 more rows
## Retrieve the explanatory features
explanatoryFeatures(rf_analysis,metric = 'false_positive_rate',threshold = 0.05)
#> # A tibble: 35 × 5
#> response comparison feature metric value
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 day 1~2~3~4~5~H N229 false_positive_rate 5.75e-129
#> 2 day 1~2~3~4~5~H N259 false_positive_rate 4.88e- 72
#> 3 day 1~2~3~4~5~H N277 false_positive_rate 3.98e- 67
#> 4 day 1~2~3~4~5~H N255 false_positive_rate 3.27e- 53
#> 5 day 1~2~3~4~5~H N213 false_positive_rate 4.92e- 45
#> 6 day 1~2~3~4~5~H N200 false_positive_rate 2.35e- 40
#> 7 day 1~2~3~4~5~H N221 false_positive_rate 1.80e- 38
#> 8 day 1~2~3~4~5~H N299 false_positive_rate 4.91e- 36
#> 9 day 1~2~3~4~5~H N245 false_positive_rate 9.75e- 27
#> 10 day 1~2~3~4~5~H N279 false_positive_rate 2.38e- 20
#> # ℹ 25 more rows