Subsets rows of the sentiment measures based on its columns.
# S3 method for class 'sento_measures'
subset(x, subset = NULL, select = NULL, delete = NULL, ...)
a sento_measures
object created using sento_measures
.
a logical (non-character
) expression indicating the rows to keep. If a
numeric
input is given, it is used for row index subsetting.
a character
vector of the lexicon, feature and time weighting scheme names, to indicate which
measures need to be selected, or as a list
of character
vectors, possibly with separately specified
combinations (consisting of one unique lexicon, one unique feature, and one unique time weighting scheme at maximum).
see the select
argument, but to delete measures.
not used.
A modified sento_measures
object, with only the remaining rows and sentiment measures,
including updated information and statistics, but the original sentiment scores data.table
untouched.
data("usnews", package = "sentometrics")
data("list_lexicons", package = "sentometrics")
data("list_valence_shifters", package = "sentometrics")
# construct a sento_measures object to start with
corpus <- sento_corpus(corpusdf = usnews)
corpusSample <- quanteda::corpus_sample(corpus, size = 500)
l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3)
sm <- sento_measures(corpusSample, l, ctr)
# three specified indices in required list format
three <- as.list(
stringi::stri_split(c("LM_en--economy--linear",
"HENRY_en--wsj--equal_weight",
"HENRY_en--wapo--equal_weight"),
regex = "--")
)
# different subsets
sub1 <- subset(sm, HENRY_en--economy--equal_weight >= 0.01)
sub2 <- subset(sm, date %in% get_dates(sm)[3:12])
#> Error in subset.sento_measures(sm, date %in% get_dates(sm)[3:12]): The 'subset' argument must evaluate to logical.
sub3 <- subset(sm, 3:12)
sub4 <- subset(sm, 1:100) # warning
#> Warning: At least one row index is greater than nobs(x). Input sento_measures object is returned.
# different selections
sel1 <- subset(sm, select = "equal_weight")
sel2 <- subset(sm, select = c("equal_weight", "linear"))
sel3 <- subset(sm, select = c("linear", "LM_en"))
sel4 <- subset(sm, select = list(c("linear", "wsj"), c("linear", "economy")))
sel5 <- subset(sm, select = three)
# different deletions
del1 <- subset(sm, delete = "equal_weight")
del2 <- subset(sm, delete = c("linear", "LM_en"))
del3 <- subset(sm, delete = list(c("linear", "wsj"), c("linear", "economy")))
del4 <- subset(sm, delete = c("equal_weight", "linear")) # warning
#> Warning: No appropriate combination found or all measures selected for deletion. Input sento_measures object is returned.
del5 <- subset(sm, delete = three)