Subset sentiment measures — subset.sento

Subsets rows of the sentiment measures based on its columns.

# S3 method for class 'sento_measures'
subset(x, subset = NULL, select = NULL, delete = NULL, ...)

Arguments

x: a sento_measures object created using sento_measures.
subset: a logical (non-character) expression indicating the rows to keep. If a numeric input is given, it is used for row index subsetting.
select: a character vector of the lexicon, feature and time weighting scheme names, to indicate which measures need to be selected, or as a list of character vectors, possibly with separately specified combinations (consisting of one unique lexicon, one unique feature, and one unique time weighting scheme at maximum).
delete: see the select argument, but to delete measures.
...: not used.

Value

A modified sento_measures object, with only the remaining rows and sentiment measures, including updated information and statistics, but the original sentiment scores data.table untouched.

Author

Samuel Borms

Examples

data("usnews", package = "sentometrics")
data("list_lexicons", package = "sentometrics")
data("list_valence_shifters", package = "sentometrics")

# construct a sento_measures object to start with
corpus <- sento_corpus(corpusdf = usnews)
corpusSample <- quanteda::corpus_sample(corpus, size = 500)
l <- sento_lexicons(list_lexicons[c("LM_en", "HENRY_en")])
ctr <- ctr_agg(howTime = c("equal_weight", "linear"), by = "year", lag = 3)
sm <- sento_measures(corpusSample, l, ctr)

# three specified indices in required list format
three <- as.list(
  stringi::stri_split(c("LM_en--economy--linear",
                        "HENRY_en--wsj--equal_weight",
                        "HENRY_en--wapo--equal_weight"),
                      regex = "--")
)

# different subsets
sub1 <- subset(sm, HENRY_en--economy--equal_weight >= 0.01)
sub2 <- subset(sm, date %in% get_dates(sm)[3:12])
#> Error in subset.sento_measures(sm, date %in% get_dates(sm)[3:12]): The 'subset' argument must evaluate to logical.
sub3 <- subset(sm, 3:12)
sub4 <- subset(sm, 1:100) # warning
#> Warning: At least one row index is greater than nobs(x). Input sento_measures object is returned.

# different selections
sel1 <- subset(sm, select = "equal_weight")
sel2 <- subset(sm, select = c("equal_weight", "linear"))
sel3 <- subset(sm, select = c("linear", "LM_en"))
sel4 <- subset(sm, select = list(c("linear", "wsj"), c("linear", "economy")))
sel5 <- subset(sm, select = three)

# different deletions
del1 <- subset(sm, delete = "equal_weight")
del2 <- subset(sm, delete = c("linear", "LM_en"))
del3 <- subset(sm, delete = list(c("linear", "wsj"), c("linear", "economy")))
del4 <- subset(sm, delete = c("equal_weight", "linear")) # warning
#> Warning: No appropriate combination found or all measures selected for deletion. Input sento_measures object is returned.
del5 <- subset(sm, delete = three)