Last updated: 2018-03-08
Code version: 7bd0159
library(flashr); library(R.matlab)
R.matlab v3.6.1 (2016-10-19) successfully loaded. See ?R.matlab for help.
Attaching package: 'R.matlab'
The following objects are masked from 'package:base':
getOption, isOpen
library(denoiseR)
Presidential Address data:
The data contains word counts from the inaugural addresses of 13 US presidents (1940–2009). There are 13 rows (presidents) and 836 columns (words), and each entry indicates the number of times a particular word was used in a particular address. Since both row and column means vary greatly here we pre-processed the data by centering and scaling both rows and columns, using the biScale function from softImpute.
data("Presidents")
data = readMat('../data/PresiAdd.mat')
data = data$Yscaled
words = row.names(Presidents)
names = colnames(Presidents)
row.names(data) = names
colnames(data) = words
Flash:
flash.data = flash_set_data(data)
fmodel = flash(flash.data, greedy = TRUE, backfit = TRUE)
fitting factor/loading 1
fitting factor/loading 2
fitting factor/loading 3
fitting factor/loading 4
fitting factor/loading 5
fitting factor/loading 6
saveRDS(fmodel, '../output/PresiAddVarCol.rds')
Factors = flash_get_ldf(fmodel)$f
row.names(Factors) = words
pve.order = order(flash_get_pve(fmodel), decreasing = TRUE)
par(mar=c(1,1,1,1))
par(mfrow=c(3,2))
for(i in pve.order){
barplot(Factors[,i], main=paste0('Factor ',i, ' pve= ', round(flash_get_pve(fmodel)[i],3)), las=2, names='')
}
par(mfrow=c(1,1))
Flash again on the loading matrix
flash.loading = flash_set_data(fmodel$EL[,1:5])
flmodel = flash(flash.loading, greedy = TRUE, backfit = TRUE)
fitting factor/loading 1
fitting factor/loading 2
Factors_loading = flash_get_ldf(flmodel)$f
barplot(as.numeric(Factors_loading), main=paste0('Factor 1 pve= ', round(flash_get_pve(flmodel),3)), las=2, cex.names = 0.4, names='')
The distributions for the first four columns of the loading matrix are consistent with a normal distribution. So they are captured by the error term of the second flash run.
We check the strongest words in each factor.
The first factor separates the words about Iraq from those very general political words.
# Factor 1
words[order(Factors[,1], decreasing = TRUE)[1:50]]
[1] "Iraq" "critical" "away " "tomorrow" "depend "
[6] "bad " "play " "whatever" "guarantee " "simply "
[11] "longer " "hear " "partner" "often" "road "
[16] "era " "responsible" "plant" "recession " "easy "
[21] "least" "avoid" "deep " "happen " "view "
[26] "agree" "vice " "building" "lay " "solve"
[31] "launch " "reward " "fundamental" "$1" "contribute "
[36] "rule " "week " "fulfill" "election" "0"
[41] "competitive" "tough" "big " "street " "burden "
[46] "moral" "soon " "speed" "powerful" "simple "
words[order(Factors[,1])[1:50]]
[1] "have " "be" "to" "need " "make "
[6] "as" "can " "state" "service" "most "
[11] "government " "force" "continue" "world" "nation "
[16] "year " "other" "will " "increase" "people "
[21] "congress" "peace" "provide" "national" "unite"
[26] "would" "use " "system " "private" "program"
[31] "meet " "must " "now " "also " "progress"
[36] "security" "great" "policy " "high " "legislation"
[41] "action " "good " "not " "only " "important "
[46] "economic" "public " "first" "law " "past "
The second factor separates the words about people’s life, like ‘child’, ‘job’, ‘school’, ‘parent’, ‘Medicare’, from political words.
# Factor 2
words[order(Factors[,2], decreasing = TRUE)[1:50]]
[1] "thus " "complete" "affair " "executive "
[5] "adequate" "particularly " "importance " "agricultural "
[9] "facility" "understanding" "western" "demonstrate"
[13] "situation " "activity" "assure " "capacity"
[17] "substantial" "agriculture" "available " "improvement"
[21] "objective " "republic" "natural" "period "
[25] "legislative" "rapidly" "provision " "cooperation"
[29] "maintain" "design " "requirement" "general"
[33] "basis" "assist " "structure " "resource"
[37] "position" "political " "relation" "i"
[41] "clearly" "vast " "appropriate" "farm "
[45] "contribution " "exist" "being" "addition"
[49] "industrial " "organization "
words[order(Factors[,2])[1:50]]
[1] "safe " "thank" "child" "join " "pass " "ask "
[7] "here " "america" "save " "get " "family " "sure "
[13] "young" "fellow " "American" "job " "who " "say "
[19] "just " "start" "lead " "know " "n" "tonight"
[25] "stop " "when " "leave" "parent " "back " "together"
[31] "America" "money" "so" "honor" "next " "let "
[37] "do" "begin" "school " "reform " "go" "create "
[43] "cut " "worker " "day " "keep " "t" "down "
[49] "help " "bill "
The third factor separate the words about government and development of technology. These words are used in Carter’s address.
# Factor 3
words[order(Factors[,3], decreasing = TRUE)[1:50]]
[1] "win " "may " "method " "destroy" "man "
[6] "point" "whole" "war " "fact " "find "
[11] "power" "practice" "follow " "few " "victory"
[16] "army " "great" "far " "shall" "modern "
[21] "history" "labor" "lie " "possible" "case "
[26] "rest " "not " "produce" "certain" "immediate "
[31] "learn" "accept " "fight" "live " "carry"
[36] "fear " "adequate" "beginning " "would" "ahead"
[41] "mean " "total" "condition " "operation " "fail "
[46] "back " "believe" "session" "task " "part "
words[order(Factors[,3])[1:50]]
[1] "administration " "remain " "stability "
[4] "develop" "area " "major"
[7] "assistance " "promote" "nuclear"
[10] "significant" "sector " "relation"
[13] "strong " "establish " "comprehensive"
[16] "demonstrate" "conservation " "sign "
[19] "negotiation" "system " "technology "
[22] "agreement " "development" "political "
[25] "strategic " "ensure " "serious"
[28] "support" "policy " "agency "
[31] "important " "energy " "soviet "
[34] "provide" "region " "continue"
[37] "oil " "foundation " "importance "
[40] "improve" "encourage " "commitment "
[43] "western" "funding" "access "
[46] "global " "addition" "rural"
[49] "stable " "assist "
The fourth factor separate the words about Vietnam war.
# Factor 4
words[order(Factors[,4], decreasing = TRUE)[1:50]]
[1] "session" "history" "chance "
[4] "Vietnam" "poverty" "senate "
[7] "south" "goal " "entire "
[10] "achievement" "open " "land "
[13] "city " "consumer" "Asia "
[16] "skill" "quality" "answer "
[19] "water" "deal " "question"
[22] "partnership" "in" "congress"
[25] "then " "enter" "ahead"
[28] "treaty " "try " "structure "
[31] "reach" "approve" "believe"
[34] "resolve" "i" "measure"
[37] "last " "move " "test "
[40] "crime" "possible" "assist "
[43] "hospital" "transportation " "pledge "
[46] "live " "revenue" "early"
[49] "truly" "$1"
words[order(Factors[,4])[1:50]]
[1] "general" "operation " "practice" "sound"
[5] "immediate " "such " "production " "permanent "
[9] "situation " "number " "industrial " "fundamental"
[13] "day " "whole" "future " "effect "
[17] "ideal" "investment " "powerful" "force"
[21] "encourage " "plant" "agricultural " "include"
[25] "away " "activity" "group" "exist"
[29] "army " "plan " "certain" "long "
[33] "thing" "condition " "rest " "democratic "
[37] "produce" "term " "process" "objective "
[41] "fall " "democracy " "put " "gain "
[45] "capital" "service" "pay " "armed"
[49] "economic" "depend "
The fifth factor separates words about Vietnam war from words like ‘peacetime’, ‘reform’.
# Factor 5
words[order(Factors[,5], decreasing = TRUE)[1:50]]
[1] "look " "place" "truly" "difference "
[5] "set " "chance " "message" "address"
[9] "washington " "structure " "example" "generation "
[13] "particular " "point" "meet " "environment"
[17] "approach" "peacetime " "beginning " "record "
[21] "historic" "critical" "goal " "million"
[25] "decision" "comprehensive" "america" "open "
[29] "preserve" "government " "concern" "America"
[33] "building" "era " "great" "priority"
[37] "balanced" "what " "back " "proposal"
[41] "ahead" "become " "negotiation" "same "
[45] "natural" "fundamental" "relationship " "sense"
[49] "reform " "particularly "
words[order(Factors[,5])[1:50]]
[1] "south" "north" "poverty" "test " "month"
[6] "$1" "carry" "hospital" "hope " "especially "
[11] "early" "danger " "aggression " "public " "seek "
[16] "rest " "payment" "food " "approve" "europe "
[21] "man " "try " "permit " "Vietnam" "prevent"
[26] "prepare" "find " "unity" "already" "certain"
[31] "entire " "course " "skill" "close" "home "
[36] "in" "last " "effort " "think" "pledge "
[41] "short" "conflict" "recommend " "least" "strengthen "
[46] "extend " "finally" "Iraq" "achievement" "bear "
Checking the loading matrix
loading = fmodel$EL[,1:5]
row.names(loading) = names
loading
[,1] [,2] [,3] [,4] [,5]
Roos -11.749949 7.3625658 17.85384337 -12.95451443 0.018641829
Trum -19.545564 10.6172584 5.84426524 -6.03364902 -0.025528893
Eise -17.804700 13.7714467 -4.18405718 -4.34058236 0.022367090
Kenn 15.089515 13.3985343 -0.22487247 -0.03688365 -21.745118226
John -2.471928 -0.7544205 6.48496963 26.92988948 -38.428225506
Nixo 9.068585 6.3107013 4.30658520 18.88490017 51.978743499
Ford 18.748130 11.3162886 -5.12464770 -4.16083773 0.016974278
Cart -13.115567 7.1901043 -23.24710473 0.02551236 0.001341336
Reag -5.423707 -17.3142106 -0.07000135 -0.11704529 7.122486088
Bush 18.904265 -6.6852742 0.07994342 -11.92173614 -0.004739630
Clin -8.207371 -22.7657349 -0.01911699 0.11123080 0.075746403
Bushf -4.507005 -18.7038114 -0.05075150 -0.12599003 -0.027001068
Obam 20.995769 -3.6126797 -0.01329787 -5.48368076 -0.033863230
Kennedy, Johnson and Nixon used the words in Factor 5 a lot. This is reasonable since the Vietnam war was happened during their presidency term. When Nixon took office, people want peace because of the long times of the war. So he used words about ‘peace’ in his address a lot.
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] denoiseR_1.0 R.matlab_3.6.1 flashr_0.5-6
loaded via a namespace (and not attached):
[1] Rcpp_0.12.15 compiler_3.4.3 git2r_0.20.0
[4] plyr_1.8.4 R.utils_2.6.0 R.methodsS3_1.7.1
[7] iterators_1.0.9 tools_3.4.3 digest_0.6.13
[10] evaluate_0.10.1 tibble_1.3.4 gtable_0.2.0
[13] lattice_0.20-35 rlang_0.1.6 Matrix_1.2-12
[16] foreach_1.4.4 yaml_2.1.17 parallel_3.4.3
[19] ebnm_0.1-10 cluster_2.0.6 stringr_1.3.0
[22] knitr_1.20 flashClust_1.01-2 scatterplot3d_0.3-40
[25] rprojroot_1.2 grid_3.4.3 rmarkdown_1.8
[28] irlba_2.3.2 FactoMineR_1.39 ggplot2_2.2.1
[31] ashr_2.2-7 magrittr_1.5 leaps_3.0
[34] backports_1.1.2 scales_0.5.0 codetools_0.2-15
[37] htmltools_0.3.6 MASS_7.3-47 assertthat_0.2.0
[40] softImpute_1.4 colorspace_1.3-2 stringi_1.1.6
[43] lazyeval_0.2.1 pscl_1.5.2 doParallel_1.0.11
[46] munsell_0.4.3 truncnorm_1.0-8 SQUAREM_2017.10-1
[49] R.oo_1.21.0
This R Markdown site was created with workflowr