"plspm"
is an R
package dedicated to Partial Least Squares Path Modeling (PLS-PM)
analysis for both metric and non-metric data. Versions
later than 4.0 include a whole new set of features to handle non-metric
variables.
As a Data Science and Statistics educator, I love to share the work I
do. Each month I spend dozens of hours curating learning materials and
computational tools like this R package. If you find any value and
usefulness in plspm
, please consider making a
one-time
donation—via paypal—in any amount (e.g. the amount you would spend
inviting me a coffee or any other drink). Your support really
matters.
You can install "plspm"
using the function
install_github()
from package "devtools"
# install "devtools"
.packages("devtools")
install
library(devtools)
# install "plspm"
"gastonstat/plspm") install_github(
Typical example with a Customer Satisfaction Model
# load plspm
library(plspm)
# load dataset satisfaction
data(satisfaction)
# define path matrix (inner model)
IMAG < -c(0,0,0,0,0,0)
EXPE <- c(1,0,0,0,0,0)
QUAL <- c(0,1,0,0,0,0)
VAL <- c(0,1,1,0,0,0)
SAT <- c(1,1,1,1,0,0)
LOY <- c(1,0,0,0,1,0)
<- rbind(IMAG, EXPE, QUAL, VAL, SAT, LOY)
sat_path
# define list of blocks (outer model)
<- list(1:5, 6:10, 11:15, 16:19, 20:23, 24:27)
sat_blocks
# vector of modes (reflective indicators)
<- rep("A", 6)
sat_modes
# apply plspm with bootstrap validation
<- plspm(satisfaction, sat_path, sat_blocks, modes = sat_modes,
satpls = FALSE, boot.val = TRUE)
scaled
# default print
satpls
# summary of results
summary(satpls)
# plot inner model results
= "inner")
plot(satpls, what
# plot outer model loadings
= "loadings")
plot(satpls, what
# plot outer model weights
= "weights") plot(satpls, what
Example with the classic Russett data (original data set)
# load dataset russett A
# (variable 'demo' as numeric)
data(russa)
# load dataset russett B
# (variable 'demo' as factor)
data(russb)
# russett all numeric
<- rbind(c(0, 0, 0), c(0, 0, 0), c(1, 1, 0))
rus_path <- c("AGRI", "IND", "POLINS")
rownames(rus_path) <- c("AGRI", "IND", "POLINS")
colnames(rus_path) <- list(1:3, 4:5, 6:9)
rus_blocks <- list(c("NUM", "NUM", "NUM"),
rus_scaling "NUM", "NUM"),
c("NUM", "NUM", "NUM", "NUM"))
c(<- c("A", "A", "A") rus_modes
PLS-PM using data set russa
and scaling all ‘NUM’
# PLS-PM using data set 'russa'
<- plspm(russa, rus_path, rus_blocks, scaling = rus_scaling,
rus_pls1 = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
modes
rus_pls1
# outer model
$outer_model
rus_pls1
# inner model
$inner_model
rus_pls1
# scores
$scores)
head(rus_pls1
# plot inner model
plot(rus_pls1)
PLS-PM using data set russa
, and different scaling
# new scaling
<- list(c("NUM", "NUM", "NUM"),
rus_scaling2 "ORD", "ORD"),
c("NUM", "NUM", "NUM", "NOM"))
c(
# PLS-PM using data set 'russa'
<- plspm(russa, rus_path, rus_blocks, scaling = rus_scaling2,
rus_pls2 = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
modes
# outer model
$outer_model rus_pls2
Now let’s use data set russb
(it contains a factor!)
# take a peek
head(russb)
# PLS-PM using data set 'russb'
<- plspm(russb, rus_path, rus_blocks, scaling = rus_scaling2,
rus_pls3 = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
modes
# outer model
$outer_model rus_pls3
Now let’s change modes
# modes new A
<- c("newA", "newA", "newA")
rus_modes2
# PLS-PM using data set 'russa'
<- plspm(russa, rus_path, rus_blocks, scaling = rus_scaling2,
rus_pls4 = rus_modes2, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
modes
# outer model
$outer_model rus_pls4
Let’s make things more interesting, flexible and versatile. How? What
if you could have more freedom specifying the arguments? Now you can!
Note that you can specify blocks
using variables’ names,
the scaling
types are NOT case senstive, neither are
modes
nor scheme
. Isn’t that cool?
# blocks
<- list(
rus_blocchi "gini", "farm", "rent"),
c("gnpr", "labo"),
c("inst", "ecks", "death", "demo"))
c(
# scaling
<- list(c("numeric", "numeric", "numeric"),
rus_scaling3 "ordinal", "ORDINAL"),
c("NuM", "numer", "NUM", "nominal"))
c(
# modes new A
<- c("newa", "NEWA", "NewA")
rus_modes3
# PLS-PM using data set 'russb'
<- plspm(russb, rus_path, rus_blocchi, scaling = rus_scaling3,
rus_pls5 = rus_modes3, scheme = "CENTROID", plscomp = c(1,1,1), tol = 0.0000001)
modes
# outer model
$outer_model rus_pls5
Another nice feature is that you can perform a PLS-PM analysis on data containing missing values.
We’ll use the dataset russa
and add some missing values.
Then we’ll handle all variables with a numeric scaling
.
# let's add missing values to russa
<- russa
russNA [1,1] <- NA
russNA[4,4] <- NA
russNA[6,6] <- NA
russNA
# PLS-PM using data set 'russa'
<- plspm(russNA, rus_path, rus_blocks, scaling = rus_scaling,
rus_pls6 = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
modes
rus_pls6
# outer model
$outer_model
rus_pls6
# inner model
$inner_model
rus_pls6
# scores
$scores)
head(rus_pls6
# plot inner model
plot(rus_pls6)
Gaston Sanchez
(gaston.stat at gmail.com
)
Laura
Trinchera (ltr at rouenbs.fr
)
Giorgio
Russolillo (giorgio.russolillo at cnam.fr
)