\documentclass{article}
\newcommand{\titel}{\pkg{lavaan}: a brief user's guide}
%\usepackage{Sweave}
\usepackage{fancyvrb}
\usepackage{graphicx}
\usepackage{color}

\usepackage{amsmath}
\newcommand{\tr}{\textsf{tr}}

\newlength{\ldescriptionwidth}
\newcommand{\ldescriptionlabel}[1]{%
  \settowidth{\ldescriptionwidth}{{#1}}%
  \ifdim\ldescriptionwidth>\labelwidth
    {\parbox[b]{\labelwidth}%
      {\makebox[0pt][l]{#1}\\[1pt]\makebox{}}}%
  \else
    \makebox[\labelwidth][l]{{#1}}%
  \fi
  \hfil\relax}
\newenvironment{ldescription}[1][1in]%
  {\begin{list}{}%
    {\setlength{\labelwidth}{#1}%
      \setlength{\leftmargin}{\labelwidth}%
      \addtolength{\leftmargin}{\labelsep}%
      \renewcommand{\makelabel}{\ldescriptionlabel}}}%
  {\end{list}}

\usepackage{bm}
\newcommand{\vet}[1]{\ensuremath{\bm{\mathrm{#1}}}}


\usepackage{times}
\usepackage{sectsty}
\usepackage{tikz}
\tikzstyle{ov}=[shape=rectangle,
                draw=black!80,
                minimum height=0.6cm,
                minimum width=0.6cm,
                thick]

\tikzstyle{av}=[shape=rectangle,
                draw=black!80,
                fill=black!10,
                minimum height=0.6cm,
                minimum width=0.6cm,
                thick]

\tikzstyle{lv}=[shape=circle,draw=black!80,thick,minimum width=0.7cm]


\fvset{fontfamily=courier}
\DefineVerbatimEnvironment{Sinput}{Verbatim}
{fontseries=b, fontsize=\footnotesize, xleftmargin=0.2cm}
\DefineVerbatimEnvironment{Routput}{Verbatim}
{fontsize=\footnotesize, xleftmargin=1cm}
\DefineVerbatimEnvironment{Routput}{Verbatim}
{fontseries=b,fontsize=\scriptsize, xleftmargin=0.2cm}
\DefineVerbatimEnvironment{Binput}{Verbatim}
{fontseries=b, fontsize=\scriptsize,frame=single, label=\fbox{lavaan model syntax}, framesep=2mm}
%\DefineShortVerb{\!}
\DefineVerbatimEnvironment{Rinput}{Verbatim}
{fontseries=b, fontsize=\scriptsize, frame=single, label=\fbox{R code}, framesep=5mm}


\newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}}
\let\proglang=\textsf
\let\code=\texttt

\usepackage{bm}
\renewcommand{\v}[1]{\ensuremath{\bm{\mathrm{#1}}}}

% colors
\definecolor{darkred}{rgb}{.4, .0, .0}
\definecolor{darkblue}{rgb}{.0, .0, .4}
\definecolor{darkgreen}{rgb}{.0, .3, .0}
\definecolor{darkgray}{rgb}{.6, .6, .6}
\definecolor{brown}{rgb}{.3, .3, .0}
\subsectionfont{\color{darkgreen}}
\sectionfont{\color{darkblue}}
\subsubsectionfont{\sffamily\color{brown}}

\usepackage{geometry}
\geometry{paperwidth=12.8cm, paperheight=9.6cm,
          includeheadfoot,
          scale={0.90,0.95},
          headsep=0.3cm,footskip=0.4cm
         }

\usepackage{fancyhdr}
\usepackage{lastpage}
\pagestyle{fancy}
\renewcommand{\headrulewidth}{0.1mm}
\renewcommand{\footrulewidth}{0.1mm}
\lhead{\tiny \sffamily Department of Data Analysis}
\chead{}
\rhead{\tiny \sffamily Ghent University}
\lfoot{\tiny \sffamily Yves Rosseel}
\cfoot{\tiny \sffamily \textcolor{darkred}{\titel}}
\rfoot{\tiny \sffamily \thepage\ / {\normalcolor \pageref{LastPage}}}

\usepackage{booktabs}
\renewcommand{\arraystretch}{1.2}



\author{Yves Rosseel\\
Department of Data Analysis\\
Ghent University -- Belgium}
\date{\vspace*{1cm}Utrecht -- April 24, 2012}
\title{\textcolor{darkred}{\titel}}

\newcommand{\sss}[1]{\subsubsection*{#1}}
\newcommand{\lijn}{\mbox{}\\*[0.04\textheight]}

\usepackage[hypertexnames=false,
            hyperfootnotes=false,
            colorlinks,
            linkcolor={blue},
            citecolor={blue},
            urlcolor={red},
            pdfauthor={Vakgroep Data-analyse},
            pdfpagemode=UseNone,bookmarks=false
            ]{hyperref}



\begin{document}
\maketitle\thispagestyle{fancy}\newpage
\tableofcontents\newpage

\newpage
\section{lavaan: a brief user's guide}
\subsection{Model syntax: specifying models}
\sss{The four main formula types, and other operators}
{\footnotesize
\begin{center}
\begin{tabular}{@{}lcl@{}} \toprule
formula type & operator & mnemonic \\ \midrule
latent variable            & \Verb!=~!  & is manifested by\\
regression                 & \Verb!~!   & is regressed on\\
(residual) (co)variance    & \Verb!~~!  & is correlated with\\
intercept                  & \Verb!~ 1! & intercept\\ \midrule
defined parameter          & \Verb!:=!  & is defined as\\
equality constraint        & \Verb!==!  & is equal to\\
inequality constraint      & \Verb!<!   & is smaller than\\
inequality constraint      & \Verb!>!   & is larger than\\ \bottomrule
\end{tabular}
\end{center}
}
\newpage
\sss{A typical model syntax}
\begin{Sinput}
> myModel <- ' # regressions
                 y1 + y2 ~ f1 + f2 + x1 + x2
                      f1 ~ f2 + f3
                      f2 ~ f3 + x1 + x2

               # latent variable definitions
                 f1 =~ y1 + y2 + y3
                 f2 =~ y4 + y5 + y6
                 f3 =~ y7 + y8 +
                       y9 + y10

               # variances and covariances
                 y1 ~~ y1
                 y1 ~~ y2
                 f1 ~~ f2

               # intercepts
                 y1 ~ 1
                 f1 ~ 1
             '
\end{Sinput}

\newpage
\sss{Fixing parameters, and overriding auto-fixed parameters}
\begin{Sinput}
HS.model.bis <- ' visual  =~ NA*x1 + x2 + x3
                  textual =~ NA*x4 + x5 + x6
                  speed   =~ NA*x7 + x8 + x9
                  visual  ~~ 1*visual
                  textual ~~ 1*textual
                  speed   ~~ 1*speed
                '
\end{Sinput}
\begin{itemize}
\item pre-multiplying a model parameter with a numeric value will keep
the parameter fixed to that value
\item pre-multiplying a model parameter with `NA' will force the parameter
to be free
\item for this piece of code: using the \verb!std.lv=TRUE! argument
has the same effect
\end{itemize}

\newpage
\sss{Labels and simple equality constraints}
\begin{Sinput}
model.equal <- '
  # measurement model
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + d1*y2 + d2*y3 + d3*y4
    dem65 =~ y5 + d1*y6 + d2*y7 + d3*y8

  # regressions
    dem60 ~ ind60
    dem65 ~ ind60 + dem60

  # residual covariances
    y1 ~~ y5
    y2 ~~ y4 + y6
    y3 ~~ y7
    y4 ~~ y8
    y6 ~~ y8
'
\end{Sinput}
\begin{itemize}
\item pre-multiplying model parameters with a string gives the model
parameter a custom `label'
\item model parameters with the same label are considered to be equal
\end{itemize}

\newpage
\sss{Defined parameters and mediation analysis}
\begin{Sinput}
X <- rnorm(100); M <- 0.5*X + rnorm(100); Y <- 0.7*M + rnorm(100)
Data <- data.frame(X = X, Y = Y, M = M)

model <- ' # direct effect
             Y ~ c*X
           # mediator
             M ~ a*X
             Y ~ b*M
           # indirect effect (a*b)
             ab := a*b
           # total effect
             total := c + (a*b)
         '

fit <- sem(model, data=Data)
\end{Sinput}
\begin{itemize}
\item the ``:='' operator defines a new parameter, as a function of existing
(free) parameters, but referring to their labels
\item by default, the delta rule is used to compute standard errors for
these defined parameters; bootstrapping may be a better option
\end{itemize}

\newpage
\sss{Linear and nonlinear equality and inequality constraints}
\begin{Sinput}
Data <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100),
                   x3 = rnorm(100))

model.constr <- ' # model with labeled parameters
                    y ~ b1*x1 + b2*x2 + b3*x3
                  # constraints
                    b1 == (b2 + b3)^2
                    b1 > exp(b2 + b3) '
fit <- sem(model.constr, data=Data)
\end{Sinput}
\begin{itemize}
\item simple regression model, but with (nonlinear) constraints imposed
on the regression coefficients
\item can be used to force variances to be strictly positive
\item can be used for testing interaction effects among latent variables
\item for simple equality constraints (e.g.\ \verb!b1 == b2!), it is much
more efficient to simply provide the same label
\end{itemize}

\newpage
\subsection{Fitting functions: estimating models}
\sss{User-friendly fitting functions}
\begin{itemize}
\item \Verb!cfa()! for confirmatory factor analysis
\item \Verb!sem()! for path analysis and SEM
\item \Verb!growth()! for growth curve modeling
\end{itemize}

\sss{Arguments of the cfa() and sem() fitting functions}
\begin{Sinput}
     sem(model = NULL, meanstructure = "default", fixed.x = "default",
         orthogonal = FALSE, std.lv = FALSE, data = NULL, std.ov = FALSE,
         missing = "default", sample.cov = NULL, sample.mean = NULL,
         sample.nobs = NULL, group = NULL, group.equal = "",
         group.partial = "", constraints = '', estimator = "default",
         likelihood = "default", information = "default", se = "default",
         test = "default", bootstrap = 1000L,
         mimic = "default", representation = "default",
         do.fit = TRUE, control = list(), start = "default",
         verbose = FALSE, warn = TRUE, debug = FALSE)
\end{Sinput}

\newpage
{\footnotesize
\begin{ldescription}
\item[\code{model}] A description of the user-specified model. Typically, the model
is described using the lavaan model syntax. See
\code{model.syntax} for more information. Alternatively, a
parameter list (eg. the output of the \code{lavaanify()} function) is also
accepted.
\item[\code{meanstructure}] If \code{TRUE}, the means of the observed
variables enter the model. If \code{"default"}, the value is set based
on the user-specified model, and/or the values of other arguments.
\item[\code{fixed.x}] If \code{TRUE}, the exogenous `x' covariates are considered
fixed variables and the means, variances and covariances of these variables
are fixed to their sample values. If \code{FALSE}, they are considered
random, and the means, variances and covariances are free parameters. If
\code{"default"}, the value is set depending on the mimic option.
\item[\code{orthogonal}] If \code{TRUE}, the exogenous latent variables
are assumed to be uncorrelated.
\item[\code{orthogonal}] If \code{TRUE}, the exogenous latent variables
are assumed to be uncorrelated.
\item[\code{std.lv}] If \code{TRUE}, the metric of each latent variable is
determined by fixing their variances to 1.0. If \code{FALSE}, the metric
of each latent variable is determined by fixing the factor loading of the
first indicator to 1.0.
\item[\code{data}] An optional data frame containing the observed variables used in
the model.
\item[\code{std.ov}] If \code{TRUE}, all observed variables are standardized
before entering the analysis.
\item[\code{missing}] If \code{"listwise"}, cases with missing values are removed
listwise from the data frame before analysis. If \code{"direct"} or
\code{"ml"} or \code{"fiml"} and the estimator is maximum likelihood,
Full Information Maximum Likelihood (FIML) estimation is used using all
available data in the data frame. This is only valid if the data are
missing completely at random (MCAR) or missing at random (MAR). If
\code{"default"}, the value is set depending on the estimator and the
mimic option.
\item[\code{sample.cov}] Numeric matrix. A sample variance-covariance matrix.
The rownames must contain the observed variable names.
For a multiple group analysis, a list with a variance-covariance matrix
for each group.
\item[\code{sample.mean}] A sample mean vector. For a multiple group analysis,
a list with a mean vector for each group.
\item[\code{sample.nobs}] Number of observations if the full data frame is missing
and only sample moments are given. For a multiple group analysis, a list
or a vector with the number of observations for each group.
\item[\code{group}] A variable name in the data frame defining the groups in a
multiple group analysis.
\item[\code{group.equal}] A vector of character strings. Only used in
a multiple group analysis. Can be one or more of the following:
\code{"loadings"}, \code{"intercepts"}, \code{"means"},
\code{"regressions"}, \code{"residuals"},
\code{"residual.covariances"}, \code{"lv.variances"} or
\code{"lv.covariances"}, specifying the pattern of equality
constraints across multiple groups.
\item[\code{group.partial}] A vector of character strings containing the labels
of the parameters which should be free in all groups (thereby
overriding the group.equal argument for some specific parameters).
\item[\code{constraints}] Additional (in)equality constraints not yet included in the
model syntax. See \code{model.syntax} for more information.
\item[\code{estimator}] The estimator to be used. Can be one of the following:
\code{"ML"} for maximum likelihood, \code{"GLS"} for generalized least
squares, \code{"WLS"} for weighted least squares (sometimes called ADF
estimation), \code{"MLM"} for maximum likelihood estimation with robust
standard errors and a Satorra-Bentler scaled test statistic,
\code{"MLF"} for maximum likelihood estimation with standard errors
based on first-order derivatives and a conventional test statistic,
\code{"MLR"} for maximum likelihood estimation with robust `Huber-White'
standard errors and a scaled test statistic which is asymptotically
equivalent to the Yuan-Bentler T2-star test statistic. Note that the
\code{"MLM"}, \code{"MLF"} and \code{"MLR"} choices only affect the
standard errors and the test statistic. They also imply
\code{mimic="Mplus"}.
\item[\code{likelihood}] Only relevant for ML estimation. If \code{"wishart"},
the wishart likelihood approach is used. In this approach, the covariance
matrix has been divided by N-1, and both standard errors and test
statistics are based on N-1.
If \code{"normal"}, the normal likelihood approach is used. Here,
the covariance matrix has been divided by N, and both standard errors
and test statistics are based on N. If \code{"default"}, it depends
on the mimic option: if \code{mimic="Mplus"}, normal likelihood is used;
otherwise, wishart likelihood is used.
\item[\code{information}] If \code{"expected"}, the expected information matrix
is used (to compute the standard errors). If \code{"observed"}, the
observed information matrix is used. If \code{"default"}, the value is
set depending on the estimator and the mimic option.
\item[\code{se}] If \code{"standard"}, conventional standard errors
are computed based on inverting the (expected or observed) information
matrix. If \code{"first.order"}, standard errors are computed based on
first-order derivatives. If \code{"robust.mlm"}, conventional robust
standard errors are computed.
If \code{"robust.mlr"},
standard errors are computed based on the `mlr' (aka pseudo ML,
Huber-White) approach.
If \code{"robust"}, either \code{"robust.mlm"} or \code{"robust.mlr"} is
used depending on the estimator, the mimic option, and whether the data
are complete or not.
If \code{"boot"} or \code{"bootstrap"}, bootstrap standard errors are
computed using standard bootstrapping (unless Bollen-Stine bootstrapping
is requested for the test statistic; in this case bootstrap standard
errors are computed using model-based bootstrapping).
If \code{"none"}, no standard errors are computed.
\item[\code{test}] If \code{"standard"}, a conventional chi-square test is computed.
If \code{"Satorra-Bentler"}, a Satorra-Bentler scaled test statistic is
computed. If \code{"Yuan-Bentler"}, a Yuan-Bentler scaled test statistic
is computed. If \code{"boot"} or \code{"bootstrap"} or
\code{"bollen.stine"}, the Bollen-Stine bootstrap is used to compute
the bootstrap probability value of the test statistic.
If \code{"default"}, the value depends on the
values of other arguments.
\item[\code{bootstrap}] Number of bootstrap draws, if bootstrapping is used.
\item[\code{mimic}] If \code{"Mplus"}, an attempt is made to mimic the Mplus
program. If \code{"EQS"}, an attempt is made to mimic the EQS program.
If \code{"default"}, the value is (currently) set to \code{"Mplus"}.
\item[\code{representation}] If \code{"LISREL"} the classical LISREL matrix
representation is used to represent the model (using the all-y variant).
\item[\code{do.fit}] If \code{FALSE}, the model is not fit, and the current
starting values of the model parameters are preserved.
\item[\code{control}] A list containing control parameters passed to the optimizer.
By default, lavaan uses \code{"nlminb"}. See the manpage of
\code{nlminb} for an overview of the control parameters.
A different optimizer can be chosen by setting the value of
\code{optim.method}. For unconstrained optimization (the model syntax
does not include any "==", ">" or "<" operators),
the available options are \code{"nlminb"} (the default), \code{"BFGS"} and
\code{"L-BFGS-B"}. See the manpage of the \code{optim} function for
the control parameters of the latter two options. For constrained
optimization, the only available option is \code{"nlminb.constr"}.
\item[\code{start}] If it is a character string,
the two options are currently \code{"simple"} and \code{"Mplus"}.
In the first
case, all parameter values are set to zero, except the factor loadings
(set to one), the variances of latent variables (set to 0.05), and
the residual variances of observed variables (set to half the observed
variance).
If \code{"Mplus"}, we use a similar scheme, but the factor loadings are
estimated using the fabin3 estimator (tsls) per factor.
If \code{start} is a fitted
object of class \code{lavaan-class}, the estimated values of
the corresponding parameters will be extracted. If it is a model list,
for example the output of the \code{paramaterEstimates()} function,
the values of the \code{est} or \code{start} or \code{ustart} column
(whichever is found first) will be extracted.
\item[\code{verbose}] If \code{TRUE}, the function value is printed out during
each iteration.
\item[\code{warn}] If \code{TRUE}, some (possibly harmless) warnings are printed
out during the iterations.
\item[\code{debug}] If \code{TRUE}, debugging information is printed out.
\end{ldescription}

}

\newpage
\sss{Power-user fitting functions}
\begin{itemize}
\item the \Verb!lavaan()! function does NOT do anything automagically
\begin{enumerate}
\item no model parameters are added to the parameter table
\item no actions are taken to make the model identifiable (e.g. setting
the metric of the latent variables
\end{enumerate}
\end{itemize}
\sss{Example model syntax using the lavaan() function}
\begin{Sinput}
HS.model.full <- ' # latent variables
                     visual  =~ 1*x1 + x2 + x3
                     textual =~ 1*x4 + x5 + x6
                     speed   =~ 1*x7 + x8 + x9
                 
                   # factor variances
                     visual  ~~ visual
                     textual ~~ textual
                     speed   ~~ speed

                   # factor covariances
                     visual  ~~ textual
                     visual  ~~ speed
                     textual ~~ speed

                   # residual variances observed variables
                     x1 ~~ x1
                     x2 ~~ x2
                     x3 ~~ x3
                     x4 ~~ x4
                     x5 ~~ x5
                     x6 ~~ x6
                     x7 ~~ x7
                     x8 ~~ x8
                     x9 ~~ x9
                  '
fit <- lavaan(HS.model.full, data=HolzingerSwineford1939)
\end{Sinput}

\newpage
\sss{Combining the lavaan() function with auto.* arguments}
\begin{itemize}
\item several \verb!auto.*! arguments are available to
\begin{itemize}
\item automatically add a set of parameters (e.g.\ all (residual) variances)
\item take actions to make the model identifiable (e.g.\ set the
metric of the latent variables)
\end{itemize}
\end{itemize}
\sss{Example using lavaan with an auto.* argument}
\begin{Sinput}
HS.model.mixed <- ' # latent variables
                       visual  =~ 1*x1 + x2 + x3
                       textual =~ 1*x4 + x5 + x6
                       speed   =~ 1*x7 + x8 + x9
                     # factor covariances
                       visual  ~~ textual + speed
                       textual ~~ speed
                  '
fit <- lavaan(HS.model.mixed, data=HolzingerSwineford1939,
              auto.var=TRUE)
\end{Sinput}

\newpage
{\footnotesize
\begin{center}
\begin{tabular}{p{0.20\textwidth}p{0.10\textwidth}p{0.60\textwidth}} \toprule
keyword & operator  & parameter set\\ \midrule
\Verb!auto.var!      & \Verb!~~! & (residual) variances observed and latent variables\\
\Verb!auto.cov.y!    & \Verb!~~! & (residual) covariances observed and latent endogenous variables\\
\Verb!auto.cov.lv.x! &  \Verb!~~! & covariances among exogenous latent variables\\ \midrule
keyword & default      & action\\ \midrule
\Verb!auto.fix.first!  & TRUE & fix the factor loading of the first indicator to 1\\
\Verb!auto.fix.single! & TRUE & fix the residual variance of a single indicator to 0\\
\Verb!int.ov.free!     & TRUE & freely estimate the intercepts of the observed variables (only if a mean structure is included)\\
\Verb!int.lv.free!     & FALSE & freely estimate the intercepts of the latent variables (only if a mean structure is included)\\ \bottomrule
\end{tabular}
\end{center}
}

\newpage
\subsection{Extractor functions: inspecting fitted models}
{\footnotesize
\begin{center}
\begin{tabular}{p{0.15\textwidth}p{0.80\textwidth}} \toprule
Method & Description \\ \midrule
\Verb!summary()!   & print a long summary of the model results\\
\Verb!show()!      & print a short summary of the model results\\
\Verb!coef()!      & returns the estimates of the free parameters in the
                     model as a named numeric vector\\
%\Verb!parameterEstimates()! & returns the parameter estimates, including confidence intervals, as a data frame\\
%\Verb!standardizedSolution()! & returns one of three types of standardized parameter estimates, as a data frame\\
%\Verb!modindices()!& computes modification
\Verb!fitted()!    & returns the implied moments (covariance matrix and mean vector) of the model\\
\Verb!resid()!     & returns the raw, normalized or standardized residuals
                     (difference between implied and observed moments)\\
\Verb!vcov()!      & returns the covariance matrix of the estimated parameters\\
\Verb!predict()!   & compute factor scores\\
\Verb!logLik()!    & returns the log-likelihood of the fitted model (if maximum
                     likelihood estimation was used)\\
\Verb!AIC()!,
\Verb!BIC()!       & compute information criteria (if maximum likelihood estimation was used)\\
\Verb!update()!    & update a fitted lavaan object\\
\Verb!inspect()!   & peek into the internal representation of the model;
                     by default, it returns a list of model matrices counting
                     the free parameters in the model; can also be used to
                     extract starting values, gradient values,
                     and much more\\ \bottomrule
\end{tabular}
\end{center}
}

\newpage
\subsection{Other functions}
{\footnotesize
\begin{center}
\begin{tabular}{p{0.35\textwidth}p{0.60\textwidth}} \toprule
Function & Description \\ \midrule
\Verb!lavaanify()! & converts a lavaan model syntax to a parameter table\\
\Verb!parameterTable()! & returns the parameter table\\
\Verb!parameterEstimates()! & returns the parameter estimates, including confidence intervals, as a data frame\\
\Verb!standardizedSolution()! & returns one of three types of standardized parameter estimates, as a data frame\\
\Verb!modindices()!& computes modification indices and expected parameter changes\\
\Verb!bootstrapLavaan()! & bootstrap any arbitrary statistic that can be extracted from a fitted lavaan object\\
\Verb!bootstrapLRT()! & bootstrap a chi-square difference test for comparing two
alternative models\\ \bottomrule
\end{tabular}
\end{center}
}

\newpage
\subsection{Meanstructures}
\begin{itemize}
\item traditionally, SEM has focused on covariance structure analysis
\item but we can also include the means
\item typical situations where we would include the means are:
\begin{itemize}
\item multiple group analysis
\item growth curve models
\item analysis of non-normal data, and/or missing data
\end{itemize}
\item we have more data: the $p$-dimensional mean vector
\item we have more parameters:
\begin{itemize}
\item means/intercepts for the observed variables
\item means/intercepts for the latent variables (often fixed to zero)
\end{itemize}
\end{itemize}

\newpage
\sss{Adding the means in lavaan}
\begin{itemize}
\item when the \code{meanstructure} argument is set to \code{TRUE},
a meanstructure is added to the model
\begin{Sinput}
fit <- cfa(HS.model, data=HolzingerSwineford1939, 
           meanstructure=TRUE)
\end{Sinput}
\item if no restrictions are imposed on the means, the fit will be
identical to the non-meanstructure fit
\item we add $p$ datapoints (the mean vector)
\item we add $p$ free parameters (the intercepts of the observed variables)
\item we fix the latent means to zero
\item the number of degrees of freedom does not change
\end{itemize}

\newpage
\sss{Output meanstructure=TRUE}
\begin{Routput}
lavaan (0.4-12) converged normally after 41 iterations

  Number of observations                           301

  Estimator                                         ML
  Minimum Function Chi-square                   85.306
  Degrees of freedom                                24
  P-value                                        0.000

Parameter estimates:

  Information                                 Expected
  Standard Errors                             Standard

                   Estimate  Std.err  Z-value  P(>|z|)
Latent variables:
  visual =~
    x1                1.000
    x2                0.553    0.100    5.554    0.000
    x3                0.729    0.109    6.685    0.000
  textual =~
    x4                1.000
    x5                1.113    0.065   17.014    0.000
    x6                0.926    0.055   16.703    0.000
  speed =~
    x7                1.000
    x8                1.180    0.165    7.152    0.000
    x9                1.082    0.151    7.155    0.000

Covariances:
  visual ~~
    textual           0.408    0.074    5.552    0.000
    speed             0.262    0.056    4.660    0.000
  textual ~~
    speed             0.173    0.049    3.518    0.000

Intercepts:
    x1                4.936    0.067   73.473    0.000
    x2                6.088    0.068   89.855    0.000
    x3                2.250    0.065   34.579    0.000
    x4                3.061    0.067   45.694    0.000
    x5                4.341    0.074   58.452    0.000
    x6                2.186    0.063   34.667    0.000
    x7                4.186    0.063   66.766    0.000
    x8                5.527    0.058   94.854    0.000
    x9                5.374    0.058   92.546    0.000
    visual            0.000
    textual           0.000
    speed             0.000

Variances:
    x1                0.549    0.114
    x2                1.134    0.102
    ...
\end{Routput}

\newpage
\subsection{Multiple groups}
\sss{Single group analysis (CFA)}
\begin{center}
\begin{tikzpicture}[>=stealth,semithick]
\node[ov] (y1) at (0,-1)      {$y_1$};
\node[ov] (y2) [below of=y1]  {$y_2$};
\node[ov] (y3) [below of=y2]  {$y_3$};
\node[ov] (y4) [below of=y3]  {$y_4$};
\node[ov] (y5) [below of=y4]  {$y_5$};
\node[ov] (y6) [below of=y5]  {$y_6$};

\node[lv] (f1) at (1.5, -2)  {$f_1$};
\node[lv] (f2) at (1.5, -5)  {$f_2$};

\path[->] (f1) edge node[above,scale=0.6] {} (y1)
          (f1) edge node[above,scale=0.6] {} (y2)
          (f1) edge node[above,scale=0.6] {} (y3)
          (f2) edge node[above,scale=0.6] {} (y4)
          (f2) edge node[above,scale=0.6] {} (y5)
          (f2) edge node[above,scale=0.6] {} (y6);

\path[<->] (f1.east) edge [bend left=45] node[left,scale=0.8] {} (f2.east);
\end{tikzpicture}
\end{center}
\begin{itemize}
\item factor means typically fixed to zero
\end{itemize}



\newpage
\sss{Multiple group analysis (CFA)}
\begin{center}
GROUP 1 \hspace{4cm} GROUP 2\\*[0.4cm]
\begin{tikzpicture}[>=stealth,semithick]
\node[ov] (y1) at (0,-1)      {$y_1$};
\node[ov] (y2) [below of=y1]  {$y_2$};
\node[ov] (y3) [below of=y2]  {$y_3$};
\node[ov] (y4) [below of=y3]  {$y_4$};
\node[ov] (y5) [below of=y4]  {$y_5$};
\node[ov] (y6) [below of=y5]  {$y_6$};

\node[lv] (f1) at (1.5, -2)  {$f_1$};
\node[lv] (f2) at (1.5, -5)  {$f_2$};

\path[->] (f1) edge node[above,scale=0.6] {} (y1)
          (f1) edge node[above,scale=0.6] {} (y2)
          (f1) edge node[above,scale=0.6] {} (y3)
          (f2) edge node[above,scale=0.6] {} (y4)
          (f2) edge node[above,scale=0.6] {} (y5)
          (f2) edge node[above,scale=0.6] {} (y6);

\path[<->] (f1.east) edge [bend left=45] node[left,scale=0.8] {} (f2.east);
\end{tikzpicture}
\hspace{1cm}
\begin{tikzpicture}[>=stealth,semithick]
\node[ov] (y1) at (0,-1)      {$y_1$};
\node[ov] (y2) [below of=y1]  {$y_2$};
\node[ov] (y3) [below of=y2]  {$y_3$};
\node[ov] (y4) [below of=y3]  {$y_4$};
\node[ov] (y5) [below of=y4]  {$y_5$};
\node[ov] (y6) [below of=y5]  {$y_6$};

\node[lv] (f1) at (1.5, -2)  {$f_1$};
\node[lv] (f2) at (1.5, -5)  {$f_2$};

\path[->] (f1) edge node[above,scale=0.6] {} (y1)
          (f1) edge node[above,scale=0.6] {} (y2)
          (f1) edge node[above,scale=0.6] {} (y3)
          (f2) edge node[above,scale=0.6] {} (y4)
          (f2) edge node[above,scale=0.6] {} (y5)
          (f2) edge node[above,scale=0.6] {} (y6);

\path[<->] (f1.east) edge [bend left=45] node[left,scale=0.8] {} (f2.east);
\end{tikzpicture}
\end{center}
\begin{itemize}
\item can we compare the means of the latent variables?
\end{itemize}

\newpage
\sss{Measurement Invariance in lavaan}
\begin{Sinput}
# model 1: configural invariance
fit1 <- cfa(HS.model, data=HolzingerSwineford1939, group="school")

# model 2: weak invariance
fit2 <- cfa(HS.model, data=HolzingerSwineford1939, group="school",
            group.equal="loadings")

# model 3: strong invariance
fit3 <- cfa(HS.model, data=HolzingerSwineford1939, group="school",
            group.equal=c("loadings", "intercepts"))
\end{Sinput}

\sss{Comparing two (nested) models: the anova() function}
\begin{Sinput}
anova(fit1, fit2)
\end{Sinput}
\begin{Routput}
Chi Square Difference Test

     Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)
fit1 48 7484.4 7706.8 115.85                              
fit2 54 7480.6 7680.8 124.04     8.1922       6     0.2244
\end{Routput}

\newpage
\sss{Measurement invariance tests -- all together}
\begin{Sinput}
> measurementInvariance(HS.model, data=HolzingerSwineford1939, 
                        group="school", strict=FALSE)
\end{Sinput}
\begin{Routput}
Measurement invariance tests:

Model 1: configural invariance:
   chisq       df   pvalue      cfi    rmsea      bic 
 115.851   48.000    0.000    0.923    0.097 7706.822 

Model 2: weak invariance (equal loadings):
   chisq       df   pvalue      cfi    rmsea      bic 
 124.044   54.000    0.000    0.921    0.093 7680.771 

[Model 1 versus model 2]
  delta.chisq      delta.df delta.p.value     delta.cfi 
        8.192         6.000         0.224         0.002 

Model 3: strong invariance (equal loadings + intercepts):
   chisq       df   pvalue      cfi    rmsea      bic 
 164.103   60.000    0.000    0.882    0.107 7686.588 

[Model 1 versus model 3]
  delta.chisq      delta.df delta.p.value     delta.cfi 
       48.251        12.000         0.000         0.041 

...
\end{Routput}

\newpage
\subsection{Missing data in lavaan}
\begin{itemize}
\item if the data contain missing values, the default behavior in \pkg{lavaan}
is listwise deletion
\item if the missing mechanism is MCAR or MAR, the \pkg{lavaan} package
provides case-wise (or `full information') maximum likelihood (FIML) estimation
by specifying the argument \code{missing="ml"} (or its alias
\code{missing="fiml"}):
\begin{Sinput}
fit <- sem(myModel, data=myIncompleteData, missing="ml")
\end{Sinput}
\item an unrestricted (h1) model will automatically be
estimated (using the EM algorithm), so that all common fit indices are available
\item robust standard errors are also available, as is a scaled
(`Yuan-Bentler') test statistic if the data are both incomplete and non-normal
(\code{estimator="MLR"})
\end{itemize}

\newpage
\subsection{Standard errors}
\begin{itemize}
\item the \code{se} argument can be used to switch
between different types of standard errors
\item setting \code{se="robust"} will
produce robust standard errors
\begin{itemize}
\item if data is complete, lavaan will use \code{se="robust.mlm"}
\item if data is incomplete, lavaan will use \code{se="robust.mlr"}
\end{itemize}
\item setting \code{se="boot"} or \code{se="bootstrap} will produce
bootstrap standard errors
\item setting \code{se="none"} will NOT compute standard errors
\end{itemize}

\newpage
\subsection{Test statistics}
\begin{itemize}
\item the \code{test} argument can be used to switch between
different test statistics:
\begin{itemize}
\item \code{test="standard"} (default)
\item \code{test="satorra.bentler"}
\item \code{test="yuan.bentler"}
\item \code{test="bootstrap"} or \code{test="bollen.stine"}
\item \code{test="none"}
\end{itemize}
\item combine both robust standard errors and a scaled test statistic:
\begin{itemize}
\item \code{estimator="MLM"} 
\item \code{estimator="MLR"}
\end{itemize}
\end{itemize}

\newpage
\subsection{BootstrapLavaan}
\begin{itemize}
\item once a lavaan model has been fitted, you can bootstrap
any statistic that you can extract from a fitted lavaan object
\item examples:
\begin{Sinput}
# bootstrap model parameters
PAR.boot <- bootstrapLavaan(fit, R=10, type="ordinary",
                            FUN="coef")


# bootstrap test statistic + compute p-value
T.boot <- bootstrapLavaan(fit, R=10, type="bollen.stine",
                          FUN=fitMeasures, fit.measures="chisq")

pvalue.boot <- length(which(T.boot > T.orig))/length(T.boot)


# bootstrap CFI
CFI.boot <-  bootstrapLavaan(fit, R=10, type="parametric",
                             FUN=fitMeasures, fit.measures="cfi",
                             parallel="multicore", ncpus=8)
\end{Sinput}
\end{itemize}

\newpage
\subsection{Constraints and defined parameters}
\sss{linear and nonlinear equality and inequality constraints}
\begin{Sinput}
Data <- data.frame(   y = rnorm(100),
                     x1 = rnorm(100),
                     x2 = rnorm(100),
                     x3 = rnorm(100)  )

model.constr <- ' # model with labeled parameters
                    y ~ b1*x1 + b2*x2 + b3*x3

                  # constraints
                    b1 == (b2 + b3)^2
                    b1 > exp(b2 + b3)
                '

fit <- sem(model.constr, data=Data)
\end{Sinput}


\newpage
\sss{defined parameters and mediation analysis}
\begin{Sinput}
X <- rnorm(100)
M <- 0.5*X + rnorm(100)
Y <- 0.7*M + rnorm(100)
Data <- data.frame(X = X, Y = Y, M = M)

model <- ' # direct effect
             Y ~ c*X
           # mediator
             M ~ a*X
             Y ~ b*M

           # indirect effect (a*b)
             ab := a*b
           # total effect
             total := c + (a*b)
         '

fit <- sem(model, data=Data)
\end{Sinput}


\newpage
\subsection{Using a covariance matrix as input}
\begin{Sinput}
lower <- '
 11.834,
  6.947,    9.364,
  6.819,    5.091,   12.532,
  4.783,    5.028,    7.495,    9.986,
 -3.839,   -3.889,   -3.841,   -3.625,   9.610,
-21.899,  -18.831,  -21.748,  -18.775,  35.522,  450.288 '

# classic wheaton et al model
wheaton.cov <- getCov(lower, 
                      names=c("anomia67","powerless67", "anomia71",
                              "powerless71","education","sei"))

wheaton.model <- '
  # measurement model
    ses     =~ education + sei
    alien67 =~ anomia67 + powerless67
    alien71 =~ anomia71 + powerless71

  # equations
    alien71 ~ alien67 + ses
    alien67 ~ ses

  # correlated residuals
    anomia67 ~~ anomia71
    powerless67 ~~ powerless71
'

fit <- sem(wheaton.model, sample.cov=wheaton.cov, sample.nobs=932)
summary(fit, standardized=TRUE)
\end{Sinput}


\newpage
\section{Some technical details}
\subsection{Default estimator: ML}
\begin{itemize}
\item ML is the default estimator in all software packages for SEM
\item the likelihood function is derived from the multivariate normal
distribution (the `normal' tradition) or the Wishart distribution (the
`Wishart' tradition)
\item standard errors are usually based on the covariance matrix that is obtained
by inverting the expected information matrix
\begin{align*} n \mbox{Cov}(\hat{\theta}) &= A^{-1}\\ 
                                        &= (\Delta' W \Delta)^{-1} 
\end{align*}
\begin{itemize}
\item $\Delta$ is a jacobian matrix and $W$ is a function of $\Sigma^{-1}$
\item if no meanstructure:
\begin{align*}
\Delta &= \partial \hat{\Sigma} / \partial \hat{\theta}'\\
     W &= 2 D' (\hat{\Sigma}^{-1} \otimes \hat{\Sigma}^{-1}) D
\end{align*}
\end{itemize}
\newpage
\item an alternative is to use the {\itshape observed}
information matrix 
\begin{align*}
n \mbox{Cov}(\hat{\theta}) &= A^{-1}\\
                           &= \left[- \text{Hessian} \right]^{-1}\\
                           &= \left[- \partial F(\hat{\theta}) / (\partial \hat{\theta} \partial \hat{\theta}') \right]^{-1}
\end{align*}
where $F(\theta)$ is the function that is minimized
\item overall model evaluation is based on the likelihood-ratio (LR)
statistic (chi-square test): $T_{ML}$
\begin{itemize}
\item (minus two times the) difference
between loglikelihood of user-specified model $H_0$ and
unrestricted model $H_1$
\item equals (in lavaan) $2\times n$ times the minimum value of $F(\theta)$
\item $T_{ML}$ follows (under regularity conditions) a chi-square
distribution
\end{itemize}
\end{itemize}

\newpage
\subsection{Estimator MLM}
\begin{itemize}
\item parameter estimates are standard ML estimates
\item standard errors are robust to non-normality
\begin{itemize}
\item standard errors are computed using a sandwich-type estimator:
\begin{align*} n \mbox{Cov}(\hat{\theta}) &= A^{-1} B A^{-1}\\
                                        &= (\Delta' W \Delta)^{-1} 
(\Delta' W \Gamma W \Delta) (\Delta' W \Delta)^{-1}
\end{align*}
\item $A$ is usually the expected information matrix (but not in Mplus)
\item references: Huber (1967), Browne (1984), Shapiro (1983), Bentler (1983), \ldots
\end{itemize}
\newpage
\item chi-square test statistic is robust to non-normality
\begin{itemize}
\item test statistic is `scaled' by a correction factor
\[ T_{SB} = T_{ML}/c \]
\item the scaling factor $c$ is computed by:
\[ c = tr\left[U \Gamma \right] / \mbox{df} \]
where
\[U = (W^{-1} - W^{-1} \Delta (\Delta' W^{-1} \Delta)^{-1} \Delta' W^{-1})\]
\item correction method described by Satorra \& Bentler (1986, 1988, 1994)
\end{itemize}
\item estimator MLM: for complete data only
\end{itemize}

\newpage
\subsection{Estimator MLR}
\begin{itemize}
\item parameter estimates are standard ML estimates
\item standard errors are robust to non-normality
\begin{itemize}
\item standard errors are computed using a (different) sandwich approach:
\begin{align*}
n \mbox{Cov}(\hat{\theta}) &= A^{-1} B A^{-1}\\
                           &= A_0^{-1} B_0 A_0^{-1} = C_0
\end{align*}
where
\[ A_0 = - \sum_{i=1}^n \frac{\partial \log L_i}{\partial \hat{\theta} \, 
\partial \hat{\theta}'} \quad \mbox{(observed information)}
\]
and
\[ B_0 = \sum_{i=1}^n 
\left(\frac{\partial \log L_i}{\partial \hat{\theta}}\right) 
\times \left(\frac{\partial \log L_i}{\partial \hat{\theta}}\right)' \]
\item for both complete and incomplete data
\item Huber (1967), Gourieroux, Monfort \& Trognon (1984), Arminger \&
Schoenberg (1989)
\end{itemize}
\item chi-square test statistic is robust to non-normality
\begin{itemize}
\item test statistic is `scaled' by a correction factor
\[ T_{MLR} = T_{ML} / c \]
\item the scaling factor $c$ is (usually) computed by
\[ c = tr \left[ M \right] \]
where
\[ M = C_1(A_1 - A_1 \Delta (\Delta' A_1 \Delta)^{-1} \Delta' A_1) \]
\item $A_1$ and $C_1$ are computed under the unrestricted ($H_1$) model
\item correction method described by Yuan \& Bentler (2000)
\end{itemize}
\item information matrix ($A$) can be observed or expected
\item for complete data, the MLR and MLM corrections are asymptotically
equivalent
\end{itemize}

\end{document}


