C
C

      SUBROUTINE GLMADV (ISEND)
C
C ACTION: Advise user about SIMFIT GLM procedures
C AUTHOR: w.g.bardsley, university of manchester, u.k.
C         20/04/2006 extensive upgrade
C
C ISEND: (input/unchanged) information selector as follows:
C        ISEND = 1: advanced help
C        ISEND = 2: simple help
C        ISEND = 3: data formats
C        ISEND = 4: view test files
C
      IMPLICIT   NONE
C
C Argument
C
      INTEGER    ISEND
C
C Locals
C
      INTEGER    JCOLOR, JMAX, NFILES
      PARAMETER (JCOLOR = 9, JMAX = 30, NFILES = 12)
      INTEGER    JUMBLD(JMAX), NUMTXT
      CHARACTER  TEXT(JMAX)*100
      LOGICAL    NEXT
      LOGICAL    FRAME, UPDOWN
      PARAMETER (FRAME = .FALSE., UPDOWN = .TRUE.)
      EXTERNAL   TUTOR1, VUTEST
      DATA       JUMBLD / JMAX*0 /
      JUMBLD(1) = 1
      IF (ISEND.EQ.1) THEN
C
C GLM advanced help
C
         NEXT = .TRUE.
         WRITE (TEXT,100)
         NUMTXT = 22
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         WRITE (TEXT,200)
         NUMTXT = 20
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         WRITE (TEXT,300)
         JUMBLD(8) = 1
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(8) = 0
         WRITE (TEXT,400)
         NUMTXT = 20
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
            WRITE (TEXT,500)
         JUMBLD(8) = 1
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(8) = 0
         WRITE (TEXT,600)
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         WRITE (TEXT,700)
         NEXT = .FALSE.
         JUMBLD(6) = 1
         JUMBLD(13) = 1
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(6) = 0
         JUMBLD(13) = 0
      ELSEIF (ISEND.EQ.2) THEN
c
c Simplified GLM help has been selected
c
         NEXT = .FALSE.
         WRITE (TEXT,800)
         NUMTXT = 20
         JUMBLD(1) = 1
         JUMBLD(13) = 1
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(1) = 0
         JUMBLD(13) = 0
      ELSEIF (ISEND.EQ.3) THEN
c
c Advise about data formats
c
         NEXT = .TRUE.
         WRITE (TEXT,900)
         JUMBLD(19) = 1
         NUMTXT = 22
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(19) = 0
         WRITE (TEXT,1000)
         JUMBLD(17) = 1
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(17) = 0
         WRITE (TEXT,1100)
         JUMBLD(7) = 1
         JUMBLD(11) = 1
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
         JUMBLD(7) = 0
         JUMBLD(11) = 0
         NEXT = .FALSE.
         WRITE (TEXT,1200)
         CALL TUTOR1 (JCOLOR, JUMBLD, NUMTXT, TEXT, FRAME, NEXT,
     +                UPDOWN)
      ELSEIF (ISEND.EQ.4) THEN
C
C View test files
C
         WRITE (TEXT,1300)
         CALL VUTEST (NFILES,
     +                TEXT)
      ENDIF
  100 FORMAT ('Generalized Linear Models (GLM): Overview'
     +/
     +/'GLM methods are used for logistic regression, probit analysis,'
     +/'log-linear fitting, survival analysis, dose-response curve'
     +/'construction, and many other situations where ordinary least'
     +/'squares is not valid and maximum likelihood must be employed,'
     +/'e.g. where experimental errors are binomially distributed.'
     +/'By suitably normalising the data and carefully choosing models'
     +/'for error and link functions, the usefullness can be greatly'
     +/'extended. For example, choosing binomial errors with a logistic'
     +/'link, all y(i) set to 0 or 1 and all N(i) set to 1 (see later),'
     +/'then binary logistic regression can be performed.'
     +/'The m covariates x_1, x_2, ..., x_m could be measurements of'
     +/'continuous quantities like weight, height, concentration,'
     +/'temperature and so on, but categorical variables can be used,'
     +/'e.g. male or female could be modelled by an indicator variable'
     +/'x set to 0 or 1. Any nominal variable with k factor levels'
     +/'could be modelled by k dummy 0,1 indicator variables, but only'
     +/'k-1 of these would be included in the regression if a constant'
     +/'is fitted, to avoid aliasing. Polynomial regression is done by'
     +/'setting x_1 = x, x_2 = x^2, x_3 = x^3, ..., x_m = x^m, and'
     +/'mixed effects are modelled by setting x_(ij) = x_i.x_j.')
  200 FORMAT ('Generalized Linear Models (GLM): Nomenclature'
     +/
     +/'The Simfit GLM interface relies on the following assumptions:'
     +/'a)`Observations y have random errors with a known distribution'
     +/'b)`Observations y depend on m independent covariates x'
     +/'c)`The mean y values at x are functions mu, E(Y) = mu(x)'
     +/'d)`There are link functions, g(mu) = eta'
     +/'e)`The eta are linear functions of x, eta = beta^T.x'
     +/'f)`The coefficients of the linear function are to be estimated'
     +/'g)`The linear function can have a constant term if required'
     +/'h)`The linear function can have offsets if required'
     +/'i)`The fitting can be weighted if required'
     +/'j)`Overall goodness of fit can be assessed by the deviance'
     +/'k)`Deviance residuals and leverages can be examined'
     +/'l)`Covariates to be included can be selected interactively'
     +/'m)`If the data are consistent with the error and link then'
     +/'  `the linear link parameters can be estimated by an iterative'
     +/'  `procedure using the y as starting estimates'
     +/'n)`Singular designs can be handled by SVD'
     +/'l)`Users must prepare correctly formatted data files')
  300 FORMAT ('Error types'
     +/
     +/'1)`Normal'
     +/'2)`Binomial'
     +/'3)`Poisson'
     +/'4)`Gamma'
     +/
     +/'Link types'
     +/
     +/'For Normal, Poisson and Gamma:'
     +/'A)`Exponent'
     +/'B)`Identity'
     +/'C)`Log'
     +/'D)`Square root'
     +/'E)`Reciprocal'
     +/
     +/'For Binomial errors:'
     +/'i)  `Logistic'
     +/'ii) `Probit'
     +/'iii)`Complementary Log-Log')
  400 FORMAT ('Generalized Linear Models (GLM): Advice'
     +/
     +/'1)`The SIMFIT GLM module is based on the methods described in'
     +/'  `Generalized Linear Models by P. McCullagh and J.A. Nelder,'
     +/'  `Chapman and Hall 2nd edition 1989. Please read it.'
     +/'2)`The implementation uses iterative weighted least squares'
     +/'  `using the methods and functions described in that book and'
     +/'  `calling LAPACK routines as well as SIMFIT routines.'
     +/'3)`You should only use GLM if you have good reasons to choose'
     +/'  `error and link types. You must have experimental evidence'
     +/'  `or theoretical arguments to support your choices.'
     +/'4)`The data file must be formatted strictly and any badly'
     +/'  `formatted data, e.g. Binomial errors with y < 0 or y > N'
     +/'  `will cause failure to fit and corresponding IFAIL messages.'
     +/'5)`If the design matrix does not have full rank you will be'
     +/'  `warned. In such cases the parameters and standard errors'
     +/'  `will not be uniquely defined and it is best to redesign'
     +/'  `the experiment to create a full rank design matrix. The'
     +/'  `fitting procedure will only succeed if you have good data'
     +/'  `and choose a meaningful error type and link function.')
  500 FORMAT ('Parameters that must be set by users'
     +/
     +/'A and S can be changed if necessary as follows:'
     +/'1)`A is the exponent as in eta = mu^A'
     +/'2)`S is the user-supplied variance (Normal) or scale factor'
     +/'  `(Gamma). Usually S is set = 0, causing S to be estimated.'
     +/
     +/'Parameters controlling iterations and convergence'
     +/
     +/'Normally you should use default values for these parameters.'
     +/'They should only be changed by experienced users who understand'
     +/'the consequences of altering them.'
     +/'a)`MAXIT is the maximum number of iterations allowed, and 20'
     +/'  `is normally sufficient, except for very difficult problems.'
     +/'b)`EPS is used to define the rank of the SVD, i.e. the limit'
     +/'  `ratio where singular values are defined to be zero.'
     +/'c)`TOL is used to define convergence, i.e. when the deviance'
     +/'  `change is less than TOL*(1.0 + Current Deviance).'
     +/'d)`IPRINT is the number of iterations beftween intermediate'
     +/'  `output. This should almost always be set at IPRINT = 0.')
  600 FORMAT ('Details concerning the error types'
     +/
     +/'In the following, the context defines the terms y, mu, N, p, x.'
     +/
     +/'Normal pdf = exp-{(1/2)[(y - mu)/sigma]**2}/[sigma*sqrt(2*pi)]'
     +/'with (deviance) residuals = y - mu'
     +/
     +/'Binomial pdf = [N-choose-y]*p^y*(1 - p)^(N - y) with mu = p*N'
     +/'and deviance residuals = sign(y - mu)*sqrt[dev(y,mu)]'
     +/'where dev(y,mu) = 2{y*log(y/mu) + (N - y)*log[(N-y)/(N-mu)]}'
     +/
     +/'Poisson pdf = mu^y*exp(-mu)/y!'
     +/'with deviance residuals = sign(y - mu)*sqrt[dev(y,mu)]'
     +/'where dev(y,mu) = 2{y*log(y/mu) - (y - mu)}'
     +/
     +/'Gamma pdf = {(1/Gamma(nu))[nu*y/mu)^nu*exp(-nu*y/mu)}/y'
     +/'with Anscombe residuals = 3{y^(1/3) - mu^(1/3)}/mu^(1/3)'
     +/'and deviance = 2*Summation{log(mu) + y/mu}'
     +/'Scale factor nu^(-1) can be calculated as the moment estimator'
     +/'nu^(-1) = Summation{(y - mu)/mu}^2/(Deg. of freedom)')
  700 FORMAT ('Links'
     +/
     +/'Note: eta(i) = offset(i) + Const. + Summation{beta(j)*x(i,j)}.'
     +/'for i = 1, ..., n_data and j = 1, ..., m_variables'
     +/
     +/'For Normal, Poisson and Gamma errors:-'
     +/'Exponent: eta = mu^A'
     +/'Identity: eta = mu'
     +/'Log: eta = log(mu)'
     +/'Square root: eta = sqrt(mu)'
     +/'Reciprocal: eta = 1/mu'
     +/
     +/'For Binomial errors:-'
     +/'Logistic: eta = log[mu/(N - mu)]'
     +/'Probit: eta = Phi_inverse(mu/N)'
     +/'Complementary log-log: eta = log{-log[1 - (mu/N)]}'
     +/
     +/'Failure exits with IFAIL > 0 will occur if starting estimates'
     +/'or fitted values lead to singularities in the calculations'
     +/'required for the above functions and their inverses.')
  800 FORMAT ('Generalized Linear Models (GLM): simplified interface'
     +/
     +/'If you do not want to take responsibility for choosing the data'
     +/'format, offsets, weights, error types, link functions and other'
     +/'run-time parameters required to use the Simfit GLM procedure,'
     +/'a simplified interface is provided. You should use this for'
     +/'a) logistic, binary and polynomial logistic regression'
     +/'b) survival analysis with covariates (exponential/Weibull/Cox)'
     +/'c) analysis of contingency tables (log-linear fitting)'
     +/'d) bioassay (LD50 and percentiles by probit/logistic/log-log)'
     +/'First-time messages are provided concerning the data formats.'
     +/
     +/'Special features of the simplified interface'
     +/
     +/'The s-values must be 1 or, in some cases, the stratum indictor.'
     +/'Offsets are not allowed as these are calculated when required'
     +/'You can choose to include or exclude a constant term'
     +/'You can choose to de-select or restore explanatory variables'
     +/'Covariates can be of the continuous or qualitative kind'
     +/'GLM control parameters can be adjusted in some cases')
  900 FORMAT ('Generalized Linear Models (GLM): Summary of data format'
     +/'Note: n = no. data values, m = no. covariates'
     +/'Column 1: x_1'
     +/'...'
     +/'Column m: x_m'
     +/'Column m + 1: y'
     +/'Binomial only: Column m + 2 = N, Column m + 3 = s'
     +/'Survival only: Column m + 2 = t, Column m + 3 = s'
     +/'All other situations: Column m + 2 = s'
     +/'Only k-1 dummy indicator variables can be included for factors'
     +/'with k levels if a constant is fitted, powers of x can be used'
     +/'for polynomials, and product terms can model mixed effects.'
     +/'For Binomial errors, the y are no. of successes in N trials'
     +/'For survival analysis y = 0 for failure, 1 for right censored'
     +/'Usually set s = 1, s = stratum, or s = 0 to suppress data, but'
     +/'weights w = 1/s^2 can be used (e.g. s = 1/sqrt(3) for means of'
     +/'triplicates) but do not use s = std.dev. as for regression.'
     +/'Inconsistent data will lead to IFAIL type error messages.'
     +/'Format for the offset file'
     +/'Offsets are only needed by experienced users (e.g. for survival'
     +/'analysis) and they must be in a vector file of the same length'
     +/'as the data file (i.e. n or greater).')
 1000 FORMAT ('The covariates x_1, x_2, ...,x_m'
     +/'For continuous variables a single column of values is required'
     +/'For qualitative variables with k levels there are must be sets'
     +/'of column vectors with entries of either 0 or 1 as follows:'
     +/'1)`Supply k columns of dummy indicator variables and choose'
     +/'  `which column to suppress, i.e. use as the reference.'
     +/'2)`Supply k - 1 independent columns of 0, 1 dummy indicators'
     +/'  `where you have decided which factor level is the reference.'
     +/'Method 2) is preferred, and this can be done automatically by'
     +/'the Simfit GLM data preparation module.'
     +/'To study mixed effects, new columns xij must be input where'
     +/'xij(k) = xi(k)*xj(k) but this must be done after any factor'
     +/'dummy indicator vectors have been constructed.'
     +/'For polynomial regression, x2 = x_1^2, x3 = x1^3, ..., etc.'
     +/'which happens automatically for logistic polynomial regression.'
     +/
     +/'The measured responses y'
     +/'Limitations are placed on y in some situations as follows:'
     +/'Logistic regression ... y must be integers with 0 =< y =< N'
     +/'Binary logistic regression ... y must be either 0 or 1'
     +/'Survival analysis ... y must be 0 for failure (e.g. death) or'
     +/'1 if right censored (i.e. survived).')
 1100 FORMAT ('The N variable'
     +/'For logistic regression, N is the number of trials used to get'
     +/'the corresponding y successes, so N must be an integer that'
     +/'satisfies N > 0 and N >= y. For binary logistic regression'
     +/'it is assumed that y is a Bernoulli variable, so N must be 1.'
     +/
     +/'The t variable'
     +/'For the survival analysis modules it is necessary the the time'
     +/'of failure or right censorship must be postitive, i.e. t > 0.'
     +/
     +/'The s value (this is not the s = std.dev.y as for regression)'
     +/'There are four possible situations.'
     +/'1)`Setting all s = 1 is the normal option as correct weights'
     +/'  `are calculated internally depending on the error type.'
     +/'2)`Setting a chosen s to zero has the effect of removing the'
     +/'  `corresponding data point from the analysis. Note that this'
     +/'  `technique is very dangerous and should be avoided unless a'
     +/'  `record is kept of such suppressed data points.'
     +/'3)`For Cox regression the s value can be 0 (suppress) or i'
     +/'  `(stratum indicator) where there are j strata and s is an'
     +/'  `integer satisfying 1 =< i =< j and all values of i occur.'
     +/'4)`Advanced users may require weighting given by w = 1/s^2')
 1200 FORMAT ('Simfit GLM macros'
     +/
     +/'Data files can be prepared using program EDITMT, edited using'
     +/'program EDITMT, copied and pasted as spread sheet tables using'
     +/'the clipboard, or written as files using a Simfit macro from MS'
     +/'Excel. However it may be inconvenient to create dummy indicator'
     +/'variables for qualitative variables this way, so the facility'
     +/'exists to install a macro. Such macros can transform data sets'
     +/'in the following ways before analysis.'
     +/'1)`Data points to be suppressed can be specified.'
     +/'2)`Variables to be suppressed can be specified.'
     +/'3)`Variables to be treated as categorical and used to generate'
     +/'  `dummy indicator variables can be specified.'
     +/'4)`Quantitative variables can be transformed into logs, square'
     +/'  `roots, reciprocals, etc.'
     +/'5)`New variables created as products of existing variables to'
     +/'  `estimate mixed effects can be specified.'
     +/'6)`Calculation of statistics for goodness of fit tests and for'
     +/'  `model discrimination can be requested.'
     +/'7)`Iterative procedures and graphical displays can be defined.'
     +/'The syntax necessary for GLM macros and methods for creating,'
     +/'installing and archiving them will be in a future readme file.')
 1300 FORMAT (
     + 'glm.tf1'
     +/'glm.tf2'
     +/'glm.tf3'
     +/'glm.tf4'
     +/'cox.tf1'
     +/'cox.tf2'
     +/'cox.tf3'
     +/'cox.tf4'
     +/'logistic.tf1'
     +/'ld50.tf1'
     +/'ld50.tf2'
     +/'strata.tf1'
     +/'Quit ... Exit these data file options')
      END
C
C
