;
; Uses regional age-banded MXD time series to predict NH or ALL temperatures,
; with calibration done using principal component regression, and independent
; validation.
; Should run bandreg2nh_youngold_fixed.pro in treeharry/banding to get the
; regional MXD series based on 20-550yr trees, with regional weighting, and
; at least 2 trees per band.
; There are a number of different PCR models that can be built, depending
; on which regions have data.
;
; It also computes the standard error ranges for the particular model being
; used, and for a given 'fixed grid'.  For each model use the following
; fixed grids (also says whether to interpolate between the errors given
; by the various fixed grids, or to use a constant error model):
;
; Model   Period used   Fixed grid to use
; 0       1683-1981     1950 1800 1700 (1683-99 con, 1700-1900 int, 1901-81 con)
; 1       1667-1682     1700 (con)
; 2       1625-1666     1700 (con)
; 3       1602-1624     1700 (con)
; 4       1588-1603     1600 (con)
; 5       1480-1587     1600 1500 (1480-99 con, 1500-1587 int)
; 6       1402-1479     1500 1400 (1402-1479 int)
; 7       1982-1988     1950 (con)
; 8       1989-1991     1988 (con)
; 9       1992-1994     1988 (con)
;
; The use of fixed grids and multiple models is quite complex.  The full grid
; needs to be used to define the EOFs that are used as predictors.  These are
; then used to generate a full-grid reconstruction for that PCR model.  The
; fixed grid data is then projected onto these EOFs and a new PCR model is
; built.  This new model is not used for any reconstruction, but the
; calibration statistics are used to estimate errors around the full-grid
; reconstruction for this period.
;
; ***DECIDE WHETHER TO USE THE NSIB AGE-BANDED OR THE NSIB HUGERSHOFF RECORD
; ***BECAUSE THE FORMER HAS LARGE ANOMALIES!
;
dohug=0          ; 0=age-banded NSIB  1=hugershoff NSIB
;
; Define periods for calibration
;
if n_elements(iper) eq 0 then iper=0
                ; 0=full 1=1667-1682 2=1625-1666 3=1604-1624 4=1588-1603
                ;        5=1480-1587 6=1402-1479
                ;        7=1982-1988 8=1989-1991 9=1992-1994
if n_elements(thalf) eq 0 then thalf=10.
if n_elements(dosmooth) eq 0 then dosmooth=0
                ; 0=compute errors for interannual data 1=for smoothed
if n_elements(donh) eq 0 then donh=1
                ; 0=do NH 1=do ALL
if n_elements(fyr) eq 0 then begin
  case iper of
    0: fyr=1950  ; 1950, 1800, 1700
    1: fyr=1700
    2: fyr=1700
    3: fyr=1700
    4: fyr=1600
    5: fyr=1600  ; 1600, 1500
    6: fyr=1500  ; 1500, 1400
    7: fyr=1950
    8: fyr=1988
    9: fyr=1988
  endcase
endif
fst=string(fyr,format='(I4)')
;
doprint=1                 ; 0=no output 1=extra output
nret=9                    ; max number of PCs to compute (should be all)
nuse=6                    ; max number of PCs to show and use in PCR
pcaper=[1700,1960]        ; PCA analysis period
calper=[1906,1960]        ; calibration period
verper=[1871,1905]        ; verification period
; IF CHANGING DOFINAL to 0, CHANGE BACK & RERUN AFTERWARDS!!!!
dofinal=0                ; 0=use calper & verper
finalper=[1881,1960]     ; 1=happy with model, so re-calibrate using *same*
                         ; predictors but a standard period
;
; Now, if required, get the Hugershoff NSIB series
;
if dohug eq 1 then begin
  ; Get the full grid, full time version
  restore,'regts_mxd_fixed1950.idlsave'
  ; Gets many things including regname, timey, bestmxd
  insib=where(regname eq 'NSIB')
  hugts=reform(bestmxd(*,insib))
  hugyr=timey
  ; Get the fixed grid, full time version
  restore,'regts_mxd_fixed'+fst+'.idlsave'
  ; Gets many things including regname, timey, bestmxd
  hugfixts=reform(bestmxd(*,insib))
endif
;
if donh eq 0 then nhtit='NH' else nhtit='ALL'
pertit=string(iper,format='(I1)')
;
; Get the hemispheric temperature series
;
restore,filename='datastore/compbest_mxd_fixed1950.idlsave'
if donh eq 0 then nhtemp=reform(comptemp(*,3)) $
             else nhtemp=reform(comptemp(*,2))
; Get rid of pre-1871 temperatures
knh=where(timey lt 1871)
nhtemp(knh)=!values.f_nan
tempyr=timey
;
; Get the age-banded regional MXD series for the required fixed grid
;
if fyr eq 1950 then fst=''
restore,filename='/cru/u2/f055/treeharry/banding/bandregnh_fixed'+fst+'.idlsave'
; Gets: timey,nyr,nhts,allts,alleps,nreg,regname
fixts=allts
;
; Get all the age-banded MXD series (regions back to 1400)
;
restore,filename='/cru/u2/f055/treeharry/banding/bandregnh_fixed.idlsave'
; Gets: timey,nyr,nhts,allts,alleps,nreg,regname
yrmxd=timey
regname=['NEUR','SEUR','NSIB','ESIB','CAS','TIBP','WNA','NWNA','ECCA']
;
; Optionally replace NSIB with Hugershoff version.  But since we're using
; covariance matrix PCA, we need the variance of it to be correct, so match
; it to the age-banded version being replaced over the period 1700-1994.
; In fact, the age-banded series were normalised over that period, so we
; just re-normalise the new series.
;
if dohug eq 1 then begin   
  insib=where(regname eq 'NSIB')
  onets=reform(fixts(insib,*))
  mknormal,onets,timey,refperiod=[1700,1994],refmean=rfm,refsd=rfs
  print,regname(insib),rfm,rfs
  onets=reform(allts(insib,*))
  mknormal,onets,timey,refperiod=[1700,1994],refmean=rfm,refsd=rfs
  print,regname(insib),rfm,rfs
  mknormal,hugfixts,hugyr,refperiod=[1700,1994]
  mknormal,hugts,hugyr,refperiod=[1700,1994]
  ; The hugershoff is 1400-1994, but the age-banded is 1400-1995, so:
  fixts(insib,*)=[hugfixts,!values.f_nan]
  allts(insib,*)=[hugts,!values.f_nan]
endif
;
; Remove the non-contiguous bits of the series, by identifying the
; latest missing value before 1700, and removing all values prior
; to this.
;
for ireg = 0 , nreg-1 do begin
  kkk=where(yrmxd lt 1700)
  onets=reform(allts(ireg,*))
  ml=where(finite(onets(kkk)) eq 0,nmiss)
  if nmiss gt 0 then begin
    lastmiss=ml(nmiss-1)
    allts(ireg,0:lastmiss)=!values.f_nan
    fixts(ireg,0:lastmiss)=!values.f_nan
  endif
endfor
;
; Prepare for plotting
;
loadct,39
multi_plot,nrow=4,ncol=1,layout='large'
if !d.name eq 'X' then begin
  if doprint eq 1 then window,ysize=850
  !p.font=-1
endif else begin
  !p.font=0
  device,/helvetica,/bold,font_size=12
endelse
!y.omargin=[4,0]
def_1color,10,color='blue'
def_1color,11,color='red'
def_1color,12,color='vlgrey'
;
; Optionally remove TIBP and CAS
;
;iuse=[0,1,2,3,6,7,8]
;nreg=n_elements(iuse)
;nret=nreg
;nuse=nuse < nret
;regname=regname(iuse)
;allts=allts(iuse,*)
;
case iper of
  0: iuse=indgen(nreg)
  1: iuse=[0,1,2,3,4,6,7,8]
  2: iuse=[0,1,2,3,4,6,7]
  3: iuse=[0,1,2,3,4,6]
  4: iuse=[0,2,3,6]
  5: iuse=[2,3,6]
  6: iuse=[2,3]
  7: iuse=[0,2,3,4,5,8]
  8: iuse=[0,2,3,4,5]
  9: iuse=[3,4,5]
endcase
nreg=n_elements(iuse)
nret=nreg
nuse=nuse < nret
regname=regname(iuse)
allts=allts(iuse,*)
fixts=fixts(iuse,*)
print,'Model number',iper
print,'Regions',nreg,regname
print,'Fixed grid',fyr
print,'Region',nhtit
print,'Smoothing?',dosmooth,thalf
print,'NSIB: 0=age 1=hug',dohug
;
; Now compute leading PCs of the group of time series
;
kpca=where( (yrmxd ge pcaper(0)) and (yrmxd le pcaper(1)) , nyrpca)
pcayr=yrmxd(kpca)
data1=allts(*,kpca)
myeof1d,data1,ev,ea,lam,lampct,lamcum,nretain=nret
;
if doprint eq 1 then begin
plot,findgen(nret)+1.,lam,xtitle='Mode',$
  ytitle='Eigenvalue',$
  title='Covariance matrix PCA of!Cregional age-banded MXD'
;
plot,findgen(nret)+1.,alog(lam),xtitle='Mode',$
  ytitle='Log of eigenvalue'
;
plot,findgen(nret+1),[!values.f_nan,lampct],xtitle='Mode',$
  ytitle='Explained variance  (%)',psym=10
;
plot,findgen(nret+1),[0.,lamcum],xtitle='Mode',$
  ytitle='Cumulative explained variance  (%)',psym=10,yrange=[0,100]
endif
;
; Plot the selected ones
;
nret=nuse
for iret = 0 , nret-1 do begin
  ;
  ; Adjust EOF and PC to give mean loading as positive
  ;
  oneev=reform(ev(*,iret))
  onetot=total(oneev,/nan)
  if onetot lt 0. then begin
    ev(*,iret)=-ev(*,iret)
    ea(*,iret)=-ea(*,iret)
  endif
  ;
if doprint eq 1 then begin
  pause
  onets=ea(*,iret)
  filter_cru,thalf,/nan,tsin=onets,tslow=onelow
  plot,pcayr,onets,$
    /xstyle,xtitle='Year',$
    /ystyle,ytitle='PC time series',yrange=[-4.5,4],$
    title='Mode #'+string(iret+1,format='(I2)')
  oplot,!x.crange,[0.,0.],linestyle=1
  oplot,pcayr,onelow,thick=3
  ;
  if ((iret mod 3) eq 2) or (iret eq nret-1) then begin
    !p.multi=[3,3,4,0,0]
    subind=iret mod 3
    for jret = iret-subind , iret do begin
      onelf=ev(*,jret)
      onex=findgen(nreg)
      cpl_barts,onex,onelf,outline=-1,bar_color=12,$
        /xstyle,xtickformat='nolabels',$
        /ystyle,yrange=[-1,1],ytitle='Loading',$
        title='Mode #'+string(jret+1,format='(I2)')
      for ireg = 0 , nreg-1 do begin
        xyouts,onex(ireg)+0.25,-1.05,align=1.,orient=90.,regname(ireg)
      endfor
    endfor
    !p.multi=[0,1,4,0,0]
  endif
endif
  ;
endfor
;
; Now extract the PCA modes to provide to the step-wise screening regression
;
ev=ev(*,0:nret-1)
ea=ea(*,0:nret-1)
;
; Now recompute the PC time series using all the data (gives longer series
; and ones that haven't had a specific long-term mean removed).
; Also do it for the fixed grid regional series.
;
allea=fltarr(nyr,nret)
fixea=fltarr(nyr,nret)
missea=fltarr(nyr,nret)
allea(*,*)=!values.f_nan
fixea(*,*)=!values.f_nan
missea(*,*)=!values.f_nan
for iyr = 0 , nyr-1 do begin
  for iret = 0 , nret-1 do begin
    onedat=allts(*,iyr)*ev(*,iret)
    allea(iyr,iret)=total(onedat,/nan)
    missea(iyr,iret)=total(onedat)
    onedat=fixts(*,iyr)*ev(*,iret)
    fixea(iyr,iret)=total(onedat)
  endfor
endfor
ea=missea
;
kcal=where( (yrmxd ge calper(0)) and (yrmxd le calper(1)) , nyrcal)
indts=transpose(ea(kcal,*))
indfixts=transpose(fixea(kcal,*))
ktem=where( (tempyr ge calper(0)) and (tempyr le calper(1)) , nyrtemp)
if nyrtemp ne nyrcal then message,'Ooops!!!'
depts=nhtemp(ktem)
wts=fltarr(nyrcal)+1.
ltm=total(depts)/float(nyrtemp)
vn='PC#'+string(indgen(nret)+1,format='(I1)')
;
; Now perform the regression
;
;print,'---------------------------------------------------------------'
;print,'REGRESSION USING ALL RETAINED PCs:',nret
regression,indts,depts,wts,acoeff,bcoeff,yresid,ypred,bsigma,fvalue,$
  indr,varname=vn,noprint=1-doprint
;print
;print,'Individual correlations'
;print,indr,format='(9F7.2)'
;print,'---------------------------------------------------------------'
;print
print,'---------------------------------------------------------------'
print,'STEPWISE REGRESSION'
ineq=!values.f_nan       ; do not force any variables into regression!
stepwise,indts,depts,ineq,0.05,0.05,varnames=vn       ;,/report
print,'---------------------------------------------------------------'
;
if dofinal eq 1 then begin
  calper=finalper
  kcal=where( (yrmxd ge calper(0)) and (yrmxd le calper(1)) , nyrcal)
  indts=transpose(ea(kcal,*))
  indfixts=transpose(fixea(kcal,*))
  ktem=where( (tempyr ge calper(0)) and (tempyr le calper(1)) , nyrtemp)
  if nyrtemp ne nyrcal then message,'Ooops!!!'
  depts=nhtemp(ktem)
  wts=fltarr(nyrcal)+1.
  ltm=total(depts)/float(nyrtemp)
endif
;
; Now redo the regression using just the selected predictors and also
; compute the calibration statistics
;
indts=indts(ineq,*)
mkcalibratemulti,indts,depts,fitcoeff=fitcoeff,autocoeff=autocoeff
bcoeff=fitcoeff.beta
acoeff=fitcoeff.alpha
;
; Now redo the regression using just the selected predictors and the
; PC timeseries based on the fixed-grid regional series to compute the
; calibration statistics for error bands
;
indfixts=indfixts(ineq,*)
mkcalibratemulti,indfixts,depts,fitcoeff=fitfix,autocoeff=autofix
print,'Fixed grid calibration statistics'
print,'     r   alpha SEalpha    sig    rmse'
print,fitfix.r,fitfix.alpha,fitfix.sealpha,fitfix.sig,fitfix.rms,$
  format='(F6.2,2F8.4,F7.2,F8.4)'
print,'beta(s) then SEbeta(s)'
print,fitfix.beta,fitfix.sebeta,format='(8F8.4)'
print,'     ess a_resid   a_tem   a_mxd...'
print,autofix.ess,autofix.ac1resid,autofix.ac1pand,autofix.ac1por,$
  format='(F8.1,6F8.2)'
;
; Scale up the standard errors of the regression coefficients if there
; is autocorrelation in the residuals
;
aresid=autofix.ac1resid
serialfac=(1.+abs(aresid))/(1.-abs(aresid))
fitfix.sebeta=fitfix.sebeta*serialfac
print,'SEbeta(s) scaled up for autocorrelation'
print,fitfix.sebeta,format='(4F8.4)'
;
; Now compute the standard errors for this (full-grid) reconstruction.
; SE due to uncertainty in (a) slope(s) and (b) intercept (ZERO HERE!!),
; and (c) due to unexplained variance
;
useea=missea(*,ineq)
npred=n_elements(ineq)
allerr=fltarr(nyr,npred)
if dosmooth ne 0 then begin
  ; filter the predictor(s) and project the slope errors
  for ipred = 0 , npred-1 do begin
    ; To correctly compute errors, need predictors as anomalies from calper!
    onepred=reform(useea(*,ipred))
    mkanomaly,onepred,yrmxd,refperiod=calper,refmean=predmean
    print,'Predictor #',ipred,' mean over cal. per. is',predmean
    filter_cru,/nan,thalf,tsin=onepred,tslow=lowpred,weight=filwt
    allerr(*,ipred)=lowpred*fitfix.sebeta(ipred)
  endfor
  nwt=n_elements(filwt)
  allwt=[reverse(filwt(1:nwt-1)),filwt]
  varfac=serialfac*sqrt(total(allwt^2)) < 1. ; don't let errors get bigger
;   print,'Unexplained variance scaled (due to smoothing) by',varfac
  se_c=fitfix.rms*varfac
endif else begin
  ; project the slope errors
  for ipred = 0 , npred-1 do begin
    ; To correctly compute errors, need predictors as anomalies from calper!
    onepred=reform(useea(*,ipred))
    mkanomaly,onepred,yrmxd,refperiod=calper,refmean=predmean
    print,'Predictor #',ipred,' mean over cal. per. is',predmean
    allerr(*,ipred)=onepred*fitfix.sebeta(ipred)
  endfor
  se_c=fitfix.rms
endelse
if npred eq 1 then se_a=reform(allerr) else se_a=sqrt(total(allerr^2,2))
;if npred eq 1 then se_a=reform(allerr) else se_a=total(allerr,2)/float(npred)
;print,'TESTING:'
;print,se_a(500),reform(allerr(500,*)),sqrt(total(allerr(500,*)^2))
se_b=0.
predserr=sqrt( se_a^2 + se_b^2 + se_c^2 )
;
; Now apply the regression to the full period with data (any missing,
; means no prediction!).
;
prednh=fltarr(nyr)
prednh(*)=!values.f_nan
useea=missea(*,ineq)
for iyr = 0 , nyr-1 do begin
  prednh(iyr)=total(useea(iyr,*)*bcoeff(*))+acoeff
endfor
;
; Now verify it over the verification period
; Both verification and calibration % var expl. remove the temperature long
; term mean from the calibration period!
;
kmxd=where( (yrmxd ge verper(0)) and (yrmxd le verper(1)) , nyrmxd)
ktem=where( (tempyr ge verper(0)) and (tempyr le verper(1)) , nyrtemp)
if nyrtemp ne nyrmxd then message,'Ooops!!!'
print
print,'Verification over:',verper
print,mkcorrelation(prednh(kmxd),nhtemp(ktem),filter=thalf)
sqvfull=total((nhtemp(ktem)-ltm)^2)
sqvresid=total( (nhtemp(ktem)-prednh(kmxd))^2 )
print,'RE=',1.-sqvresid/sqvfull
verltm=total(nhtemp(ktem))/float(nyrtemp)
sqvfull=total((nhtemp(ktem)-verltm)^2)
;print,'CE=',1.-(sqvresid/sqvfull)
;
kmxd=where( (yrmxd ge calper(0)) and (yrmxd le calper(1)) , nyrmxd)
ktem=where( (tempyr ge calper(0)) and (tempyr le calper(1)) , nyrtemp)
if nyrtemp ne nyrmxd then message,'Ooops!!!'
;print
print,'Calibration over:',calper
print,mkcorrelation(prednh(kmxd),nhtemp(ktem),filter=thalf)
sqvfull=total((nhtemp(ktem)-ltm)^2)
sqvresid=total( (nhtemp(ktem)-prednh(kmxd))^2 )
print,'RE=',1.-sqvresid/sqvfull
verltm=total(nhtemp(ktem))/float(nyrtemp)
sqvfull=total((nhtemp(ktem)-verltm)^2)
;print,'CE=',1.-sqvresid/sqvfull
;
; Now apply the regression to the full period with data (any missing,
; set predictor to zero and carry on!).
;
fullnh=fltarr(nyr)
fullnh(*)=!values.f_nan
useea=allea(*,ineq)
for iyr = 0 , nyr-1 do begin
  fullnh(iyr)=total(useea(iyr,*)*bcoeff(*))+acoeff
endfor
;
; Only store results if dofinal is set
;
if dofinal ne 0 then begin
;
fst=string(fyr,format='(I4)')
if dosmooth eq 1 then smst='sm'+string(thalf,format='(I2.2)') else smst=''
if dohug eq 1 then ahug='_NSIBhug' else ahug=''
save,$
 filename='bandtemp'+nhtit+pertit+'_fixed'+fst+smst+'_calpcr'+ahug+'.idlsave',$
 yrmxd,prednh,nhtit,nyr,fullnh,predserr,tempyr,nhtemp
;
endif
;
end
