;
; Creates regional timeseries based on a limited sample of MXD series,
; variance adjusted with a time-dependent rbar, and compares them with
; a regional instrumental temperature record which is also computed here
; and variance adjusted.
;
; The instrumental record can be based on either all available land
; observations in the region, or only those with an MXD site within their grid
; box (in the latter case, the region is defined by the FULL sample of MXD
; data, not by the limited sample of MXD sites actually being used.
; The instrumental data is based on Apr-Sep means, although this is adjustable.
; Large-scale composite means are not done here.
;
; The MXD sites are limited first by specifying a year for which each site
; used must have had data for (e.g., specifying 1700 would use only sites
; with data back to at least 1700).  After this selection, the available
; sites are sorted (on a region-by-region basis) into descending order
; according to their correlation with local Apr-Sep temperature (already
; computed, using the period 1881-1984 where data is available).  Calculations
; are repeated using gradually more and more data.
;
; Comparison is made by computing correlations over the period 1881-1984,
; both filtered and unfiltered.
;
;-----------------------------------------------------------------
dotem=0         ; 0=use CRUTEM1, 1=use CRUTEM2v
if dotem eq 0 then begin
  fntem='lat_jones_18512000.mon.nc'
  savtem=''
endif else begin
  fntem='crutem2v_18512001.mon.nc'
  savtem='ct2v_'
endelse
;
trv=0           ; selects tree-ring-variable: 0=MXD 1=TRW
case trv of
  0: begin
    fnadd='mxd'
    iseas=18        ; use local r with Apr-Sep temp to select chronologies
    seasst=3
    seasen=8
    dthr=3        ; used to be 3!
    end
  1: begin
    fnadd='trw'
    iseas=20        ; use local r with Jun-Aug temp to select chronologies
    seasst=5
    seasen=7
    dthr=2        ; used to be 2!
    end
endcase
titadd=strupcase(fnadd)
;
; Specify options
;
fixedyr=1400              ; year of fixed grid (1950, 1988, 1800, 1700,...,1400)
rbarper=[1881,1960]       ; period for computing correlation matrix
corrper=[1881,1960]    ; period over which comparison with temperature is made
doreg=0                ; 0=use co-located temperature only, 1=use full region
cutr=0.22       ; a 'best' series is based on sites with local r >= cutr
;
; Identify analysis period:
; For speed, analyse the minimum possible. Must cover all of rbarper and
; corrper, plus a bit more either side of corrper for filtering.  Does not
; need to cover the fixedyr.
;
analper=[1860,2000]
anallen=analper(1)-analper(0)+1
;
; First read in the MXD data
;
print,'Reading '+titadd+' data'
restore,filename='all'+fnadd+'.idlsave'
;  nchron,idno,idname,location,country,tree,yrstart,yrend,statlat,statlon,$
;  mxd,fraction,timey,nyr
;
; Next read in the regional breakdown
;
print,'Reading MXD regions'
restore,filename='reg_mxdlists.idlsave'
;  ntree,treelist,nreg,regname
;
; Next read in the correlations between MXD and local climate
;
print,'Reading '+titadd+' local correlations'
restore,filename='datastore/'+savtem+fnadd+'_moncorr.idlsave'
;  allr,ncorr,nvar,nchron,varname,corrname,statlat,statlon,allp,moir,moir2
;
; Read in the instrumental temperatures for the required period
;
print,'Reading temperatures'
print,fntem
ncid=ncdf_open('/cru/u2/f055/data/obs/grid/surface/'+fntem)
tsmon=crunc_rddata(ncid,tst=[analper(0),0],tend=[analper(1),11],grid=gtemp)
ncdf_close,ncid
nmon=12
ntemp=gtemp.nt
nyrtemp=ntemp/nmon
yrtemp=reform(gtemp.year,nmon,nyrtemp)
yrtemp=reform(yrtemp(0,*))
;
; Read in the regional breakdown of grid boxes
;
print,'Reading temperature regions'
restore,filename='reg_mxdboxes.idlsave'
;  nreg,boxlists,boxnumbs,regname,g,boxregs,boxprec,boxtemp
;
; Define arrays for storing all regional results
;
z=!values.f_nan
maxntree=max(ntree)
nsites=fltarr(nreg)            ; number of sites with data back to fixed yr
regr=fltarr(maxntree,nreg,3)*z ; regional raw high low correls as extra series
locr=fltarr(maxntree,nreg)*z   ; local correlations sorted into descending order
bestx=fltarr(maxntree,nreg)*z  ; location of sites making best raw correlation
besty=fltarr(maxntree,nreg)*z
bestmxd=fltarr(anallen,nreg,3)*z ; MXD series: best raw regr; cutoff locr; all
regtemp=fltarr(anallen,nreg)*z ; regional temperature timeseries
rawtemp=fltarr(anallen,nreg)*z ; regional temps unadjusted for variance changes
cutsites=fltarr(nreg)
cutx=fltarr(maxntree,nreg)
cuty=fltarr(maxntree,nreg)
;
; Now analyse each region in turn
;
multi_plot,nrow=6,ncol=3,layout='large',/landscape
loadct,39
def_1color,20,color='red'
if !d.name eq 'X' then window,xsize=1000,ysize=850
for ireg = 0 , nreg-1 do begin
  print,regname(ireg)
  ;
  ; Extract monthly instrumental data for current region
  ;
  ; Choose mask (co-located data only, or all temperatures in region)
  if doreg eq 0 then fdmask=reform(boxlists(*,*,ireg)) $
                else fdmask=reform(boxregs(*,*,ireg))
  ; Count boxes to use
  blist=where(finite(fdmask),nbox)
  ; Define array to hold monthly grid box timeseries from region
  regts=fltarr(nbox,ntemp)
  ; Fill the array
  for imon = 0 , ntemp-1 do begin
     fd1mon=reform(tsmon(*,*,imon))
     regts(*,imon)=fd1mon(blist)
  endfor
  ; Separate months from years
  regts=reform(regts,nbox,nmon,nyrtemp)
  ; Compute seasonal average (April-September)
  regseas=mkseason(regts,seasst,seasen,datathresh=dthr)
  ; Define an array to hold latitude weighting
  locy=fix(blist/g.nx)
  regwt=cos(g.y(locy)*!dtor)
  ; Compute a regional average (make second adjustment for input variance!)
  meanlat=var_adjust(regseas,yrtemp,weight=regwt,/td_rbar,$
    refperiod=rbarper,/no_anomaly,td_sdev=1,rbar=rbar,corrmat=tempcm,$
    /mkrbar,/mkcorr)
  ; For WNA and NWNA, early values are contaminated by warm anomalies, probably
  ; due to the use of north facing walls instead of Stephenson screens.  Must
  ; remove the contaminated data.
  if ireg eq 6 then meanlat(where(yrtemp le 1871))=!values.f_nan
  if ireg eq 7 then meanlat(where(yrtemp le 1886))=!values.f_nan
  ; Need to store a regional series in oC.  meanlat is adjusted but still in
  ; oC (hopefully), but represents a single series.  Need to multiply by the
  ; time-constant rbar to get regional variance.
  klper=where((yrtemp ge rbarper(0)) and (yrtemp le rbarper(1)))
  constrbar=mkrbar(regseas(*,klper))
  regtemp(*,ireg)=meanlat(*)*sqrt(constrbar)
  mknormal,meanlat,yrtemp,refperiod=rbarper
  ;
  ; Also compute unadjusted regional mean series
  ;
  wt=fltarr(nbox,nyrtemp)
  for iyr = 0 , nyrtemp-1 do wt(*,iyr)=regwt(*)
  totval=total(regseas*wt,1,/nan)
  totnum=total(float(finite(regseas))*wt,1,/nan)
  rawtemp(*,ireg)=totval/totnum
  ;
  ; First select sites in region
  ;
  nx=ntree(ireg)
  print,'Maximum number of sites',nx
  tl=treelist(0:nx-1,ireg)
  ;
  regmxd=mxd(*,tl)
  regfra=fraction(*,tl)
  regcor=reform(allr(tl,iseas,1))        ; iseas=18/20=Apr-Sep/Jun-Aug 1=temperature
  xxx=statlon(tl)
  yyy=statlat(tl)
  ;
  ; Select those sites with data back to required year
  ;
  iyr=where(timey eq fixedyr)
  kl=where(finite(regmxd(iyr,*)),nx)
  print,'Fixed grid number of sites',nx
  nsites(ireg)=nx
  if nx gt 0 then begin
    regmxd=regmxd(*,kl)
    regfra=regfra(*,kl)
    regcor=regcor(kl)
    xxx=xxx(kl)
    yyy=yyy(kl)
    ;
    ; Reduce time dimension to minimum required
    ;
    kl2=where((timey ge analper(0)) and (timey le analper(1)),nyr)
    regmxd=regmxd(kl2,*)
    regfra=regfra(kl2,*)
    yrmxd=timey(kl2)
    ;
    ; Transpose to give space then time
    ;
    regmxd=transpose(regmxd)
    regfra=transpose(regfra)
    ;
    ; Compute correlation matrix of all retained sites
    ;
    fullrbar=mkrbar(regmxd,grandr=corrmat)
    ;
    ; Identify decreasing order of local skill
    ;
    slist=reverse(sort(regcor))
    locr(0:nx-1,ireg)=regcor(slist)
    ;
    ; Identify those with significant local correlations (approx.)
    ;
    dummy=where(regcor ge cutr,n2cut)
    cutsites(ireg)=n2cut
    ;
    !p.multi(0)=0
    pause
    plot,regcor(slist),/xstyle,title=regname(ireg),psym=-1
    ;
    ; Compute regional series for a range of subsets (based on skill sort)
    ;
    print,'Number of sites being used:'
    bestr=-1.
    for iuse = 0 , nx-1 do begin
      print,iuse,format='($,I4)'
      ;
      ; Extract skillful subset and subset of the correlation matrix
      ;
      submxd=regmxd(slist(0:iuse),*)
      subfra=regfra(slist(0:iuse),*)
      subcorr=corrmat(slist(0:iuse),*)
      subcorr=subcorr(*,slist(0:iuse))
      subcorr=reform(subcorr,iuse+1,iuse+1)     ; make sure it is 2D
      ;
      ; Compute weighted regional mean with appropriate variance adjustment
      ;
      meanmxd=var_adjust(submxd,yrmxd,weight=subfra,corrmat=subcorr,$
        refperiod=rbarper,/no_anomaly,/td_rbar)
      mknormal,meanmxd,yrmxd,refperiod=rbarper
      ;
      ; Correlate MXD with temperature
      ;
      r3=mkcorrelation(meanmxd,meanlat,yrmxd,filter=10,refperiod=corrper,$
        datathresh=10)
      regr(iuse,ireg,*)=r3(*)
      if (iuse eq 0) or (r3(0) gt bestr) then begin
        bestr=r3(0)
        bestmxd(*,ireg,0)=meanmxd(*)
        bestx(0:iuse,ireg)=xxx(slist(0:iuse))
        besty(0:iuse,ireg)=yyy(slist(0:iuse))
      endif
      if iuse+1 eq n2cut then begin
        bestmxd(*,ireg,1)=meanmxd(*)
        cutx(0:iuse,ireg)=xxx(slist(0:iuse))
        cuty(0:iuse,ireg)=yyy(slist(0:iuse))
      endif
      if iuse eq nx-1 then bestmxd(*,ireg,2)=meanmxd(*)
      ;
      if (iuse mod 5) eq 0 then begin
        plot,yrmxd,meanmxd,/xstyle,title=string(iuse)+string(r3(0))
        oplot,yrtemp,meanlat,color=20
      endif
      ;
    endfor
    print
    ;
  endif
  ;
endfor
;
; Now save results
;
timey=yrmxd
if doreg eq 0 then fnadd2='' else fnaddi2='full'
;
save,filename='datastore/'+savtem+fnadd2+'regbest_'+fnadd+'_fixed'+$
    string(fixedyr,format='(I4)')+'.idlsave',$
  nreg,regname,nsites,regr,locr,bestx,besty,bestmxd,regtemp,timey,$
  fixedyr,corrper,doreg,maxntree,cutx,cuty,cutsites,seasst,seasen,rawtemp
;
end
