wt_learning.c

Go to the documentation of this file.
00001 /* ***************************************************** */
00002 /* wt_learning Compute learning data needed for          */
00003 /* downscaling climate scenarios using weather typing.   */
00004 /* wt_learning.c                                         */
00005 /* ***************************************************** */
00006 /* Author: Christian Page, CERFACS, Toulouse, France.    */
00007 /* ***************************************************** */
00008 /* Date of creation: oct 2008                            */
00009 /* Last date of modification: feb 2015                   */
00010 /* ***************************************************** */
00011 /* Original version: 1.0                                 */
00012 /* Current revision: 1.2                                 */
00013 /* Adapted to udunits2                                   */
00014 /* ***************************************************** */
00015 /* Revisions                                             */
00016 /* 1.1: Adapted to udunits2 (Christian Page)             */
00017 /* 1.2: Added deactivation of regression points with     */
00018 /*      all missing data in the vicinity (Christian Page)*/
00019 /* ***************************************************** */
00024 /* LICENSE BEGIN
00025 
00026 Copyright Cerfacs (Christian Page) (2015)
00027 
00028 christian.page@cerfacs.fr
00029 
00030 This software is a computer program whose purpose is to downscale climate
00031 scenarios using a statistical methodology based on weather regimes.
00032 
00033 This software is governed by the CeCILL license under French law and
00034 abiding by the rules of distribution of free software. You can use, 
00035 modify and/ or redistribute the software under the terms of the CeCILL
00036 license as circulated by CEA, CNRS and INRIA at the following URL
00037 "http://www.cecill.info". 
00038 
00039 As a counterpart to the access to the source code and rights to copy,
00040 modify and redistribute granted by the license, users are provided only
00041 with a limited warranty and the software's author, the holder of the
00042 economic rights, and the successive licensors have only limited
00043 liability. 
00044 
00045 In this respect, the user's attention is drawn to the risks associated
00046 with loading, using, modifying and/or developing or reproducing the
00047 software by the user in light of its specific status of free software,
00048 that may mean that it is complicated to manipulate, and that also
00049 therefore means that it is reserved for developers and experienced
00050 professionals having in-depth computer knowledge. Users are therefore
00051 encouraged to load and test the software's suitability as regards their
00052 requirements in conditions enabling the security of their systems and/or 
00053 data to be ensured and, more generally, to use and operate it in the 
00054 same conditions as regards security. 
00055 
00056 The fact that you are presently reading this means that you have had
00057 knowledge of the CeCILL license and that you accept its terms.
00058 
00059 LICENSE END */
00060 
00061 
00062 
00063 
00064 
00065 
00066 
00067 #include <dsclim.h>
00068 
00070 int
00071 wt_learning(data_struct *data) {
00078   double *buf_learn = NULL;
00079   double *buf_weight = NULL;
00080   double *buf_learn_obs = NULL;
00081   double *buf_learn_rea = NULL;
00082   double *buf_learn_obs_sub = NULL;
00083   double *buf_learn_rea_sub = NULL;
00084   double *buf_learn_pc = NULL;
00085   double *buf_learn_pc_sub = NULL;
00086 
00087   double *precip_liquid_obs = NULL;
00088   double *precip_solid_obs = NULL;
00089   double *precip_obs = NULL;
00090   double *mean_precip = NULL;
00091   double *mean_precip_sub = NULL;
00092 
00093   double *precip_reg = NULL;
00094   double *precip_err = NULL;
00095   double *precip_index = NULL;
00096   double *dist_reg = NULL;
00097   double *vif = NULL;
00098   double chisq;
00099   double rsq;
00100   double autocor;
00101 
00102   double obs_first_sing;
00103   double rea_sing;
00104   double obs_sing;
00105   double *rea_var = NULL;
00106   double rea_first_sing;
00107 
00108   double *tas_rea = NULL;
00109   double *tas_rea_sub = NULL;
00110   double *tas_rea_mean = NULL;
00111   double *tas_rea_mean_sub = NULL;
00112 
00113   double missing_value;
00114   double missing_value_precip;
00115 
00116   double *mean_dist = NULL;
00117   double *var_dist = NULL;
00118   double *dist = NULL;
00119   double dist_pt;
00120 
00121   double *mask_subd = NULL;
00122   short int *mask_sub = NULL;
00123   int nlon_mask;
00124   int nlat_mask;
00125   double *lon_mask = NULL;
00126   double *lat_mask = NULL;
00127 
00128   int ntime_learn_all;
00129   int *ntime_sub = NULL;
00130 
00131   double *sup_mean = NULL;
00132   double *sup_var = NULL;
00133 
00134   double meanvif = 0.0;
00135 
00136   int eof;
00137   int clust;
00138   int nt;
00139   int ntt;
00140   int t;
00141   int s;
00142   int i;
00143   int j;
00144   int pt;
00145   int term;
00146   int *npt = NULL;
00147   short int allpt;
00148 
00149   /* udunits variables */
00150   ut_system *unitSystem = NULL; /* Unit System (udunits) */
00151   ut_unit *dataunits = NULL; /* Data units (udunits) */
00152 
00153   int niter = 2;
00154 
00155   int istat; 
00156   int istat_solid; 
00158   if (data->learning->learning_provided == TRUE) {
00160     istat = read_learning_fields(data);
00161     if (istat != 0) return istat;
00162   }
00163   else  {
00167     /* Read re-analysis pre-computed EOF and Singular Values */
00168     istat = read_learning_rea_eof(data);
00169     if (istat != 0) return istat;
00170 
00171     /* Read observations pre-computed EOF and Singular Values */
00172     istat = read_learning_obs_eof(data);
00173     if (istat != 0) return istat;
00174 
00175     /* Select common time period between the re-analysis and the observation data periods */
00176     if (data->learning->obs_neof != 0) {
00177       istat = sub_period_common(&buf_learn_obs, &ntime_learn_all, data->learning->obs->eof,
00178                                 data->learning->obs->time_s->year, data->learning->obs->time_s->month, data->learning->obs->time_s->day,
00179                                 data->learning->rea->time_s->year, data->learning->rea->time_s->month, data->learning->rea->time_s->day,
00180                                 1, data->learning->obs_neof, 1, data->learning->obs->ntime, data->learning->rea->ntime);
00181       if (istat != 0) return istat;
00182     }
00183     istat = sub_period_common(&buf_learn_rea, &ntime_learn_all, data->learning->rea->eof,
00184                               data->learning->rea->time_s->year, data->learning->rea->time_s->month, data->learning->rea->time_s->day,
00185                               data->learning->obs->time_s->year, data->learning->obs->time_s->month, data->learning->obs->time_s->day,
00186                               1, data->learning->rea_neof, 1, data->learning->rea->ntime, data->learning->obs->ntime);
00187     if (istat != 0) return istat;
00188 
00189     rea_var = (double *) malloc(data->learning->rea_neof * sizeof(double));
00190     if (rea_var == NULL) alloc_error(__FILE__, __LINE__);
00191 
00192     /* Compute normalisation factor of EOF of large-scale field for the whole period */
00193 
00194     data->learning->pc_normalized_var = (double *) malloc(data->learning->rea_neof * sizeof(double));
00195     if (data->learning->pc_normalized_var == NULL) alloc_error(__FILE__, __LINE__);
00196     buf_learn_pc = (double *) malloc(data->learning->rea_neof * ntime_learn_all * sizeof(double));
00197     if (buf_learn_pc == NULL) alloc_error(__FILE__, __LINE__);
00198 
00199     for (eof=0; eof<data->learning->rea_neof; eof++) {
00200 
00201       for (nt=0; nt<ntime_learn_all; nt++)
00202         buf_learn_pc[nt+eof*ntime_learn_all] = buf_learn_rea[nt+eof*ntime_learn_all] * data->learning->rea->sing[eof];
00203 
00204       rea_var[eof] = gsl_stats_variance(&(buf_learn_pc[eof*ntime_learn_all]), 1, ntime_learn_all);
00205       if (rea_var[eof] == 0.0) {
00206         (void) fprintf(stderr, "%s: ERROR: Variance of the projection of the large-scale field onto EOF is 0.0. You probably have too many EOFs for your field. EOF number=%d. Variance=%f. Must abort...\n",
00207                        __FILE__, eof, rea_var[eof]);
00208         return -1;
00209       }
00210 
00211       /* Renormalize EOF of large-scale field for the whole period using the first EOF norm and the Singular Value */
00212       for (nt=0; nt<ntime_learn_all; nt++)
00213         buf_learn_pc[nt+eof*ntime_learn_all] = buf_learn_pc[nt+eof*ntime_learn_all] / sqrt(rea_var[0]);
00214 
00215       /* Recompute normalization factor using normalized field */
00216       data->learning->pc_normalized_var[eof] = gsl_stats_variance(&(buf_learn_pc[eof*ntime_learn_all]), 1, ntime_learn_all);
00217       if (data->learning->pc_normalized_var[eof] == 0.0) {
00218         (void) fprintf(stderr, "%s: ERROR: Normalized variance of the projection of the large-scale field onto EOF is 0.0. You probably have too many EOFs for your field. EOF number=%d. Variance=%f. Must abort...\n",
00219                        __FILE__, eof, data->learning->pc_normalized_var[eof]);
00220         return -1;
00221       }
00222     }
00223                                                     
00224     ntime_sub = (int *) malloc(data->conf->nseasons * sizeof(int));
00225     if (ntime_sub == NULL) alloc_error(__FILE__, __LINE__);
00226 
00227     /* Read observed precipitation (liquid and solid) */
00228     istat_solid = read_obs_period(&precip_solid_obs, &(data->learning->lon), &(data->learning->lat), &missing_value_precip,
00229                                   data, "prsn", data->learning->obs->time_s->year, data->learning->obs->time_s->month,
00230                                   data->learning->obs->time_s->day, &(data->learning->nlon), &(data->learning->nlat),
00231                                   data->learning->obs->ntime);
00232     if (istat_solid == -1) return -1;
00233     if (istat_solid >= 0) {
00234       (void) free(data->learning->lon);
00235       (void) free(data->learning->lat);
00236     }
00237     istat = read_obs_period(&precip_liquid_obs, &(data->learning->lon), &(data->learning->lat), &missing_value_precip, data, "prr",
00238                             data->learning->obs->time_s->year, data->learning->obs->time_s->month, data->learning->obs->time_s->day,
00239                             &(data->learning->nlon), &(data->learning->nlat), data->learning->obs->ntime);
00240     if (istat == -1) return -1;
00241 
00242     /* Calculate total precipitation */
00243     precip_obs = (double *) malloc(data->learning->nlon*data->learning->nlat*data->learning->obs->ntime * sizeof(double));
00244     if (precip_obs == NULL) alloc_error(__FILE__, __LINE__);
00245 
00246     if (istat_solid == -2) {
00247       fprintf(stderr, "%s: WARNING: Snow observation variable not found in dsclim XML config file. Will assume that you don't have snow observations, and set it to zero.\n", __FILE__);
00248       for (t=0; t<data->learning->obs->ntime; t++)
00249         for (j=0; j<data->learning->nlat; j++)
00250           for (i=0; i<data->learning->nlon; i++)
00251             if (precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] != missing_value_precip)
00252               precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] =
00253                 precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] * 86400;
00254             else
00255               precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] = missing_value_precip;
00256       (void) free(precip_liquid_obs);
00257     }
00258     else {
00259       (void) printf("%s: Calculating total precipitation from solid and liquid.\n", __FILE__);
00260       for (t=0; t<data->learning->obs->ntime; t++)
00261         for (j=0; j<data->learning->nlat; j++)
00262           for (i=0; i<data->learning->nlon; i++)
00263             if (precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] != missing_value_precip)
00264               precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] =
00265                 (precip_liquid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] +
00266                  precip_solid_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat]) * 86400.0;
00267             else
00268               precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] = missing_value_precip;
00269       (void) free(precip_liquid_obs);
00270       (void) free(precip_solid_obs);
00271     }
00272 
00273     /* Apply mask for learning data */
00274     if (data->conf->learning_maskfile->use_mask == TRUE) {
00275       /* Allocate memory */
00276       mask_sub = (short int *) malloc(data->learning->nlat*data->learning->nlon * sizeof(short int));
00277       if (mask_sub == NULL) alloc_error(__FILE__, __LINE__);
00278       for (i=0; i<data->learning->nlat*data->learning->nlon; i++)
00279         mask_sub[i] = (short int) data->conf->learning_maskfile->field[i];
00280       /* Apply mask */
00281       (void) printf("%s: Masking points using mask file for regression analysis.\n", __FILE__);
00282       (void) mask_points(precip_obs, missing_value_precip, mask_sub,
00283                          data->learning->nlon, data->learning->nlat, data->learning->obs->ntime);
00284       /* Free memory of mask_sub */
00285       (void) free(mask_sub);
00286       mask_sub = NULL;
00287     }
00288 
00289     /* Mask region if needed using domain bounding box */
00290     if (data->conf->learning_mask_longitude_min != -999.0 &&
00291         data->conf->learning_mask_longitude_max != -999.0 &&
00292         data->conf->learning_mask_latitude_min != -999.0 &&
00293         data->conf->learning_mask_latitude_max != -999.0) {
00294       (void) printf("%s: Masking region for regression analysis.\n", __FILE__);
00295       (void) mask_region(precip_obs, missing_value_precip, data->learning->lon, data->learning->lat,
00296                          data->conf->learning_mask_longitude_min, data->conf->learning_mask_longitude_max,
00297                          data->conf->learning_mask_latitude_min, data->conf->learning_mask_latitude_max,
00298                          data->learning->nlon, data->learning->nlat, data->learning->obs->ntime);
00299     }
00300 
00301     /* Perform spatial mean of observed precipitation around regression points, normalize precip */
00302     (void) printf("%s: Perform spatial mean of observed precipitation around regression points.\n", __FILE__);
00303     mean_precip = (double *) malloc(data->reg->npts * data->learning->obs->ntime * sizeof(double));
00304     if (mean_precip == NULL) alloc_error(__FILE__, __LINE__);
00305     npt = (int *) malloc(data->learning->obs->ntime * sizeof(int));
00306     if (npt == NULL) alloc_error(__FILE__, __LINE__);
00307     for (pt=0; pt<data->reg->npts; pt++) {
00308       for (t=0; t<data->learning->obs->ntime; t++) {
00309         mean_precip[t+pt*data->learning->obs->ntime] = 0.0;
00310         npt[t] = 0;
00311       }
00312       for (j=0; j<data->learning->nlat; j++)
00313         for (i=0; i<data->learning->nlon; i++) {
00314           dist_pt = distance_point(data->reg->lon[pt], data->reg->lat[pt],
00315                                    data->learning->lon[i+j*data->learning->nlon], data->learning->lat[i+j*data->learning->nlon]);
00316           if (dist_pt <= data->reg->dist)
00317             for (t=0; t<data->learning->obs->ntime; t++)
00318               if (precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat] != missing_value_precip) {
00319                 mean_precip[t+pt*data->learning->obs->ntime] +=
00320                   precip_obs[i+j*data->learning->nlon+t*data->learning->nlon*data->learning->nlat];
00321                 npt[t]++;
00322               }
00323         }
00324       allpt = FALSE;
00325       for (t=0; t<data->learning->obs->ntime; t++)
00326         if (npt[t] == 0) allpt = TRUE;
00327       if (allpt == TRUE) {
00328         (void) fprintf(stderr, "%s: WARNING: There are no point of observation in the vicinity of the regression point #%d at a minimum distance of at least %f meters! Verify your regression points, or the configuration of your coordinate variable names in your configuration file, or that you don't have all missing values in your observations in the vicinity of the regression point. Time=%d. lon=%lf lat=%lf. WARNING: Will desactivate this regression point.\n",
00329                        __FILE__, pt, data->reg->dist, t, data->reg->lon[pt], data->reg->lat[pt]);
00330         for (t=0; t<data->learning->obs->ntime; t++)
00331           mean_precip[t+pt*data->learning->obs->ntime] = missing_value_precip;
00332       }
00333       else
00334         for (t=0; t<data->learning->obs->ntime; t++)
00335           mean_precip[t+pt*data->learning->obs->ntime] = sqrt(mean_precip[t+pt*data->learning->obs->ntime] / (double) npt[t]);
00336     }
00337     (void) free(npt);
00338     (void) free(precip_obs);
00339 
00340     /* Select common time period between the re-analysis and the observation data periods for */
00341     /* secondary large-scale field and extract subdomain */
00342     istat = read_field_subdomain_period(&tas_rea, &(data->learning->sup_lon), &(data->learning->sup_lat),
00343                                         &missing_value, data->learning->nomvar_rea_sup,
00344                                         data->learning->obs->time_s->year, data->learning->obs->time_s->month,
00345                                         data->learning->obs->time_s->day,
00346                                         data->conf->secondary_longitude_min, data->conf->secondary_longitude_max,
00347                                         data->conf->secondary_latitude_min, data->conf->secondary_latitude_max, 
00348                                         data->learning->rea_coords, data->learning->rea_gridname,
00349                                         data->learning->rea_lonname, data->learning->rea_latname,
00350                                         data->learning->rea_dimxname, data->learning->rea_dimyname,
00351                                         data->learning->rea_timename, data->learning->filename_rea_sup,
00352                                         &(data->learning->sup_nlon), &(data->learning->sup_nlat), data->learning->obs->ntime);
00353 
00354     /* Perform spatial mean of secondary large-scale fields */
00355     tas_rea_mean = (double *) malloc(data->learning->obs->ntime * sizeof(double));
00356     if (tas_rea_mean == NULL) alloc_error(__FILE__, __LINE__);
00357     /* Prepare mask */
00358     if (data->secondary_mask->use_mask == TRUE) {
00359       (void) extract_subdomain(&mask_subd, &lon_mask, &lat_mask, &nlon_mask, &nlat_mask, data->secondary_mask->field,
00360                                data->secondary_mask->lon, data->secondary_mask->lat,
00361                                data->conf->secondary_longitude_min, data->conf->secondary_longitude_max,
00362                                data->conf->secondary_latitude_min, data->conf->secondary_latitude_max, 
00363                                data->secondary_mask->nlon, data->secondary_mask->nlat, 1);
00364       if (data->learning->sup_nlon != nlon_mask || data->learning->sup_nlat != nlat_mask) {
00365         (void) fprintf(stderr, "%s: IMPORTANT WARNING: The mask for secondary large-scale fields after selecting subdomain has invalid dimensions: nlon=%d nlat=%d. Expected: nlon=%d nlat=%d\nReverting to no-mask processing.", __FILE__, nlon_mask, nlat_mask,
00366                        data->learning->sup_nlon, data->learning->sup_nlat);
00367         mask_sub = (short int *) NULL;
00368       }
00369       else {
00370         mask_sub = (short int *) malloc(data->learning->sup_nlat*data->learning->sup_nlon * sizeof(short int));
00371         if (mask_sub == NULL) alloc_error(__FILE__, __LINE__);
00372         for (i=0; i<data->learning->sup_nlat*data->learning->sup_nlon; i++)
00373           mask_sub[i] = (short int) mask_subd[i];
00374       }
00375       (void) free(mask_subd);
00376       (void) free(lon_mask);
00377       (void) free(lat_mask);
00378     }
00379     else
00380       mask_sub = (short int *) NULL;
00381 
00382     if (mask_sub != NULL)
00383       printf("%s: Using a mask for secondary large-scale fields.\n", __FILE__);
00384 
00385     (void) mean_field_spatial(tas_rea_mean, tas_rea, mask_sub, data->learning->sup_nlon, data->learning->sup_nlat,
00386                               data->learning->obs->ntime);
00387     if (mask_sub != NULL)
00388       (void) free(mask_sub);
00389 
00390     /* Loop over each season */
00391     (void) printf("Extract data for each season separately and process each season.\n");
00392 
00393     for (s=0; s<data->conf->nseasons; s++) {
00394       /* Process separately each season */
00395 
00396       /* Select season months in the whole time period and create sub-period fields */
00397       if (data->learning->obs_neof != 0) {
00398         (void) extract_subperiod_months(&buf_learn_obs_sub, &(ntime_sub[s]), buf_learn_obs,
00399                                         data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day,
00400                                         data->conf->season[s].month,
00401                                         1, 1, data->learning->obs_neof, ntime_learn_all,
00402                                         data->conf->season[s].nmonths);
00403       }
00404       (void) extract_subperiod_months(&buf_learn_rea_sub, &(ntime_sub[s]), buf_learn_rea,
00405                                       data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day,
00406                                       data->conf->season[s].month,
00407                                       1, 1, data->learning->rea_neof, ntime_learn_all,
00408                                       data->conf->season[s].nmonths);
00409       (void) extract_subperiod_months(&buf_learn_pc_sub, &(ntime_sub[s]), buf_learn_pc,
00410                                       data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day,
00411                                       data->conf->season[s].month,
00412                                       1, 1, data->learning->rea_neof, ntime_learn_all,
00413                                       data->conf->season[s].nmonths);
00414       (void) extract_subperiod_months(&tas_rea_mean_sub, &(ntime_sub[s]), tas_rea_mean,
00415                                       data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day,
00416                                       data->conf->season[s].month,
00417                                       1, 1, 1, ntime_learn_all,
00418                                       data->conf->season[s].nmonths);
00419       (void) extract_subperiod_months(&tas_rea_sub, &(ntime_sub[s]), tas_rea,
00420                                       data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day,
00421                                       data->conf->season[s].month,
00422                                       1, data->learning->sup_nlon, data->learning->sup_nlat, ntime_learn_all,
00423                                       data->conf->season[s].nmonths);
00424       (void) extract_subperiod_months(&mean_precip_sub, &(ntime_sub[s]), mean_precip,
00425                                       data->learning->time_s->year, data->learning->time_s->month, data->learning->time_s->day,
00426                                       data->conf->season[s].month,
00427                                       1, 1, data->reg->npts, ntime_learn_all,
00428                                       data->conf->season[s].nmonths);
00429 
00431       data->learning->data[s].sup_index = (double *) malloc(ntime_sub[s] * sizeof(double));
00432       if (data->learning->data[s].sup_index == NULL) alloc_error(__FILE__, __LINE__);
00433       data->learning->data[s].sup_val = (double *) malloc(data->learning->sup_nlon*data->learning->sup_nlat*ntime_sub[s] * sizeof(double));
00434       if (data->learning->data[s].sup_val == NULL) alloc_error(__FILE__, __LINE__);
00435 
00436       /* Compute mean and variance over time */
00437       data->learning->data[s].sup_index_mean = gsl_stats_mean(tas_rea_mean_sub, 1, ntime_sub[s]);
00438       data->learning->data[s].sup_index_var = gsl_stats_variance(tas_rea_mean_sub, 1, ntime_sub[s]);
00439 
00440       /* Normalize using mean and variance */
00441       (void) normalize_field(data->learning->data[s].sup_index, tas_rea_mean_sub, data->learning->data[s].sup_index_mean,
00442                              data->learning->data[s].sup_index_var, 1, 1, ntime_sub[s]);
00443 
00444       /* Compute mean and variance over time for each point */
00445       sup_mean = (double *) malloc(data->learning->sup_nlon*data->learning->sup_nlat*ntime_sub[s] * sizeof(double));
00446       if (sup_mean == NULL) alloc_error(__FILE__, __LINE__);
00447       sup_var = (double *) malloc(data->learning->sup_nlon*data->learning->sup_nlat*ntime_sub[s] * sizeof(double));
00448       if (sup_var == NULL) alloc_error(__FILE__, __LINE__);
00449       (void) time_mean_variance_field_2d(sup_mean, sup_var, tas_rea_sub, data->learning->sup_nlon, data->learning->sup_nlat, ntime_sub[s]);
00450 
00451       /* Normalize whole secondary 2D field using mean and variance at each point */
00452       (void) normalize_field_2d(data->learning->data[s].sup_val, tas_rea_sub, sup_mean,
00453                                 sup_var, data->learning->sup_nlon, data->learning->sup_nlat, ntime_sub[s]);
00454       
00455       (void) free(sup_mean);
00456       sup_mean = NULL;
00457       (void) free(sup_var);
00458       sup_var = NULL;
00459 
00461       data->learning->data[s].ntime = ntime_sub[s];
00462       data->learning->data[s].time = (double *) malloc(ntime_sub[s] * sizeof(double));
00463       if (data->learning->data[s].time == NULL) alloc_error(__FILE__, __LINE__);
00464       data->learning->data[s].time_s->year = (int *) malloc(ntime_sub[s] * sizeof(int));
00465       if (data->learning->data[s].time_s->year == NULL) alloc_error(__FILE__, __LINE__);
00466       data->learning->data[s].time_s->month = (int *) malloc(ntime_sub[s] * sizeof(int));
00467       if (data->learning->data[s].time_s->month == NULL) alloc_error(__FILE__, __LINE__);
00468       data->learning->data[s].time_s->day = (int *) malloc(ntime_sub[s] * sizeof(int));
00469       if (data->learning->data[s].time_s->day == NULL) alloc_error(__FILE__, __LINE__);
00470       data->learning->data[s].time_s->hour = (int *) malloc(ntime_sub[s] * sizeof(int));
00471       if (data->learning->data[s].time_s->hour == NULL) alloc_error(__FILE__, __LINE__);
00472       data->learning->data[s].time_s->minutes = (int *) malloc(ntime_sub[s] * sizeof(int));
00473       if (data->learning->data[s].time_s->minutes == NULL) alloc_error(__FILE__, __LINE__);
00474       data->learning->data[s].time_s->seconds = (double *) malloc(ntime_sub[s] * sizeof(double));
00475       if (data->learning->data[s].time_s->seconds == NULL) alloc_error(__FILE__, __LINE__);
00476 
00477       /* Retrieve time index spanning selected months and assign time structure values */
00478       t = 0;
00479 
00480       /* Initialize udunits */
00481       ut_set_error_message_handler(ut_ignore);
00482       unitSystem = ut_read_xml(NULL);
00483       ut_set_error_message_handler(ut_write_to_stderr);
00484       dataunits = ut_parse(unitSystem, data->conf->time_units, UT_ASCII);
00485 
00486       for (nt=0; nt<ntime_learn_all; nt++)
00487         for (ntt=0; ntt<data->conf->season[s].nmonths; ntt++)
00488           if (data->learning->time_s->month[nt] == data->conf->season[s].month[ntt]) {
00489             data->learning->data[s].time_s->year[t] = data->learning->time_s->year[nt];
00490             data->learning->data[s].time_s->month[t] = data->learning->time_s->month[nt];
00491             data->learning->data[s].time_s->day[t] = data->learning->time_s->day[nt];
00492             data->learning->data[s].time_s->hour[t] = data->learning->time_s->hour[nt];
00493             data->learning->data[s].time_s->minutes[t] = data->learning->time_s->minutes[nt];
00494             data->learning->data[s].time_s->seconds[t] = data->learning->time_s->seconds[nt];
00495             istat = utInvCalendar2(data->learning->data[s].time_s->year[t], data->learning->data[s].time_s->month[t],
00496                                    data->learning->data[s].time_s->day[t], data->learning->data[s].time_s->hour[t],
00497                                    data->learning->data[s].time_s->minutes[t], data->learning->data[s].time_s->seconds[t],
00498                                    dataunits, &(data->learning->data[s].time[t]));
00499             t++;
00500           }
00501 
00502       (void) ut_free(dataunits);
00503       (void) ut_free_system(unitSystem);  
00504       
00507       buf_learn = (double *) realloc(buf_learn, ntime_sub[s] * (data->learning->rea_neof + data->learning->obs_neof) * sizeof(double));
00508       if (buf_learn == NULL) alloc_error(__FILE__, __LINE__);
00509 
00510       /* Normalisation by the first Singular Value */
00511       rea_first_sing = data->learning->rea->sing[0];
00512       for (eof=0; eof<data->learning->rea_neof; eof++) {
00513         rea_sing = data->learning->rea->sing[eof];        
00514         for (nt=0; nt<ntime_sub[s]; nt++) {
00515           buf_learn_rea_sub[nt+eof*ntime_sub[s]] = buf_learn_rea_sub[nt+eof*ntime_sub[s]] * rea_sing / rea_first_sing;
00516           buf_learn[nt+eof*ntime_sub[s]] = buf_learn_rea_sub[nt+eof*ntime_sub[s]];
00517         }
00518       }      
00519       if (data->learning->obs_neof != 0) {
00520         obs_first_sing = data->learning->obs->sing[0];
00521         for (eof=0; eof<data->learning->obs_neof; eof++) {
00522           obs_sing = data->learning->obs->sing[eof];        
00523           for (nt=0; nt<ntime_sub[s]; nt++) {
00524             buf_learn_obs_sub[nt+eof*ntime_sub[s]] = buf_learn_obs_sub[nt+eof*ntime_sub[s]] * obs_sing / obs_first_sing;
00525             buf_learn[nt+(eof+data->learning->rea_neof)*ntime_sub[s]] = buf_learn_obs_sub[nt+eof*ntime_sub[s]];
00526           }
00527         }
00528       }
00529 
00530       /* Compute best clusters */
00531       buf_weight = (double *) realloc(buf_weight, data->conf->season[s].nclusters * (data->learning->rea_neof + data->learning->obs_neof) *
00532                                       sizeof(double));
00533       if (buf_weight == NULL) alloc_error(__FILE__, __LINE__);
00534       niter = best_clusters(buf_weight, buf_learn, data->conf->classif_type, data->conf->npartitions,
00535                             data->conf->nclassifications, data->learning->rea_neof + data->learning->obs_neof,
00536                             data->conf->season[s].nclusters, ntime_sub[s]);
00537 
00538       /* Keep only first data->learning->rea_neof EOFs */
00539       data->learning->data[s].weight = (double *) 
00540         malloc(data->conf->season[s].nclusters*data->learning->rea_neof * sizeof(double));
00541       if (data->learning->data[s].weight == NULL) alloc_error(__FILE__, __LINE__);
00542       for (clust=0; clust<data->conf->season[s].nclusters; clust++)
00543         for (eof=0; eof<data->learning->rea_neof; eof++)
00544           data->learning->data[s].weight[eof+clust*data->learning->rea_neof] =
00545             buf_weight[eof+clust*(data->learning->rea_neof+data->learning->obs_neof)];
00546       
00547       /* Classify each day in the current clusters */
00548       data->learning->data[s].class_clusters = (int *) malloc(ntime_sub[s] * sizeof(int));
00549       if (data->learning->data[s].class_clusters == NULL) alloc_error(__FILE__, __LINE__);
00550       (void) class_days_pc_clusters(data->learning->data[s].class_clusters, buf_learn,
00551                                     data->learning->data[s].weight, data->conf->classif_type,
00552                                     data->learning->rea_neof, data->conf->season[s].nclusters,
00553                                     ntime_sub[s]);
00554 
00555       /* Set mean and variance of distances to clusters to 1.0 because we first need to compute distances and */
00556       /* we don't have a control run in learning mode */
00557       mean_dist = (double *) realloc(mean_dist, data->conf->season[s].nclusters * sizeof(double));
00558       if (mean_dist == NULL) alloc_error(__FILE__, __LINE__);
00559       var_dist = (double *) realloc(var_dist, data->conf->season[s].nclusters * sizeof(double));
00560       if (var_dist == NULL) alloc_error(__FILE__, __LINE__);
00561       for (clust=0; clust<data->conf->season[s].nclusters; clust++) {
00562         mean_dist[clust] = 1.0;
00563         var_dist[clust] = 1.0;
00564       }
00565 
00566       /* Compute distances to clusters using normalization */
00567       dist = (double *) realloc(dist, data->conf->season[s].nclusters*ntime_sub[s] * sizeof(double));
00568       if (dist == NULL) alloc_error(__FILE__, __LINE__);
00569       (void) dist_clusters_normctrl(dist, buf_learn_pc_sub, data->learning->data[s].weight,
00570                                     data->learning->pc_normalized_var, data->learning->pc_normalized_var, mean_dist, var_dist,
00571                                     data->learning->rea_neof, data->conf->season[s].nclusters, ntime_sub[s]);
00572       /* Normalize */
00573       for (clust=0; clust<data->conf->season[s].nclusters; clust++) {
00574         /* Calculate mean over time */
00575         mean_dist[clust] = gsl_stats_mean(&(dist[clust*ntime_sub[s]]), 1, ntime_sub[s]);
00576         /* Calculate variance over time */
00577         var_dist[clust] = gsl_stats_variance(&(dist[clust*ntime_sub[s]]), 1, ntime_sub[s]);
00578         /* Normalize */
00579         for (nt=0; nt<ntime_sub[s]; nt++)
00580           dist[nt+clust*ntime_sub[s]] = ( dist[nt+clust*ntime_sub[s]] - mean_dist[clust] ) / sqrt(var_dist[clust]);
00581       }
00582 
00583       /* Classify each day in the current clusters */
00584       /* data->learning->data[s].class_clusters */
00585       /*      data->learning->data[s].class_clusters = (int *) malloc(ntime_sub[s] * sizeof(int));
00586       if (data->learning->data[s].class_clusters == NULL) alloc_error(__FILE__, __LINE__);
00587       (void) class_days_pc_clusters(data->learning->data[s].class_clusters, buf_learn,
00588       data->learning->data[s].weight, data->conf->classif_type,
00589       data->learning->rea_neof, data->conf->season[s].nclusters, ntime_sub[s]);*/
00590 
00591       /* Allocate memory for regression */
00592       precip_reg = (double *) malloc(data->conf->season[s].nreg * sizeof(double));
00593       if (precip_reg == NULL) alloc_error(__FILE__, __LINE__);
00594       precip_index = (double *) malloc(ntime_sub[s] * sizeof(double));
00595       if (precip_index == NULL) alloc_error(__FILE__, __LINE__);
00596       precip_err = (double *) malloc(ntime_sub[s] * sizeof(double));
00597       if (precip_err == NULL) alloc_error(__FILE__, __LINE__);
00598       dist_reg = (double *) malloc(data->conf->season[s].nreg*ntime_sub[s] * sizeof(double));
00599       if (dist_reg == NULL) alloc_error(__FILE__, __LINE__);
00600       vif = (double *) malloc(data->conf->season[s].nreg * sizeof(double));
00601       if (vif == NULL) alloc_error(__FILE__, __LINE__);
00602 
00603       /* Create variable to hold values of x vector for regression */
00604       /* Begin with distances to clusters */
00605       for (clust=0; clust<data->conf->season[s].nclusters; clust++)
00606         for (t=0; t<ntime_sub[s]; t++)
00607           dist_reg[t+clust*ntime_sub[s]] = dist[t+clust*ntime_sub[s]];
00608 
00609       /* For special seasons using secondary field in the regression, append values to x vector for regression */
00610       if (data->conf->season[s].nreg == (data->conf->season[s].nclusters+1)) {
00611         clust = data->conf->season[s].nclusters;
00612         for (t=0; t<ntime_sub[s]; t++)
00613           dist_reg[t+clust*ntime_sub[s]] = data->learning->data[s].sup_index[t];
00614       }
00615 
00616       data->learning->data[s].precip_reg_cst = (double *) malloc(data->reg->npts * sizeof(double));
00617       if (data->learning->data[s].precip_reg_cst == NULL) alloc_error(__FILE__, __LINE__);
00618       data->learning->data[s].precip_reg = (double *) malloc(data->reg->npts*data->conf->season[s].nreg * sizeof(double));
00619       if (data->learning->data[s].precip_reg == NULL) alloc_error(__FILE__, __LINE__);
00620       data->learning->data[s].precip_reg_dist = (double *)
00621         malloc(data->conf->season[s].nclusters*ntime_sub[s] * sizeof(double));
00622       if (data->learning->data[s].precip_reg_dist == NULL) alloc_error(__FILE__, __LINE__);
00623       data->learning->data[s].precip_index = (double *) malloc(data->reg->npts*ntime_sub[s] * sizeof(double));
00624       if (data->learning->data[s].precip_index == NULL) alloc_error(__FILE__, __LINE__);
00625       data->learning->data[s].precip_index_obs = (double *) malloc(data->reg->npts*ntime_sub[s] * sizeof(double));
00626       if (data->learning->data[s].precip_index_obs == NULL) alloc_error(__FILE__, __LINE__);
00627       data->learning->data[s].precip_reg_err = (double *) malloc(data->reg->npts*ntime_sub[s] * sizeof(double));
00628       if (data->learning->data[s].precip_reg_err == NULL) alloc_error(__FILE__, __LINE__);
00629       data->learning->data[s].precip_reg_rsq = (double *) malloc(data->reg->npts * sizeof(double));
00630       if (data->learning->data[s].precip_reg_rsq == NULL) alloc_error(__FILE__, __LINE__);
00631       data->learning->data[s].precip_reg_vif = (double *) malloc(data->conf->season[s].nreg * sizeof(double));
00632       if (data->learning->data[s].precip_reg_vif == NULL) alloc_error(__FILE__, __LINE__);
00633       data->learning->data[s].precip_reg_autocor = (double *) malloc(data->reg->npts * sizeof(double));
00634       if (data->learning->data[s].precip_reg_autocor == NULL) alloc_error(__FILE__, __LINE__);
00635 
00636       /* Save distances */
00637       for (t=0; t<ntime_sub[s]; t++)
00638         for (clust=0; clust<data->conf->season[s].nclusters; clust++)
00639           data->learning->data[s].precip_reg_dist[clust+t*data->conf->season[s].nclusters] = dist[t+clust*ntime_sub[s]];
00640 
00641       for (pt=0; pt<data->reg->npts; pt++) {
00642         /* Compute regression and save regression constant */
00643         istat = regress(precip_reg, dist_reg, &(mean_precip_sub[pt*ntime_sub[s]]), &(data->learning->data[s].precip_reg_cst[pt]),
00644                         precip_index, precip_err, &chisq, &rsq, vif, &autocor, data->conf->season[s].nreg, ntime_sub[s]);
00645         /* Save R^2 */
00646         data->learning->data[s].precip_reg_rsq[pt] = rsq;
00647         /* Save residuals */
00648         for (t=0; t<ntime_sub[s]; t++)
00649           data->learning->data[s].precip_reg_err[pt+t*data->reg->npts] = precip_err[t];
00650         /* Save autocorrelation of residuals */
00651         data->learning->data[s].precip_reg_autocor[pt] = autocor;
00652         /* Save Variance Inflation Factor VIF, and compute mean VIF */
00653         if (pt == 0) {
00654           meanvif = 0.0;
00655           for (term=0; term<data->conf->season[s].nreg; term++) {
00656             data->learning->data[s].precip_reg_vif[term] = vif[term];
00657             meanvif += vif[term];
00658           }
00659           meanvif = meanvif / (double) data->conf->season[s].nreg;
00660         }
00661 
00662         //        (void) fprintf(stdout, "%s: pt=%d R^2=%lf CHI^2=%lf ACOR=%lf\n", __FILE__, pt, rsq, chisq, autocor);
00663 
00664         /* Save regression coefficients */
00665         for (clust=0; clust<data->conf->season[s].nreg; clust++)
00666           data->learning->data[s].precip_reg[pt+clust*data->reg->npts] = precip_reg[clust];
00667 
00668         /* Save precipitation index */
00669         for (t=0; t<ntime_sub[s]; t++)
00670           data->learning->data[s].precip_index[pt+t*data->reg->npts] = precip_index[t];
00671 
00672         /* Save observed precipitation index */
00673         for (t=0; t<ntime_sub[s]; t++)
00674           data->learning->data[s].precip_index_obs[pt+t*data->reg->npts] = mean_precip_sub[t+pt*ntime_sub[s]];
00675       }
00676 
00677       (void) fprintf(stdout, "%s: MeanVIF=%lf\n", __FILE__, meanvif);
00678 
00679       (void) free(precip_reg);
00680       (void) free(precip_index);
00681       (void) free(precip_err);
00682       (void) free(dist_reg);
00683       (void) free(vif);
00684 
00685       (void) free(buf_learn_rea_sub);
00686       buf_learn_rea_sub = NULL;
00687       if (data->learning->obs_neof != 0) {
00688         (void) free(buf_learn_obs_sub);
00689         buf_learn_obs_sub = NULL;
00690       }
00691       (void) free(buf_learn_pc_sub);
00692       buf_learn_pc_sub = NULL;
00693       (void) free(tas_rea_mean_sub);
00694       tas_rea_mean_sub = NULL;
00695       (void) free(tas_rea_sub);
00696       tas_rea_sub = NULL;
00697       (void) free(mean_precip_sub);
00698       mean_precip_sub = NULL;
00699     }
00700 
00701     (void) free(tas_rea);
00702     (void) free(tas_rea_mean);
00703     (void) free(mean_precip);
00704     (void) free(buf_weight);
00705     (void) free(buf_learn);
00706     (void) free(buf_learn_rea);
00707     if (data->learning->obs_neof != 0) (void) free(buf_learn_obs);
00708     (void) free(buf_learn_pc);
00709     (void) free(ntime_sub);
00710     (void) free(rea_var);
00711     (void) free(mean_dist);
00712     (void) free(var_dist);
00713     (void) free(dist);
00714 
00715     /* If wanted, write learning data to files for later use */
00716     if (data->learning->learning_save == TRUE) {
00717       (void) printf("Writing learning fields.\n");
00718       istat = write_learning_fields(data);
00719     }
00720     if (niter == 1) {
00721       (void) fprintf(stderr, "%s: ERROR: In one classification, only 1 iteration was needed! Probably an error in your EOF data or configuration. Must abort...\n",
00722                      __FILE__);
00723       return -1;
00724     }
00725   }
00726 
00727   /* Success status */
00728   return 0;
00729 }

Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1