best_clusters.c

Go to the documentation of this file.
00001 /* ***************************************************** */
00002 /* Algorithm to find best clusters among many tries.     */
00003 /* best_clusters.c                                       */
00004 /* ***************************************************** */
00005 /* Author: Christian Page, CERFACS, Toulouse, France.    */
00006 /* ***************************************************** */
00011 /* LICENSE BEGIN
00012 
00013 Copyright Cerfacs (Christian Page) (2015)
00014 
00015 christian.page@cerfacs.fr
00016 
00017 This software is a computer program whose purpose is to downscale climate
00018 scenarios using a statistical methodology based on weather regimes.
00019 
00020 This software is governed by the CeCILL license under French law and
00021 abiding by the rules of distribution of free software. You can use, 
00022 modify and/ or redistribute the software under the terms of the CeCILL
00023 license as circulated by CEA, CNRS and INRIA at the following URL
00024 "http://www.cecill.info". 
00025 
00026 As a counterpart to the access to the source code and rights to copy,
00027 modify and redistribute granted by the license, users are provided only
00028 with a limited warranty and the software's author, the holder of the
00029 economic rights, and the successive licensors have only limited
00030 liability. 
00031 
00032 In this respect, the user's attention is drawn to the risks associated
00033 with loading, using, modifying and/or developing or reproducing the
00034 software by the user in light of its specific status of free software,
00035 that may mean that it is complicated to manipulate, and that also
00036 therefore means that it is reserved for developers and experienced
00037 professionals having in-depth computer knowledge. Users are therefore
00038 encouraged to load and test the software's suitability as regards their
00039 requirements in conditions enabling the security of their systems and/or 
00040 data to be ensured and, more generally, to use and operate it in the 
00041 same conditions as regards security. 
00042 
00043 The fact that you are presently reading this means that you have had
00044 knowledge of the CeCILL license and that you accept its terms.
00045 
00046 LICENSE END */
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 #include <classif.h>
00055 
00057 int
00058 best_clusters(double *best_clusters, double *pc_eof_days, char *type, int npart, int nclassif, int neof, int ncluster, int ndays) {
00072   double min_meandistval; /* Minimum distance between a partition and all other partitions */
00073   double meandistval; /* Mean distance value between each corresponding clusters for the comparison of two partitions. */
00074   double maxdistval; /* Maximum distance over all clusters for the two partitions comparison. */
00075   double minval; /* Minimum distance to find a corresponding closest cluster in another partition. */
00076   double dist_bary; /* Distance summed over all EOFs between a cluster in one partition and other clusters in other partitions. */
00077   double val; /* Difference in positions between a cluster in one partition and other clusters in other partitions for a particular EOF. */
00078 
00079   double *tmpcluster = NULL; /* Temporary vector of clusters for one partition. */
00080   double *testclusters = NULL; /* Temporary vector of clusters for all partitions. */
00081   
00082   int min_cluster = -1; /* Cluster number used to find a corresponding cluster in another partition. */
00083   int min_partition = -1; /* Partition number used to find the partition which has the minimum distance to all other partitions. */
00084 
00085   int part; /* Loop counter for partitions */
00086   int part1; /* Loop counter for partitions inside loop */
00087   int part2; /* Loop counter for partitions inside loop */
00088   int clust; /* Loop counter for clusters */
00089   int clust1; /* Loop counter for clusters inside loop */
00090   int clust2; /* Loop counter for clusters inside loop */
00091   int eof; /* Loop counter for eofs */
00092 
00093   int niter; /* Number of iterations */
00094   int niter_min; /* Minimum number of iterations */
00095 
00096   (void) fprintf(stdout, "%s:: BEGIN: Find the best partition of clusters.\n", __FILE__);
00097 
00098   niter_min = 99999;
00099 
00100   /* Allocate memory */
00101   tmpcluster = (double *) calloc(neof*ncluster, sizeof(double));
00102   if (tmpcluster == NULL) alloc_error(__FILE__, __LINE__);
00103   testclusters = (double *) calloc(neof*ncluster*npart, sizeof(double));
00104   if (testclusters == NULL) alloc_error(__FILE__, __LINE__);
00105 
00106   /* Generate npart clusters (which will be used to find the best clustering). */
00107   (void) fprintf(stdout, "%s:: Generating %d partitions of clusters.\n", __FILE__, npart);
00108   for (part=0; part<npart; part++) {
00109 #if DEBUG >= 1
00110     (void) fprintf(stdout, "%s:: Generating %d/%d partition of clusters.\n", __FILE__, part+1, npart);
00111 #endif
00112     niter = generate_clusters(tmpcluster, pc_eof_days, type, nclassif, neof, ncluster, ndays);
00113     if (niter < niter_min) niter_min = niter;
00114     for (clust=0; clust<ncluster; clust++)
00115       for (eof=0; eof<neof; eof++)
00116         testclusters[part+eof*npart+clust*npart*neof] = tmpcluster[eof+clust*neof];
00117   }
00118 
00121   min_meandistval = 9999999999.9;
00122   min_partition = -1;
00123   /* Loop over all partition and compute distance between each other partition. */
00124   (void) fprintf(stdout, "%s:: Computing distance between each partitions of clusters.\n", __FILE__);
00125   for (part1=0; part1<npart; part1++) {
00126 #if DEBUG >= 1
00127     (void) fprintf(stdout, "%s:: Partition %d/%d.\n", __FILE__, part1+1, npart);
00128 #endif
00129     meandistval = 0.0;
00130     for (part2=0; part2<npart; part2++) {
00131 
00132       /* Don't compute for the same partition number. */
00133       if (part1 != part2) {
00134 
00135         maxdistval = -9999999999.9;
00136         
00137         for (clust1=0; clust1<ncluster; clust1++) {
00138           
00139           /* Find closest cluster to current one (in terms of distance summed over all EOF). */
00140           minval = 9999999999.9;
00141           min_cluster = -1;
00142           for (clust2=0; clust2<ncluster; clust2++) {
00143 
00144             if ( !strcmp(type, "euclidian") ) {
00145               /* Sum distances over all EOF. */
00146               dist_bary = 0.0;
00147               for (eof=0; eof<neof; eof++) {
00148                 val = testclusters[part2+eof*npart+clust1*npart*neof] - testclusters[part1+eof*npart+clust2*npart*neof];
00149                 dist_bary += (val * val);
00150               }
00151               
00152               dist_bary = sqrt(dist_bary);
00153             }
00154             else {
00155               (void) fprintf(stderr, "best_clusters: ABORT: Unknown distance type=%s!!\n", type);
00156               (void) abort();
00157             }
00158             
00159             /* Check for minimum distance. We want to find the corresponding closest cluster in another partition. */
00160             if (dist_bary < minval) {
00161               minval = dist_bary;
00162               min_cluster = clust2;
00163             }
00164           }
00165 
00166           if (min_cluster == -1) {
00167             (void) fprintf(stderr, "best_clusters: ABORT: Error in algorithm. Cannot find best cluster!\n");
00168             (void) abort();
00169           }
00170           
00171           /* Save the maximum distance over all clusters for the two partitions comparison. */
00172           if (minval > maxdistval)
00173             maxdistval = minval;
00174         }
00175         /* Sum the maximum distance of the clusters between each corresponding one over all the partitions.
00176            We want to compute the mean afterward. */
00177         meandistval += maxdistval;
00178       }
00179     }
00180     /* Compute the mean of the distances between each corresponding clusters for the comparison of two partitions. */
00181     meandistval = meandistval / (double) (npart-1);
00182     /* We want to keep the partition which has the minimum distance to all other partitions. */
00183     if (meandistval < min_meandistval) {
00184       min_meandistval = meandistval;
00185       min_partition = part1;
00186     }
00187   }
00188 
00189   if (min_partition == -1) {
00190     /* Failing algorithm */
00191     (void) fprintf(stderr, "best_clusters: ABORT: Error in algorithm. Cannot find best partition!\n");
00192     (void) abort();
00193   }
00194 
00195   /* Save data for the best selected partition of clusters. */
00196   (void) fprintf(stdout, "%s:: Save best partition of clusters.\n", __FILE__);
00197   for (clust=0; clust<ncluster; clust++)
00198     for (eof=0; eof<neof; eof++)
00199       best_clusters[eof+clust*neof] = testclusters[min_partition+eof*npart+clust*npart*neof];  
00200 
00201   /* Free memory. */
00202   (void) free(tmpcluster);
00203   (void) free(testclusters);
00204 
00205   (void) fprintf(stdout, "%s:: END: Find the best partition of clusters. Partition %d selected.\n", __FILE__, min_partition);
00206 
00207   return niter_min;
00208 }

Generated on 12 May 2016 for DSCLIM by  doxygen 1.6.1