00001
00002
00003
00004
00005 #include <stdio.h>
00006 #include <stddef.h>
00007 #include <stdlib.h>
00008 #include <string.h>
00009
00010 #include "mpi.h"
00011 #include "PSMILe_f2c.h"
00012
00013 #ifndef PSMILE_BSEND
00014
00015 #define PRISM_Error_Alloc 13
00016
00017 #define MAX(a,b) (a) > (b) ? (a) : (b)
00018
00019 # define ASSERT2(c, a, b) \
00020 if (!(c)) {\
00021 fprintf(stderr, "### Assertion violation: %s (%s = %d, %s = %d) in %s:%d\n", #c, #a, a, #b, b, __FILE__, __LINE__);\
00022 abort();\
00023 }
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifdef DEBUG
00034 # define n_to_free 0
00035 #else
00036 # define n_to_free 64
00037 #endif
00038
00039 #define dfree 64*2048
00040 #define N_HASH 16
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069 static double dalloc = 0.0;
00070
00071 static MPI_Request *requests = NULL;
00072 static void **buffers = NULL;
00073 static long long *lengths = NULL;
00074 static int nalloc = 0, max_alloc = 0;
00075
00076 static int use_hash = 0;
00077 static int max_type = -1, min_type = 0;
00078 static int *dbsend = NULL;
00079 static int *dbtype = NULL;
00080 static int *dblen = NULL;
00081
00082
00083
00084
00085
00086 void psmile_bsend_init (INTEGER *ftypes, INTEGER *flengths,
00087 INTEGER *number_of_ftypes, INTEGER *ierror);
00088
00089 void psmile_bsend (void *buf, INTEGER *lenbuf, INTEGER *dtype,
00090 INTEGER *dest, INTEGER *tag, INTEGER *comm,
00091 INTEGER *ierror);
00092
00093 static void error_in_testany (int imin, int ind, int error);
00094 static int free_buffers (long long len_bytes, int free_memory);
00095 static int release_buffer (int i, long long len_bytes, int *imin,
00096 int free_memory);
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106 #ifdef ANSI_C
00107
00108 void psmile_bsend (void *buf, INTEGER *lenbuf, INTEGER *dtype,
00109 INTEGER *dest, INTEGER *tag, INTEGER *comm,
00110 INTEGER *ierror)
00111
00112 #else
00113
00114 void psmile_bsend (buf, lenbuf, dtype,
00115 dest, tag, comm, ierror)
00116
00117 void *buf;
00118 INTEGER *lenbuf, *dtype, *dest, *tag, *comm, *ierror;
00119
00120 #endif
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190 {
00191 long long len_bytes;
00192 MPI_Datatype dtypec = MPI_Type_f2c(*dtype);
00193 MPI_Comm commc = MPI_Comm_f2c(*comm);
00194
00195
00196
00197 ASSERT (*dtype >= min_type && *dtype <= max_type)
00198
00199 if (use_hash) {
00200 register int i, j = (*dtype) % N_HASH;
00201 #pragma vdir vector
00202 for (i = dbsend[j]; i < dbsend[j+1]; j++)
00203 if (dbtype[i] == (*dtype)) break;
00204
00205 ASSERT (i < dbsend[j+1]);
00206 len_bytes = *lenbuf * dblen[i];
00207 }
00208 else {
00209 ASSERT (dbsend [*dtype-min_type] > 0)
00210 len_bytes = *lenbuf * dbsend [*dtype-min_type];
00211 }
00212
00213 #ifdef VERBOSE_COMM
00214 #define VERBOSE_COMM
00215 fprintf (stdout, "-> %d; tag %d comm %d lenbuf %d %lld\n",
00216 *dest, *tag, *comm, *lenbuf, len_bytes);
00217 fflush (stdout);
00218 #else
00219
00220 #ifdef DEBUG
00221 if (*lenbuf < 1) {
00222 int rank;
00223 MPI_Comm_rank (commc, &rank);
00224 fprintf (stderr, "(%d)-> psmile_bsend: dest = %d, tag = %d, comm %d, lenbuf %d\n",
00225 rank, *dest, *tag, *comm, *lenbuf);
00226 }
00227 #endif
00228 #endif
00229
00230
00231
00232
00233
00234 #ifdef PSMILE_SEND_LENBUF
00235
00236 if (len_bytes <= PSMILE_SEND_LENBUF) {
00237 *ierror = MPI_Send (buf, (int) *lenbuf, dtypec,
00238 (int) *dest, (int) *tag, commc);
00239
00240 return;
00241 }
00242
00243 #else
00244
00245
00246
00247
00248 if (*lenbuf <= 0) {
00249 MPI_Request lrequest;
00250
00251 *ierror = MPI_Isend (buf, (int) *lenbuf, dtypec,
00252 (int) *dest, (int) *tag, commc, &lrequest);
00253 if (*ierror != MPI_SUCCESS) return;
00254
00255 *ierror = MPI_Request_free (&lrequest);
00256 return;
00257 }
00258 #endif
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272 {
00273 int imin = -1;
00274 int free_memory = nalloc > n_to_free || dalloc > dfree;
00275
00276 if (free_memory) {
00277 imin = free_buffers (len_bytes, free_memory);
00278 ASSERT2 (imin < nalloc, imin, nalloc);
00279 }
00280
00281
00282
00283
00284
00285
00286
00287
00288 if (imin == -1) {
00289 if (nalloc == max_alloc) {
00290 register int i;
00291 max_alloc += 32;
00292 if (nalloc == 0) {
00293 requests = (MPI_Request *) MALLOC (max_alloc * sizeof(MPI_Request));
00294 buffers = (void **) MALLOC (max_alloc * sizeof(void *));
00295 lengths = (long long *) MALLOC (max_alloc * sizeof(long long));
00296 }
00297 else {
00298 requests = (MPI_Request *) realloc (requests, max_alloc * sizeof(MPI_Request));
00299 buffers = (void **) realloc (buffers, max_alloc * sizeof(void *));
00300 lengths = (long long *) realloc (lengths, max_alloc * sizeof(long long));
00301 }
00302
00303 if (! requests || ! buffers || ! lengths) {
00304 fprintf (stderr, "pmsile_bsend: Cannot allocate %d bytes in order to allocate requests\n",
00305 max_alloc*(sizeof(MPI_Request)+sizeof(void *)+sizeof(long long)));
00306 *ierror = MAX (1307, MPI_ERR_LASTCODE+10);
00307 return;
00308 }
00309
00310
00311 #pragma vdir vector
00312 for (i=nalloc; i < max_alloc; i++) {
00313 requests[i] = MPI_REQUEST_NULL;
00314 buffers[i] = (void *) NULL;
00315 lengths[i] = 0;
00316 }
00317 }
00318
00319
00320
00321 allocate_buffer:
00322 buffers [nalloc] = (void *) MALLOC (len_bytes);
00323 if (buffers [nalloc] == NULL) {
00324 if (! free_memory) {
00325
00326 free_memory = 1;
00327 imin = free_buffers (len_bytes, free_memory);
00328 if (imin == -1) goto allocate_buffer;
00329 ASSERT2 (imin < nalloc, imin, nalloc);
00330 }
00331 else {
00332 fprintf (stderr, "pmsile_bsend: Cannot allocate %lld bytes in order to send buffer\n",
00333 len_bytes);
00334 *ierror = MAX (1307, MPI_ERR_LASTCODE+10);
00335 return;
00336 }
00337 }
00338 else {
00339
00340
00341 imin = nalloc;
00342 nalloc ++;
00343 dalloc += len_bytes;
00344 lengths [ imin] = len_bytes;
00345 }
00346 }
00347
00348
00349
00350 ASSERT2 (imin < nalloc, imin, nalloc);
00351 ASSERT (imin >= 0);
00352 ASSERT (lengths[imin] >= len_bytes);
00353 ASSERT (requests[imin] == MPI_REQUEST_NULL);
00354
00355
00356
00357 memcpy (buffers[imin], buf, (size_t) len_bytes);
00358
00359 *ierror = MPI_Isend (buffers[imin], *lenbuf, dtypec,
00360 *dest, *tag, commc, &requests[imin]);
00361
00362 return;
00363 }
00364 }
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388 #ifdef ANSI_C
00389
00390 void psmile_bsend_init (INTEGER *ftypes, INTEGER *flengths,
00391 INTEGER *number_of_ftypes, INTEGER *ierror)
00392
00393 #else
00394
00395 void psmile_bsend_init (ftypes, flengths, number_of_ftypes,
00396 ierror)
00397
00398 INTEGER *ftypes, *flengths, *number_of_ftypes, *ierror;
00399
00400 #endif
00401
00402 {
00403 register int i, j, k, ndbsnd;
00404 register int n_ftypes = *number_of_ftypes;
00405 int *num = NULL;
00406
00407 max_type = min_type = ftypes[0];
00408 #pragma vdir vector
00409 for (i=1; i < n_ftypes; i++)
00410 max_type = ftypes[i] > max_type ? ftypes [i] : max_type;
00411
00412 #pragma vdir vector
00413 for (i=1; i < n_ftypes; i++)
00414 min_type = ftypes[i] < min_type ? ftypes [i] : min_type;
00415
00416 use_hash = max_type - min_type > 64;
00417 if (! use_hash) {
00418
00419 ndbsnd = max_type - min_type + 1;
00420 dbsend = (int *) MALLOC (ndbsnd*sizeof(int));
00421 if (! dbsend) {
00422 fprintf (stderr, "Error in psmile_bsend_init: Cannot allocate %d bytes\n",
00423 ndbsnd*sizeof(int));
00424 *ierror = PRISM_Error_Alloc;
00425 return;
00426 }
00427
00428 #pragma vdir vector
00429 for (i = 0; i < ndbsnd; i++) dbsend [i] = 0;
00430
00431 #pragma vdir vector
00432 for (i = 0; i < n_ftypes; i++)
00433 dbsend [ftypes[i]-min_type] = flengths[i];
00434 }
00435 else {
00436
00437 ndbsnd = N_HASH + 1;
00438
00439 dbsend = (int *) MALLOC ((ndbsnd+2*n_ftypes)*sizeof(int));
00440 if (! dbsend) {
00441 fprintf (stderr, "Error in psmile_bsend_init: Cannot allocate %d bytes\n",
00442 (ndbsnd+2*n_ftypes)*sizeof(int));
00443 *ierror = PRISM_Error_Alloc;
00444 return;
00445 }
00446
00447 num = (int *) MALLOC (ndbsnd*sizeof(int));
00448 if (! num) {
00449 fprintf (stderr, "Error in psmile_bsend_init: Cannot allocate %d bytes\n",
00450 ndbsnd*sizeof(int));
00451 *ierror = PRISM_Error_Alloc;
00452 return;
00453 }
00454
00455 dbtype = dbsend + ndbsnd;
00456 dblen = dbtype + n_ftypes;
00457
00458 #pragma vdir vector
00459 for (j = 0; j < ndbsnd; j++)
00460 dbsend [j] = 0;
00461
00462 #pragma vdir vector
00463 for (i = 0; i < n_ftypes; i++) {
00464 j = ftypes[i] % N_HASH;
00465 dbsend[j+1] ++;
00466 }
00467
00468
00469
00470
00471 #pragma vdir vector
00472 for (j = 1; j < ndbsnd; j++) {
00473 dbsend[j] += dbsend[j-1];
00474 }
00475
00476 #pragma vdir vector
00477 for (j = 0; j < ndbsnd; j++) {
00478 num[j] = 0;
00479 }
00480
00481 #pragma vdir vector
00482 for (i = 0; i < n_ftypes; i++) {
00483 j = ftypes[i] % N_HASH;
00484 k = dbsend[j] + num[j];
00485 num[j] ++;
00486
00487 dbtype[k] = ftypes[i];
00488 dblen [k] = flengths[i];
00489 }
00490
00491 #ifdef DEBUGXX
00492 fprintf (stderr, "ftypes:");
00493 for (i = 0; i < n_ftypes; i++) fprintf (stderr, "%d ", ftypes[i]);
00494 fprintf (stderr, "\n");
00495
00496 fprintf (stderr, "flengths:");
00497 for (i = 0; i < n_ftypes; i++) fprintf (stderr, "%d ", flengths[i]);
00498 fprintf (stderr, "\n");
00499
00500 fprintf (stderr, "dbsend:");
00501 for (i = 0; i < ndbsnd; i++) fprintf (stderr, "%d ", dbsend[i]);
00502 fprintf (stderr, "\n");
00503
00504 fprintf (stderr, "dbtype:");
00505 for (i = 0; i < n_ftypes; i++) fprintf (stderr, "%d ", dbtype[i]);
00506 fprintf (stderr, "\n");
00507
00508 fprintf (stderr, "dblen:");
00509 for (i = 0; i < n_ftypes; i++) fprintf (stderr, "%d ", dblen[i]);
00510 fprintf (stderr, "\n");
00511 #endif
00512
00513
00514
00515 FREE (num);
00516 }
00517
00518 *ierror = 0;
00519 return;
00520 }
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535 int free_buffers (long long len_bytes, int free_memory)
00536 {
00537 int imin = -1;
00538 int ibeg = 0;
00539 int error, flag;
00540 MPI_Status lstatus;
00541
00542
00543
00544
00545
00546 while (ibeg < nalloc) {
00547 register int i;
00548
00549 #pragma vdir vector
00550 for (i=ibeg; i < nalloc; i++) {
00551 if (requests [i] == MPI_REQUEST_NULL) break;
00552 }
00553
00554 if (i >= nalloc) break;
00555
00556
00557 ibeg = release_buffer (i, len_bytes, &imin, free_memory);
00558 if (imin >= 0 && lengths[imin] == len_bytes) {
00559 return imin;
00560 }
00561
00562 }
00563
00564
00565
00566
00567 if (imin != -1) return imin;
00568
00569
00570
00571
00572
00573 ibeg = 0;
00574
00575 while (ibeg < nalloc) {
00576 int ind;
00577 error = MPI_Testany (nalloc-ibeg, requests+ibeg, &ind, &flag,
00578 &lstatus);
00579 if (error != MPI_SUCCESS) {
00580 error_in_testany (ibeg, ind, error);
00581 return imin;
00582 }
00583
00584
00585
00586
00587 if (! flag || ind == MPI_UNDEFINED) return imin;
00588
00589
00590
00591 ind = ibeg + ind;
00592
00593
00594
00595 ASSERT (requests[ind] == MPI_REQUEST_NULL)
00596
00597 ibeg = release_buffer (ind, len_bytes, &imin, free_memory);
00598
00599 if (imin >= 0 && lengths[imin] == len_bytes) return imin;
00600 }
00601
00602 return imin;
00603 }
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622 static int release_buffer (int i, long long len_bytes, int *imin,
00623 int free_memory)
00624 {
00625 ASSERT (0 <= i && i < nalloc)
00626 ASSERT (i != *imin)
00627
00628 if (lengths [i] < len_bytes ||
00629 (*imin >= 0 && lengths[i] >= lengths[*imin])) {
00630
00631
00632
00633 if (free_memory) {
00634 FREE (buffers[i]);
00635
00636 dalloc -= lengths [i];
00637 nalloc --;
00638
00639 buffers [i] = buffers [nalloc];
00640 lengths [i] = lengths [nalloc];
00641
00642 requests [i] = requests [nalloc];
00643
00644 #ifdef PRISM_ASSERTION
00645
00646 requests [nalloc] = MPI_REQUEST_NULL;
00647 #endif
00648 }
00649 else i++;
00650 }
00651 else if (free_memory && *imin >= 0) {
00652
00653
00654
00655 ASSERT2 (*imin < nalloc, *imin, nalloc)
00656 ASSERT (*imin >= 0)
00657 ASSERT (lengths[*imin] >= len_bytes)
00658 ASSERT (requests [*imin] == MPI_REQUEST_NULL)
00659 ASSERT (requests [i] == MPI_REQUEST_NULL)
00660
00661 FREE (buffers[*imin]);
00662
00663 dalloc -= lengths [*imin];
00664 nalloc --;
00665
00666
00667
00668 buffers [*imin] = buffers [i];
00669 lengths [*imin] = lengths [i];
00670
00671
00672
00673 buffers [i] = buffers [nalloc];
00674 lengths [i] = lengths [nalloc];
00675
00676 requests [i] = requests [nalloc];
00677
00678 #ifdef PRISM_ASSERTION
00679
00680 requests [nalloc] = MPI_REQUEST_NULL;
00681 #endif
00682 }
00683 else {
00684 *imin = i;
00685 i++;
00686 }
00687
00688 ASSERT2 (*imin < nalloc, *imin, nalloc)
00689
00690 return i;
00691 }
00692
00693
00694
00695
00696
00697
00698
00699 static void error_in_testany (int ibeg, int ind, int error)
00700 {
00701 register int i;
00702 fprintf (stderr, "\nError in MPI_Testany () called form psmile_bsend: error %d\n",
00703 error);
00704 fprintf (stderr, "ind %d request %d nalloc %d ibeg %d:\n",
00705 ind, requests[ind], nalloc, ibeg);
00706
00707 for (i=0; i < nalloc; i++) {
00708 fprintf (stderr, "i = %d, start 0x%p len %lld, request %d\n",
00709 i, buffers [i], lengths [i], requests[i]);
00710 }
00711
00712
00713
00714 requests [ind] = MPI_REQUEST_NULL;
00715 }
00716 #endif