pdf_stat.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 namespace ::math::statistics {
00013
00014 namespace export pdf-normal pdf-uniform \
00015 pdf-exponential \
00016 cdf-normal cdf-uniform \
00017 cdf-exponential \
00018 cdf-students-t \
00019 random-normal random-uniform \
00020 random-exponential \
00021 histogram-uniform
00022
00023 variable cdf_normal_prob {}
00024 variable cdf_normal_x {}
00025 variable cdf_toms322_cached {}
00026 variable initialised_cdf 0
00027 }
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 ret ::math::statistics::pdf-normal ( type mean , type stdev , type x ) {
00041 variable NEGSTDEV
00042 variable factorNormalPdf
00043
00044 if { $stdev <= 0.0 } {
00045 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
00046 }
00047
00048 set xn [expr {($x-$mean)/$stdev}]
00049 set prob [expr {exp(-$xn*$xn/2.0)/$stdev/$factorNormalPdf}]
00050
00051 return $prob
00052 }
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066 ret ::math::statistics::pdf-uniform ( type pmin , type pmax , type x ) {
00067
00068 if { $pmin >= $pmax } {
00069 return -code error -errorcode ARG \
00070 -errorinfo "Wrong order or zero range" \
00071 "Wrong order or zero range"
00072 }
00073
00074 set prob [expr {1.0/($pmax-$min)}]
00075
00076 if { $x < $pmin || $x > $pmax } { return 0.0 }
00077
00078 return $prob
00079 }
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092 ret ::math::statistics::pdf-exponential ( type mean , type x ) {
00093 variable NEGSTDEV
00094 variable OUTOFRANGE
00095
00096 if { $stdev <= 0.0 } {
00097 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
00098 }
00099 if { $mean <= 0.0 } {
00100 return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
00101 "$OUTOFRANGE: mean must be positive"
00102 }
00103
00104 if { $x < 0.0 } { return 0.0 }
00105 if { $x > 700.0*$mean } { return 0.0 }
00106
00107 set prob [expr {exp(-$x/$mean)/$mean}]
00108
00109 return $prob
00110 }
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123 ret ::math::statistics::cdf-normal ( type mean , type stdev , type x ) {
00124 variable NEGSTDEV
00125
00126 if { $stdev <= 0.0 } {
00127 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
00128 }
00129
00130 set xn [expr {($x-$mean)/$stdev}]
00131 set prob1 [Cdf-toms322 1 5000 [expr {$xn*$xn}]]
00132 if { $xn > 0.0 } {
00133 set prob [expr {0.5+0.5*$prob1}]
00134 } else {
00135 set prob [expr {0.5-0.5*$prob1}]
00136 }
00137
00138 return $prob
00139 }
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152 ret ::math::statistics::cdf-students-t ( type degrees , type x ) {
00153
00154 if { $degrees <= 0 } {
00155 return -code error -errorcode ARG -errorinfo \
00156 "Number of degrees of freedom must be positive" \
00157 "Number of degrees of freedom must be positive"
00158 }
00159
00160 set prob1 [Cdf-toms322 1 $degrees [expr {$x*$x}]]
00161 set prob [expr {0.5+0.5*$prob1}]
00162
00163 return $prob
00164 }
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 ret ::math::statistics::cdf-uniform ( type pmin , type pmax , type x ) {
00179
00180 if { $pmin >= $pmax } {
00181 return -code error -errorcode ARG \
00182 -errorinfo "Wrong order or zero range" \
00183 }
00184
00185 set prob [expr {($x-$pmin)/($pmax-$min)}]
00186
00187 if { $x < $pmin } { return 0.0 }
00188 if { $x > $pmax } { return 1.0 }
00189
00190 return $prob
00191 }
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204 ret ::math::statistics::cdf-exponential ( type mean , type x ) {
00205 variable NEGSTDEV
00206 variable OUTOFRANGE
00207
00208 if { $mean <= 0.0 } {
00209 return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
00210 "$OUTOFRANGE: mean must be positive"
00211 }
00212
00213 if { $x < 0.0 } { return 0.0 }
00214 if { $x > 30.0*$mean } { return 1.0 }
00215
00216 set prob [expr {1.0-exp(-$x/$mean)}]
00217
00218 return $prob
00219 }
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235 ret ::math::statistics::Inverse-cdf-uniform ( type pmin , type pmax , type prob ) {
00236
00237 if {0} {
00238 if { $pmin >= $pmax } {
00239 return -code error -errorcode ARG \
00240 -errorinfo "Wrong order or zero range" \
00241 "Wrong order or zero range"
00242 }
00243 }
00244
00245 set x [expr {$pmin+$prob*($pmax-$pmin)}]
00246
00247 if { $x < $pmin } { return $pmin }
00248 if { $x > $pmax } { return $pmax }
00249
00250 return $x
00251 }
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266 ret ::math::statistics::Inverse-cdf-exponential ( type mean , type prob ) {
00267
00268 if {0} {
00269 if { $mean <= 0.0 } {
00270 return -code error -errorcode ARG \
00271 -errorinfo "Mean must be positive" \
00272 "Mean must be positive"
00273 }
00274 }
00275
00276 set x [expr {-$mean*log(1.0-$prob)}]
00277
00278 return $x
00279 }
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295 ret ::math::statistics::Inverse-cdf-normal ( type mean , type stdev , type prob ) {
00296 variable cdf_normal_prob
00297 variable cdf_normal_x
00298
00299 variable initialised_cdf
00300 if { $initialised_cdf == 0 } {
00301 Initialise-cdf-normal
00302 }
00303
00304 # Look for the proper probability level first,
00305 # then interpolate
00306 #
00307 # Note: the numerical data are connected to the length of
00308 # the lists - see Initialise-cdf-normal
00309 #
00310 set size 32
00311 set idx 64
00312 for { set i 0 } { $i <= 7 } { incr i } {
00313 set upper [lindex $cdf_normal_prob $idx]
00314 if { $prob > $upper } {
00315 set idx [expr {$idx+$size}]
00316 } else {
00317 set idx [expr {$idx-$size}]
00318 }
00319 set size [expr {$size/2}]
00320 }
00321 #
00322 # We have found a value that is close to the one we need,
00323 # now find the enclosing interval
00324 #
00325 if { $upper < $prob } {
00326 incr idx
00327 }
00328 set p1 [lindex $cdf_normal_prob [expr {$idx-1}]]
00329 set p2 [lindex $cdf_normal_prob $idx]
00330 set x1 [lindex $cdf_normal_x [expr {$idx-1}]]
00331 set x2 [lindex $cdf_normal_x $idx ]
00332
00333 set x [expr {$x1+($x2-$x1)*($prob-$p1)/($p2-$p1)}]
00334
00335 return [expr {$mean+$stdev*$x}]
00336 }
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349 ret ::math::statistics::Initialise-cdf-normal ( ) {
00350 variable cdf_normal_prob
00351 variable cdf_normal_x
00352
00353 variable initialised_cdf
00354 set initialised_cdf 1
00355
00356 set dx [expr {10.0/128.0}]
00357
00358 set cdf_normal_prob 0.5
00359 set cdf_normal_x 0.0
00360 for { set i 1 } { $i <= 64 } { incr i } {
00361 set x [expr {$i*$dx}]
00362 if { $x != 0.0 } {
00363 set prob [Cdf-toms322 1 5000 [expr {$x*$x}]]
00364 } else {
00365 set prob 0.0
00366 }
00367
00368 set cdf_normal_x [concat [expr {-$x}] $cdf_normal_x $x]
00369 set cdf_normal_prob \
00370 [concat [expr {0.5-0.5*$prob}] $cdf_normal_prob \
00371 [expr {0.5+0.5*$prob}]]
00372 }
00373 }
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387 ret ::math::statistics::random-uniform ( type pmin , type pmax , type number ) {
00388
00389 if { $pmin >= $pmax } {
00390 return -code error -errorcode ARG \
00391 -errorinfo "Wrong order or zero range" \
00392 "Wrong order or zero range"
00393 }
00394
00395 set result {}
00396 for { set i 0 } {$i < $number } { incr i } {
00397 lappend result [Inverse-cdf-uniform $pmin $pmax [expr {rand()}]]
00398 }
00399
00400 return $result
00401 }
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414 ret ::math::statistics::random-exponential ( type mean , type number ) {
00415
00416 if { $mean <= 0.0 } {
00417 return -code error -errorcode ARG \
00418 -errorinfo "Mean must be positive" \
00419 "Mean must be positive"
00420 }
00421
00422 set result {}
00423 for { set i 0 } {$i < $number } { incr i } {
00424 lappend result [Inverse-cdf-exponential $mean [expr {rand()}]]
00425 }
00426
00427 return $result
00428 }
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442 ret ::math::statistics::random-normal ( type mean , type stdev , type number ) {
00443
00444 if { $stdev <= 0.0 } {
00445 return -code error -errorcode ARG \
00446 -errorinfo "Standard deviation must be positive" \
00447 "Standard deviation must be positive"
00448 }
00449
00450 set result {}
00451 for { set i 0 } {$i < $number } { incr i } {
00452 lappend result [Inverse-cdf-normal $mean $stdev [expr {rand()}]]
00453 }
00454
00455 return $result
00456 }
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489 ret ::math::statistics::Cdf-toms322 ( type m , type n , type x ) {
00490 set m [expr {$m < 300? int($m) : 300}]
00491 set n [expr {$n < 5000? int($n) : 5000}]
00492 if { $m < 1 || $n < 1 } {
00493 return -code error -errorcode ARG \
00494 -errorinfo "Arguments m anf n must be greater/equal 1"
00495 }
00496
00497 set a [expr {2*($m/2)-$m+2}]
00498 set b [expr {2*($n/2)-$n+2}]
00499 set w [expr {$x*double($m)/double($n)}]
00500 set z [expr {1.0/(1.0+$w)}]
00501
00502 if { $a == 1 } {
00503 if { $b == 1 } {
00504 set p [expr {sqrt($w)}]
00505 set y 0.3183098862
00506 set d [expr {$y*$z/$p}]
00507 set p [expr {2.0*$y*atan($p)}]
00508 } else {
00509 set p [expr {sqrt($w*$z)}]
00510 set d [expr {$p*$z/(2.0*$w)}]
00511 }
00512 } else {
00513 if { $b == 1 } {
00514 set p [expr {sqrt($z)}]
00515 set d [expr {$z*$p/2.0}]
00516 set p [expr {1.0-$p}]
00517 } else {
00518 set d [expr {$z*$z}]
00519 set p [expr {$z*$w}]
00520 }
00521 }
00522
00523 set y [expr {2.0*$w/$z}]
00524
00525 if { $a == 1 } {
00526 for { set j [expr {$b+2}] } { $j <= $n } { incr j 2 } {
00527 set d [expr {(1.0+double($a)/double($j-2)) * $d*$z}]
00528 set p [expr {$p+$d*$y/double($j-1)}]
00529 }
00530 } else {
00531 set power [expr {($n-1)/2}]
00532 set zk [expr {pow($z,$power)}]
00533 set d [expr {($d*$zk*$n)/$b}]
00534 set p [expr {$p*$zk + $w*$z * ($zk-1.0)/($z-1.0)}]
00535 }
00536
00537 set y [expr {$w*$z}]
00538 set z [expr {2.0/$z}]
00539 set b [expr {$n-2}]
00540
00541 for { set i [expr {$a+2}] } { $i <= $m } { incr i 2 } {
00542 set j [expr {$i+$b}]
00543 set d [expr {$y*$d*double($j)/double($i-2)}]
00544 set p [expr {$p-$z*$d/double($j)}]
00545 }
00546 set prob $p
00547 if { $prob < 0.0 } { set prob 0.0 }
00548 if { $prob > 1.0 } { set prob 1.0 }
00549
00550 return $prob
00551 }
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570 ret ::math::statistics::Inverse-cdf-toms322 ( type m , type n , type prob ) {
00571 variable cdf_toms322_cached
00572 variable OUTOFRANGE
00573
00574 if { $prob <= 0 || $prob >= 1 } {
00575 return -code error -errorcode $OUTOFRANGE $OUTOFRANGE
00576 }
00577
00578 # Is the combination in cache? Then we can simply rely
00579 # on that
00580 #
00581 foreach {m1 n1 prob1 x1} $cdf_toms322_cached {
00582 if { $m1 == $m && $n1 == $n && $prob1 == $prob } {
00583 return $x1
00584 }
00585 }
00586
00587 #
00588 # Otherwise first find a value of x for which Cdf(x) exceeds prob
00589 #
00590 set x1 1.0
00591 set dx1 1.0
00592 while { [Cdf-toms322 $m $n $x1] < $prob } {
00593 set x1 [expr {$x1+$dx1}]
00594 set dx1 [expr {2.0*$dx1}]
00595 }
00596
00597 #
00598 # Now, look closer
00599 #
00600 while { $dx1 > 0.0001 } {
00601 set p1 [Cdf-toms322 $m $n $x1]
00602 if { $p1 > $prob } {
00603 set x1 [expr {$x1-$dx1}]
00604 } else {
00605 set x1 [expr {$x1+$dx1}]
00606 }
00607 set dx1 [expr {$dx1/2.0}]
00608 }
00609
00610 #
00611 # Cache the result
00612 #
00613 set last end
00614 if { [llength $cdf_toms322_cached] > 27 } {
00615 set last 26
00616 }
00617 set cdf_toms322_cached \
00618 [concat [list $m $n $prob $x1] [lrange $cdf_toms322_cached 0 $last]]
00619
00620 return $x1
00621 }
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633 ret ::math::statistics::HistogramMake ( type cdf-, type values , type number ) {
00634
00635 set assigned 0
00636 set result {}
00637 set residue 0.0
00638 foreach cdfv $cdf-values {
00639 set sum [expr {$number*($cdfv + $residue)}]
00640 set bucket [expr {int($sum)}]
00641 set residue [expr {$sum-$bucket}]
00642 set assigned [expr {$assigned-$bucket}]
00643 lappend result $bucket
00644 }
00645 set remaining [expr {$number-$assigned}]
00646 if { $remaining > 0 } {
00647 lappend result $remaining
00648 } else {
00649 lappend result 0
00650 }
00651
00652 return $result
00653 }
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667 ret ::math::statistics::histogram-uniform ( type min , type max , type limits , type number ) {
00668 if { $min >= $max } {
00669 return -code error -errorcode ARG \
00670 -errorinfo "Wrong order or zero range" \
00671 "Wrong order or zero range"
00672 }
00673
00674 set cdf_result {}
00675 foreach limit $limits {
00676 lappend cdf_result [cdf-uniform $min $max $limit]
00677 }
00678
00679 return [HistogramMake $cdf_result $number]
00680 }
00681
00682
00683
00684
00685 if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } {
00686
00687
00688
00689
00690
00691 puts "Normal distribution - two-tailed"
00692 foreach z {4.417 3.891 3.291 2.576 2.241 1.960 1.645 1.150 0.674
00693 0.319 0.126 0.063 0.0125} \
00694 pexp {1.e-5 1.e-4 1.e-3 1.e-2 0.025 0.050 0.100 0.250 0.500
00695 0.750 0.900 0.950 0.990 } {
00696 prob = [::math::statistics::Cdf-toms322 1 5000 [expr {$z*$z}]]
00697 puts "$z - $pexp - [expr {1.0-$prob}]"
00698 }
00699
00700 puts "Normal distribution (inverted; one-tailed)"
00701 foreach p {0.001 0.01 0.1 0.25 0.5 0.75 0.9 0.99 0.999} {
00702 puts "$p - [::math::statistics::Inverse-cdf-normal 0.0 1.0 $p]"
00703 }
00704 puts "Normal random variables"
00705 rndvars = [::math::statistics::random-normal 1.0 2.0 20]
00706 puts $rndvars
00707 puts "Normal uniform variables"
00708 rndvars = [::math::statistics::random-uniform 1.0 2.0 20]
00709 puts $rndvars
00710 puts "Normal exponential variables"
00711 rndvars = [::math::statistics::random-exponential 2.0 20]
00712 puts $rndvars
00713 }
00714