mvlinreg.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007 package require math::linearalgebra 1.0
00008 package require math::statistics 0.4
00009
00010
00011
00012
00013
00014 namespace ::math::statistics {
00015 variable epsilon 1.0e-7
00016
00017 namespace export tstat mv-wls mv-ols
00018
00019 namespace import -force \
00020 ::math::linearalgebra::mkMatrix \
00021 ::math::linearalgebra::mkVector \
00022 ::math::linearalgebra::mkIdentity \
00023 ::math::linearalgebra::mkDiagonal \
00024 ::math::linearalgebra::getrow \
00025 ::math::linearalgebra::row = \
00026 ::math::linearalgebra::getcol \
00027 ::math::linearalgebra::col = \
00028 ::math::linearalgebra::getelem \
00029 ::math::linearalgebra::elem = \
00030 ::math::linearalgebra::dotproduct \
00031 ::math::linearalgebra::matmul \
00032 ::math::linearalgebra::add \
00033 ::math::linearalgebra::sub \
00034 ::math::linearalgebra::solveGauss \
00035 ::math::linearalgebra::transpose
00036 }
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 ret ::math::statistics::tstat (type n , optional alpha =0.05) {
00054 variable epsilon
00055 variable tvals
00056
00057 if [info exists tvals($n:$alpha)] {
00058 return $tvals($n:$alpha)
00059 }
00060
00061 set deltat [expr {100 * $epsilon}]
00062 # For one-tailed distribution,
00063 set ptarg [expr {1.000 - $alpha/2.0}]
00064 set maxiter 100
00065
00066 # Initial value for t
00067 set t 2.0
00068
00069 set niter 0
00070 while {abs([::math::statistics::cdf-students-t $n $t] - $ptarg) > $epsilon} {
00071 set pstar [::math::statistics::cdf-students-t $n $t]
00072 set pl [::math::statistics::cdf-students-t $n [expr {$t - $deltat}]]
00073 set ph [::math::statistics::cdf-students-t $n [expr {$t + $deltat}]]
00074
00075 set t [expr {$t + 2.0 * $deltat * ($ptarg - $pstar)/($ph - $pl)}]
00076
00077 incr niter
00078 if {$niter == $maxiter} {
00079 return -code error "::math::statistics::tstat: Did not converge after $niter iterations"
00080 }
00081 }
00082
00083 # Cache the result to shorten the call in future
00084 set tvals($n:$alpha) $t
00085
00086 return $t
00087 }
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109 ret ::math::statistics::mv-wls (type data) {
00110
00111 # Fill the matrices of x & y values, and weights
00112 # For n points, k coefficients
00113
00114 # The number of points is equal to half the arguments (n weights, n points)
00115 set n [expr {[llength $data]/2}]
00116
00117 set firstloop true
00118 # Sum up all y values to take an average
00119 set ysum 0
00120 # Add up the weights
00121 set wtsum 0
00122 # Count over rows (points) as you go
00123 set point 0
00124 foreach {wt pt} $data {
00125
00126 # Check inputs
00127 if {[string is double $wt] == 0} {
00128 return -code error "::math::statistics::mv-wls: Weight \"$wt\" is not a number"
00129 return {}
00130 }
00131
00132 ## -- Check dimensions, initialize
00133 if $firstloop {
00134 # k = num of vals in pt = 1 + number of x's (because of constant)
00135 set k [llength $pt]
00136 if {$n <= [expr {$k + 1}]} {
00137 return -code error "::math::statistics::mv-wls: Too few degrees of freedom: $k variables but only $n points"
00138 return {}
00139 }
00140 set X [mkMatrix $n $k]
00141 set y [mkVector $n]
00142 set I_x [mkIdentity $k]
00143 set I_y [mkIdentity $n]
00144
00145 set firstloop false
00146
00147 } else {
00148 # Have to have same number of x's for all points
00149 if {$k != [llength $pt]} {
00150 return -code error "::math::statistics::mv-wls: Point \"$pt\" has wrong number of values (expected $k)"
00151 # Clean up
00152 return {}
00153 }
00154 }
00155
00156 ## -- Extract values from set of points
00157 # Make a list of y values
00158 set yval [expr {double([lindex $pt 0])}]
00159 setelem y $point $yval
00160 set ysum [expr {$ysum + $wt * $yval}]
00161 set wtsum [expr {$wtsum + $wt}]
00162 # Add x-values to the x-matrix
00163 set xrow [lrange $pt 1 end]
00164 # Add the constant (value = 1.0)
00165 lappend xrow 1.0
00166 setrow X $point $xrow
00167 # Create list of weights & square root of weights
00168 lappend w [expr {double($wt)}]
00169 lappend sqrtw [expr {sqrt(double($wt))}]
00170
00171 incr point
00172
00173 }
00174
00175 set ymean [expr {double($ysum)/$wtsum}]
00176 set W [mkDiagonal $w]
00177 set sqrtW [mkDiagonal $sqrtw]
00178
00179 # Calculate sum os square differences for x's
00180 for {set r 0} {$r < $k} {incr r} {
00181 set xsqrsum 0.0
00182 set xmeansum 0.0
00183 # Calculate sum of squared differences as: sum(x^2) - (sum x)^2/n
00184 for {set t 0} {$t < $n} {incr t} {
00185 set xval [getelem $X $t $r]
00186 set xmeansum [expr {$xmeansum + double($xval)}]
00187 set xsqrsum [expr {$xsqrsum + double($xval * $xval)}]
00188 }
00189 lappend xsqr [expr {$xsqrsum - $xmeansum * $xmeansum/$n}]
00190 }
00191
00192 ## -- Set up the X'WX matrix
00193 set XtW [matmul [::math::linearalgebra::transpose $X] $W]
00194 set XtWX [matmul $XtW $X]
00195
00196 # Invert
00197 set M [solveGauss $XtWX $I_x]
00198
00199 set beta [matmul $M [matmul $XtW $y]]
00200
00201 ### -- Residuals & R-squared
00202 # 1) Generate list of diagonals of the hat matrix
00203 set H [matmul $X [matmul $M $XtW]]
00204 for {set i 0} {$i < $n} {incr i} {
00205 lappend h_ii [getelem $H $i $i]
00206 }
00207
00208 set R [matmul $sqrtW [matmul [sub $I_y $H] $y]]
00209 set yhat [matmul $H $y]
00210
00211 # 2) Generate list of residuals, sum of squared residuals, r-squared
00212 set sstot 0.0
00213 set ssreg 0.0
00214 # Note: Relying on representation of Vector as a list for y, yhat
00215 foreach yval $y wt $w yhatval $yhat {
00216 set sstot [expr {$sstot + $wt * ($yval - $ymean) * ($yval - $ymean)}]
00217 set ssreg [expr {$ssreg + $wt * ($yhatval - $ymean) * ($yhatval - $ymean)}]
00218 }
00219 set r2 [expr {double($ssreg)/$sstot}]
00220 set adjr2 [expr {1.0 - (1.0 - $r2) * ($n - 1)/($n - $k)}]
00221 set sumsqresid [dotproduct $R $R]
00222 set s2 [expr {double($sumsqresid) / double($n - $k)}]
00223
00224 ### -- Confidence intervals for coefficients
00225 set tvalue [tstat [expr {$n - $k}]]
00226 for {set i 0} {$i < $k} {incr i} {
00227 set stderr [expr {sqrt($s2 * [getelem $M $i $i])}]
00228 set mid [lindex $beta $i]
00229 lappend stderrs $stderr
00230 lappend confinterval [list [expr {$mid - $tvalue * $stderr}] [expr {$mid + $tvalue * $stderr}]]
00231 }
00232
00233 return [list $r2 $adjr2 $beta $stderrs $confinterval]
00234 }
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256 ret ::math::statistics::mv-ols (type data) {
00257 set newdata {}
00258 foreach pt $data {
00259 lappend newdata 1 $pt
00260 }
00261 return [mv-wls $newdata]
00262 }
00263