| [0b990d] | 1 | //
 | 
|---|
 | 2 | // mops.h --- block matrix operations
 | 
|---|
 | 3 | //
 | 
|---|
 | 4 | // Copyright (C) 1997 Limit Point Systems, Inc.
 | 
|---|
 | 5 | //
 | 
|---|
 | 6 | // Author: Edward Seidl <seidl@janed.com>
 | 
|---|
 | 7 | // Maintainer: LPS
 | 
|---|
 | 8 | //
 | 
|---|
 | 9 | // This file is part of the SC Toolkit.
 | 
|---|
 | 10 | //
 | 
|---|
 | 11 | // The SC Toolkit is free software; you can redistribute it and/or modify
 | 
|---|
 | 12 | // it under the terms of the GNU Library General Public License as published by
 | 
|---|
 | 13 | // the Free Software Foundation; either version 2, or (at your option)
 | 
|---|
 | 14 | // any later version.
 | 
|---|
 | 15 | //
 | 
|---|
 | 16 | // The SC Toolkit is distributed in the hope that it will be useful,
 | 
|---|
 | 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
 | 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
 | 19 | // GNU Library General Public License for more details.
 | 
|---|
 | 20 | //
 | 
|---|
 | 21 | // You should have received a copy of the GNU Library General Public License
 | 
|---|
 | 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
 | 
|---|
 | 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 | 
|---|
 | 24 | //
 | 
|---|
 | 25 | // The U.S. Government is granted a limited license as per AL 91-7.
 | 
|---|
 | 26 | //
 | 
|---|
 | 27 | 
 | 
|---|
 | 28 | #ifndef _math_scmat_mops_h
 | 
|---|
 | 29 | #define _math_scmat_mops_h
 | 
|---|
 | 30 | 
 | 
|---|
 | 31 | #define D1 32
 | 
|---|
 | 32 | 
 | 
|---|
 | 33 | // copy a chunk of rectangular matrix source into dest.  dest is D1xD1, and is
 | 
|---|
 | 34 | // padded with zeros
 | 
|---|
 | 35 | 
 | 
|---|
 | 36 | static inline void
 | 
|---|
 | 37 | copy_block(double **dest, double **source,
 | 
|---|
 | 38 |            int istart, int ni, int jstart, int nj)
 | 
|---|
 | 39 | {
 | 
|---|
 | 40 |   int ii,jj;
 | 
|---|
 | 41 |   
 | 
|---|
 | 42 |   for (ii=0; ii < ni; ii++) {
 | 
|---|
 | 43 |     double *di = dest[ii];
 | 
|---|
 | 44 |     double *si = &source[istart+ii][jstart];
 | 
|---|
 | 45 |     for (jj=0; jj < nj; jj++)
 | 
|---|
 | 46 |       di[jj] = si[jj];
 | 
|---|
 | 47 |     for (; jj < D1; jj++)
 | 
|---|
 | 48 |       di[jj] = 0;
 | 
|---|
 | 49 |   }
 | 
|---|
 | 50 | 
 | 
|---|
 | 51 |   int left=D1-ii;
 | 
|---|
 | 52 |   if (left)
 | 
|---|
 | 53 |     memset(dest[ii], 0, sizeof(double)*left*D1);
 | 
|---|
 | 54 | }
 | 
|---|
 | 55 | 
 | 
|---|
 | 56 | static inline void
 | 
|---|
 | 57 | copy_trans_block(double **dest, double **source,
 | 
|---|
 | 58 |                  int istart, int ni, int jstart, int nj)
 | 
|---|
 | 59 | {
 | 
|---|
 | 60 |   int ii,jj;
 | 
|---|
 | 61 |   
 | 
|---|
 | 62 |   memset(dest[0], 0, sizeof(double)*D1*D1);
 | 
|---|
 | 63 | 
 | 
|---|
 | 64 |   for (jj=0; jj < nj; jj++) {
 | 
|---|
 | 65 |     double *sj = &source[jstart+jj][istart];
 | 
|---|
 | 66 |     for (ii=0; ii < ni; ii++)
 | 
|---|
 | 67 |       dest[ii][jj] = sj[ii];
 | 
|---|
 | 68 |   }
 | 
|---|
 | 69 | }
 | 
|---|
 | 70 | 
 | 
|---|
 | 71 | // copy a chunk of symmetric matrix source into dest.  dest is D1xD1, and is
 | 
|---|
 | 72 | // padded with zeros
 | 
|---|
 | 73 | static inline void
 | 
|---|
 | 74 | copy_sym_block(double **dest, double **source,
 | 
|---|
 | 75 |                int istart, int ni, int jstart, int nj)
 | 
|---|
 | 76 | {
 | 
|---|
 | 77 |   int ii,jj;
 | 
|---|
 | 78 | 
 | 
|---|
 | 79 |   for (ii=0; ii < ni; ii++) {
 | 
|---|
 | 80 |     double *di = dest[ii];
 | 
|---|
 | 81 |     double *si = &source[istart+ii][jstart];
 | 
|---|
 | 82 |     
 | 
|---|
 | 83 |     if (jstart < istart)
 | 
|---|
 | 84 |       for (jj=0; jj < nj; jj++)
 | 
|---|
 | 85 |         di[jj] = si[jj];
 | 
|---|
 | 86 |     else if (jstart==istart)
 | 
|---|
 | 87 |       for (jj=0; jj <= ii; jj++)
 | 
|---|
 | 88 |         di[jj] = dest[jj][ii] = si[jj];
 | 
|---|
 | 89 |     else
 | 
|---|
 | 90 |       for (jj=0; jj < nj; jj++)
 | 
|---|
 | 91 |         di[jj] = source[jstart+jj][istart+ii];
 | 
|---|
 | 92 | 
 | 
|---|
 | 93 |     for (jj=nj; jj < D1; jj++)
 | 
|---|
 | 94 |       di[jj] = 0;
 | 
|---|
 | 95 |   }
 | 
|---|
 | 96 | 
 | 
|---|
 | 97 |   int left=D1-ii;
 | 
|---|
 | 98 |   if (left)
 | 
|---|
 | 99 |     memset(dest[ii], 0, sizeof(double)*left*D1);
 | 
|---|
 | 100 | }
 | 
|---|
 | 101 | 
 | 
|---|
 | 102 | static inline void
 | 
|---|
 | 103 | return_block(double **dest, double **source,
 | 
|---|
 | 104 |              int istart, int ni, int jstart, int nj)
 | 
|---|
 | 105 | {
 | 
|---|
 | 106 |   int ii,jj;
 | 
|---|
 | 107 | 
 | 
|---|
 | 108 |   for (ii=0; ii < ni; ii++)
 | 
|---|
 | 109 |     for (jj=0; jj < nj; jj++)
 | 
|---|
 | 110 |       dest[istart+ii][jstart+jj] = source[ii][jj];
 | 
|---|
 | 111 | }
 | 
|---|
 | 112 | 
 | 
|---|
 | 113 | // a, b, and c are all D1xD1 blocks
 | 
|---|
 | 114 | static inline void
 | 
|---|
 | 115 | mult_block(double **a, double **b, double **c, int ni, int nj, int nk)
 | 
|---|
 | 116 | {
 | 
|---|
 | 117 |   int ii,jj,kk;
 | 
|---|
 | 118 |   double t00,t10,t20,t30;
 | 
|---|
 | 119 |   double *a0, *a1, *a2, *a3;
 | 
|---|
 | 120 |   double *c0, *c1, *c2, *c3;
 | 
|---|
 | 121 | 
 | 
|---|
 | 122 |   for (ii=0; ii < ni; ii += 4) {
 | 
|---|
 | 123 |     a0=a[ii]; a1=a[ii+1]; a2=a[ii+2]; a3=a[ii+3];
 | 
|---|
 | 124 |     c0=c[ii]; c1=c[ii+1]; c2=c[ii+2]; c3=c[ii+3];
 | 
|---|
 | 125 | 
 | 
|---|
 | 126 |     for (jj=0; jj < nj; jj++) {
 | 
|---|
 | 127 |       double *bt = b[jj];
 | 
|---|
 | 128 |       t00=c0[jj]; t10=c1[jj]; t20=c2[jj]; t30=c3[jj];
 | 
|---|
 | 129 | 
 | 
|---|
 | 130 |       for (kk=0; kk < nk; kk += 2) {
 | 
|---|
 | 131 |         register double b0=bt[kk], b1=bt[kk+1];
 | 
|---|
 | 132 |         t00 += a0[kk]*b0 + a0[kk+1]*b1;
 | 
|---|
 | 133 |         t10 += a1[kk]*b0 + a1[kk+1]*b1;
 | 
|---|
 | 134 |         t20 += a2[kk]*b0 + a2[kk+1]*b1;
 | 
|---|
 | 135 |         t30 += a3[kk]*b0 + a3[kk+1]*b1;
 | 
|---|
 | 136 |       }
 | 
|---|
 | 137 | 
 | 
|---|
 | 138 |       c0[jj]=t00;
 | 
|---|
 | 139 |       c1[jj]=t10;
 | 
|---|
 | 140 |       c2[jj]=t20;
 | 
|---|
 | 141 |       c3[jj]=t30;
 | 
|---|
 | 142 |     }
 | 
|---|
 | 143 |   }
 | 
|---|
 | 144 | }
 | 
|---|
 | 145 | 
 | 
|---|
 | 146 | #endif
 | 
|---|
 | 147 | 
 | 
|---|
 | 148 | // Local Variables:
 | 
|---|
 | 149 | // mode: c++
 | 
|---|
 | 150 | // c-file-style: "ETS"
 | 
|---|
 | 151 | // End:
 | 
|---|