Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

VrMMX.h

Go to the documentation of this file.
00001 /* -*- c++ -*- */
00002 /*
00003  * Copyright 2002 Free Software Foundation, Inc.
00004  * 
00005  * This file is part of GNU Radio
00006  * 
00007  * GNU Radio is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2, or (at your option)
00010  * any later version.
00011  * 
00012  * GNU Radio is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with GNU Radio; see the file COPYING.  If not, write to
00019  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00020  * Boston, MA 02111-1307, USA.
00021  */
00022 /*
00023  *  Copyright 1997 Massachusetts Institute of Technology
00024  * 
00025  *  Permission to use, copy, modify, distribute, and sell this software and its
00026  *  documentation for any purpose is hereby granted without fee, provided that
00027  *  the above copyright notice appear in all copies and that both that
00028  *  copyright notice and this permission notice appear in supporting
00029  *  documentation, and that the name of M.I.T. not be used in advertising or
00030  *  publicity pertaining to distribution of the software without specific,
00031  *  written prior permission.  M.I.T. makes no representations about the
00032  *  suitability of this software for any purpose.  It is provided "as is"
00033  *  without express or implied warranty.
00034  * 
00035  */
00036 
00037 
00038 #ifndef _VRMMX_H_
00039 #define _VRMMX_H_
00040 
00041 //long long is 64 bits
00042 typedef unsigned long long mmxpc2; //Packed Complex (2 vals, 16 bit-precision)
00043 typedef unsigned long long mmxpcr; //Packed Complex Result (32 bit precision)
00044 
00045 extern "C" void _vradd_mmx(char array1[], char array2[], unsigned int x, char array3[]);
00046 extern "C" void _ccvdp_mmx(char inputArray[], mmxpc2 *mmxTaps, mmxpcr *r, int numTaps);
00047 extern "C" void _scvdp_mmx(short inputArray[], mmxpc2 *mmxTaps, mmxpcr *r, int numTaps);
00048 extern "C" int  _query_mmx();
00049 extern "C" void _vrproc_mmx(short array1[], short array2[],unsigned int length, short array3[]);
00050 
00051 static inline int queryMMX() {return _query_mmx();}
00052 
00053 class mmxTaps {
00054 private:
00055   VrComplex *taps;
00056   int numTaps;
00057   char *memPointer;
00058   mmxpc2 *pTaps;
00059   float real_fixed16factor; //1bit sign 15 bit mantissa
00060   float imag_fixed16factor; //1bit sign 15 bit mantissa
00061 public:
00062   mmxTaps() : numTaps(0),memPointer(NULL) {};
00063   mmxTaps(VrComplex[], int);
00064   ~mmxTaps();
00065 
00066   inline int mmxReady() {return (numTaps!=0);}
00067   VrComplex mmxCVDProduct(char inputArray[]);
00068   VrComplex mmxCVDProduct(short inputArray[]);
00069 
00070   inline mmxpc2 vrcToMMXPC2(VrComplex a, VrComplex b) {
00071     short Ar=(short) (a.real()*real_fixed16factor);
00072     short Ai=(short) (a.imag()*imag_fixed16factor);
00073     short Br=(short) (b.real()*real_fixed16factor);
00074     short Bi=(short) (b.imag()*imag_fixed16factor);
00075     mmxpc2 x = (unsigned long long) Br & 0xffff;
00076     x <<= 16;
00077     x |= (unsigned long long) Ar & 0xffff;
00078     x <<= 16;
00079     x |= (unsigned long long) Bi & 0xffff;
00080     x <<= 16;
00081     x |= (unsigned long long) Ai & 0xffff;
00082     return x; //(Br Ar Bi Ai)
00083   } 
00084 
00085   inline VrComplex MMXPCRToVrc(mmxpcr p) {
00086     long Pr=(long) (p>>32);
00087     long Pi=(long) (p&0xFFFFFFFF);
00088     float fPr=(float) Pr / ((float) real_fixed16factor);
00089     float fPi=(float) Pi / ((float) imag_fixed16factor);
00090     return VrComplex(fPr,fPi);
00091   }
00092 };
00093 
00094 inline
00095 mmxTaps::mmxTaps(VrComplex mytaps[],int n) : taps(mytaps), numTaps(n){
00096   if(!_query_mmx()) {
00097     numTaps=0;
00098     return;
00099   }
00100   //determine fixed16factor
00101   float rmax=-1, rmin=100;
00102   float imax=-1, imin=100;
00103   for(int x=0;x<numTaps;x++) {
00104     float r=abs(real(taps[x]));
00105     float i=abs(imag(taps[x]));
00106     if(r<rmin && r!=0) rmin=r;
00107     if(r>rmax) rmax=r;
00108     if(i<imin && i!=0) imin=i;
00109     if(i>imax) imax=i;
00110   }
00111   
00112   /*fprintf(stderr, "RMax %f, Rmin %f, IMax %f, Imin  %f\n", rmax,rmin,imax,imin);
00113   float factor1 = (1<<15)/rmax;
00114   float factor2 = (1<<15)/rmin;
00115   fprintf(stderr, "Size of real range: %f\n", rmax/rmin);
00116   */
00117 
00118   // real_fixed16factor=(1<<15)/(rmax+rmin);
00119   real_fixed16factor=(1<<15)/(rmax+rmin) * 0.25;        // head room -eb
00120   
00121   /*
00122   fprintf(stderr, "RFactor = %f\n", real_fixed16factor);
00123   fprintf(stderr, "Real Max, Min in fixed pt: %d %d\n",
00124         ((short) (rmax*real_fixed16factor)), ((short) (rmin*real_fixed16factor)) );
00125   */
00126 
00127   /*
00128   factor1 = (1<<15)/imax;
00129   factor2 = (1<<15)/imin;
00130   fprintf(stderr, "Size of imag range: %f\n", imax/imin);
00131   */
00132 
00133   // imag_fixed16factor=(1<<15)/(imax+imin);
00134   imag_fixed16factor=(1<<15)/(imax+imin) * 0.25;        // head rooom -eb
00135 
00136   /*
00137   fprintf(stderr, "IFactor = %f\n", imag_fixed16factor);
00138   fprintf(stderr, "Imag Max, Min in fixed pt: %d %d\n",
00139         ((short) (imax*imag_fixed16factor)), ((short) (imin*imag_fixed16factor)));
00140   */
00141 
00142   int size = numTaps/2+1+4;   //4 is in case loop which does 8 mults runs over
00143   //mmxpc2 *pTaps=new mmxpc2[size*2];
00144 
00145   int iSizeOf = sizeof(mmxpc2);
00146   unsigned long csize = size*2*iSizeOf+iSizeOf;
00147   memPointer = new char[csize];
00148   unsigned int iAdjust = (unsigned int) memPointer % (unsigned int) iSizeOf;
00149   pTaps = (mmxpc2 *) ((unsigned int) (iSizeOf-iAdjust) +
00150                       (unsigned int) memPointer);
00151   pTaps[0]=vrcToMMXPC2(taps[0],taps[1]);
00152   pTaps[size]=vrcToMMXPC2(0,taps[0]);
00153   for(int x=1;x<size-1-4;x++) {
00154     pTaps[x]=vrcToMMXPC2(taps[2*x],taps[2*x+1]);
00155     pTaps[size+x]=vrcToMMXPC2(taps[2*x-1],taps[2*x]);
00156   }
00157   if(numTaps%2==0) {
00158     pTaps[size-1-4] = 0;
00159     pTaps[2*size-1-4] = vrcToMMXPC2(taps[2*(size-4)-3],0);
00160   } else {
00161     pTaps[size-1-4] = vrcToMMXPC2(taps[2*(size-4)-2],0);
00162     pTaps[2*size-1-4] = vrcToMMXPC2(taps[2*(size-4)-3],taps[2*(size-4)-2]);
00163   }
00164   for(int x=size-4;x<size;x++)
00165     pTaps[x]=pTaps[size+x]=0;
00166 }
00167 
00168 inline
00169 mmxTaps::~mmxTaps() {
00170   if(memPointer!=NULL)
00171     delete [] memPointer;
00172 }
00173 
00174 //
00175 // char input vector
00176 //
00177 inline VrComplex
00178 mmxTaps::mmxCVDProduct(char inputArray[]) {
00179   VrComplex r;
00180   VrComplex rextra = 0;
00181   mmxpc2 *pt=pTaps;
00182   VrComplex *t = taps;
00183   int n=numTaps;
00184 
00185   if(numTaps==0) //MMX not present or mmxTaps not initialized
00186     return 0;
00187 
00188   unsigned long p1 = (unsigned long) inputArray;
00189   p1 &= 7;
00190   if(p1!=0) {
00191     /**** This stuff never gets used since the way the GuPPi drivers ****/
00192     /**** ensures out data is always aligned...                      ****/
00193     /**** (It also has therefore NOT been tested!!!)                 ****/
00194     if(p1 & 1) {
00195       fprintf(stderr, "ADJUST1\n");
00196       //use second array for alignment (point to 2nd tap)
00197       pt+=(numTaps/2+1+4);
00198     }
00199     fprintf(stderr, "Adjust: %ld\n", p1);
00200     p1 = 8 - p1;
00201     pt += p1/2;
00202     n-=p1;
00203     while(p1--) {
00204       rextra +=  *t++ * *inputArray++;
00205     }
00206   }
00207 
00208   mmxpcr pr;
00209 
00210   int nextra = n & 0x7; //extra after multiple of 8
00211 
00212   _ccvdp_mmx(inputArray,pt,&pr,n-nextra);
00213 
00214   if(nextra>0) {
00215     t+=n-nextra;
00216     inputArray+=n-nextra;
00217     while(nextra--) {
00218       rextra +=  *t++ * *inputArray++;
00219     }
00220   }
00221 
00222   r = MMXPCRToVrc(pr);
00223   r += rextra;
00224   return r;
00225 }
00226 
00227 // 
00228 // short input vector
00229 //
00230 inline VrComplex
00231 mmxTaps::mmxCVDProduct(short inputArray[]) {
00232   VrComplex r;
00233   VrComplex rextra = 0;
00234   mmxpc2 *pt=pTaps;
00235   VrComplex *t = taps;
00236   int n=numTaps;
00237 
00238   if(numTaps==0) //MMX not present or mmxTaps not initialized
00239     return 0;
00240 
00241   unsigned long p1 = (unsigned long) inputArray;
00242   p1 &= 7;
00243   if(p1!=0) {
00244     /**** This stuff never gets used since the way the GuPPi drivers ****/
00245     /**** ensures out data is always aligned...                      ****/
00246     /**** (It also has therefore NOT been tested!!!)                 ****/
00247     if(p1 & 1) {
00248       fprintf(stderr, "ADJUST1\n");
00249       //use second array for alignment (point to 2nd tap)
00250       pt+=(numTaps/2+1+4);
00251     }
00252     // fprintf(stderr, "Adjust: %ld\n", p1);
00253     p1 = 8 - p1;
00254     pt += p1/2;
00255     n-=p1;
00256     while(p1--) {
00257       rextra +=  *t++ * *inputArray++;
00258     }
00259   }
00260 
00261   mmxpcr pr;
00262 
00263   int nextra = n & 0x7; //extra after multiple of 8
00264 
00265   _scvdp_mmx(inputArray,pt,&pr,n-nextra);
00266 
00267   if(nextra>0) {
00268     t+=n-nextra;
00269     inputArray+=n-nextra;
00270     while(nextra--) {
00271       rextra +=  *t++ * *inputArray++;
00272     }
00273   }
00274 
00275   r = MMXPCRToVrc(pr);
00276   r += rextra;
00277   return r;
00278 }
00279 
00280 inline
00281 void
00282 mmxAdd(char in1[],char in2[],unsigned int x, char out[]) {
00283   if(x==0) //MMX not present or input arrays not present
00284     return;
00285    _vradd_mmx(in1, in2, x, out);
00286 }
00287 
00288 inline
00289 void
00290 mmxFMMulAdd(short in1[], short in2[], unsigned int length, short out[]) {
00291   if(length==0) //nothing to process
00292     return;
00293 
00294   //***Assumes length of in2 is a multiple of 4***
00295   //***Assumes length of in1 is at least 4     ***
00296   if((length % 16)==0)
00297     _vrproc_mmx(in1, in2, length, out);
00298       else {
00299         unsigned int n = length % 16;
00300         unsigned int p3 = length - n;
00301         _vrproc_mmx(in1, in2, p3, out);
00302         for(unsigned int i=p3;i<=length;i+=4) {
00303           unsigned int q3= i/4;
00304           out[q3] = (in1[0] * in2[i]) + (in1[1] * in2[i+1]) + (in1[2] * in2[i+2]) + (in1[3] * in2[i+3]);
00305         }
00306       }
00307 }
00308 
00309 #endif
00310 

Generated on Tue Mar 15 23:55:34 2005 for GNU Radio by  doxygen 1.4.0