00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef _VRMMX_H_
00039 #define _VRMMX_H_
00040
00041
00042 typedef unsigned long long mmxpc2;
00043 typedef unsigned long long mmxpcr;
00044
00045 extern "C" void _vradd_mmx(char array1[], char array2[], unsigned int x, char array3[]);
00046 extern "C" void _ccvdp_mmx(char inputArray[], mmxpc2 *mmxTaps, mmxpcr *r, int numTaps);
00047 extern "C" void _scvdp_mmx(short inputArray[], mmxpc2 *mmxTaps, mmxpcr *r, int numTaps);
00048 extern "C" int _query_mmx();
00049 extern "C" void _vrproc_mmx(short array1[], short array2[],unsigned int length, short array3[]);
00050
00051 static inline int queryMMX() {return _query_mmx();}
00052
00053 class mmxTaps {
00054 private:
00055 VrComplex *taps;
00056 int numTaps;
00057 char *memPointer;
00058 mmxpc2 *pTaps;
00059 float real_fixed16factor;
00060 float imag_fixed16factor;
00061 public:
00062 mmxTaps() : numTaps(0),memPointer(NULL) {};
00063 mmxTaps(VrComplex[], int);
00064 ~mmxTaps();
00065
00066 inline int mmxReady() {return (numTaps!=0);}
00067 VrComplex mmxCVDProduct(char inputArray[]);
00068 VrComplex mmxCVDProduct(short inputArray[]);
00069
00070 inline mmxpc2 vrcToMMXPC2(VrComplex a, VrComplex b) {
00071 short Ar=(short) (a.real()*real_fixed16factor);
00072 short Ai=(short) (a.imag()*imag_fixed16factor);
00073 short Br=(short) (b.real()*real_fixed16factor);
00074 short Bi=(short) (b.imag()*imag_fixed16factor);
00075 mmxpc2 x = (unsigned long long) Br & 0xffff;
00076 x <<= 16;
00077 x |= (unsigned long long) Ar & 0xffff;
00078 x <<= 16;
00079 x |= (unsigned long long) Bi & 0xffff;
00080 x <<= 16;
00081 x |= (unsigned long long) Ai & 0xffff;
00082 return x;
00083 }
00084
00085 inline VrComplex MMXPCRToVrc(mmxpcr p) {
00086 long Pr=(long) (p>>32);
00087 long Pi=(long) (p&0xFFFFFFFF);
00088 float fPr=(float) Pr / ((float) real_fixed16factor);
00089 float fPi=(float) Pi / ((float) imag_fixed16factor);
00090 return VrComplex(fPr,fPi);
00091 }
00092 };
00093
00094 inline
00095 mmxTaps::mmxTaps(VrComplex mytaps[],int n) : taps(mytaps), numTaps(n){
00096 if(!_query_mmx()) {
00097 numTaps=0;
00098 return;
00099 }
00100
00101 float rmax=-1, rmin=100;
00102 float imax=-1, imin=100;
00103 for(int x=0;x<numTaps;x++) {
00104 float r=abs(real(taps[x]));
00105 float i=abs(imag(taps[x]));
00106 if(r<rmin && r!=0) rmin=r;
00107 if(r>rmax) rmax=r;
00108 if(i<imin && i!=0) imin=i;
00109 if(i>imax) imax=i;
00110 }
00111
00112
00113
00114
00115
00116
00117
00118
00119 real_fixed16factor=(1<<15)/(rmax+rmin) * 0.25;
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134 imag_fixed16factor=(1<<15)/(imax+imin) * 0.25;
00135
00136
00137
00138
00139
00140
00141
00142 int size = numTaps/2+1+4;
00143
00144
00145 int iSizeOf = sizeof(mmxpc2);
00146 unsigned long csize = size*2*iSizeOf+iSizeOf;
00147 memPointer = new char[csize];
00148 unsigned int iAdjust = (unsigned int) memPointer % (unsigned int) iSizeOf;
00149 pTaps = (mmxpc2 *) ((unsigned int) (iSizeOf-iAdjust) +
00150 (unsigned int) memPointer);
00151 pTaps[0]=vrcToMMXPC2(taps[0],taps[1]);
00152 pTaps[size]=vrcToMMXPC2(0,taps[0]);
00153 for(int x=1;x<size-1-4;x++) {
00154 pTaps[x]=vrcToMMXPC2(taps[2*x],taps[2*x+1]);
00155 pTaps[size+x]=vrcToMMXPC2(taps[2*x-1],taps[2*x]);
00156 }
00157 if(numTaps%2==0) {
00158 pTaps[size-1-4] = 0;
00159 pTaps[2*size-1-4] = vrcToMMXPC2(taps[2*(size-4)-3],0);
00160 } else {
00161 pTaps[size-1-4] = vrcToMMXPC2(taps[2*(size-4)-2],0);
00162 pTaps[2*size-1-4] = vrcToMMXPC2(taps[2*(size-4)-3],taps[2*(size-4)-2]);
00163 }
00164 for(int x=size-4;x<size;x++)
00165 pTaps[x]=pTaps[size+x]=0;
00166 }
00167
00168 inline
00169 mmxTaps::~mmxTaps() {
00170 if(memPointer!=NULL)
00171 delete [] memPointer;
00172 }
00173
00174
00175
00176
00177 inline VrComplex
00178 mmxTaps::mmxCVDProduct(char inputArray[]) {
00179 VrComplex r;
00180 VrComplex rextra = 0;
00181 mmxpc2 *pt=pTaps;
00182 VrComplex *t = taps;
00183 int n=numTaps;
00184
00185 if(numTaps==0)
00186 return 0;
00187
00188 unsigned long p1 = (unsigned long) inputArray;
00189 p1 &= 7;
00190 if(p1!=0) {
00191
00192
00193
00194 if(p1 & 1) {
00195 fprintf(stderr, "ADJUST1\n");
00196
00197 pt+=(numTaps/2+1+4);
00198 }
00199 fprintf(stderr, "Adjust: %ld\n", p1);
00200 p1 = 8 - p1;
00201 pt += p1/2;
00202 n-=p1;
00203 while(p1--) {
00204 rextra += *t++ * *inputArray++;
00205 }
00206 }
00207
00208 mmxpcr pr;
00209
00210 int nextra = n & 0x7;
00211
00212 _ccvdp_mmx(inputArray,pt,&pr,n-nextra);
00213
00214 if(nextra>0) {
00215 t+=n-nextra;
00216 inputArray+=n-nextra;
00217 while(nextra--) {
00218 rextra += *t++ * *inputArray++;
00219 }
00220 }
00221
00222 r = MMXPCRToVrc(pr);
00223 r += rextra;
00224 return r;
00225 }
00226
00227
00228
00229
00230 inline VrComplex
00231 mmxTaps::mmxCVDProduct(short inputArray[]) {
00232 VrComplex r;
00233 VrComplex rextra = 0;
00234 mmxpc2 *pt=pTaps;
00235 VrComplex *t = taps;
00236 int n=numTaps;
00237
00238 if(numTaps==0)
00239 return 0;
00240
00241 unsigned long p1 = (unsigned long) inputArray;
00242 p1 &= 7;
00243 if(p1!=0) {
00244
00245
00246
00247 if(p1 & 1) {
00248 fprintf(stderr, "ADJUST1\n");
00249
00250 pt+=(numTaps/2+1+4);
00251 }
00252
00253 p1 = 8 - p1;
00254 pt += p1/2;
00255 n-=p1;
00256 while(p1--) {
00257 rextra += *t++ * *inputArray++;
00258 }
00259 }
00260
00261 mmxpcr pr;
00262
00263 int nextra = n & 0x7;
00264
00265 _scvdp_mmx(inputArray,pt,&pr,n-nextra);
00266
00267 if(nextra>0) {
00268 t+=n-nextra;
00269 inputArray+=n-nextra;
00270 while(nextra--) {
00271 rextra += *t++ * *inputArray++;
00272 }
00273 }
00274
00275 r = MMXPCRToVrc(pr);
00276 r += rextra;
00277 return r;
00278 }
00279
00280 inline
00281 void
00282 mmxAdd(char in1[],char in2[],unsigned int x, char out[]) {
00283 if(x==0)
00284 return;
00285 _vradd_mmx(in1, in2, x, out);
00286 }
00287
00288 inline
00289 void
00290 mmxFMMulAdd(short in1[], short in2[], unsigned int length, short out[]) {
00291 if(length==0)
00292 return;
00293
00294
00295
00296 if((length % 16)==0)
00297 _vrproc_mmx(in1, in2, length, out);
00298 else {
00299 unsigned int n = length % 16;
00300 unsigned int p3 = length - n;
00301 _vrproc_mmx(in1, in2, p3, out);
00302 for(unsigned int i=p3;i<=length;i+=4) {
00303 unsigned int q3= i/4;
00304 out[q3] = (in1[0] * in2[i]) + (in1[1] * in2[i+1]) + (in1[2] * in2[i+2]) + (in1[3] * in2[i+3]);
00305 }
00306 }
00307 }
00308
00309 #endif
00310