Nicolás Wolovick, $Date: 2012-04-17 14:27:51 -0300 (Tue, 17 Apr 2012) $, $Revision: 3418 $
En C
.
1 static u[size], v[size], u_prev[size], v_p rev[size];
2 static dens[size], dens_p rev[size];
3
4 #define IX(i,j) ((i)+(N+2)*(j))
1 void add_source ( int N, float * x, float * s, float dt )
2 {
3 int i, size=(N+2)*(N+2);
4 for ( i=0 ; i<size ; i++ ) x[i] += dt*s[i];
5 }
x0[IX(i-1,j)]+x0[IX(i+1,j)]+x0[IX(i,j-1)]+x0[IX(i,j+1)]-4*x0[IX(i,j)]
1 void diffuse_bad ( int N, int b, float * x, float * x0, float diff, float dt )
2 {
3 int i, j;
4 float a=dt*diff*N*N;
5 for ( i=1 ; i<=N ; i++ ) {
6 for ( j=1 ; j<=N ; j++ ) {
7 x[IX(i,j)] = x0[IX(i,j)] + a*(x0[IX(i-1,j)]+x0[IX(i+1,j)]+
8 x0[IX(i,j-1)]+x0[IX(i,j+1)]-4*x0[IX(i,j)]);
9 }
10 }
11 set_bnd ( N, b, x );
12 }
1 void diffuse ( int N, int b, float * x, float * x0, float diff, float dt )
2 {
3 int i, j, k;
4 float a=dt*diff*N*N;
5 for ( k=0 ; k<20 ; k++ ) {
6 for ( i=1 ; i<=N ; i++ ) {
7 for ( j=1 ; j<=N ; j++ ) {
8 x[IX(i,j)] = (x0[IX(i,j)] + a*(x[IX(i-1,j)]+x[IX(i+1,j)]+
9 x[IX(i,j-1)]+x[IX(i,j+1)]))/(1+4*a);
10 }
11 }
12 set_bnd ( N, b, x );
13 }
14 }
Intercambia velocidad y exactitud por estabilidad.
1 void advect ( int N, int b, float * d, float * d0, float * u, float * v, float dt )
2 {
3 int i, j, i0, j0, i1, j1;
4 float x, y, s0, t0, s1, t1, dt0;
5 dt0 = dt*N;
6 for ( i=1 ; i<=N ; i++ ) {
7 for ( j=1 ; j<=N ; j++ ) {
8 x = i-dt0*u[IX(i,j)]; y = j -dt0*v[IX(i,j)];
9 if (x<0.5) x=0.5; if (x>N+0.5) x=N+ 0.5; i0=(int)x; i1=i0+ 1;
10 if (y<0.5) y=0.5; if (y>N+0.5) y=N+ 0.5; j0=(int)y; j1=j0+1;
11 s1 = x-i0; s0 = 1-s1; t1 = y-j0; t0 = 1-t1;
12 d[IX(i,j)] = s0*(t0*d0[IX(i0,j0)]+t1*d 0[IX(i0,j1)])+
13 s1*(t0*d0[IX(i1,j0)]+t1*d 0[IX(i1,j1)]);
14 }
15 }
16 set_bnd ( N, b, d );
17 }
1 void dens_step ( int N, float * x, float * x0, float * u, float * v, float diff,
2 float dt )
3 {
4 add_source ( N, x, x0, dt );
5 SWAP ( x0, x ); diffuse ( N, 0, x, x0, diff, dt );
6 SWAP ( x0, x ); advect ( N, 0, x, x0, u, v, dt );
7 }
Donde SWAP
intercambia dos punteros.
1 #define SWAP(x0,x) {float *tmp=x0;x0=x;x=tmp;}
Complicado de explicar, pero usa las mismas rutinas.
1 void vel_step ( int N, float * u, float * v, float * u0, float * v0, float visc,
2 float dt )
3 {
4 add_source ( N, u, u0, dt ); add_source ( N, v, v0, dt );
5 SWAP ( u0, u ); diffuse ( N, 1, u, u0, visc, dt );
6 SWAP ( v0, v ); diffuse ( N, 2, v, v0, visc, dt );
7 project ( N, u, v, u0, v0 );
8 SWAP ( u0, u ); SWAP ( v0, v );
9 advect ( N, 1, u, u0, u0, v0, dt ); advect ( N, 2, v, v0, u0, v0, dt );
10 project ( N, u, v, u0, v0 );
11 }
Faltaría project
.
Ver demo!
N
vs. performance.gprof
para hacer profiling y ver que funciones toman la mayor parte del tiempo.perf
mide performance counters de punta a punta.memset
.(Michael Gerndt, Efficient Programming of Multicore Processors and Supercomputers)
Table of Contents | t |
---|---|
Exposé | ESC |
Full screen slides | e |
Presenter View | p |
Source Files | s |
Slide Numbers | n |
Toggle screen blanking | b |
Show/hide slide context | c |
Notes | 2 |
Help | h |