Let's `Compile`.

 dif = 1/500; pec = 20; U0 = 0; V0 = 0; F0 = 3; dn0 = 1; kap = 5; n = 63(*31*); 
 n1 = n + 1;(*dt=40./n^2;*)sm = 4 200; r = 20;(*a=dt dif n n;*)
 (*c=ConstantArray[0.,{n1,n1}];*)
 (*d=ConstantArray[0,{n1,n1}];*)
 den = ConstantArray[dn0 (1 + .3 Tanh[-kap Range[-n1/2, n1/2]]), n1];
 (*c0=ConstantArray[0,{n1,n1}];*)
 u0 = ConstantArray[0., {n1, n1}]; 
 v0 = ConstantArray[0., {n1, n1}];
 (*u=ConstantArray[0,{n1,n1}];
 v=ConstantArray[0,{n1,n1}];*) 
 (*Do[den[[All,j]]=dn0 (1+.3 Tanh[kap (n1/2-j)]);,{j,n1}];*)
 (*dnup=den[[1,n1]];dnd=den[[1,1]];*)
 
 periodic[n_, up_, ud_, ub_] := 
 Module[{bd = ub}, Do[bd[[1, i]] = .5 (bd[[n, i]] + bd[[2, i]]);
 bd[[n + 1, i]] = bd[[1, i]]; bd[[i, 1]] = ud;
 bd[[i, n + 1]] = up;, {i, 2, n}];
 bd[[1, 1]] = .5 (bd[[2, 1]] + bd[[1, 2]]);
 bd[[n + 1, n + 1]] = .5 (bd[[n, n + 1]] + bd[[n + 1, n]]);
 bd[[n + 1, 1]] = .5 (bd[[n, 1]] + bd[[n + 1, 2]]);
 bd[[1, n + 1]] = .5 (bd[[1, n]] + bd[[2, n + 1]]); bd];
 
 diffuse[n_, r_, a_, c_, c0_] := 
 Module[{c1 = c},(*c1=ConstantArray[0,{n+1,n+1}];*)
 Do[Do[Do[
 c1[[i, j]] = (c0[[i, j]] + 
 a (c1[[i - 1, j]] + c1[[i + 1, j]] + c1[[i, j - 1]] + c1[[i, j + 1]]))/(1 + 
 4 a);, {j, 2, n}];, {i, 2, n}];
 Do[c1[[1, i]] = c1[[n, i]]; c1[[n + 1, i]] = c1[[2, i]];
 c1[[i, 1]] = c0[[i, 1]];
 c1[[i, n + 1]] = c0[[i, n + 1]];, {i, 2, n}];
 c1[[1, 1]] = .5 (c1[[2, 1]] + c1[[1, 2]]);
 c1[[n + 1, n + 1]] = .5 (c1[[n, n + 1]] + c1[[n + 1, n]]);
 c1[[n + 1, 1]] = .5 (c1[[n, 1]] + c1[[n + 1, 2]]);
 c1[[1, n + 1]] = .5 (c1[[1, n]] + c1[[2, n + 1]]);, {k, 0, r}];
 c1];
 
 advect[n_, d0_, u_, v_, dt_] := 
 Module[{x, y, d1, dt0, i0, i1, j0, j1, s0, s1, t0, t1}, 
 d1 = ConstantArray[0, {n + 1, n + 1}]; dt0 = dt n;
 Do[Do[x = i - dt0 u[[i, j]]; y = j - dt0 v[[i, j]];
 i0 = Which[x <= 1, 1, 1 < x < n, Floor[x], True(*x≥n*), n];
 i1 = i0 + 1;
 j0 = Which[y <= 1, 1, 1 < y < n, Floor[y], True(*y≥n*), n];
 j1 = j0 + 1; s1 = x - i0; s0 = 1 - s1; t1 = y - j0; t0 = 1 - t1;
 d1[[i, j]] = 
 s0 (t0 d0[[i0, j0]] + t1 d0[[i0, j1]]) + 
 s1 (t0 d0[[i1, j0]] + t1 d0[[i1, j1]]);, {j, 1, n + 1}];, {i, 1, n + 1}]; d1];
 
 project[n_, r_, u0_, v0_, u_, v_] := 
 Module[{ux = u, vy = v, div, p}, p = ConstantArray[0, {n + 1, n + 1}];
 div = ConstantArray[0, {n + 1, n + 1}];
 ux = ConstantArray[0, {n + 1, n + 1}];
 vy = ConstantArray[0, {n + 1, n + 1}];
 Do[div[[i, j]] = -.5/
 n (u0[[i + 1, j]] - u0[[i - 1, j]] + v0[[i, 1 + j]] - v0[[i, j - 1]]);, {i, 2, 
 n}, {j, 2, n}];
 Do[Do[Do[
 p[[i, j]] = (div[[i, j]] + (p[[i - 1, j]] + p[[i + 1, j]] + p[[i, j - 1]] + 
 p[[i, j + 1]]))/4;, {j, 2, n}], {i, 2, n}];, {k, 0, r}];
 Do[ux[[i, j]] = u0[[i, j]] - .5 n (p[[i + 1, j]] - p[[i - 1, j]]);
 vy[[i, j]] = v0[[i, j]] - .5 n (p[[i, j + 1]] - p[[i, j - 1]]);, {i, 2, n}, {j, 2, 
 n}]; {ux, vy}];
 
 Fx[t_, x_, y_] := F0 ((y - .5) Sin[2 Pi t]^2 - (x - 0.5) Sin[2 Pi t] Cos[2 Pi t]);
 Fy[t_, x_, y_] := F0 (-(x - .5) Cos[2 Pi t]^2 + (y - .5) Sin[2 Pi t] Cos[2 Pi t]);
 
 onestep[n_, step_, r_, a_, uin_, vin_, dt_, c_] := 
 Module[{u1, v1, f1, f2(*,c*), u, v, u0, v0}, f1 = ConstantArray[0, {n + 1, n + 1}];
 f2 = ConstantArray[0., {n + 1, n + 1}];
 u0 = ConstantArray[0., {n + 1, n + 1}];
 v0 = ConstantArray[0., {n + 1, n + 1}];
 u = ConstantArray[0., {n + 1, n + 1}];
 v = ConstantArray[0., {n + 1, n + 1}];
 u1 = ConstantArray[0., {n + 1, n + 1}];
 v1 = ConstantArray[0., {n + 1, n + 1}]; u0 = uin; v0 = vin;
 u0 = advect[n, u0, u0, v0, dt]; v0 = advect[n, v0, u0, v0, dt];
 u0 = periodic[n, 0, 0, u0]; v0 = periodic[n, 0, 0, v0];
 u0 = diffuse[n, r, a, c, u0]; v0 = diffuse[n, r, a, c, v0];
 u0 = periodic[n, 0, 0, u0]; v0 = periodic[n, 0, 0, v0];
 Do[f1[[i, j]] = Fx[dt (step + .5), (i - 1)/n, (j - 1)/n]/den[[i, j]];
 f2[[i, j]] = Fy[dt (step + .5), (i - 1)/n, (j - 1)/n]/den[[i, j]];, 
 {i, 2, n + 1}, {j, 2, n + 1}]; u0 += f1 dt;
 v0 += f2 dt; {u1, v1} = project[n, r, u0, v0, u, v];
 u0 = periodic[n, 0, 0, u1]; v0 = periodic[n, 0, 0, v1]; {u0, v0}]
 
 cf = With[{cg = Compile`GetElement, hp = HoldPattern, dv = DownValues}, 
 Hold@Compile[{{u0argu, _Real, 2}, {v0argu, _Real, 2}, {denargu, _Real, 
 2}, {sm, _Integer}, {n, _Integer}, {r, _Integer}, dif, pec, F0}, 
 Module[{u0 = u0argu, v0 = v0argu, uu, vv, dd, den = denargu, 
 c = Table[0., {n + 1}, {n + 1}], dt = 40./n^2, a, dnup = den[[1, n + 1]], 
 dnd = den[[1, 1]]}, a = dt dif n n; 
 uu = vv = dd = Table[0., {sm + 1}, {n + 1}, {n + 1}]; 
 Do[{u0, v0} = onestep[n, step, r, a, u0, v0, dt, c];
 uu[[step + 1]] = u0;
 vv[[step + 1]] = v0;
 den = diffuse[n, r, a/pec, c, den];
 den = periodic[n, dnup, dnd, den];
 den = advect[n, den, u0, v0, dt];
 den = periodic[n, dnup, dnd, den];
 dd[[step + 1]] = den;, {step, 0, sm}]; {uu, vv, dd}], 
 CompilationTarget -> C, RuntimeOptions -> "Speed"] /. dv@onestep /. 
 Flatten[dv /@ {Fx, Fy, advect, diffuse, periodic, project}] /. 
 hp@ConstantArray[c_, {i_, j_}] :> Table[0., {i}, {j}] /. 
 hp@Part[a__] :> cg[a] /. hp[cg[a__] = rhs_] :> (Part[a] = rhs) // 
 ReleaseHold]; // AbsoluteTiming
 (* {1.375986, Null} *)
 
 rst = cf[u0, v0, den, sm, n, r, dif, pec, F0]; // AbsoluteTiming
 (* {3.036883, Null} *)
 
 (*
 lst = 
 ListContourPlot[Transpose[#], ColorFunction -> "BlueGreenYellow", Contours -> 5, 
 PlotRange -> All, Frame -> False] & /@ 
 rst[[-1]]; // AbsoluteTiming
 *)
 (* {28.801388, Null} *)
 
 lst = 
 ArrayPlot[Transpose[#], ColorFunction -> "BlueGreenYellow", DataReversed -> True, 
 Frame -> None, PlotRangePadding -> None] & /@ rst[[-1]]; // AbsoluteTiming
 (* {6.044640, Null} *)

[![enter image description here][1]][1]

Notice I've increased `sm` and `n`. For `sm = 200; n = 31;` it only takes 0.2 second to calculate `rst` on my laptop, which is a 350x speed-up.
 
To understand why the code is modified in this manner, you may want to read

https://mathematica.stackexchange.com/q/47485/1871

https://mathematica.stackexchange.com/q/145931/1871

https://mathematica.stackexchange.com/q/132130/1871

https://mathematica.stackexchange.com/q/46345/1871


 [1]: https://i.sstatic.net/IffOt.gif