-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Example] add N-body of O(N log N) make use of sparse #814
Comments
#include <GL/glut.h>
#include <glm/glm.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <vector>
#include <array>
#include <cstdio>
#include <cmath>
const int RES = 512;
const int IVAL = 1000.0 / 300;
const float SIZ = 1.2;
const float DT = 0.001;
const float SPED = 1.0;
const float GRAV = 1.0;
inline glm::vec2 gravity(glm::vec2 dis, float mass)
{
auto d = glm::length(dis);
if (d < 1e-5) return glm::vec2(0.0);
auto sc = mass / (d * d * d);
return dis * sc;
}
struct Particle {
glm::vec2 pos, vel;
float mass;
bool background;
Particle(glm::vec2 &&pos, glm::vec2 &&vel,
float mass = 0.0, bool background = false)
: pos(pos), vel(vel * SPED * DT)
, mass(mass * GRAV * DT * DT)
, background(background)
{}
void render(void) const
{
glVertex3fv(glm::value_ptr(pos));
}
};
std::vector<Particle> P;
int bv2i(glm::bvec2 const &b)
{
return int(b.x) + int(b.y) * 2;
}
glm::vec2 i2bvf(int i)
{
return glm::vec2(i % 2, i / 2);
}
struct Quadtree {
glm::vec2 topleft;
float size;
int par{-1};
std::array<Quadtree *, 4> ch;
constexpr glm::vec2 fullsize() const
{
return glm::vec2(size);
}
constexpr glm::vec2 halfsize() const
{
return glm::vec2(size * 0.5);
}
float mass() const
{
float m = 0;
for (auto &&c: ch) {
if (c) m += c->mass();
}
if (par != -1) m += P[par].mass;
return m;
}
glm::vec2 mass_center() const
{
glm::vec2 p(0.0);
float m = 0;
for (auto &&c: ch) {
if (!c) continue;
auto ma = c->mass();
p += ma * c->mass_center();
m += ma;
}
if (par != -1) {
m += P[par].mass;
p += P[par].pos * P[par].mass;
}
return m == 0.0 ? this->center() : p / m;
}
glm::vec2 center() const
{
return topleft + halfsize();
}
float factor(glm::vec2 const &pos) const
{
auto d = this->center() - pos;
return size / glm::length(d);
}
glm::vec2 visit(glm::vec2 const &pos, int depth = 0) const
{
float fac = 0;
glm::vec2 acc(0.0);
for (auto &&c: ch) {
if (!c) continue;
auto f = c->factor(pos);
if (f >= 1)
acc += c->visit(pos);
else
acc += gravity(c->mass_center() - pos, c->mass());
}
if (par != -1) {
acc += gravity(P[par].pos - pos, P[par].mass);
}
return acc;
}
bool in_range(glm::vec2 const &pos) const
{
return glm::all(glm::greaterThanEqual(pos, topleft))
&& glm::all(glm::lessThan(pos, topleft + fullsize()));
}
glm::bvec2 which_range(glm::vec2 const &pos) const
{
return glm::greaterThanEqual(pos, center());
}
void build(std::vector<int> const &ps, int depth = 0)
{
if (ps.size() == 0) return;
if (ps.size() == 1) {
par = ps[0];
return;
}
std::vector<int> npss[4];
for (auto &&p: ps) {
if (!in_range(P[p].pos)) continue;
int idx = bv2i(which_range(P[p].pos));
npss[idx].push_back(p);
}
for (int i = 0; i < 4; i++) {
if (npss[i].size()) {
auto b = i2bvf(i);
ch[i] = new Quadtree{topleft + halfsize() * b, size * 0.5f};
ch[i]->build(npss[i], depth + 1);
}
}
}
void render(int depth = 0)
{
glLineWidth(par != -1 ? 2 : 1);
glColor4f(par != -1 ? 0.9 : 0.3, 0.0, 0.0, 0.1);
glBegin(GL_LINE_LOOP);
glVertex2f(topleft.x, topleft.y);
glVertex2f(topleft.x + size, topleft.y);
glVertex2f(topleft.x + size, topleft.y + size);
glVertex2f(topleft.x, topleft.y + size);
glEnd();
for (auto &&c: ch) {
if (c) c->render(depth + 1);
}
}
};
Quadtree *qt;
void build_tree(void)
{
qt = new Quadtree{glm::vec2(0.0), SIZ};
std::vector<int> ps;
for (int i = 0; i < P.size(); i++) {
ps.push_back(i);
}
qt->build(ps);
}
glm::vec2 field(glm::vec2 const &pos)
{
return qt->visit(pos); /* comment this line for O(N*N) computation */
glm::vec2 acc(0.0);
for (auto &&p: P) {
acc += gravity(p.pos - pos, p.mass);
}
return acc;
}
void advance(void)
{
build_tree();
float dt = (rand() / RAND_MAX) * 0.7 + 0.4;
for (auto &&p: P) {
if (p.background)
continue;
auto acc = field(p.pos);
p.vel += acc * dt;
}
for (auto &&p: P) {
p.pos += p.vel * dt;
}
}
void init(void)
{
#if 1
P.push_back(Particle(
glm::vec2(0.47, 0.50),
glm::vec2(0.00, 0.40),
0.050));
P.push_back(Particle(
glm::vec2(0.53, 0.50),
glm::vec2(0.00, -0.40),
0.050));
P.push_back(Particle(
glm::vec2(1.10, 0.71),
glm::vec2(0.00, 0.27),
0.002));
#else
P.push_back(Particle(
glm::vec2(0.50, 0.44),
glm::vec2(0.00, 0.00),
0.050));
P.push_back(Particle(
glm::vec2(0.50, 0.56),
glm::vec2(0.10, 0.00),
0.050));
#endif
float mass = 0.0;
glm::vec2 adjust(0.0);
for (auto &&p: P) {
mass += p.mass;
adjust += p.vel * p.mass;
}
adjust /= mass;
for (auto &&p: P) {
if (p.mass != 0.0)
p.vel -= adjust;
}
}
void display(void)
{
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glColor4f(0.0, 0.0, 0.0, 0.05);
glRectf(0.0, 0.0, SIZ, SIZ);
glColor4f(1.0, 1.0, 1.0, 1.0);
glPointSize(3);
glBegin(GL_POINTS);
for (auto &&p: P) {
p.render();
}
glEnd();
if (qt) qt->render();
glFlush();
}
void timer(int id)
{
advance();
advance();
advance();
advance();
advance();
glutPostRedisplay();
glutTimerFunc(IVAL, timer, 1);
}
void keyboard(unsigned char key, int x, int y)
{
if (key == 27)
exit(0);
}
int main(int argc, char **argv)
{
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB);
glutInitWindowSize(RES, RES);
glutCreateWindow("N-body");
glutKeyboardFunc(keyboard);
glutDisplayFunc(display);
glutTimerFunc(IVAL, timer, 1);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0.0, SIZ, 0.0, SIZ);
glShadeModel(GL_FLAT);
init();
glutMainLoop();
} |
@yuanming-hu could you write a doc on sparse data structures? So that I know how to make use of it to make octrees? |
Right, I should do that once I get a chance... |
C++ version is done now, have fun! You may notice that the precession is gone after we uncomment the line |
I found a bug when writing python n-body on @ti.kernel
def adjust_momentum():
adj = ti.Vector([0.0, 0.0])
mac = 0.0
for i in pos:
mac += mass[i]
adj += vel[i] * mac
adj = adj / mac
for i in pos:
vel[i] += adj obtains:
It cannot because $66 exist in the previous offload, and the constant_fold optimized it even if it's crossing offloaded tasks. In additional, if I change it to |
Right, this is indeed a bug, being fixed in #813... |
I merged #813 in. Could you retry with the latest commit? |
Great! Building now, but can you tell why |
After merging with latest commit, the issue still exists... |
I realized this is not about constant_fold.
after Simplified II:
The local store was eliminated! Why? Can we do full_simplify after offloaded? You don't want to bother offload bounderies. |
I removed full_simplify and compilation passed. @ti.kernel
def adjust_momentum():
adj = ti.Vector([0.0, 0.0])
mac = 0.0
for i in vel:
mac += mass[i]
adj += vel[i] * mac
adj /= mac
print(adj[0])
print(adj[1])
for i in pos:
print(adj[0])
print(adj[1])
vel[i] += adj obtains:
If I change
Schrodinger's cat?? Why changing codes after a line can affect that line??? |
SIZ = 1.2
DT = 0.001
ti.init(arch=ti.x64)
N = 3
pos = ti.Vector(2, dt=ti.f32, shape=N)
vel = ti.Vector(2, dt=ti.f32, shape=N)
mass = ti.var(dt=ti.f32, shape=N)
@ti.func
def gravity(dis, m):
d = dis.norm()
ret = ti.Vector([0.0, 0.0])
if d >= 1e-5:
sc = m / d ** 3
ret = dis * sc
return ret
@ti.func
def field(p):
acc = ti.Vector([0.0, 0.0])
for i in range(N):
acc += gravity(pos[i] - p, mass[i])
return acc
@ti.kernel
def advance():
dt = ti.random() * 0.7 + 0.4
for i in pos:
acc = field(pos[i])
vel[i] += acc * dt
for i in pos:
pos[i] += vel[i] * dt
@ti.kernel
def adjust_momentum():
adj = ti.Vector([0.0, 0.0])
mac = 0.0
for i in vel:
mac += mass[i]
adj += vel[i] * mac
adj /= mac
print(adj[0])
print(adj[1])
for i in pos:
print(adj[0])
print(adj[1])
vel[i] += adj
def initialize():
pos.from_numpy(np.array([0.47, 0.50, 0.53, 0.50, 1.10, 0.71]).reshape(N, 2))
vel.from_numpy(np.array([0.00, 0.40, 0.00, -0.4, 0.00, 0.27]).reshape(N, 2) * DT)
mass.from_numpy(np.array([0.050, 0.050, 0.000]) * DT ** 2)
initialize()
adjust_momentum()
gui = ti.GUI('N-body')
while not gui.get_event(ti.GUI.PRESS):
for i in range(5):
_pos = pos.to_numpy()
gui.circles(_pos / SIZ, radius=5, color=0x333333 * i)
for i in range(5):
advance()
gui.show() Have |
Thanks for sharing this. There's actually a racing condition (which is undefined behavior in Taichi) in this piece of code and I'm not surprised that you are getting weird behaviors.
Note that I believe you meant adj = ti.Vector([0.0, 0.0])
mac = 0.0
for i in vel: # Note this loop is parallel
mac += mass[i]
adj += vel[i] * mass[i] # instead of mac |
Another bug: @ti.kernel
def advance():
for i in range(N):
n = ti.cast(ti.max(vel[i].norm() / VEL0 + 0.5, 1), ti.i32)
cnt[i] = n
for i in range(N):
dt = 1 / cnt[i]
for t in range(cnt[i]):
acc[i] = field(pos[i])
print(acc[i][0] * INV_DT ** 2)
for s in range(1):
vel[i] += acc[i] * dt
pos[i] += vel[i] * dt obtains:
If I replace
|
@yuanming-hu I suddenly realized that the differentiable programming may help us to calculate gravity force vector from potential energy!!! |
import taichi as ti
import numpy as np
from random import Random
rand_gen = Random()
SIZ = 1.2
DT = 0.0005
INV_DT = 1 / DT
INV_VEL0 = INV_DT / 0.2
INV_ACC0 = INV_DT ** 2 / 0.2
MAX_LOSS = 2 ** 14
MIN_DIS3 = 4e-7
ti.init(arch=ti.cuda)#, print_ir=True)
N = 32
T = 9
pos = ti.Vector(2, dt=ti.f32)
acc = ti.Vector(2, dt=ti.f32)
vel = ti.Vector(2, dt=ti.f32)
mass = ti.var(dt=ti.f32)
pot = ti.var(dt=ti.i32)
max_pot = ti.var(dt=ti.i32)
d_x = ti.root.dense(ti.i, N)
b_x = ti.root.bitmasked(ti.i, N)
d_x.place(pos, vel, pot, mass)
b_x.place(acc)
ti.root.place(max_pot)
@ti.func
def gravity(dis, m):
d = dis.norm()
ret = ti.Vector([0.0, 0.0])
ret = dis * (m / (MIN_DIS3 + d ** 3))
return ret
@ti.func
def field(p):
a = ti.Vector([0.0, 0.0])
for i in range(N):
a += gravity(pos[i] - p, mass[i])
return a
@ti.kernel
def calc_pot():
max_pot[None] = 0
for i in range(N):
loss = acc[i].norm() * INV_ACC0
loss += vel[i].norm() * INV_VEL0
n = ti.min(ti.max(loss, 1), MAX_LOSS)
pot[i] = ti.cast(ti.log(n) / ti.log(2.0) + 0.5, ti.i32)
ti.atomic_max(max_pot[None], pot[i])
@ti.kernel
def advance_sub(seq: ti.i32):#, dt0: ti.f32):
for i in range(N):
p = max_pot[None] - pot[i]
tp = 2 ** p
if seq % tp:
ti.deactivate(b_x, i)
continue
acc[i] = field(pos[i])
for i in acc:
steps = 2 ** pot[i]
dt = 1 / steps
vel[i] += acc[i] * dt
pos[i] += vel[i] * dt
@ti.kernel
def bouncing_wall():
for i in range(N):
for s in ti.static(range(2)):
if pos[i][s] >= SIZ and vel[i][s] > 0:
vel[i][s] = -vel[i][s]
elif pos[i][s] <= 0 and vel[i][s] < 0:
vel[i][s] = -vel[i][s]
def advance():
calc_pot()
max_seq = 2 ** max_pot[None]
for seq in range(max_seq):
advance_sub(seq)#, rand_gen.gauss(1, 0.1))
bouncing_wall()
@ti.kernel
def adjust_momentum():
mac = 0.0
adj = ti.Vector([0.0, 0.0])
for i in range(N):
mac += mass[i]
adj += vel[i] * mass[i]
adj /= mac
for i in range(N):
vel[i] -= adj
@ti.kernel
def calc_energy():
e = 0.0
for i in range(N):
e += mass[i] * vel[i].norm_sqr()
for i in range(N):
phi = 0.0
for j in range(N):
if j == i: continue
d = pos[i] - pos[j]
phi += mass[j] / d.norm()
e -= phi * mass[i]
e *= INV_DT ** 4
print(e)
@ti.kernel
def calc_momentum():
p = ti.Vector([0.0, 0.0])
for i in range(N):
p += vel[i] * mass[i]
p *= INV_DT ** 3
print(p[0])
print(p[1])
@ti.kernel
def initialize():
for i in range(N):
pos[i] = ti.Vector([ti.random(), ti.random()]) * SIZ
vel[i] = (ti.Vector([ti.random(), ti.random()]) - 0.5) * 6.0 * DT
mass[i] = 0.1 * ti.random() * DT ** 2
initialize()
#adjust_momentum()
gui = ti.GUI('N-body')
for frame in range(100000):
if gui.get_event(ti.GUI.PRESS):
break
for i in range(4):
advance()
_pos = pos.to_numpy()
gui.circles(_pos / SIZ, radius=2)
gui.show()
if frame % 32 == 0:
calc_energy() Update: dispatch more computation resource to those bodies in deeper gravity well so that energy is conserved. (i.e. |
How to use |
@archibate I think @yuanming-hu 's Taichi paper would be a good place to start reading about storing particles in sparse data structures. http://taichi.graphics/wp-content/uploads/2019/09/taichi_lang.pdf . You can look at the taichi_sparse.py example for the new syntax. Also, you probably would not want to create a quadtree directly from taichi pointer data structures. Instead, you want to create a pyramid of sparse grids with increasing resolution to emulate an quadtree. An example of this is in the mgpcg.py example. |
Concisely describe the proposed feature
Describe the solution you'd like (if any)
Additional comments
The text was updated successfully, but these errors were encountered: