|
|
|
| Home Page | Freebies | Contact Us |
Studying equivalently cutdown versions of our ray tracer written in C++ and SML is a great way to learn the differences between C++ and SML. This web page presents two versions of the same ray tracer, written in C++ and SML, and compares the code used to implement equivalent functionality in these two languages. The chosen ray tracer is particularly well suited to this task because it involves both integer and floating point operations, imperative-style loops, functional-style recursion, a non-trivial data structure (a tree) and many other interesting contructs which appear regularly in real programming.
Note that these programs are not intended to be optimised for performance. On the contrary, these programs are written in a straightforward, comprehensible style. However, we shall examine the performance of these programs at the end of this page as it is interesting to see how much the compilers can optimise these simple implementations.
Let's begin by stating the complete source codes of the 105-line C++ and 76-line SML programs:
|
C++ (download)
#include <list>
#include <iostream>
#include <limits>
#include <cmath>
using namespace std;
numeric_limits<double> real;
double delta = sqrt(real.epsilon()), infinity = real.infinity();
struct Vec {
double x, y, z;
Vec(double x2, double y2, double z2) : x(x2), y(y2), z(z2) {}
};
Vec operator+(const Vec &a, const Vec &b)
{ return Vec(a.x+b.x, a.y+b.y, a.z+b.z); }
Vec operator-(const Vec &a, const Vec &b)
{ return Vec(a.x-b.x, a.y-b.y, a.z-b.z); }
Vec operator*(double a, const Vec &b) { return Vec(a*b.x, a*b.y, a*b.z); }
double dot(const Vec &a, const Vec &b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
Vec unitise(const Vec &a) { return (1 / sqrt(dot(a, a))) * a; }
typedef pair<double, Vec> Hit;
struct Ray {
Vec orig, dir;
Ray(const Vec &o, const Vec &d) : orig(o), dir(d) {}
};
struct Scene {
virtual ~Scene() {};
virtual Hit intersect(const Hit &, const Ray &) const = 0;
};
struct Sphere : public Scene {
Vec center;
double radius;
Sphere(Vec c, double r) : center(c), radius(r) {}
~Sphere() {}
double ray_sphere(const Ray &ray) const {
Vec v = center - ray.orig;
double b = dot(v, ray.dir), disc = b*b - dot(v, v) + radius * radius;
if (disc < 0) return infinity;
double d = sqrt(disc), t2 = b + d;
if (t2 < 0) return infinity;
double t1 = b - d;
return (t1 > 0 ? t1 : t2);
}
Hit intersect(const Hit &hit, const Ray &ray) const {
double lambda = ray_sphere(ray);
if (lambda >= hit.first) return hit;
return Hit(lambda, unitise(ray.orig + lambda*ray.dir - center));
}
};
typedef list<Scene *> Scenes;
struct Group : public Scene {
Sphere bound;
Scenes child;
Group(Sphere b, Scenes c) : bound(b), child(c) {}
~Group() {
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
delete *it;
}
Hit intersect(const Hit &hit, const Ray &ray) const {
Hit hit2=hit;
double l = bound.ray_sphere(ray);
if (l >= hit.first) return hit;
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
hit2 = (*it)->intersect(hit2, ray);
return hit2;
}
};
Hit intersect(const Ray &ray, const Scene &s)
{ return s.intersect(Hit(infinity, Vec(0, 0, 0)), ray); }
double ray_trace(const Vec &light, const Ray &ray, const Scene &s) {
Hit hit = intersect(ray, s);
if (hit.first == infinity) return 0;
double g = dot(hit.second, light);
if (g >= 0) return 0.;
Vec p = ray.orig + hit.first*ray.dir + delta*hit.second;
return (intersect(Ray(p, -1. * light), s).first < infinity ? 0 : -g);
}
Scene *create(int level, const Vec &c, double r) {
Scene *s = new Sphere(c, r);
if (level == 1) return s;
Scenes child;
child.push_back(s);
double rn = 3*r/sqrt(12.);
for (int dz=-1; dz<=1; dz+=2)
for (int dx=-1; dx<=1; dx+=2)
child.push_back(create(level-1, c + rn*Vec(dx, 1, dz), r/2));
return new Group(Sphere(c, 3*r), child);
}
int main(int argc, char *argv[]) {
int level = 6, n = 512, ss = 4;
if (argc == 2) level = atoi(argv[1]);
Vec light = unitise(Vec(-1, -3, 2));
Scene *s(create(level, Vec(0, -1, 0), 1));
cout << "P5\n" << n << " " << n << "\n255\n";
for (int y=n-1; y>=0; --y)
for (int x=0; x<n; ++x) {
double g=0;
for (int dx=0; dx<ss; ++dx)
for (int dy=0; dy<ss; ++dy) {
Vec dir(unitise(Vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n)));
g += ray_trace(light, Ray(Vec(0, 0, -4), dir), *s);
}
cout << char(int(.5 + 255. * g / (ss*ss)));
}
delete s;
return 0;
}
|
SML (download)
val delta = Math.sqrt (Real.nextAfter(1.0, 2.0) - 1.0)
val infinity = Real.posInf
type vec = real * real * real
infix 7 *| fun s *| (x, y, z) : vec = (s*x, s*y, s*z)
infix 6 +| fun (x1, y1, z1) +| (x2, y2, z2) : vec = (x1+x2, y1+y2, z1+z2)
infix 6 -| fun (x1, y1, z1) -| (x2, y2, z2) : vec = (x1-x2, y1-y2, z1-z2)
fun dot (x1, y1, z1) (x2, y2, z2) : real = x1*x2 + y1*y2 + z1*z2
fun unitise r = (1.0 / Real.Math.sqrt (dot r r)) *| r
type ray = { orig: vec, dir: vec }
type sphere = { center: vec, radius: real }
datatype scene = Sphere of sphere | Group of sphere * scene list
fun ray_sphere ray sphere =
let val v = #center sphere -| #orig ray
val b = dot v (#dir ray)
val disc = b*b - dot v v + #radius sphere * #radius sphere in
if disc < 0.0 then infinity else
let val disc = Real.Math.sqrt disc
val t2 = b + disc in
if t2 < 0.0 then infinity else
(fn t1 => if t1 > 0.0 then t1 else t2) (b - disc)
end
end
fun intersect ray scene =
let fun aux (Sphere sphere, (l, n)) =
let val l' = ray_sphere ray sphere in
if l' >= l then (l, n) else
(l', unitise (#orig ray +| l' *| #dir ray -| #center sphere))
end
| aux (Group (sphere, scenes), (hit as (l, _))) =
if ray_sphere ray sphere >= l then hit else foldl aux hit scenes
in aux (scene, (infinity, (0.0, 0.0, 0.0))) end
fun ray_trace light ray scene =
let val (lambda, normal) = intersect ray scene in
if lambda >= infinity then 0.0 else
let val g = dot normal light in
if g >= 0.0 then 0.0 else
let val p = #orig ray +| lambda *| #dir ray +| delta *| normal
val (l, _) = intersect { orig=p, dir= ~1.0 *| light } scene in
if l >= infinity then ~g else 0.0
end
end
end
fun create level r (c as (x, y, z)) =
let val obj = Sphere { center = c, radius = r } in
if level = 1 then obj else
let val r' = 3.0 * r / Real.Math.sqrt 12.0
fun aux x' z' = create (level-1) (0.5 * r) (x-x', y+r', z+z') in
val objs = [obj, aux (~r') (~r'), aux r' (~r'),
aux (~r') r', aux r' r'] in
Group ({ center = c, radius = 3.0 * r }, objs)
end
end
val n = 512
val ss = 4
val level = case CommandLine.arguments () of
[s] => (case Int.fromString s of
SOME n => n | _ => 6)
| _ => 6
val scene = create level 1.0 (0.0, ~1.0, 0.0)
val light = unitise (~1.0, ~3.0, 2.0)
fun loop f accu (x, y, n) =
if y=n then accu else
if x=n then loop f accu (0, y+1, n) else
loop f (f accu (real x) (real y)) (x+1, y, n)
fun eye_ray n ss x y g dx dy =
g + ray_trace light { orig = (0.0, 0.0, ~4.0),
dir = unitise (x + dx/ss, y + dy/ss, n) } scene
fun pixel n ss () x y =
let val x = x - n / 2.0
val y = (n - 1.0) / 2.0 - y
val g = loop (eye_ray n (real ss) x y) 0.0 (0, 0, ss) in
print (String.str(Char.chr(Real.trunc (255.0 * g / real (ss*ss)))))
end
val () =
(fn s => print ("P5\n"^s^" "^s^"\n255\n")) (Int.toString n);
loop (pixel (real n) ss) () (0, 0, n);
|
A side-by-side comparison of equivalent chunks of code is more instructive.
The C++ program begins with a header:
#include <vector> #include <iostream> #include <limits> #include <cmath> using namespace std; | |
The SML program needs no such header as all of the necessary libraries used are built in to the language in this case.
The ray tracer uses numerical values for the square root of the floating point epsilon (the smallest positive float which can be added to one to produce a number not equal to one) and floating point infinity.
In C++, floating point constants are obtained via inclusion of the "limits" header file. In SML, the floating point constants are obtained from the relevant library.
numeric_limits<double> dbl; double delta = sqrt(dbl.epsilon()), infinity = dbl.infinity(); |
val delta = Math.sqrt (Real.nextAfter(1.0, 2.0) - 1.0) val infinity = Real.posInf |
The ray tracer makes heavy use of 3D vector arithmetic, so the programs begin with definitions of 3D vectors and associated functions.
The C++ and SML programs implement 3D vector as a struct and a 3-tuple, respectively:
struct Vec {
double x, y, z;
Vec(double x2, double y2, double z2) : x(x2), y(y2), z(z2) {}
};
|
type vec = real * real * real |
In C++, such structs are most easily instantiated via a constructor. The SML language allows tuples to be instantiated directly and, therefore, does not require constructor definitions to be given.
Vector addition, subtraction, scaling, dot product and unitization are all implemented as simple functions in both C++ and SML:
Vec operator+(const Vec &a, const Vec &b)
{ return Vec(a.x+b.x, a.y+b.y, a.z+b.z); }
Vec operator-(const Vec &a, const Vec &b)
{ return Vec(a.x-b.x, a.y-b.y, a.z-b.z); }
Vec operator*(double a, const Vec &b) { return Vec(a*b.x, a*b.y, a*b.z); }
double dot(const Vec &a, const Vec &b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
Vec unitise(const Vec &a) { return (1 / sqrt(dot(a, a))) * a; }
|
infix 7 *| fun s *| (x, y, z) : vec = (s*x, s*y, s*z) infix 6 +| fun (x1, y1, z1) +| (x2, y2, z2) : vec = (x1+x2, y1+y2, z1+z2) infix 6 -| fun (x1, y1, z1) -| (x2, y2, z2) : vec = (x1-x2, y1-y2, z1-z2) fun dot (x1, y1, z1) (x2, y2, z2) : real = x1*x2 + y1*y2 + z1*z2 fun unitise r = (1.0 / Real.Math.sqrt (dot r r)) *| r |
Both implementations use infix operators for vector addition, subtraction and scaling. As SML disallows overloading (as it conflicts with type inference), the operators are given slightly different names in the SML implementation. However, SML allows the associativities and precedences of the operators to be defined.
We have explicitly asked for pass by reference in the C++ implementation. This is not necessary but performance will be very poor without this optimisation.
The primitives implemented in this ray tracer are rays, intersections (hits), spheres and groups. Spheres and groups are both types of scene, a group being a bounding sphere and a sequence of scenes.
We have deliberately chosen a variety of representations for these data. Intersections are represented by pairs, giving the parameter of the intersection and the surface normal at the point of intersection. Rays are represented by named pairs, containing the origin and direction vectors quantifying the ray. Spheres are represented by a floating point radius and center vector. Groups are represented by a bounding sphere and a list of child scenes.
The natural implementations of these in each language are:
| Entity | C++ | SML |
| Hit | An STL pair | A 2-tuple |
| Ray | A struct | A record |
| Scene | Abstract base class and derived classes | A variant type |
| Sphere | Derived class | A record |
Let's look at each in turn. Firstly, a hit:
typedef pair<double, Vec> Hit; |
|
In fact, neither implementation needs to explicitly define this type because an STL pair is already defined in the library and uses of a 2-tuple are inferred by the SML type system. However, the type of an STL pair is so verbose and common in this program that it is worth factoring it out with a typedef.
Next, the definition of a ray:
struct Ray {
Vec orig, dir;
Ray(const Vec &o, const Vec &d) : orig(o), dir(d) {}
};
|
type ray = { orig: vec, dir: vec }
|
The C++ equivalent is more verbose because it includes a trivial constructor definition. In SML, records are constructed directly so there is no need to explicitly specify a trivial constructor.
Next, the definition of the scene tree:
struct Scene {
virtual ~Scene() {};
...
};
struct Sphere : public Scene {
Vec center;
double radius;
Sphere(Vec c, double r) : center(c), radius(r) {}
~Sphere() {}
...
};
typedef list<Scene *> Scenes;
struct Group : public Scene {
Sphere bound;
Scenes child;
Group(Sphere b, Scenes c) : bound(b), child(c) {}
~Group() {
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
delete *it;
}
...
};
|
type sphere = { center: vec, radius: real }
datatype scene = Sphere of sphere | Group of sphere * scene list
|
Amazingly, there is no easy way to define a type in C++ which can be "one of these or one of these", e.g. a sphere or a group in this case. The old C equivalent is a tagged union, but these are rare in modern C++ programs. Instead, an inheritance hierarchy is built, with derived classes representing the different possibilities. This is a common (ab)use of object oriented programming.
Also, the scene type in SML simply refers to itself. This is not possible in C++ and, instead, the programmer must use the convoluted approach of handling pointers to instances of the abstract base class. The objects pointed to must therefore be explicitly allocated and deallocated, which is most easily done using destructors.
The ray tracer is based around two simple functions.
ray_sphere
Computes the distance along the ray to its intersection with a sphere, returning infinity if there is no intersection.
intersect
Finds the first intersection of the ray with the scene.
The implementations of the "ray_sphere" function are very similar:
struct Sphere : public Scene {
...
double ray_sphere(const Ray &ray) const {
Vec v = center - ray.orig;
double b = dot(v, ray.dir), disc = b*b - dot(v, v) + radius * radius;
if (disc < 0) return infinity;
double d = sqrt(disc), t2 = b + d;
if (t2 < 0) return infinity;
double t1 = b - d;
return (t1 > 0 ? t1 : t2);
}
};
|
fun ray_sphere ray sphere =
let val v = #center sphere -| #orig ray
val b = dot v (#dir ray)
val disc = b*b - dot v v + #radius sphere * #radius sphere in
if disc < 0.0 then infinity else
let val disc = Real.Math.sqrt disc
val t2 = b + disc in
if t2 < 0.0 then infinity else
(fn t1 => if t1 > 0.0 then t1 else t2) (b - disc)
end
end
|
There are only two differences:
The "intersect" function is more interesting:
struct Scene {
...
virtual Hit intersect(const Hit &, const Ray &) const = 0;
};
struct Sphere : public Scene {
...
Hit intersect(const Hit &hit, const Ray &ray) const {
double lambda = ray_sphere(ray);
if (lambda >= hit.first) return hit;
return Hit(lambda, unitise(ray.orig + lambda*ray.dir - center));
}
};
struct Group : public Scene {
...
Hit intersect(const Hit &hit, const Ray &ray) const {
Hit hit2=hit;
double l = bound.ray_sphere(ray);
if (l >= hit.first) return hit;
for (Scenes::const_iterator it=child.begin(); it!=child.end(); ++it)
hit2 = (*it)->intersect(hit2, ray);
return hit2;
}
};
Hit intersect(const Ray &ray, const Scene &s)
{ return s.intersect(Hit(infinity, Vec(0, 0, 0)), ray); }
|
fun intersect ray scene =
let fun aux (Sphere sphere, (l, n)) =
let val l' = ray_sphere ray sphere in
if l' >= l then (l, n) else
(l', unitise (#orig ray +| l' *| #dir ray -| #center sphere))
end
| aux (Group (sphere, scenes), (hit as (l, _))) =
if ray_sphere ray sphere >= l then hit else foldl aux hit scenes
in aux (scene, (infinity, (0.0, 0.0, 0.0))) end
|
Both implementations really have two separate functions - one function to accumulate a hit over the scene and another function to start the accumulation with a suitable value (infinite parameter and zero normal vector). In the C++, the former function is a member of the "Scene" class and its implementation is split between the derived classes. The latter function is provided separately. In the SML, the former function is an auxiliary function "aux" nested inside the latter function.
Placing the intersect routines in different places has different advantages. The C++ approach groups code by the contructor they are related to. The SML approach groups code by the functions they are related to. Essentially, the C++ approach requires only localised changes to the program when the inheritance hierarchy is extended (by adding another derived class) whereas the SML approach requires only localised changes to the program when new functions are added.
There are several other interesting aspects to this function:
The core function of the ray tracer is, unsurprisingly, called "ray_trace". This function traces a ray from the camera into the scene, returning zero (black) if the ray does not intersect anything or the intersection point is on the dark side of the object or a second ray from the intersection point to the light intersects another object (which is casting a shadow on this intersection point). Otherwise, a greyscale value given by a simple diffuse lighting expression is returned.
The implementations of the "ray_trace" function are very similar:
double ray_trace(const Vec &light, const Ray &ray, const Scene &s) {
Hit hit = intersect(ray, s);
if (hit.first == infinity) return 0;
double g = dot(hit.second, light);
if (g >= 0) return 0.;
Vec p = ray.orig + hit.first*ray.dir + delta*hit.second;
return (intersect(Ray(p, -1. * light), s).first < infinity ? 0 : -g);
}
|
fun ray_trace light ray scene =
let val (lambda, normal) = intersect ray scene in
if lambda >= infinity then 0.0 else
let val g = dot normal light in
if g >= 0.0 then 0.0 else
let val p = #orig ray +| lambda *| #dir ray +| delta *| normal
val (l, _) = intersect { orig=p, dir= ~1.0 *| light } scene in
if l >= infinity then ~g else 0.0
end
end
end
|
The first element of the pair "hit" is extracted using the "first" member function in the C++ but is extracted via the pattern match "(l, _)" in SML, giving the value of interest the more meaningful name "l".
The scene is represented by a tree data structure, leaf nodes of which are spheres and non-leaf nodes are groups of further nodes. Due to the recursive nature of this data structure, the scene is most easiy built recursively.
Both programs use a "create" function to build the scene. This function accepts as arguments the number of levels remaining and the radius and coordinates of the current sphere:
Scene *create(int level, const Vec &c, double r) {
Scene *s = new Sphere(c, r);
if (level == 1) return s;
Scenes child;
child.push_back(s);
double rn = 3*r/sqrt(12.);
for (int dz=-1; dz<=1; dz+=2)
for (int dx=-1; dx<=1; dx+=2)
child.push_back(create(level-1, c + rn*Vec(dx, 1, dz), r/2));
return new Group(Sphere(c, 3*r), child);
}
|
fun create level r (c as (x, y, z)) =
let val obj = Sphere { center = c, radius = r } in
if level = 1 then obj else
let val r' = 3.0 * r / Real.Math.sqrt 12.0 in
fun aux x' z' = create (level-1) (0.5 * r) (x-x', y+r', z+z') in
val objs = [aux (~r') (~r'), aux r' (~r'),
aux (~r') r', aux r' r', obj] in
Group ({ center = c, radius = 3.0 * r }, objs)
end
end
|
There are also some interesting differences between these implementations:
The main part of the program is responsible for extracting the optional command line argument (the number of levels of spheres), creating the scene and, tracing rays for each pixel in the output image and writing the results to stdout as a PGM file.
The C++ and SML implementations of the main part of the program are quite different:
int main(int argc, char *argv[]) {
int level = 6, n = 512, ss = 4;
if (argc == 2) level = atoi(argv[1]);
Vec light = unitise(Vec(-1, -3, 2));
Scene *s(create(level, Vec(0, -1, 0), 1));
cout << "P5\n" << n << " " << n << "\n255\n";
for (int y=n-1; y>=0; --y)
for (int x=0; x<n; ++x) {
double g=0;
for (int dx=0; dx<ss; ++dx)
for (int dy=0; dy<ss; ++dy) {
Vec dir(unitise(Vec(x+dx*1./ss-n/2., y+dy*1./ss-n/2., n)));
g += ray_trace(light, Ray(Vec(0, 0, -4), dir), *s);
}
cout << char(int(.5 + 255. * g / (ss*ss)));
}
delete s;
return 0;
}
|
val n = 512
val ss = 4
val level = case CommandLine.arguments () of
[s] => (case Int.fromString s of
SOME n => n | _ => 6)
| _ => 6
val scene = create level 1.0 (0.0, ~1.0, 0.0)
val light = unitise (~1.0, ~3.0, 2.0)
fun loop f accu (x, y, n) =
if y=n then accu else
if x=n then loop f accu (0, y+1, n) else
loop f (f accu (real x) (real y)) (x+1, y, n)
fun eye_ray n ss x y g dx dy =
g + ray_trace light { orig = (0.0, 0.0, ~4.0),
dir = unitise (x + dx/ss, y + dy/ss, n) } scene
fun pixel n ss () x y =
let val x = x - n / 2.0
val y = (n - 1.0) / 2.0 - y
val g = loop (eye_ray n (real ss) x y) 0.0 (0, 0, ss) in
print (String.str(Char.chr(Real.trunc (255.0 * g / real (ss*ss)))))
end
val () =
(fn s => print ("P5\n"^s^" "^s^"\n255\n")) (Int.toString n);
loop (pixel (real n) ss) () (0, 0, n);
|
There are several interesting differences between the two implementations:
Ideally, the compiler will do all of the performance optimisation so that the programmer doesn't have to. Consequently, it is interesting to measure the performance of these simple ray tracer implementations in order to see how well the compilers optimise the programs.
We have measured performance on 32- and 64-bit architectures.
On a 1.2GHz Athlon Thunderbird Linux box, the two programs can be compiled with (g++ 4.0.1 and mlton):
g++ -O3 -ffast-math ray.cpp -o ray mlton ray.sml
The following graph illustrates the time taken t to ray trace scenes of different complexities, where n is the number of levels of spheres, with the C++ program (red) and SML program (blue):

The SML program is much faster, taking around 30% less time to complete than the C++ program.
At the time of writing, MLton lacks a code generator for AMD64. Thus, we shall compare AMD64-optimised C++ code with x86-optimised SML code with the latter running in 32-bit mode.
On a 1.8GHz Athlon 64, the two programs can be compiled with (g++ 4.0.1 and mlton):
g++ -O3 -ffast-math ray.cpp -o ray mlton ray.sml
The following graph illustrates the time taken t to ray trace scenes of different complexities, where n is the number of levels of spheres, with the C++ program (red) and SML program (blue):

Despite the lack of an AMD64-specific code generator, the MLton-compiled SML program is still comparable to the C++ program in terms of speed. However, the SML program uses more RAM and, consequently, is greatly slowed by the use of swap space for n=11.
The SML implementation of the ray tracer is 25% shorter and is also significantly faster than the C++ on x86. The brevity of the SML implementation can be attributed to several different factors:
The performance of the MLton-compiled SML program can be attibuted to various reasons:
SML is clearly very succinct and expressive whilst also providing competitive performance.
The following webpages are derived from this work:
The graphs on this page were generated by Mathematica
Contact the Webmaster