Square root implementation


1:	#include <windows.h>
2:	#include <stdio.h>
3:	#include <math.h>
4:	#include <unistd.h>
5:	#include <limits.h>
6:	#include <time.h>
7:
8:	#undef NDEBUG
9:	#include <assert.h>
10:	#include <string.h>
11:	#include <mmsystem.h>
12:	#include <float.h>
13:	#include <stdlib.h>
14:
15:
16:	/*
17:	NVidia corporation provides sample code at their website
18:	for their fast square root implementation.
19:	It is basically a 256K table look up.
20:
21:	This is the link to the fastsqrt function source
22:	http://www.azillionmonkeys.com/qed/fastmath.cpp
23:
24:	*/
25:	float fastsqrt (float n);
26:
27:
28:	/*
29:
30:	The isqrt function is a integer square root implementation,
31:	it was retrieved from http://www.azillionmonkeys.com/qed/sqroot.html
32:
33:	It is fast but not faster than the table lookup Via implementation
34:
35:	Since it provides only integer square root and is slower than
36:	the table lookup implementation , there is no reason to use this method
37:
38:	Use it only if you cannot use a table lookup implementation
39:
40:	*/
41:	unsigned int isqrt (unsigned int n);
42:
43:	double
44:	sqrt2 (double val)
45:	{
46:
47:
48:	register double temp2 = 0;
49:	register double temp3;
50:	register double lastval;
51:
52:
53:	if (1 == val)
54:	{
55:	return 1;
56:	}
57:
58:	lastval = -val;
59:
60:
61:	// Requires the precomputed sqrt table in the fast sqrt Via implementation
62:
63:	// It gives a very good approximation of the sqrt using 32 bit floating point
64:
65:	// There is no reason to compute something that you can keep in a precomputed table
66:	temp3 = fastsqrt (val);
67:
68:	// if you prefer to use as approximation the fast integer square root
69:	// temp3 = (double) isqrt ((unsigned int) val);
70:
71:
72:	while (1)
73:	{
74:
75:	//Now with a 32 bit floating point square root we only
76:	//need to extend it to a 64 bit floating point implementation
77:
78:	//This is the slower point of the code , since it requires a division
79:	//and a multiplication
80:
81:	//It spend 20 percent of the time in the table lookup and the
82:	//rest of the time to make the double type approximation
83:
84:	// Notice that yet I think that there is too much room to
85:	// improve the square root algorithm
86:
87:	if (temp2 == temp3)
88:	{
89:
90:	goto achou;
91:
92:	}
93:
94:	temp2 = (val / temp3);
95:
96:	temp3 = (temp2 + temp3) * 0.5;
97:
98:	if (lastval == temp3)
99:	{
100:
101:	goto achou;
102:
103:	}
104:
105:	lastval = temp3;
106:
107:	}
108:	;
109:	achou:
110:	;
111:
112:
113:	return temp2;
114:
115:	}
116:
117:
118:	/*
119:	this is the source to the integer square root implementation
120:	, it was retrieved from http://www.azillionmonkeys.com/qed/sqroot.html
121:
122:	It is fast but not faster than the table lookup Via implementation
123:
124:	Since it provides only integer square root and is slower than
125:	the table lookup implementation , there is no reason to use this
126:
127:	Use it only if you cannot use a table lookup implementation
128:	*/
129:
130:
131:	unsigned int
132:	isqrt (unsigned int n)
133:	{
134:	unsigned int root = 0, try;
135:
136:	try = root + (1 << (15));
137:
138:	if (n >= try << (15))
139:	{
140:	n -= try << (15);
141:	root \|= 2 << (15);
142:	};
143:
144:	try = root + (1 << (14));
145:
146:	if (n >= try << (14))
147:	{
148:	n -= try << (14);
149:	root \|= 2 << (14);
150:	};
151:
152:	try = root + (1 << (13));
153:
154:	if (n >= try << (13))
155:	{
156:	n -= try << (13);
157:	root \|= 2 << (13);
158:	};
159:
160:	try = root + (1 << (12));
161:
162:	if (n >= try << (12))
163:	{
164:	n -= try << (12);
165:	root \|= 2 << (12);
166:	};
167:
168:	try = root + (1 << (11));
169:
170:	if (n >= try << (11))
171:	{
172:	n -= try << (11);
173:	root \|= 2 << (11);
174:	};
175:
176:	try = root + (1 << (10));
177:
178:	if (n >= try << (10))
179:	{
180:	n -= try << (10);
181:	root \|= 2 << (10);
182:	};
183:
184:	try = root + (1 << (9));
185:
186:	if (n >= try << (9))
187:	{
188:	n -= try << (9);
189:	root \|= 2 << (9);
190:	};
191:
192:	try = root + (1 << (8));
193:
194:	if (n >= try << (8))
195:	{
196:	n -= try << (8);
197:	root \|= 2 << (8);
198:	};
199:
200:	try = root + (1 << (7));
201:
202:	if (n >= try << (7))
203:	{
204:	n -= try << (7);
205:	root \|= 2 << (7);
206:	};
207:
208:	try = root + (1 << (6));
209:
210:	if (n >= try << (6))
211:	{
212:	n -= try << (6);
213:	root \|= 2 << (6);
214:	};
215:
216:	try = root + (1 << (5));
217:
218:	if (n >= try << (5))
219:	{
220:	n -= try << (5);
221:	root \|= 2 << (5);
222:	};
223:
224:	try = root + (1 << (4));
225:
226:	if (n >= try << (4))
227:	{
228:	n -= try << (4);
229:	root \|= 2 << (4);
230:	};
231:
232:	try = root + (1 << (3));
233:
234:	if (n >= try << (3))
235:	{
236:	n -= try << (3);
237:	root \|= 2 << (3);
238:	};
239:
240:	try = root + (1 << (2));
241:
242:	if (n >= try << (2))
243:	{
244:	n -= try << (2);
245:	root \|= 2 << (2);
246:	};
247:
248:	try = root + (1 << (1));
249:
250:	if (n >= try << (1))
251:	{
252:	n -= try << (1);
253:	root \|= 2 << (1);
254:	};
255:
256:	try = root + (1 << (0));
257:
258:	if (n >= try << (0))
259:	{
260:	n -= try << (0);
261:	root \|= 2 << (0);
262:	};
263:
264:	return root >> 1;
265:	}
266:
267:
268:	int
269:	main ()
270:	{
271:	#define dprintf printf
272:	int i;
273:	int p;
274:
275:	volatile double sqrtval = 0;
276:	//to avoid the optimizer to remove this reference
277:
278:
279:	// this call will initiate the table on the fastsqrt function
280:	build_sqrt_table ();
281:
282:	p = GetTickCount ();
283:
284:	for (i = 1; i < 30000000; i++)
285:	{
286:	sqrtval = sqrt (i);
287:	}
288:
289:	p = GetTickCount () - p;
290:	dprintf ("sqrt = %5d milliseconds %f\n", p, sqrtval);
291:
292:	p = GetTickCount ();
293:
294:	for (i = 1; i < 30000000; i++)
295:	{
296:	sqrtval = sqrt2 (i);
297:	}
298:
299:	p = GetTickCount () - p;
300:	dprintf ("sqrt2 = %5d milliseconds %f\n", p, sqrtval);
301:
302:	p = GetTickCount ();
303:
304:	for (i = 1; i < 30000000; i++)
305:	{
306:	sqrtval = fastsqrt (i);
307:	}
308:
309:	p = GetTickCount () - p;
310:	dprintf ("fastsqrt = %5d milliseconds %f\n", p, sqrtval);
311:
312:	p = GetTickCount ();
313:
314:	for (i = 1; i < 30000000; i++)
315:	{
316:	sqrtval = (double) isqrt ((int) i);
317:	}
318:
319:	p = GetTickCount () - p;
320:	dprintf ("isqrt = %5d milliseconds %f\n", p, sqrtval);
321:
322:	return 0;
323:
324:
325:	}