1: | #include <windows.h> | |
2: | #include <stdio.h> | |
3: | #include <math.h> | |
4: | #include <unistd.h> | |
5: | #include <limits.h> | |
6: | #include <time.h> | |
7: | ||
8: | #undef NDEBUG | |
9: | #include <assert.h> | |
10: | #include <string.h> | |
11: | #include <mmsystem.h> | |
12: | #include <float.h> | |
13: | #include <stdlib.h> | |
14: | ||
15: | ||
16: | /* | |
17: | NVidia corporation provides sample code at their website | |
18: | for *their* fast square root implementation. | |
19: | It is basically a 256K table look up. | |
20: | ||
21: | This is the link to the fastsqrt function source | |
22: | http://www.azillionmonkeys.com/qed/fastmath.cpp | |
23: | ||
24: | */ | |
25: | float fastsqrt (float n); | |
26: | ||
27: | ||
28: | /* | |
29: | ||
30: | The isqrt function is a integer square root implementation, | |
31: | it was retrieved from http://www.azillionmonkeys.com/qed/sqroot.html | |
32: | ||
33: | It is fast but not faster than the table lookup Via implementation | |
34: | ||
35: | Since it provides only integer square root and is slower than | |
36: | the table lookup implementation , there is no reason to use this method | |
37: | ||
38: | Use it only if you cannot use a table lookup implementation | |
39: | ||
40: | */ | |
41: | unsigned int isqrt (unsigned int n); | |
42: | ||
43: | double | |
44: | sqrt2 (double val) | |
45: | { | |
46: | ||
47: | ||
48: | register double temp2 = 0; | |
49: | register double temp3; | |
50: | register double lastval; | |
51: | ||
52: | ||
53: | if (1 == val) | |
54: | { | |
55: | return 1; | |
56: | } | |
57: | ||
58: | lastval = -val; | |
59: | ||
60: | ||
61: | // Requires the precomputed sqrt table in the fast sqrt Via implementation | |
62: | ||
63: | // It gives a very good approximation of the sqrt using 32 bit floating point | |
64: | ||
65: | // There is no reason to compute something that you can keep in a precomputed table | |
66: | temp3 = fastsqrt (val); | |
67: | ||
68: | // if you prefer to use as approximation the fast integer square root | |
69: | // temp3 = (double) isqrt ((unsigned int) val); | |
70: | ||
71: | ||
72: | while (1) | |
73: | { | |
74: | ||
75: | //Now with a 32 bit floating point square root we only | |
76: | //need to extend it to a 64 bit floating point implementation | |
77: | ||
78: | //This is the slower point of the code , since it requires a division | |
79: | //and a multiplication | |
80: | ||
81: | //It spend 20 percent of the time in the table lookup and the | |
82: | //rest of the time to make the double type approximation | |
83: | ||
84: | // Notice that yet I think that there is too much room to | |
85: | // improve the square root algorithm | |
86: | ||
87: | if (temp2 == temp3) | |
88: | { | |
89: | ||
90: | goto achou; | |
91: | ||
92: | } | |
93: | ||
94: | temp2 = (val / temp3); | |
95: | ||
96: | temp3 = (temp2 + temp3) * 0.5; | |
97: | ||
98: | if (lastval == temp3) | |
99: | { | |
100: | ||
101: | goto achou; | |
102: | ||
103: | } | |
104: | ||
105: | lastval = temp3; | |
106: | ||
107: | } | |
108: | ; | |
109: | achou: | |
110: | ; | |
111: | ||
112: | ||
113: | return temp2; | |
114: | ||
115: | } | |
116: | ||
117: | ||
118: | /* | |
119: | this is the source to the integer square root implementation | |
120: | , it was retrieved from http://www.azillionmonkeys.com/qed/sqroot.html | |
121: | ||
122: | It is fast but not faster than the table lookup Via implementation | |
123: | ||
124: | Since it provides only integer square root and is slower than | |
125: | the table lookup implementation , there is no reason to use this | |
126: | ||
127: | Use it only if you cannot use a table lookup implementation | |
128: | */ | |
129: | ||
130: | ||
131: | unsigned int | |
132: | isqrt (unsigned int n) | |
133: | { | |
134: | unsigned int root = 0, try; | |
135: | ||
136: | try = root + (1 << (15)); | |
137: | ||
138: | if (n >= try << (15)) | |
139: | { | |
140: | n -= try << (15); | |
141: | root |= 2 << (15); | |
142: | }; | |
143: | ||
144: | try = root + (1 << (14)); | |
145: | ||
146: | if (n >= try << (14)) | |
147: | { | |
148: | n -= try << (14); | |
149: | root |= 2 << (14); | |
150: | }; | |
151: | ||
152: | try = root + (1 << (13)); | |
153: | ||
154: | if (n >= try << (13)) | |
155: | { | |
156: | n -= try << (13); | |
157: | root |= 2 << (13); | |
158: | }; | |
159: | ||
160: | try = root + (1 << (12)); | |
161: | ||
162: | if (n >= try << (12)) | |
163: | { | |
164: | n -= try << (12); | |
165: | root |= 2 << (12); | |
166: | }; | |
167: | ||
168: | try = root + (1 << (11)); | |
169: | ||
170: | if (n >= try << (11)) | |
171: | { | |
172: | n -= try << (11); | |
173: | root |= 2 << (11); | |
174: | }; | |
175: | ||
176: | try = root + (1 << (10)); | |
177: | ||
178: | if (n >= try << (10)) | |
179: | { | |
180: | n -= try << (10); | |
181: | root |= 2 << (10); | |
182: | }; | |
183: | ||
184: | try = root + (1 << (9)); | |
185: | ||
186: | if (n >= try << (9)) | |
187: | { | |
188: | n -= try << (9); | |
189: | root |= 2 << (9); | |
190: | }; | |
191: | ||
192: | try = root + (1 << (8)); | |
193: | ||
194: | if (n >= try << (8)) | |
195: | { | |
196: | n -= try << (8); | |
197: | root |= 2 << (8); | |
198: | }; | |
199: | ||
200: | try = root + (1 << (7)); | |
201: | ||
202: | if (n >= try << (7)) | |
203: | { | |
204: | n -= try << (7); | |
205: | root |= 2 << (7); | |
206: | }; | |
207: | ||
208: | try = root + (1 << (6)); | |
209: | ||
210: | if (n >= try << (6)) | |
211: | { | |
212: | n -= try << (6); | |
213: | root |= 2 << (6); | |
214: | }; | |
215: | ||
216: | try = root + (1 << (5)); | |
217: | ||
218: | if (n >= try << (5)) | |
219: | { | |
220: | n -= try << (5); | |
221: | root |= 2 << (5); | |
222: | }; | |
223: | ||
224: | try = root + (1 << (4)); | |
225: | ||
226: | if (n >= try << (4)) | |
227: | { | |
228: | n -= try << (4); | |
229: | root |= 2 << (4); | |
230: | }; | |
231: | ||
232: | try = root + (1 << (3)); | |
233: | ||
234: | if (n >= try << (3)) | |
235: | { | |
236: | n -= try << (3); | |
237: | root |= 2 << (3); | |
238: | }; | |
239: | ||
240: | try = root + (1 << (2)); | |
241: | ||
242: | if (n >= try << (2)) | |
243: | { | |
244: | n -= try << (2); | |
245: | root |= 2 << (2); | |
246: | }; | |
247: | ||
248: | try = root + (1 << (1)); | |
249: | ||
250: | if (n >= try << (1)) | |
251: | { | |
252: | n -= try << (1); | |
253: | root |= 2 << (1); | |
254: | }; | |
255: | ||
256: | try = root + (1 << (0)); | |
257: | ||
258: | if (n >= try << (0)) | |
259: | { | |
260: | n -= try << (0); | |
261: | root |= 2 << (0); | |
262: | }; | |
263: | ||
264: | return root >> 1; | |
265: | } | |
266: | ||
267: | ||
268: | int | |
269: | main () | |
270: | { | |
271: | #define dprintf printf | |
272: | int i; | |
273: | int p; | |
274: | ||
275: | volatile double sqrtval = 0; | |
276: | //to avoid the optimizer to remove this reference | |
277: | ||
278: | ||
279: | // this call will initiate the table on the fastsqrt function | |
280: | build_sqrt_table (); | |
281: | ||
282: | p = GetTickCount (); | |
283: | ||
284: | for (i = 1; i < 30000000; i++) | |
285: | { | |
286: | sqrtval = sqrt (i); | |
287: | } | |
288: | ||
289: | p = GetTickCount () - p; | |
290: | dprintf ("sqrt = %5d milliseconds %f\n", p, sqrtval); | |
291: | ||
292: | p = GetTickCount (); | |
293: | ||
294: | for (i = 1; i < 30000000; i++) | |
295: | { | |
296: | sqrtval = sqrt2 (i); | |
297: | } | |
298: | ||
299: | p = GetTickCount () - p; | |
300: | dprintf ("sqrt2 = %5d milliseconds %f\n", p, sqrtval); | |
301: | ||
302: | p = GetTickCount (); | |
303: | ||
304: | for (i = 1; i < 30000000; i++) | |
305: | { | |
306: | sqrtval = fastsqrt (i); | |
307: | } | |
308: | ||
309: | p = GetTickCount () - p; | |
310: | dprintf ("fastsqrt = %5d milliseconds %f\n", p, sqrtval); | |
311: | ||
312: | p = GetTickCount (); | |
313: | ||
314: | for (i = 1; i < 30000000; i++) | |
315: | { | |
316: | sqrtval = (double) isqrt ((int) i); | |
317: | } | |
318: | ||
319: | p = GetTickCount () - p; | |
320: | dprintf ("isqrt = %5d milliseconds %f\n", p, sqrtval); | |
321: | ||
322: | return 0; | |
323: | ||
324: | ||
325: | } |