\relax \citation{beowulf} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \@writefile{toc}{\contentsline {section}{\numberline {2}Amdahl's Law \& Parallel Speedup}{2}} \citation{Amdahl} \newlabel{rate}{{3}{3}} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces $T_{is} = 0$ and $T_p =$ 10, 100, 1000, 10000, 100000 (in increasing order).}}{4}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces $T_{is} = 10$ and $T_p =$ 10, 100, 1000, 10000, 100000 (in increasing order).}}{4}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces $T_{is} = 10$ and $T_p =$ 10, 100, 1000, 10000, 100000 (in increasing order) with $T_{is}$ contributing {\em quadratically} in $N$.}}{5}} \@writefile{toc}{\contentsline {section}{\numberline {3}Microbenchmarking Tools}{5}} \citation{ATLAS} \citation{lmbench} \citation{netperf} \citation{cpu-rate} \citation{Eden} \citation{Eden} \citation{beowulf} \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Lmbench Results}{7}} \@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Lucifer System Description}}{7}} \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces lmbench latencies for selected processor/process activities. The values are all times in microseconds averaged over ten independent runs (with error estimates provided by an unbiased standard deviation), so ``smaller is better''.}}{7}} \@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Lmbench latencies for context switches, in microseconds (smaller is better).}}{7}} \@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Lmbench {\em local} communication latencies, in microseconds (smaller is better).}}{7}} \@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Lmbench {\em network} communication latencies, in microseconds (smaller is better).}}{8}} \@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Lmbench {\em memory} latencies in nanoseconds (smaller is better). Also see graphs for more complete picture.}}{8}} \@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces Lmbench {\em local} communication bandwidths, in $10^6$ bytes/second (bigger is better).}}{8}} \@writefile{lot}{\contentsline {table}{\numberline {8}{\ignorespaces Lmbench {\em network} communication bandwidths, in $10^6$ bytes/second (bigger is better).}}{8}} \@writefile{lot}{\contentsline {table}{\numberline {9}{\ignorespaces CPU-rates in BOGOMFLOPS -- $10^6$ simple arithmetic operations/second, in L1 cache (bigger is better). Also see graph for out-of-cache performance.}}{8}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Netperf Results}{8}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces TCP Stream (netperf) measurements of bandwidth as a function of packet size between lucifer and eve.}}{9}} \newlabel{tcp_stream}{{4}{9}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}CPU Results}{9}} \citation{cpu-rate} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Double precision floating point operations per second as a function of vector length (in bytes). All points average 100 independent runs. The dashed lines indicate the locations of the L1 and L2 cache boundaries.}}{10}} \newlabel{mdops}{{5}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces The standard deviation (error) associated with figure 5\hbox {}.}}{10}} \newlabel{mdops_sigma}{{6}{10}} \citation{ATLAS} \citation{profiling} \@writefile{toc}{\contentsline {section}{\numberline {4}Conclusions}{11}} \bibcite{beowulf}{beowulf} \bibcite{Amdahl}{Amdahl} \bibcite{Amalsi}{Amalsi} \bibcite{Foster}{Foster} \bibcite{Kumar}{Kumar} \bibcite{lmbench}{lmbench} \bibcite{netperf}{netperf} \bibcite{cpu-rate}{cpu-rate} \bibcite{Eden}{Eden} \bibcite{profiling}{profiling} \bibcite{ATLAS}{ATLAS} \@writefile{toc}{\contentsline {section}{\numberline {5}Acknowledgments}{12}} \@writefile{toc}{\contentsline {section}{\numberline {6}Availability}{12}}