mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-10 10:41:50 +00:00
Update DXT1 timings table.
Clang is able to get much better times on ARM (around 430 us for both ARM32 and ARM64 NEON). The reference implementation is 1.13 ms on clang.
This commit is contained in:
parent
f65373ece7
commit
ff9637e884
@ -459,17 +459,17 @@ To further reduce image data size, frame images are internally compressed using
|
||||
\centering
|
||||
\begin{tabular}[h]{c|c|c}
|
||||
\textbf{Implementation} & \textbf{Required define} & \textbf{Time} \\ \hline
|
||||
x86 Reference & --- & 228 \si{\micro\second} \\
|
||||
x86 SSE4.1\textsuperscript{a} & \texttt{\_\_SSE4\_1\_\_} & 35.8 \si{\micro\second} \\
|
||||
x86 AVX2 & \texttt{\_\_AVX2\_\_} & 26.5 \si{\micro\second} \\
|
||||
x86 Reference & --- & 218.7 \si{\micro\second} \\
|
||||
x86 SSE4.1\textsuperscript{a} & \texttt{\_\_SSE4\_1\_\_} & 33.9 \si{\micro\second} \\
|
||||
x86 AVX2 & \texttt{\_\_AVX2\_\_} & 23.8 \si{\micro\second} \\
|
||||
ARM Reference & --- & 1.23 \si{\milli\second} \\
|
||||
ARM32 NEON\textsuperscript{b} & \texttt{\_\_ARM\_NEON} & 561 \si{\micro\second} \\
|
||||
ARM64 NEON & \texttt{\_\_ARM\_NEON} & 473 \si{\micro\second}
|
||||
ARM64 NEON & \texttt{\_\_ARM\_NEON} & 469 \si{\micro\second}
|
||||
\end{tabular}
|
||||
|
||||
\vspace{1em}
|
||||
\textsuperscript{a)} VEX encoding; \hspace{0.5em} \textsuperscript{b)} ARM32 NEON code compiled for ARM64
|
||||
\caption{Compression time of $320\times180$ image. x86: i7 8700K; ARM: ODROID-C2.}
|
||||
\caption{Compression time of $320\times180$ image. x86: i7 8700K (MSVC); ARM: ODROID-C2 (gcc).}
|
||||
\label{EtcSimd}
|
||||
\end{table}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user