From 147efc3421ac3860720863e796406b3918e7c1c7 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 3 Mar 2015 11:38:27 -0800
Subject: [PATCH 2/3] Combine GOTPLT and GOT Slots

In the small and medium models, when there are both PLT and GOT
references to the same function symbol, normally linker creates
a GOTPLT slot for PLT entry and a GOT slot for GOT reference.  A
run-time JUMP_SLOT relocation is created to update the GOTPLT slot
and a run-time GLOB_DAT relocation is created to update the GOT slot.
Both JUMP_SLOT and GLOB_DAT relocations apply the same symbol value
to GOTPLT and GOT slots, respectively, at run-time.

As an optimization, linker may combine GOTPLT and GOT slots into a
single GOT slot and remove the run-time JUMP_SLOT relocation.

	* Makefile (INCLUDES): Add linker-optimization.tex.
	* abi.tex: Incldue linker-optimization.
	* linker-optimization.tex: New file.
---
 Makefile                |  2 +-
 abi.tex                 |  1 +
 linker-optimization.tex | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 linker-optimization.tex

diff --git a/Makefile b/Makefile
index 7cb148a..89dfd18 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 INCLUDES  := execution.tex low-level-sys-info.tex development.tex      \
 	     introduction.tex sw-installation.tex dl.tex libraries.tex \
 	     macros.tex conventions.tex abi-index.tex object-files.tex \
-	     kernel.tex fortran.tex
+	     kernel.tex fortran.tex linker-optimization.tex
 
 ALL_FILES := abi.tex $(INCLUDES)
 
diff --git a/abi.tex b/abi.tex
index f4c3f9f..fc025f0 100644
--- a/abi.tex
+++ b/abi.tex
@@ -108,6 +108,7 @@ Draft Version \version}}
 
 \appendix
 \include{kernel}
+\include{linker-optimization}
 \include{abi-index}
 
 \end{document}
diff --git a/linker-optimization.tex b/linker-optimization.tex
new file mode 100644
index 0000000..c34afd5
--- /dev/null
+++ b/linker-optimization.tex
@@ -0,0 +1,63 @@
+\chapter{Linker Optimization}
+
+This chapter describes optimizations which may be performed by linker.
+
+\section{Combine GOTPLT and GOT Slots}
+In the small and medium models, when there are both PLT and GOT references
+to the same function symbol, normally linker creates a GOTPLT slot for PLT
+entry and a GOT slot for GOT reference.  A run-time JUMP_SLOT relocation is
+created to update the GOTPLT slot and a run-time GLOB_DAT relocation is
+created to update the GOT slot.  Both JUMP_SLOT and GLOB_DAT relocations
+apply the same symbol value to GOTPLT and GOT slots, respectively, at
+run-time.
+
+As an optimization, linker may combine GOTPLT and GOT slots into a single
+GOT slot and remove the run-time JUMP_SLOT relocation.  It replaces the
+regular PLT entry:
+
+\begin{figure}[H]
+\Hrule
+\caption{Procedure Linkage Table Entry Via GOTPLT Slot}
+\label{gotplt_plt}
+\begin{footnotesize}
+\begin{verbatim}
+  .PLT: jmp      [GOTPLT slot]
+        pushq    relocation index
+        jmp      .PLT0
+\end{verbatim}
+\end{footnotesize}
+\Hrule
+\end{figure}
+
+\noindent
+with an GOT PLT entry with an indirect jump via the GOT slot:
+\indent
+
+\begin{figure}[H]
+\Hrule
+\caption{Procedure Linkage Table Entry Via GOT Slot}
+\label{got_plt}
+\begin{footnotesize}
+\begin{verbatim}
+  .PLT: jmp      [GOT slot]
+        nop
+\end{verbatim}
+\end{footnotesize}
+\Hrule
+\end{figure}
+
+\noindent
+and resolves the PLT reference to the GOT PLT entry.  Indirect \code{jmp}
+is an 5-byte instruction.  \code{nop} can be encoded as a 3-byte
+instruction or a 11-byte instruction for 8-byte or 16-byte PLT slot.
+A separate PLT with 8-byte slots may be used for this optimization.
+\indent
+
+This optimization isn't applicable to the \texttt{STT_GNU_IFUNC} symbols
+since their GOTPLT slots are resolved to the selected implementation and
+their GOT slots are resolved to their PLT entries.
+
+This optimization must be avoided if pointer equality is needed since
+the symbol value won't be cleared in this case and the dynamic linker
+won't update the GOT slot.  Otherwise, the resulting binary will get
+into an infinite loop at run-time.
-- 
2.4.3

