1 files changed, 1144 insertions, 0 deletions
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py
new file mode 100644
index 0000000000..944ee74052
--- /dev/null
+++ b/scripts/dso-ordering-test.py
@@ -0,0 +1,1144 @@
+#!/usr/bin/python3
+# Generate testcase files and Makefile fragments for DSO sorting test
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Generate testcase files and Makefile fragments for DSO sorting test
+
+This script takes a small description string language, and generates
+testcases for displaying the ELF dynamic linker's dependency sorting
+behavior, allowing verification.
+
+Testcase descriptions are semicolon-separated description strings, and
+this tool generates a testcase from the description, including main program,
+associated modules, and Makefile fragments for including into elf/Makefile.
+
+This allows automation of what otherwise would be very laborous manual
+construction of complex dependency cases, however it must be noted that this
+is only a tool to speed up testcase construction, and thus the generation
+features are largely mechanical in nature; inconsistencies or errors may occur
+if the input description was itself erroneous or have unforeseen interactions.
+
+The format of the input test description files are:
+
+  # Each test description has a name, lines of description,
+  # and an expected output specification.  Comments use '#'.
+  testname1: <test-description-line>
+  output: <expected-output-string>
+
+  # Tests can be marked to be XFAIL by using 'xfail_output' instead
+  testname2: <test-description-line>
+  xfail_output: <expected-output-string>
+
+  # A default set of GLIBC_TUNABLES tunables can be specified, for which
+  # all following tests will run multiple times, once for each of the
+  # GLIBC_TUNABLES=... strings set by the 'tunable_option' command.
+  tunable_option: <glibc-tunable-string1>
+  tunable_option: <glibc-tunable-string2>
+
+  # Test descriptions can use multiple lines, which will all be merged
+  # together, so order is not important.
+  testname3: <test-description-line>
+  <test-description-line>
+  <test-description-line>
+  ...
+  output: <expected-output-string>
+
+  # 'testname3' will be run and compared two times, for both
+  # GLIBC_TUNABLES=<glibc-tunable-string1> and
+  # GLIBC_TUNABLES=<glibc-tunable-string2>.  This can be cleared and reset by the
+  # 'clear_tunables' command:
+  clear_tunables
+
+  # Multiple expected outputs can also be specified, with an associated
+  # tunable option in (), which multiple tests will be run with each
+  # GLIBC_TUNABLES=... option tried.
+  testname4:
+  <test-description-line>
+  ...
+  output(<glibc-tunable-string1>): <expected-output-string-1>
+  output(<glibc-tunable-string2>): <expected-output-string-2>
+  # Individual tunable output cases can be XFAILed, though note that
+  # this will have the effect of XFAILing the entire 'testname4' test
+  # in the final top-level tests.sum summary.
+  xfail_output(<glibc-tunable-string3>): <expected-output-string-3>
+
+  # When multiple outputs (with specific tunable strings) are specified,
+  # these take priority over any active 'tunable_option' settings.
+
+  # When a test is meant to be placed under 'xtests' (not run under
+  # "make check", but only when "make xtests" is used), the testcase name can be
+  # declared using 'xtest(<test-name>)':
+  ...
+  xtest(test-too-big1): <test-description>
+  output: <expected-output-string>
+  ...
+
+  # Do note that under current elf/Makefile organization, for such a xtest case,
+  # while the test execution is only run under 'make xtests', the associated
+  # DSOs are always built even under 'make check'.
+
+On the description language used, an example description line string:
+
+  a->b!->[cdef];c=>g=>h;{+c;%c;-c}->a
+
+Each identifier represents a shared object module, currently sequences of
+letters/digits are allowed, case-sensitive.
+
+All such shared objects have a constructor/destructor generated for them
+that emits its name followed by a '>' for constructors, and '<' followed by
+its name for destructors, e.g. if the name is 'obj1', then "obj1>" and "<obj1"
+is printed by its constructor/destructor respectively.
+
+The -> operator specifies a link time dependency, these can be chained for
+convenience (e.g. a->b->c->d).
+
+The => operator creates a call-reference, e.g. for a=>b, an fn_a() function
+is created inside module 'a', which calls fn_b() in module 'b'.
+These module functions emit 'name()' output in nested form,
+e.g. a=>b emits 'a(b())'
+
+For single character object names, square brackets [] in the description
+allows specifying multiple objects; e.g. a->[bcd]->e is equivalent to
+ a->b->e;a->c->e;a->d->e
+
+The () parenthesis construct with space separated names is also allowed for
+specifying objects.  For names with integer suffixes a range can also be used,
+e.g. (foo1 bar2-5), specifies DSOs foo1, bar2, bar2, bar3, bar4, bar5.
+
+A {} construct specifies the main test program, and its link dependencies
+are also specified using ->.  Inside {}, a few ;-separated constructs are
+allowed:
+         +a   Loads module a using dlopen(RTLD_LAZY|RTLD_GLOBAL)
+         ^a   Loads module a using dlopen(RTLD_LAZY)
+         %a   Use dlsym() to load and call fn_a()
+         @a   Calls fn_a() directly.
+         -a   Unloads module a using dlclose()
+
+The generated main program outputs '{' '}' with all output from above
+constructs in between.  The other output before/after {} are the ordered
+constructor/destructor output.
+
+If no {} construct is present, a default empty main program is linked
+against all objects which have no dependency linked to it. e.g. for
+'[ab]->c;d->e', the default main program is equivalent to '{}->[abd]'
+
+Sometimes for very complex or large testcases, besides specifying a
+few explicit dependencies from main{}, the above default dependency
+behavior is still useful to automatically have, but is turned off
+upon specifying a single explicit {}->dso_name.
+In this case, add {}->* to explicitly add this generation behavior:
+
+   # Main program links to 'foo', and all other objects which have no
+   # dependency linked to it.
+   {}->foo,{}->*
+
+Note that '*' works not only on main{}, but can be used as the
+dependency target of any object.  Note that it only works as a target,
+not a dependency source.
+
+The '!' operator after object names turns on permutation of its
+dependencies, e.g. while a->[bcd] only generates one set of objects,
+with 'a.so' built with a link line of "b.so c.so d.so", for a!->[bcd]
+permutations of a's dependencies creates multiple testcases with
+different link line orders: "b.so c.so d.so", "c.so b.so d.so",
+"b.so d.so c.so", etc.  Note that for a <test-name> specified on
+the script command-line, multiple <test-name_1>, <test-name_2>, etc.
+tests will be generated (e.g. for a!->[bc]!->[de], eight tests with
+different link orders for a, b, and c will be generated)
+
+It is possible to specify the ELF soname field for an object or the
+main program:
+   # DSO 'a' will be linked with the appropriate -Wl,-soname=x setting
+   a->b->c;soname(a)=x
+   # The the main program can also have a soname specified
+   soname({})=y
+
+This can be used to test how ld.so behaves when objects and/or the
+main program have such a field set.
+
+
+Strings Output by Generated Testcase Programs
+
+The text output produced by a generated testcase consists of three main
+parts:
+  1. The constructors' output
+  2. Output from the main program
+  3. Destructors' output
+
+To see by example, a simple test description "a->b->c" generates a testcase
+that when run, outputs: "c>b>a>{}<a<b<c"
+
+Each generated DSO constructor prints its name followed by a '>' character,
+and the "c>b>a" part above is the full constructor output by all DSOs, the
+order indicating that DSO 'c', which does not depend on any other DSO, has
+its constructor run first, followed by 'b' and then 'a'.
+
+Destructor output for each DSO is a '<' character followed by its name,
+reflecting its reverse nature of constructors.  In the above example, the
+destructor output part is "<a<b<c".
+
+The middle "{}" part is the main program.  In this simple example, nothing
+was specified for the main program, so by default it is implicitly linked
+to the DSO 'a' (with no other DSOs depending on it) and only prints the
+brackets {} with no actions inside.
+
+To see an example with actions inside the main program, lets see an example
+description: c->g=>h;{+c;%c;-c}->a->h
+
+This produces a testcase, that when executed outputs:
+             h>a>{+c[g>c>];%c();-c[<c<g];}<a<h
+
+The constructor and destructor parts display the a->h dependency as expected.
+Inside the main program, the "+c" action triggers a dlopen() of DSO 'c',
+causing another chain of constructors "g>c>" to be triggered.  Here it is
+displayed inside [] brackets for each dlopen call.  The same is done for "-c",
+a dlclose() of 'c'.
+
+The "%c" output is due to calling to fn_c() inside DSO 'c', this comprises
+of two parts: the '%' character is printed by the caller, here it is the main
+program.  The 'c' character is printed from inside fn_c().  The '%' character
+indicates that this is called by a dlsym() of "fn_c".  A '@' character would
+mean a direct call (with a symbol reference).  These can all be controlled
+by the main test program constructs documented earlier.
+
+The output strings described here is the exact same form placed in
+test description files' "output: <expected output>" line.
+"""
+
+import sys
+import re
+import os
+import subprocess
+import argparse
+from collections import OrderedDict
+import itertools
+
+# BUILD_GCC is only used under the --build option,
+# which builds the generated testcase, including DSOs using BUILD_GCC.
+# Mainly for testing purposes, especially debugging of this script,
+# and can be changed here to another toolchain path if needed.
+build_gcc = "gcc"
+
+def get_parser():
+    parser = argparse.ArgumentParser("")
+    parser.add_argument("description",
+                         help="Description string of DSO dependency test to be "
+                         "generated (see script source for documentation of "
+                         "description language), either specified here as "
+                         "command line argument, or by input file using "
+                         "-f/--description-file option",
+                         nargs="?", default="")
+    parser.add_argument("test_name",
+                        help="Identifier for testcase being generated",
+                        nargs="?", default="")
+    parser.add_argument("--objpfx",
+                        help="Path to place generated files, defaults to "
+                        "current directory if none specified",
+                        nargs="?", default="./")
+    parser.add_argument("-m", "--output-makefile",
+                        help="File to write Makefile fragment to, defaults to "
+                        "stdout when option not present",
+                        nargs="?", default="")
+    parser.add_argument("-f", "--description-file",
+                        help="Input file containing testcase descriptions",
+                        nargs="?", default="")
+    parser.add_argument("--build", help="After C testcase generated, build it "
+                        "using gcc (for manual testing purposes)",
+                        action="store_true")
+    parser.add_argument("--debug-output",
+                        help="Prints some internal data "
+                        "structures; used for debugging of this script",
+                        action="store_true")
+    return parser
+
+# Main script starts here.
+cmdlineargs = get_parser().parse_args()
+test_name = cmdlineargs.test_name
+description = cmdlineargs.description
+objpfx = cmdlineargs.objpfx
+description_file = cmdlineargs.description_file
+output_makefile = cmdlineargs.output_makefile
+makefile = ""
+default_tunable_options = []
+
+current_input_lineno = 0
+def error(msg):
+    global current_input_lineno
+    print("Error: %s%s" % ((("Line %d, " % current_input_lineno)
+                            if current_input_lineno != 0 else ""),
+                           msg))
+    exit(1)
+
+if(test_name or description) and description_file:
+    error("both command-line testcase and input file specified")
+if test_name and not description:
+    error("command-line testcase name without description string")
+
+# Main class type describing a testcase.
+class TestDescr:
+    def __init__(self):
+        self.objs = []              # list of all DSO objects
+        self.deps = OrderedDict()   # map of DSO object -> list of dependencies
+
+        # map of DSO object -> list of call refs
+        self.callrefs = OrderedDict()
+
+        # map of DSO object -> list of permutations of dependencies
+        self.dep_permutations = OrderedDict()
+
+        # map of DSO object -> SONAME of object (if one is specified)
+        self.soname_map = OrderedDict()
+
+        # list of main program operations
+        self.main_program = []
+        # set if default dependencies added to main
+        self.main_program_default_deps = True
+
+        self.test_name = ""                   # name of testcase
+        self.expected_outputs = OrderedDict() # expected outputs of testcase
+        self.xfail = False                    # set if this is a XFAIL testcase
+        self.xtest = False                    # set if this is put under 'xtests'
+
+    # Add 'object -> [object, object, ...]' relations to CURR_MAP
+    def __add_deps_internal(self, src_objs, dst_objs, curr_map):
+        for src in src_objs:
+            for dst in dst_objs:
+                if not src in curr_map:
+                    curr_map[src] = []
+                if not dst in curr_map[src]:
+                    curr_map[src].append(dst)
+    def add_deps(self, src_objs, dst_objs):
+        self.__add_deps_internal(src_objs, dst_objs, self.deps)
+    def add_callrefs(self, src_objs, dst_objs):
+        self.__add_deps_internal(src_objs, dst_objs, self.callrefs)
+
+# Process commands inside the {} construct.
+# Note that throughout this script, the main program object is represented
+# by the '#' string.
+def process_main_program(test_descr, mainprog_str):
+    if mainprog_str:
+        test_descr.main_program = mainprog_str.split(';')
+    for s in test_descr.main_program:
+        m = re.match(r"^([+\-%^@])([0-9a-zA-Z]+)$", s)
+        if not m:
+            error("'%s' is not recognized main program operation" % (s))
+        opr = m.group(1)
+        obj = m.group(2)
+        if not obj in test_descr.objs:
+            test_descr.objs.append(obj)
+        if opr == '%' or opr == '@':
+            test_descr.add_callrefs(['#'], [obj])
+    # We have a main program specified, turn this off
+    test_descr.main_program_default_deps = False
+
+# For(a1 a2 b1-12) object set descriptions, expand into an object list
+def expand_object_set_string(descr_str):
+    obj_list = []
+    descr_list = descr_str.split()
+    for descr in descr_list:
+        m = re.match(r"^([a-zA-Z][0-9a-zA-Z]*)(-[0-9]+)?$", descr)
+        if not m:
+            error("'%s' is not a valid object set description" % (descr))
+        obj = m.group(1)
+        idx_end = m.group(2)
+        if not idx_end:
+            if not obj in obj_list:
+                obj_list.append(obj)
+        else:
+            idx_end = int(idx_end[1:])
+            m = re.match(r"^([0-9a-zA-Z][a-zA-Z]*)([0-9]+)$", obj)
+            if not m:
+                error("object description '%s' is malformed" % (obj))
+            obj_name = m.group(1)
+            idx_start = int(m.group (2))
+            if idx_start > idx_end:
+                error("index range %s-%s invalid" % (idx_start, idx_end))
+            for i in range(idx_start, idx_end + 1):
+                o = obj_name + str(i)
+                if not o in obj_list:
+                    obj_list.append(o)
+    return obj_list
+
+# Lexer for tokens
+tokenspec = [ ("SONAME",   r"soname\(([0-9a-zA-Z{}]+)\)=([0-9a-zA-Z]+)"),
+              ("OBJ",      r"([0-9a-zA-Z]+)"),
+              ("DEP",      r"->"),
+              ("CALLREF",  r"=>"),
+              ("OBJSET",   r"\[([0-9a-zA-Z]+)\]"),
+              ("OBJSET2",  r"\(([0-9a-zA-Z \-]+)\)"),
+              ("OBJSET3",  r"\*"),
+              ("PROG",     r"{([0-9a-zA-Z;+^\-%@]*)}"),
+              ("PERMUTE",  r"!"),
+              ("SEMICOL",  r";"),
+              ("ERROR",    r".") ]
+tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tokenspec)
+
+# Main line parser of description language
+def parse_description_string(t, descr_str):
+    # State used when parsing dependencies
+    curr_objs = []
+    in_dep = False
+    in_callref = False
+    def clear_dep_state():
+        nonlocal in_dep, in_callref
+        in_dep = in_callref = False
+
+    for m in re.finditer(tok_re, descr_str):
+        kind = m.lastgroup
+        value = m.group()
+        if kind == "SONAME":
+            s = re.match(r"soname\(([0-9a-zA-Z{}]+)\)=([0-9a-zA-Z]+)", value)
+            obj = s.group(1)
+            val = s.group(2)
+            if obj == "{}":
+                if '#' in t.soname_map:
+                    error("soname of main program already set")
+                # Adjust to internal name
+                obj = '#'
+            else:
+                if re.match(r"[{}]", obj):
+                    error("invalid object name '%s'" % (obj))
+                if not obj in t.objs:
+                    error("'%s' is not name of already defined object" % (obj))
+                if obj in t.soname_map:
+                    error("'%s' already has soname of '%s' set"
+                          % (obj, t.soname_map[obj]))
+            t.soname_map[obj] = val
+
+        elif kind == "OBJ":
+            if in_dep:
+                t.add_deps(curr_objs, [value])
+            elif in_callref:
+                t.add_callrefs(curr_objs, [value])
+            clear_dep_state()
+            curr_objs = [value]
+            if not value in t.objs:
+                t.objs.append(value)
+
+        elif kind == "OBJSET":
+            objset = value[1:len(value)-1]
+            if in_dep:
+                t.add_deps(curr_objs, list (objset))
+            elif in_callref:
+                t.add_callrefs(curr_objs, list (objset))
+            clear_dep_state()
+            curr_objs = list(objset)
+            for o in list(objset):
+                if not o in t.objs:
+                    t.objs.append(o)
+
+        elif kind == "OBJSET2":
+            descr_str = value[1:len(value)-1]
+            descr_str.strip()
+            objs = expand_object_set_string(descr_str)
+            if not objs:
+                error("empty object set '%s'" % (value))
+            if in_dep:
+                t.add_deps(curr_objs, objs)
+            elif in_callref:
+                t.add_callrefs(curr_objs, objs)
+            clear_dep_state()
+            curr_objs = objs
+            for o in objs:
+                if not o in t.objs:
+                    t.objs.append(o)
+
+        elif kind == "OBJSET3":
+            if in_dep:
+                t.add_deps(curr_objs, ['*'])
+            elif in_callref:
+                t.add_callrefs(curr_objs, ['*'])
+            else:
+                error("non-dependence target set '*' can only be used "
+                      "as target of ->/=> operations")
+            clear_dep_state()
+            curr_objs = ['*']
+
+        elif kind == "PERMUTE":
+            if in_dep or in_callref:
+                error("syntax error, permute operation invalid here")
+            if not curr_objs:
+                error("syntax error, no objects to permute here")
+
+            for obj in curr_objs:
+                if not obj in t.dep_permutations:
+                    # Signal this object has permuted dependencies
+                    t.dep_permutations[obj] = []
+
+        elif kind == "PROG":
+            if t.main_program:
+                error("cannot have more than one main program")
+            if in_dep:
+                error("objects cannot have dependency on main program")
+            if in_callref:
+                # TODO: A DSO can resolve to a symbol in the main binary,
+                # which we syntactically allow here, but haven't yet
+                # implemented.
+                t.add_callrefs(curr_objs, ["#"])
+            process_main_program(t, value[1:len(value)-1])
+            clear_dep_state()
+            curr_objs = ["#"]
+
+        elif kind == "DEP":
+            if in_dep or in_callref:
+                error("syntax error, multiple contiguous ->,=> operations")
+            if '*' in curr_objs:
+                error("non-dependence target set '*' can only be used "
+                      "as target of ->/=> operations")
+            in_dep = True
+
+        elif kind == "CALLREF":
+            if in_dep or in_callref:
+                error("syntax error, multiple contiguous ->,=> operations")
+            if '*' in curr_objs:
+                error("non-dependence target set '*' can only be used "
+                      "as target of ->/=> operations")
+            in_callref = True
+
+        elif kind == "SEMICOL":
+            curr_objs = []
+            clear_dep_state()
+
+        else:
+            error("unknown token '%s'" % (value))
+    return t
+
+# Main routine to process each testcase description
+def process_testcase(t):
+    global objpfx
+    assert t.test_name
+
+    base_test_name = t.test_name
+    test_subdir = base_test_name + "-dir"
+    testpfx = objpfx + test_subdir + "/"
+
+    if not os.path.exists(testpfx):
+        os.mkdir(testpfx)
+
+    def find_objs_not_depended_on(t):
+        objs_not_depended_on = []
+        for obj in t.objs:
+            skip = False
+            for r in t.deps.items():
+                if obj in r[1]:
+                    skip = True
+                    break
+            if not skip:
+                objs_not_depended_on.append(obj)
+        return objs_not_depended_on
+
+    non_dep_tgt_objs = find_objs_not_depended_on(t)
+    for obj in t.objs:
+        if obj in t.deps:
+            deps = t.deps[obj]
+            if '*' in deps:
+                t.deps[obj].remove('*')
+                t.add_deps([obj], non_dep_tgt_objs)
+        if obj in t.callrefs:
+            deps = t.callrefs[obj]
+            if '*' in deps:
+                t.deps[obj].remove('*')
+                t.add_callrefs([obj], non_dep_tgt_objs)
+    if "#" in t.deps:
+        deps = t.deps["#"]
+        if '*' in deps:
+            t.deps["#"].remove('*')
+            t.add_deps(["#"], non_dep_tgt_objs)
+
+    # If no main program was specified in dependency description, make a
+    # default main program with deps pointing to all DSOs which are not
+    # depended by another DSO.
+    if t.main_program_default_deps:
+        main_deps = non_dep_tgt_objs
+        if not main_deps:
+            error("no objects for default main program to point "
+                  "dependency to(all objects strongly connected?)")
+        t.add_deps(["#"], main_deps)
+
+    # Some debug output
+    if cmdlineargs.debug_output:
+        print("Testcase: %s" % (t.test_name))
+        print("All objects: %s" % (t.objs))
+        print("--- Static link dependencies ---")
+        for r in t.deps.items():
+            print("%s -> %s" % (r[0], r[1]))
+        print("--- Objects whose dependencies are to be permuted ---")
+        for r in t.dep_permutations.items():
+            print("%s" % (r[0]))
+        print("--- Call reference dependencies ---")
+        for r in t.callrefs.items():
+            print("%s => %s" % (r[0], r[1]))
+        print("--- main program ---")
+        print(t.main_program)
+
+    # Main testcase generation routine, does Makefile fragment generation,
+    # testcase source generation, and if --build specified builds testcase.
+    def generate_testcase(test_descr, test_suffix):
+
+        test_name = test_descr.test_name + test_suffix
+
+        # Print out needed Makefile fragments for use in glibc/elf/Makefile.
+        module_names = ""
+        for o in test_descr.objs:
+            module_names += " " + test_subdir + "/" + test_name + "-" + o
+        makefile.write("modules-names +=%s\n" % (module_names))
+
+        # Depth-first traversal, executing FN(OBJ) in post-order
+        def dfs(t, fn):
+            def dfs_rec(obj, fn, obj_visited):
+                if obj in obj_visited:
+                    return
+                obj_visited[obj] = True
+                if obj in t.deps:
+                    for dep in t.deps[obj]:
+                        dfs_rec(dep, fn, obj_visited)
+                fn(obj)
+
+            obj_visited = {}
+            for obj in t.objs:
+                dfs_rec(obj, fn, obj_visited)
+
+        # Generate link dependencies for all DSOs, done in a DFS fashion.
+        # Usually this doesn't need to be this complex, just listing the direct
+        # dependencies is enough.  However to support creating circular
+        # dependency situations, traversing it by DFS and tracking processing
+        # status is the natural way to do it.
+        obj_processed = {}
+        fake_created = {}
+        def gen_link_deps(obj):
+            if obj in test_descr.deps:
+