aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2021-10-21 21:41:21 +0800
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2021-10-21 11:23:53 -0300
commite6fd79f3795d46dfb583e124be49fc063bc3d58b (patch)
tree1d2b1f6ac65243c5f3e8f9c5283ec14751d136a7 /scripts
parent0ff2d30daedb6d0d00401f1f2a48a80ff99d7c25 (diff)
downloadglibc-e6fd79f3795d46dfb583e124be49fc063bc3d58b.tar.xz
glibc-e6fd79f3795d46dfb583e124be49fc063bc3d58b.zip
elf: Testing infrastructure for ld.so DSO sorting (BZ #17645)
This is the first of a 2-part patch set that fixes slow DSO sorting behavior in the dynamic loader, as reported in BZ #17645. In order to facilitate such a large modification to the dynamic loader, this first patch implements a testing framework for validating shared object sorting behavior, to enable comparison between old/new sorting algorithms, and any later enhancements. This testing infrastructure consists of a Python script scripts/dso-ordering-test.py' which takes in a description language, consisting of strings that describe a set of link dependency relations between DSOs, and generates testcase programs and Makefile fragments to automatically test the described situation, for example: a->b->c->d # four objects linked one after another a->[bc]->d;b->c # a depends on b and c, which both depend on d, # b depends on c (b,c linked to object a in fixed order) a->b->c;{+a;%a;-a} # a, b, c serially dependent, main program uses # dlopen/dlsym/dlclose on object a a->b->c;{}!->[abc] # a, b, c serially dependent; multiple tests generated # to test all permutations of a, b, c ordering linked # to main program (Above is just a short description of what the script can do, more documentation is in the script comments.) Two files containing several new tests, elf/dso-sort-tests-[12].def are added, including test scenarios for BZ #15311 and Redhat issue #1162810 [1]. Due to the nature of dynamic loader tests, where the sorting behavior and test output occurs before/after main(), generating testcases to use support/test-driver.c does not suffice to control meaningful timeout for ld.so. Therefore a new utility program 'support/test-run-command', based on test-driver.c/support_test_main.c has been added. This does the same testcase control, but for a program specified through a command-line rather than at the source code level. This utility is used to run the dynamic loader testcases generated by dso-ordering-test.py. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1162810 Signed-off-by: Chung-Lin Tang <cltang@codesourcery.com> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/dso-ordering-test.py1144
1 files changed, 1144 insertions, 0 deletions
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py
new file mode 100644
index 0000000000..944ee74052
--- /dev/null
+++ b/scripts/dso-ordering-test.py
@@ -0,0 +1,1144 @@
+#!/usr/bin/python3
+# Generate testcase files and Makefile fragments for DSO sorting test
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Generate testcase files and Makefile fragments for DSO sorting test
+
+This script takes a small description string language, and generates
+testcases for displaying the ELF dynamic linker's dependency sorting
+behavior, allowing verification.
+
+Testcase descriptions are semicolon-separated description strings, and
+this tool generates a testcase from the description, including main program,
+associated modules, and Makefile fragments for including into elf/Makefile.
+
+This allows automation of what otherwise would be very laborous manual
+construction of complex dependency cases, however it must be noted that this
+is only a tool to speed up testcase construction, and thus the generation
+features are largely mechanical in nature; inconsistencies or errors may occur
+if the input description was itself erroneous or have unforeseen interactions.
+
+The format of the input test description files are:
+
+ # Each test description has a name, lines of description,
+ # and an expected output specification. Comments use '#'.
+ testname1: <test-description-line>
+ output: <expected-output-string>
+
+ # Tests can be marked to be XFAIL by using 'xfail_output' instead
+ testname2: <test-description-line>
+ xfail_output: <expected-output-string>
+
+ # A default set of GLIBC_TUNABLES tunables can be specified, for which
+ # all following tests will run multiple times, once for each of the
+ # GLIBC_TUNABLES=... strings set by the 'tunable_option' command.
+ tunable_option: <glibc-tunable-string1>
+ tunable_option: <glibc-tunable-string2>
+
+ # Test descriptions can use multiple lines, which will all be merged
+ # together, so order is not important.
+ testname3: <test-description-line>
+ <test-description-line>
+ <test-description-line>
+ ...
+ output: <expected-output-string>
+
+ # 'testname3' will be run and compared two times, for both
+ # GLIBC_TUNABLES=<glibc-tunable-string1> and
+ # GLIBC_TUNABLES=<glibc-tunable-string2>. This can be cleared and reset by the
+ # 'clear_tunables' command:
+ clear_tunables
+
+ # Multiple expected outputs can also be specified, with an associated
+ # tunable option in (), which multiple tests will be run with each
+ # GLIBC_TUNABLES=... option tried.
+ testname4:
+ <test-description-line>
+ ...
+ output(<glibc-tunable-string1>): <expected-output-string-1>
+ output(<glibc-tunable-string2>): <expected-output-string-2>
+ # Individual tunable output cases can be XFAILed, though note that
+ # this will have the effect of XFAILing the entire 'testname4' test
+ # in the final top-level tests.sum summary.
+ xfail_output(<glibc-tunable-string3>): <expected-output-string-3>
+
+ # When multiple outputs (with specific tunable strings) are specified,
+ # these take priority over any active 'tunable_option' settings.
+
+ # When a test is meant to be placed under 'xtests' (not run under
+ # "make check", but only when "make xtests" is used), the testcase name can be
+ # declared using 'xtest(<test-name>)':
+ ...
+ xtest(test-too-big1): <test-description>
+ output: <expected-output-string>
+ ...
+
+ # Do note that under current elf/Makefile organization, for such a xtest case,
+ # while the test execution is only run under 'make xtests', the associated
+ # DSOs are always built even under 'make check'.
+
+On the description language used, an example description line string:
+
+ a->b!->[cdef];c=>g=>h;{+c;%c;-c}->a
+
+Each identifier represents a shared object module, currently sequences of
+letters/digits are allowed, case-sensitive.
+
+All such shared objects have a constructor/destructor generated for them
+that emits its name followed by a '>' for constructors, and '<' followed by
+its name for destructors, e.g. if the name is 'obj1', then "obj1>" and "<obj1"
+is printed by its constructor/destructor respectively.
+
+The -> operator specifies a link time dependency, these can be chained for
+convenience (e.g. a->b->c->d).
+
+The => operator creates a call-reference, e.g. for a=>b, an fn_a() function
+is created inside module 'a', which calls fn_b() in module 'b'.
+These module functions emit 'name()' output in nested form,
+e.g. a=>b emits 'a(b())'
+
+For single character object names, square brackets [] in the description
+allows specifying multiple objects; e.g. a->[bcd]->e is equivalent to
+ a->b->e;a->c->e;a->d->e
+
+The () parenthesis construct with space separated names is also allowed for
+specifying objects. For names with integer suffixes a range can also be used,
+e.g. (foo1 bar2-5), specifies DSOs foo1, bar2, bar2, bar3, bar4, bar5.
+
+A {} construct specifies the main test program, and its link dependencies
+are also specified using ->. Inside {}, a few ;-separated constructs are
+allowed:
+ +a Loads module a using dlopen(RTLD_LAZY|RTLD_GLOBAL)
+ ^a Loads module a using dlopen(RTLD_LAZY)
+ %a Use dlsym() to load and call fn_a()
+ @a Calls fn_a() directly.
+ -a Unloads module a using dlclose()
+
+The generated main program outputs '{' '}' with all output from above
+constructs in between. The other output before/after {} are the ordered
+constructor/destructor output.
+
+If no {} construct is present, a default empty main program is linked
+against all objects which have no dependency linked to it. e.g. for
+'[ab]->c;d->e', the default main program is equivalent to '{}->[abd]'
+
+Sometimes for very complex or large testcases, besides specifying a
+few explicit dependencies from main{}, the above default dependency
+behavior is still useful to automatically have, but is turned off
+upon specifying a single explicit {}->dso_name.
+In this case, add {}->* to explicitly add this generation behavior:
+
+ # Main program links to 'foo', and all other objects which have no
+ # dependency linked to it.
+ {}->foo,{}->*
+
+Note that '*' works not only on main{}, but can be used as the
+dependency target of any object. Note that it only works as a target,
+not a dependency source.
+
+The '!' operator after object names turns on permutation of its
+dependencies, e.g. while a->[bcd] only generates one set of objects,
+with 'a.so' built with a link line of "b.so c.so d.so", for a!->[bcd]
+permutations of a's dependencies creates multiple testcases with
+different link line orders: "b.so c.so d.so", "c.so b.so d.so",
+"b.so d.so c.so", etc. Note that for a <test-name> specified on
+the script command-line, multiple <test-name_1>, <test-name_2>, etc.
+tests will be generated (e.g. for a!->[bc]!->[de], eight tests with
+different link orders for a, b, and c will be generated)
+
+It is possible to specify the ELF soname field for an object or the
+main program:
+ # DSO 'a' will be linked with the appropriate -Wl,-soname=x setting
+ a->b->c;soname(a)=x
+ # The the main program can also have a soname specified
+ soname({})=y
+
+This can be used to test how ld.so behaves when objects and/or the
+main program have such a field set.
+
+
+Strings Output by Generated Testcase Programs
+
+The text output produced by a generated testcase consists of three main
+parts:
+ 1. The constructors' output
+ 2. Output from the main program
+ 3. Destructors' output
+
+To see by example, a simple test description "a->b->c" generates a testcase
+that when run, outputs: "c>b>a>{}<a<b<c"
+
+Each generated DSO constructor prints its name followed by a '>' character,
+and the "c>b>a" part above is the full constructor output by all DSOs, the
+order indicating that DSO 'c', which does not depend on any other DSO, has
+its constructor run first, followed by 'b' and then 'a'.
+
+Destructor output for each DSO is a '<' character followed by its name,
+reflecting its reverse nature of constructors. In the above example, the
+destructor output part is "<a<b<c".
+
+The middle "{}" part is the main program. In this simple example, nothing
+was specified for the main program, so by default it is implicitly linked
+to the DSO 'a' (with no other DSOs depending on it) and only prints the
+brackets {} with no actions inside.
+
+To see an example with actions inside the main program, lets see an example
+description: c->g=>h;{+c;%c;-c}->a->h
+
+This produces a testcase, that when executed outputs:
+ h>a>{+c[g>c>];%c();-c[<c<g];}<a<h
+
+The constructor and destructor parts display the a->h dependency as expected.
+Inside the main program, the "+c" action triggers a dlopen() of DSO 'c',
+causing another chain of constructors "g>c>" to be triggered. Here it is
+displayed inside [] brackets for each dlopen call. The same is done for "-c",
+a dlclose() of 'c'.
+
+The "%c" output is due to calling to fn_c() inside DSO 'c', this comprises
+of two parts: the '%' character is printed by the caller, here it is the main
+program. The 'c' character is printed from inside fn_c(). The '%' character
+indicates that this is called by a dlsym() of "fn_c". A '@' character would
+mean a direct call (with a symbol reference). These can all be controlled
+by the main test program constructs documented earlier.
+
+The output strings described here is the exact same form placed in
+test description files' "output: <expected output>" line.
+"""
+
+import sys
+import re
+import os
+import subprocess
+import argparse
+from collections import OrderedDict
+import itertools
+
+# BUILD_GCC is only used under the --build option,
+# which builds the generated testcase, including DSOs using BUILD_GCC.
+# Mainly for testing purposes, especially debugging of this script,
+# and can be changed here to another toolchain path if needed.
+build_gcc = "gcc"
+
+def get_parser():
+ parser = argparse.ArgumentParser("")
+ parser.add_argument("description",
+ help="Description string of DSO dependency test to be "
+ "generated (see script source for documentation of "
+ "description language), either specified here as "
+ "command line argument, or by input file using "
+ "-f/--description-file option",
+ nargs="?", default="")
+ parser.add_argument("test_name",
+ help="Identifier for testcase being generated",
+ nargs="?", default="")
+ parser.add_argument("--objpfx",
+ help="Path to place generated files, defaults to "
+ "current directory if none specified",
+ nargs="?", default="./")
+ parser.add_argument("-m", "--output-makefile",
+ help="File to write Makefile fragment to, defaults to "
+ "stdout when option not present",
+ nargs="?", default="")
+ parser.add_argument("-f", "--description-file",
+ help="Input file containing testcase descriptions",
+ nargs="?", default="")
+ parser.add_argument("--build", help="After C testcase generated, build it "
+ "using gcc (for manual testing purposes)",
+ action="store_true")
+ parser.add_argument("--debug-output",
+ help="Prints some internal data "
+ "structures; used for debugging of this script",
+ action="store_true")
+ return parser
+
+# Main script starts here.
+cmdlineargs = get_parser().parse_args()
+test_name = cmdlineargs.test_name
+description = cmdlineargs.description
+objpfx = cmdlineargs.objpfx
+description_file = cmdlineargs.description_file
+output_makefile = cmdlineargs.output_makefile
+makefile = ""
+default_tunable_options = []
+
+current_input_lineno = 0
+def error(msg):
+ global current_input_lineno
+ print("Error: %s%s" % ((("Line %d, " % current_input_lineno)
+ if current_input_lineno != 0 else ""),
+ msg))
+ exit(1)
+
+if(test_name or description) and description_file:
+ error("both command-line testcase and input file specified")
+if test_name and not description:
+ error("command-line testcase name without description string")
+
+# Main class type describing a testcase.
+class TestDescr:
+ def __init__(self):
+ self.objs = [] # list of all DSO objects
+ self.deps = OrderedDict() # map of DSO object -> list of dependencies
+
+ # map of DSO object -> list of call refs
+ self.callrefs = OrderedDict()
+
+ # map of DSO object -> list of permutations of dependencies
+ self.dep_permutations = OrderedDict()
+
+ # map of DSO object -> SONAME of object (if one is specified)
+ self.soname_map = OrderedDict()
+
+ # list of main program operations
+ self.main_program = []
+ # set if default dependencies added to main
+ self.main_program_default_deps = True
+
+ self.test_name = "" # name of testcase
+ self.expected_outputs = OrderedDict() # expected outputs of testcase
+ self.xfail = False # set if this is a XFAIL testcase
+ self.xtest = False # set if this is put under 'xtests'
+
+ # Add 'object -> [object, object, ...]' relations to CURR_MAP
+ def __add_deps_internal(self, src_objs, dst_objs, curr_map):
+ for src in src_objs:
+ for dst in dst_objs:
+ if not src in curr_map:
+ curr_map[src] = []
+ if not dst in curr_map[src]:
+ curr_map[src].append(dst)
+ def add_deps(self, src_objs, dst_objs):
+ self.__add_deps_internal(src_objs, dst_objs, self.deps)
+ def add_callrefs(self, src_objs, dst_objs):
+ self.__add_deps_internal(src_objs, dst_objs, self.callrefs)
+
+# Process commands inside the {} construct.
+# Note that throughout this script, the main program object is represented
+# by the '#' string.
+def process_main_program(test_descr, mainprog_str):
+ if mainprog_str:
+ test_descr.main_program = mainprog_str.split(';')
+ for s in test_descr.main_program:
+ m = re.match(r"^([+\-%^@])([0-9a-zA-Z]+)$", s)
+ if not m:
+ error("'%s' is not recognized main program operation" % (s))
+ opr = m.group(1)
+ obj = m.group(2)
+ if not obj in test_descr.objs:
+ test_descr.objs.append(obj)
+ if opr == '%' or opr == '@':
+ test_descr.add_callrefs(['#'], [obj])
+ # We have a main program specified, turn this off
+ test_descr.main_program_default_deps = False
+
+# For(a1 a2 b1-12) object set descriptions, expand into an object list
+def expand_object_set_string(descr_str):
+ obj_list = []
+ descr_list = descr_str.split()
+ for descr in descr_list:
+ m = re.match(r"^([a-zA-Z][0-9a-zA-Z]*)(-[0-9]+)?$", descr)
+ if not m:
+ error("'%s' is not a valid object set description" % (descr))
+ obj = m.group(1)
+ idx_end = m.group(2)
+ if not idx_end:
+ if not obj in obj_list:
+ obj_list.append(obj)
+ else:
+ idx_end = int(idx_end[1:])
+ m = re.match(r"^([0-9a-zA-Z][a-zA-Z]*)([0-9]+)$", obj)
+ if not m:
+ error("object description '%s' is malformed" % (obj))
+ obj_name = m.group(1)
+ idx_start = int(m.group (2))
+ if idx_start > idx_end:
+ error("index range %s-%s invalid" % (idx_start, idx_end))
+ for i in range(idx_start, idx_end + 1):
+ o = obj_name + str(i)
+ if not o in obj_list:
+ obj_list.append(o)
+ return obj_list
+
+# Lexer for tokens
+tokenspec = [ ("SONAME", r"soname\(([0-9a-zA-Z{}]+)\)=([0-9a-zA-Z]+)"),
+ ("OBJ", r"([0-9a-zA-Z]+)"),
+ ("DEP", r"->"),
+ ("CALLREF", r"=>"),
+ ("OBJSET", r"\[([0-9a-zA-Z]+)\]"),
+ ("OBJSET2", r"\(([0-9a-zA-Z \-]+)\)"),
+ ("OBJSET3", r"\*"),
+ ("PROG", r"{([0-9a-zA-Z;+^\-%@]*)}"),
+ ("PERMUTE", r"!"),
+ ("SEMICOL", r";"),
+ ("ERROR", r".") ]
+tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tokenspec)
+
+# Main line parser of description language
+def parse_description_string(t, descr_str):
+ # State used when parsing dependencies
+ curr_objs = []
+ in_dep = False
+ in_callref = False
+ def clear_dep_state():
+ nonlocal in_dep, in_callref
+ in_dep = in_callref = False
+
+ for m in re.finditer(tok_re, descr_str):
+ kind = m.lastgroup
+ value = m.group()
+ if kind == "SONAME":
+ s = re.match(r"soname\(([0-9a-zA-Z{}]+)\)=([0-9a-zA-Z]+)", value)
+ obj = s.group(1)
+ val = s.group(2)
+ if obj == "{}":
+ if '#' in t.soname_map:
+ error("soname of main program already set")
+ # Adjust to internal name
+ obj = '#'
+ else:
+ if re.match(r"[{}]", obj):
+ error("invalid object name '%s'" % (obj))
+ if not obj in t.objs:
+ error("'%s' is not name of already defined object" % (obj))
+ if obj in t.soname_map:
+ error("'%s' already has soname of '%s' set"
+ % (obj, t.soname_map[obj]))
+ t.soname_map[obj] = val
+
+ elif kind == "OBJ":
+ if in_dep:
+ t.add_deps(curr_objs, [value])
+ elif in_callref:
+ t.add_callrefs(curr_objs, [value])
+ clear_dep_state()
+ curr_objs = [value]
+ if not value in t.objs:
+ t.objs.append(value)
+
+ elif kind == "OBJSET":
+ objset = value[1:len(value)-1]
+ if in_dep:
+ t.add_deps(curr_objs, list (objset))
+ elif in_callref:
+ t.add_callrefs(curr_objs, list (objset))
+ clear_dep_state()
+ curr_objs = list(objset)
+ for o in list(objset):
+ if not o in t.objs:
+ t.objs.append(o)
+
+ elif kind == "OBJSET2":
+ descr_str = value[1:len(value)-1]
+ descr_str.strip()
+ objs = expand_object_set_string(descr_str)
+ if not objs:
+ error("empty object set '%s'" % (value))
+ if in_dep:
+ t.add_deps(curr_objs, objs)
+ elif in_callref:
+ t.add_callrefs(curr_objs, objs)
+ clear_dep_state()
+ curr_objs = objs
+ for o in objs:
+ if not o in t.objs:
+ t.objs.append(o)
+
+ elif kind == "OBJSET3":
+ if in_dep:
+ t.add_deps(curr_objs, ['*'])
+ elif in_callref:
+ t.add_callrefs(curr_objs, ['*'])
+ else:
+ error("non-dependence target set '*' can only be used "
+ "as target of ->/=> operations")
+ clear_dep_state()
+ curr_objs = ['*']
+
+ elif kind == "PERMUTE":
+ if in_dep or in_callref:
+ error("syntax error, permute operation invalid here")
+ if not curr_objs:
+ error("syntax error, no objects to permute here")
+
+ for obj in curr_objs:
+ if not obj in t.dep_permutations:
+ # Signal this object has permuted dependencies
+ t.dep_permutations[obj] = []
+
+ elif kind == "PROG":
+ if t.main_program:
+ error("cannot have more than one main program")
+ if in_dep:
+ error("objects cannot have dependency on main program")
+ if in_callref:
+ # TODO: A DSO can resolve to a symbol in the main binary,
+ # which we syntactically allow here, but haven't yet
+ # implemented.
+ t.add_callrefs(curr_objs, ["#"])
+ process_main_program(t, value[1:len(value)-1])
+ clear_dep_state()
+ curr_objs = ["#"]
+
+ elif kind == "DEP":
+ if in_dep or in_callref:
+ error("syntax error, multiple contiguous ->,=> operations")
+ if '*' in curr_objs:
+ error("non-dependence target set '*' can only be used "
+ "as target of ->/=> operations")
+ in_dep = True
+
+ elif kind == "CALLREF":
+ if in_dep or in_callref:
+ error("syntax error, multiple contiguous ->,=> operations")
+ if '*' in curr_objs:
+ error("non-dependence target set '*' can only be used "
+ "as target of ->/=> operations")
+ in_callref = True
+
+ elif kind == "SEMICOL":
+ curr_objs = []
+ clear_dep_state()
+
+ else:
+ error("unknown token '%s'" % (value))
+ return t
+
+# Main routine to process each testcase description
+def process_testcase(t):
+ global objpfx
+ assert t.test_name
+
+ base_test_name = t.test_name
+ test_subdir = base_test_name + "-dir"
+ testpfx = objpfx + test_subdir + "/"
+
+ if not os.path.exists(testpfx):
+ os.mkdir(testpfx)
+
+ def find_objs_not_depended_on(t):
+ objs_not_depended_on = []
+ for obj in t.objs:
+ skip = False
+ for r in t.deps.items():
+ if obj in r[1]:
+ skip = True
+ break
+ if not skip:
+ objs_not_depended_on.append(obj)
+ return objs_not_depended_on
+
+ non_dep_tgt_objs = find_objs_not_depended_on(t)
+ for obj in t.objs:
+ if obj in t.deps:
+ deps = t.deps[obj]
+ if '*' in deps:
+ t.deps[obj].remove('*')
+ t.add_deps([obj], non_dep_tgt_objs)
+ if obj in t.callrefs:
+ deps = t.callrefs[obj]
+ if '*' in deps:
+ t.deps[obj].remove('*')
+ t.add_callrefs([obj], non_dep_tgt_objs)
+ if "#" in t.deps:
+ deps = t.deps["#"]
+ if '*' in deps:
+ t.deps["#"].remove('*')
+ t.add_deps(["#"], non_dep_tgt_objs)
+
+ # If no main program was specified in dependency description, make a
+ # default main program with deps pointing to all DSOs which are not
+ # depended by another DSO.
+ if t.main_program_default_deps:
+ main_deps = non_dep_tgt_objs
+