diff options
Diffstat (limited to 'scripts/dso-ordering-test.py')
| -rw-r--r-- | scripts/dso-ordering-test.py | 1144 |
1 files changed, 1144 insertions, 0 deletions
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py new file mode 100644 index 0000000000..944ee74052 --- /dev/null +++ b/scripts/dso-ordering-test.py @@ -0,0 +1,1144 @@ +#!/usr/bin/python3 +# Generate testcase files and Makefile fragments for DSO sorting test +# Copyright (C) 2021 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +"""Generate testcase files and Makefile fragments for DSO sorting test + +This script takes a small description string language, and generates +testcases for displaying the ELF dynamic linker's dependency sorting +behavior, allowing verification. + +Testcase descriptions are semicolon-separated description strings, and +this tool generates a testcase from the description, including main program, +associated modules, and Makefile fragments for including into elf/Makefile. + +This allows automation of what otherwise would be very laborous manual +construction of complex dependency cases, however it must be noted that this +is only a tool to speed up testcase construction, and thus the generation +features are largely mechanical in nature; inconsistencies or errors may occur +if the input description was itself erroneous or have unforeseen interactions. + +The format of the input test description files are: + + # Each test description has a name, lines of description, + # and an expected output specification. Comments use '#'. + testname1: <test-description-line> + output: <expected-output-string> + + # Tests can be marked to be XFAIL by using 'xfail_output' instead + testname2: <test-description-line> + xfail_output: <expected-output-string> + + # A default set of GLIBC_TUNABLES tunables can be specified, for which + # all following tests will run multiple times, once for each of the + # GLIBC_TUNABLES=... strings set by the 'tunable_option' command. + tunable_option: <glibc-tunable-string1> + tunable_option: <glibc-tunable-string2> + + # Test descriptions can use multiple lines, which will all be merged + # together, so order is not important. + testname3: <test-description-line> + <test-description-line> + <test-description-line> + ... + output: <expected-output-string> + + # 'testname3' will be run and compared two times, for both + # GLIBC_TUNABLES=<glibc-tunable-string1> and + # GLIBC_TUNABLES=<glibc-tunable-string2>. This can be cleared and reset by the + # 'clear_tunables' command: + clear_tunables + + # Multiple expected outputs can also be specified, with an associated + # tunable option in (), which multiple tests will be run with each + # GLIBC_TUNABLES=... option tried. + testname4: + <test-description-line> + ... + output(<glibc-tunable-string1>): <expected-output-string-1> + output(<glibc-tunable-string2>): <expected-output-string-2> + # Individual tunable output cases can be XFAILed, though note that + # this will have the effect of XFAILing the entire 'testname4' test + # in the final top-level tests.sum summary. + xfail_output(<glibc-tunable-string3>): <expected-output-string-3> + + # When multiple outputs (with specific tunable strings) are specified, + # these take priority over any active 'tunable_option' settings. + + # When a test is meant to be placed under 'xtests' (not run under + # "make check", but only when "make xtests" is used), the testcase name can be + # declared using 'xtest(<test-name>)': + ... + xtest(test-too-big1): <test-description> + output: <expected-output-string> + ... + + # Do note that under current elf/Makefile organization, for such a xtest case, + # while the test execution is only run under 'make xtests', the associated + # DSOs are always built even under 'make check'. + +On the description language used, an example description line string: + + a->b!->[cdef];c=>g=>h;{+c;%c;-c}->a + +Each identifier represents a shared object module, currently sequences of +letters/digits are allowed, case-sensitive. + +All such shared objects have a constructor/destructor generated for them +that emits its name followed by a '>' for constructors, and '<' followed by +its name for destructors, e.g. if the name is 'obj1', then "obj1>" and "<obj1" +is printed by its constructor/destructor respectively. + +The -> operator specifies a link time dependency, these can be chained for +convenience (e.g. a->b->c->d). + +The => operator creates a call-reference, e.g. for a=>b, an fn_a() function +is created inside module 'a', which calls fn_b() in module 'b'. +These module functions emit 'name()' output in nested form, +e.g. a=>b emits 'a(b())' + +For single character object names, square brackets [] in the description +allows specifying multiple objects; e.g. a->[bcd]->e is equivalent to + a->b->e;a->c->e;a->d->e + +The () parenthesis construct with space separated names is also allowed for +specifying objects. For names with integer suffixes a range can also be used, +e.g. (foo1 bar2-5), specifies DSOs foo1, bar2, bar2, bar3, bar4, bar5. + +A {} construct specifies the main test program, and its link dependencies +are also specified using ->. Inside {}, a few ;-separated constructs are +allowed: + +a Loads module a using dlopen(RTLD_LAZY|RTLD_GLOBAL) + ^a Loads module a using dlopen(RTLD_LAZY) + %a Use dlsym() to load and call fn_a() + @a Calls fn_a() directly. + -a Unloads module a using dlclose() + +The generated main program outputs '{' '}' with all output from above +constructs in between. The other output before/after {} are the ordered +constructor/destructor output. + +If no {} construct is present, a default empty main program is linked +against all objects which have no dependency linked to it. e.g. for +'[ab]->c;d->e', the default main program is equivalent to '{}->[abd]' + +Sometimes for very complex or large testcases, besides specifying a +few explicit dependencies from main{}, the above default dependency +behavior is still useful to automatically have, but is turned off +upon specifying a single explicit {}->dso_name. +In this case, add {}->* to explicitly add this generation behavior: + + # Main program links to 'foo', and all other objects which have no + # dependency linked to it. + {}->foo,{}->* + +Note that '*' works not only on main{}, but can be used as the +dependency target of any object. Note that it only works as a target, +not a dependency source. + +The '!' operator after object names turns on permutation of its +dependencies, e.g. while a->[bcd] only generates one set of objects, +with 'a.so' built with a link line of "b.so c.so d.so", for a!->[bcd] +permutations of a's dependencies creates multiple testcases with +different link line orders: "b.so c.so d.so", "c.so b.so d.so", +"b.so d.so c.so", etc. Note that for a <test-name> specified on +the script command-line, multiple <test-name_1>, <test-name_2>, etc. +tests will be generated (e.g. for a!->[bc]!->[de], eight tests with +different link orders for a, b, and c will be generated) + +It is possible to specify the ELF soname field for an object or the +main program: + # DSO 'a' will be linked with the appropriate -Wl,-soname=x setting + a->b->c;soname(a)=x + # The the main program can also have a soname specified + soname({})=y + +This can be used to test how ld.so behaves when objects and/or the +main program have such a field set. + + +Strings Output by Generated Testcase Programs + +The text output produced by a generated testcase consists of three main +parts: + 1. The constructors' output + 2. Output from the main program + 3. Destructors' output + +To see by example, a simple test description "a->b->c" generates a testcase +that when run, outputs: "c>b>a>{}<a<b<c" + +Each generated DSO constructor prints its name followed by a '>' character, +and the "c>b>a" part above is the full constructor output by all DSOs, the +order indicating that DSO 'c', which does not depend on any other DSO, has +its constructor run first, followed by 'b' and then 'a'. + +Destructor output for each DSO is a '<' character followed by its name, +reflecting its reverse nature of constructors. In the above example, the +destructor output part is "<a<b<c". + +The middle "{}" part is the main program. In this simple example, nothing +was specified for the main program, so by default it is implicitly linked +to the DSO 'a' (with no other DSOs depending on it) and only prints the +brackets {} with no actions inside. + +To see an example with actions inside the main program, lets see an example +description: c->g=>h;{+c;%c;-c}->a->h + +This produces a testcase, that when executed outputs: + h>a>{+c[g>c>];%c();-c[<c<g];}<a<h + +The constructor and destructor parts display the a->h dependency as expected. +Inside the main program, the "+c" action triggers a dlopen() of DSO 'c', +causing another chain of constructors "g>c>" to be triggered. Here it is +displayed inside [] brackets for each dlopen call. The same is done for "-c", +a dlclose() of 'c'. + +The "%c" output is due to calling to fn_c() inside DSO 'c', this comprises +of two parts: the '%' character is printed by the caller, here it is the main +program. The 'c' character is printed from inside fn_c(). The '%' character +indicates that this is called by a dlsym() of "fn_c". A '@' character would +mean a direct call (with a symbol reference). These can all be controlled +by the main test program constructs documented earlier. + +The output strings described here is the exact same form placed in +test description files' "output: <expected output>" line. +""" + +import sys +import re +import os +import subprocess +import argparse +from collections import OrderedDict +import itertools + +# BUILD_GCC is only used under the --build option, +# which builds the generated testcase, including DSOs using BUILD_GCC. +# Mainly for testing purposes, especially debugging of this script, +# and can be changed here to another toolchain path if needed. +build_gcc = "gcc" + +def get_parser(): + parser = argparse.ArgumentParser("") + parser.add_argument("description", + help="Description string of DSO dependency test to be " + "generated (see script source for documentation of " + "description language), either specified here as " + "command line argument, or by input file using " + "-f/--description-file option", + nargs="?", default="") + parser.add_argument("test_name", + help="Identifier for testcase being generated", + nargs="?", default="") + parser.add_argument("--objpfx", + help="Path to place generated files, defaults to " + "current directory if none specified", + nargs="?", default="./") + parser.add_argument("-m", "--output-makefile", + help="File to write Makefile fragment to, defaults to " + "stdout when option not present", + nargs="?", default="") + parser.add_argument("-f", "--description-file", + help="Input file containing testcase descriptions", + nargs="?", default="") + parser.add_argument("--build", help="After C testcase generated, build it " + "using gcc (for manual testing purposes)", + action="store_true") + parser.add_argument("--debug-output", + help="Prints some internal data " + "structures; used for debugging of this script", + action="store_true") + return parser + +# Main script starts here. +cmdlineargs = get_parser().parse_args() +test_name = cmdlineargs.test_name +description = cmdlineargs.description +objpfx = cmdlineargs.objpfx +description_file = cmdlineargs.description_file +output_makefile = cmdlineargs.output_makefile +makefile = "" +default_tunable_options = [] + +current_input_lineno = 0 +def error(msg): + global current_input_lineno + print("Error: %s%s" % ((("Line %d, " % current_input_lineno) + if current_input_lineno != 0 else ""), + msg)) + exit(1) + +if(test_name or description) and description_file: + error("both command-line testcase and input file specified") +if test_name and not description: + error("command-line testcase name without description string") + +# Main class type describing a testcase. +class TestDescr: + def __init__(self): + self.objs = [] # list of all DSO objects + self.deps = OrderedDict() # map of DSO object -> list of dependencies + + # map of DSO object -> list of call refs + self.callrefs = OrderedDict() + + # map of DSO object -> list of permutations of dependencies + self.dep_permutations = OrderedDict() + + # map of DSO object -> SONAME of object (if one is specified) + self.soname_map = OrderedDict() + + # list of main program operations + self.main_program = [] + # set if default dependencies added to main + self.main_program_default_deps = True + + self.test_name = "" # name of testcase + self.expected_outputs = OrderedDict() # expected outputs of testcase + self.xfail = False # set if this is a XFAIL testcase + self.xtest = False # set if this is put under 'xtests' + + # Add 'object -> [object, object, ...]' relations to CURR_MAP + def __add_deps_internal(self, src_objs, dst_objs, curr_map): + for src in src_objs: + for dst in dst_objs: + if not src in curr_map: + curr_map[src] = [] + if not dst in curr_map[src]: + curr_map[src].append(dst) + def add_deps(self, src_objs, dst_objs): + self.__add_deps_internal(src_objs, dst_objs, self.deps) + def add_callrefs(self, src_objs, dst_objs): + self.__add_deps_internal(src_objs, dst_objs, self.callrefs) + +# Process commands inside the {} construct. +# Note that throughout this script, the main program object is represented +# by the '#' string. +def process_main_program(test_descr, mainprog_str): + if mainprog_str: + test_descr.main_program = mainprog_str.split(';') + for s in test_descr.main_program: + m = re.match(r"^([+\-%^@])([0-9a-zA-Z]+)$", s) + if not m: + error("'%s' is not recognized main program operation" % (s)) + opr = m.group(1) + obj = m.group(2) + if not obj in test_descr.objs: + test_descr.objs.append(obj) + if opr == '%' or opr == '@': + test_descr.add_callrefs(['#'], [obj]) + # We have a main program specified, turn this off + test_descr.main_program_default_deps = False + +# For(a1 a2 b1-12) object set descriptions, expand into an object list +def expand_object_set_string(descr_str): + obj_list = [] + descr_list = descr_str.split() + for descr in descr_list: + m = re.match(r"^([a-zA-Z][0-9a-zA-Z]*)(-[0-9]+)?$", descr) + if not m: + error("'%s' is not a valid object set description" % (descr)) + obj = m.group(1) + idx_end = m.group(2) + if not idx_end: + if not obj in obj_list: + obj_list.append(obj) + else: + idx_end = int(idx_end[1:]) + m = re.match(r"^([0-9a-zA-Z][a-zA-Z]*)([0-9]+)$", obj) + if not m: + error("object description '%s' is malformed" % (obj)) + obj_name = m.group(1) + idx_start = int(m.group (2)) + if idx_start > idx_end: + error("index range %s-%s invalid" % (idx_start, idx_end)) + for i in range(idx_start, idx_end + 1): + o = obj_name + str(i) + if not o in obj_list: + obj_list.append(o) + return obj_list + +# Lexer for tokens +tokenspec = [ ("SONAME", r"soname\(([0-9a-zA-Z{}]+)\)=([0-9a-zA-Z]+)"), + ("OBJ", r"([0-9a-zA-Z]+)"), + ("DEP", r"->"), + ("CALLREF", r"=>"), + ("OBJSET", r"\[([0-9a-zA-Z]+)\]"), + ("OBJSET2", r"\(([0-9a-zA-Z \-]+)\)"), + ("OBJSET3", r"\*"), + ("PROG", r"{([0-9a-zA-Z;+^\-%@]*)}"), + ("PERMUTE", r"!"), + ("SEMICOL", r";"), + ("ERROR", r".") ] +tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tokenspec) + +# Main line parser of description language +def parse_description_string(t, descr_str): + # State used when parsing dependencies + curr_objs = [] + in_dep = False + in_callref = False + def clear_dep_state(): + nonlocal in_dep, in_callref + in_dep = in_callref = False + + for m in re.finditer(tok_re, descr_str): + kind = m.lastgroup + value = m.group() + if kind == "SONAME": + s = re.match(r"soname\(([0-9a-zA-Z{}]+)\)=([0-9a-zA-Z]+)", value) + obj = s.group(1) + val = s.group(2) + if obj == "{}": + if '#' in t.soname_map: + error("soname of main program already set") + # Adjust to internal name + obj = '#' + else: + if re.match(r"[{}]", obj): + error("invalid object name '%s'" % (obj)) + if not obj in t.objs: + error("'%s' is not name of already defined object" % (obj)) + if obj in t.soname_map: + error("'%s' already has soname of '%s' set" + % (obj, t.soname_map[obj])) + t.soname_map[obj] = val + + elif kind == "OBJ": + if in_dep: + t.add_deps(curr_objs, [value]) + elif in_callref: + t.add_callrefs(curr_objs, [value]) + clear_dep_state() + curr_objs = [value] + if not value in t.objs: + t.objs.append(value) + + elif kind == "OBJSET": + objset = value[1:len(value)-1] + if in_dep: + t.add_deps(curr_objs, list (objset)) + elif in_callref: + t.add_callrefs(curr_objs, list (objset)) + clear_dep_state() + curr_objs = list(objset) + for o in list(objset): + if not o in t.objs: + t.objs.append(o) + + elif kind == "OBJSET2": + descr_str = value[1:len(value)-1] + descr_str.strip() + objs = expand_object_set_string(descr_str) + if not objs: + error("empty object set '%s'" % (value)) + if in_dep: + t.add_deps(curr_objs, objs) + elif in_callref: + t.add_callrefs(curr_objs, objs) + clear_dep_state() + curr_objs = objs + for o in objs: + if not o in t.objs: + t.objs.append(o) + + elif kind == "OBJSET3": + if in_dep: + t.add_deps(curr_objs, ['*']) + elif in_callref: + t.add_callrefs(curr_objs, ['*']) + else: + error("non-dependence target set '*' can only be used " + "as target of ->/=> operations") + clear_dep_state() + curr_objs = ['*'] + + elif kind == "PERMUTE": + if in_dep or in_callref: + error("syntax error, permute operation invalid here") + if not curr_objs: + error("syntax error, no objects to permute here") + + for obj in curr_objs: + if not obj in t.dep_permutations: + # Signal this object has permuted dependencies + t.dep_permutations[obj] = [] + + elif kind == "PROG": + if t.main_program: + error("cannot have more than one main program") + if in_dep: + error("objects cannot have dependency on main program") + if in_callref: + # TODO: A DSO can resolve to a symbol in the main binary, + # which we syntactically allow here, but haven't yet + # implemented. + t.add_callrefs(curr_objs, ["#"]) + process_main_program(t, value[1:len(value)-1]) + clear_dep_state() + curr_objs = ["#"] + + elif kind == "DEP": + if in_dep or in_callref: + error("syntax error, multiple contiguous ->,=> operations") + if '*' in curr_objs: + error("non-dependence target set '*' can only be used " + "as target of ->/=> operations") + in_dep = True + + elif kind == "CALLREF": + if in_dep or in_callref: + error("syntax error, multiple contiguous ->,=> operations") + if '*' in curr_objs: + error("non-dependence target set '*' can only be used " + "as target of ->/=> operations") + in_callref = True + + elif kind == "SEMICOL": + curr_objs = [] + clear_dep_state() + + else: + error("unknown token '%s'" % (value)) + return t + +# Main routine to process each testcase description +def process_testcase(t): + global objpfx + assert t.test_name + + base_test_name = t.test_name + test_subdir = base_test_name + "-dir" + testpfx = objpfx + test_subdir + "/" + + if not os.path.exists(testpfx): + os.mkdir(testpfx) + + def find_objs_not_depended_on(t): + objs_not_depended_on = [] + for obj in t.objs: + skip = False + for r in t.deps.items(): + if obj in r[1]: + skip = True + break + if not skip: + objs_not_depended_on.append(obj) + return objs_not_depended_on + + non_dep_tgt_objs = find_objs_not_depended_on(t) + for obj in t.objs: + if obj in t.deps: + deps = t.deps[obj] + if '*' in deps: + t.deps[obj].remove('*') + t.add_deps([obj], non_dep_tgt_objs) + if obj in t.callrefs: + deps = t.callrefs[obj] + if '*' in deps: + t.deps[obj].remove('*') + t.add_callrefs([obj], non_dep_tgt_objs) + if "#" in t.deps: + deps = t.deps["#"] + if '*' in deps: + t.deps["#"].remove('*') + t.add_deps(["#"], non_dep_tgt_objs) + + # If no main program was specified in dependency description, make a + # default main program with deps pointing to all DSOs which are not + # depended by another DSO. + if t.main_program_default_deps: + main_deps = non_dep_tgt_objs + if not main_deps: + error("no objects for default main program to point " + "dependency to(all objects strongly connected?)") + t.add_deps(["#"], main_deps) + + # Some debug output + if cmdlineargs.debug_output: + print("Testcase: %s" % (t.test_name)) + print("All objects: %s" % (t.objs)) + print("--- Static link dependencies ---") + for r in t.deps.items(): + print("%s -> %s" % (r[0], r[1])) + print("--- Objects whose dependencies are to be permuted ---") + for r in t.dep_permutations.items(): + print("%s" % (r[0])) + print("--- Call reference dependencies ---") + for r in t.callrefs.items(): + print("%s => %s" % (r[0], r[1])) + print("--- main program ---") + print(t.main_program) + + # Main testcase generation routine, does Makefile fragment generation, + # testcase source generation, and if --build specified builds testcase. + def generate_testcase(test_descr, test_suffix): + + test_name = test_descr.test_name + test_suffix + + # Print out needed Makefile fragments for use in glibc/elf/Makefile. + module_names = "" + for o in test_descr.objs: + module_names += " " + test_subdir + "/" + test_name + "-" + o + makefile.write("modules-names +=%s\n" % (module_names)) + + # Depth-first traversal, executing FN(OBJ) in post-order + def dfs(t, fn): + def dfs_rec(obj, fn, obj_visited): + if obj in obj_visited: + return + obj_visited[obj] = True + if obj in t.deps: + for dep in t.deps[obj]: + dfs_rec(dep, fn, obj_visited) + fn(obj) + + obj_visited = {} + for obj in t.objs: + dfs_rec(obj, fn, obj_visited) + + # Generate link dependencies for all DSOs, done in a DFS fashion. + # Usually this doesn't need to be this complex, just listing the direct + # dependencies is enough. However to support creating circular + # dependency situations, traversing it by DFS and tracking processing + # status is the natural way to do it. + obj_processed = {} + fake_created = {} + def gen_link_deps(obj): + if obj in test_descr.deps: + |
