diff --git a/gold/script-c.h b/gold/script-c.h index 772c76c..473add0 100644 --- a/gold/script-c.h +++ b/gold/script-c.h @@ -432,10 +432,13 @@ script_parse_memory_attr(void*, const char*, size_t, int); extern void script_set_section_region(void*, const char*, size_t, int); extern void +script_record_include_start(void*); + +extern void script_include_directive(void *, const char*, size_t); /* Called by the bison parser for expressions. */ extern Expression_ptr diff --git a/gold/script.cc b/gold/script.cc index 5350afc..5314ef6 100644 --- a/gold/script.cc +++ b/gold/script.cc @@ -24,10 +24,11 @@ #include #include #include #include +#include #include #include #include "filenames.h" #include "elfcpp.h" @@ -1230,10 +1231,25 @@ Script_options::set_section_addresses(Symbol_table* symtab, Layout* layout) (*p)->set_if_absolute(symtab, layout, false, 0, NULL); return this->script_sections_.set_section_addresses(symtab, layout); } +// This POD holds data for INCLUDE. + +struct Include_info +{ + int start_lineno, start_charpos; + int end_lineno, end_charpos; + std::string filename; + std::string content; + + Include_info() + : start_lineno(-1), start_charpos(-1), end_lineno(-1), end_charpos(-1), + filename(""), content("") + {} +}; + // This class holds data passed through the parser to the lexer and to // the parser support functions. This avoids global variables. We // can't use global variables because we need not be called by a // singleton thread. @@ -1410,10 +1426,41 @@ class Parser_closure // Return a pointer to the incremental info. Script_info* script_info() { return this->script_info_; } + // Enqueu a "Include_info" object. + void + enqueue_include_info(const Include_info& ii) + { + gold_assert(include_info_queue_.empty() || + (include_info_queue_.back().start_lineno < ii.start_lineno + || (include_info_queue_.back().start_lineno == ii.start_lineno + && include_info_queue_.back().start_charpos < + ii.start_charpos))); + this->include_info_queue_.push(ii); + } + + // Dequeue a "Include_info" object. + Include_info + dequeue_include_info() + { + Include_info ii = this->include_info_queue_.front(); + this->include_info_queue_.pop(); + return ii; + } + + // Return a reference to the last "Include_info" object in the queue. + Include_info& + last_include_info() + { return this->include_info_queue_.back(); } + + // Return whether we have unresolved "INCLUDE"s in this parse. + bool + has_includes_to_resolve() const + { return !this->include_info_queue_.empty(); } + private: // The name of the file we are reading. const char* filename_; // The position dependent options. Position_dependent_options posdep_options_; @@ -1448,10 +1495,12 @@ class Parser_closure std::vector language_stack_; // New input files found to add to the link. Input_arguments* inputs_; // Pointer to incremental linking info. Script_info* script_info_; + // Queue for "INCLUDE". + std::queue include_info_queue_; }; // FILE was found as an argument on the command line. Try to read it // as a script. Return true if the file was handled. @@ -1540,28 +1589,119 @@ read_input_script(Workqueue* workqueue, Symbol_table* symtab, Layout* layout, *used_next_blocker = true; return true; } + +// Resolve all includes. +// Input - +// origin_content - the script content that contains "INCLUDE"s. +// Example >>> +// SECTIONS { +// INCLUDE "inc1" +// } +// Include_infos (wrapped in closure) - a queue of Include_info. +// Example >>> +// queue[0] = Include_info{content="aaa INCLUDE \"BBB\"" ccc", +// start_pos="1:3", end_pos="1:17"} +// Output - +// new_content - origin_content with each "INCLUDE" expanded. +// Example >>> +// SECTIONS { +// aaa INCLUDE "BBB" ccc +// } +// Note - after resolve, in case of nested INCLUDEs, we may introduce new +// "INCLUDE"s, then this function is called again. + +static void +script_resolve_includes(Parser_closure& closure, + const std::string& origin_content, + std::string& new_content) +{ + new_content = ""; + const size_t content_length = origin_content.length(); + int lineno = 1; + size_t last_cursor = 0; + size_t cursor = 0; + // Beginning Of Line being processed. + size_t bol = 0; + Include_info ii(closure.dequeue_include_info()); + while (cursor < content_length) + { + if (ii.start_lineno == lineno) + { + // Move cursor to the starting of "INCLUDE". + cursor = origin_content.find("INCLUDE", cursor); + gold_assert(cursor != std::string::npos); + new_content.append(std::string(origin_content, + last_cursor, + cursor - last_cursor)); + // Push include content. + new_content.append(ii.content); + + // Move cursor to the end of the include statement. + while (lineno < ii.end_lineno) + { + // Handle rare cases that a single INCLUDE "f" occupies multiple + // lines + cursor = origin_content.find("\n", cursor); + gold_assert(cursor != std::string::npos); + bol = ++cursor; + ++lineno; + } + cursor = bol + ii.end_charpos - 1; + last_cursor = cursor; + + // No more include to resolve. + if (!closure.has_includes_to_resolve()) + break; + + // Get next include to resolve. + ii = closure.dequeue_include_info(); + // Do not forward to next line, multiple "INCLUDE"s may appear on + // a single line. + } + else + { + cursor = origin_content.find("\n", cursor); + if (cursor == std::string::npos) + { + // File may end without "\n" + cursor = origin_content.length(); + break; + } + ++lineno; + bol = ++cursor; + } + } + + // Append everything left + new_content.append(std::string(origin_content, last_cursor)); +} // End of script_resolve_includes + + // Helper function for read_version_script(), read_commandline_script() and // script_include_directive(). Processes the given file in the mode indicated -// by first_token and lex_mode. +// by first_token and lex_mode. When "p_read_into_string" argument is not NULL, +// this function just reads file content into "p_read_into_string" and returns +// without parsing. static bool read_script_file(const char* filename, Command_line* cmdline, - Script_options* script_options, - int first_token, Lex::Mode lex_mode) + Script_options* script_options, + int first_token, Lex::Mode lex_mode, + std::string* p_read_into_string = NULL) { Dirsearch dirsearch; std::string name = filename; // If filename is a relative filename, search for it manually using "." + // cmdline->options()->library_path() -- not dirsearch. if (!IS_ABSOLUTE_PATH(filename)) { const General_options::Dir_list& search_path = - cmdline->options().library_path(); + cmdline->options().library_path(); name = Dirsearch::find_file_in_dir_list(name, search_path, "."); } // The file locking code wants to record a Task, but we haven't // started the workqueue yet. This is only for debugging purposes, @@ -1578,37 +1718,80 @@ read_script_file(const char* filename, Command_line* cmdline, Input_file input_file(&input_argument); int dummy = 0; if (!input_file.open(dirsearch, task, &dummy)) return false; - std::string input_string; - Lex::read_file(&input_file, &input_string); - - Lex lex(input_string.c_str(), input_string.length(), first_token); - lex.set_mode(lex_mode); - - Parser_closure closure(filename, - cmdline->position_dependent_options(), - first_token == Lex::DYNAMIC_LIST, - false, - input_file.is_in_sysroot(), - cmdline, - script_options, - &lex, - false, - NULL); - if (yyparse(&closure) != 0) + // Read file content into string, and return without parsing. + if (p_read_into_string != NULL) { + Lex::read_file(&input_file, p_read_into_string); input_file.file().unlock(task); - return false; + return true; } + const bool is_in_sysroot = input_file.is_in_sysroot(); + std::string content; + std::string new_content; + Lex::read_file(&input_file, &content); + // We no longer needs read the file content. input_file.file().unlock(task); - gold_assert(!closure.saw_inputs()); - - return true; + std::string* input_string = &content; + std::string* new_string = &new_content; + int include_level = 0; + + // Instead of dealing with intricacies of lex (that is - after lex consumes + // "INCLUDE xxx", we switch input stream to the include file on the fly, + // restore the state to just before INCLUDE, and un-consume any TOKENS that + // may possibly be read-ahead consumed by lex), we choose to iteratively do + // "parse->resolve_includes->parse->resolve_includes" cycle till no more + // INCLUDE is found (assuming linker scripts are not huge). + do + { + Lex lex(input_string->c_str(), input_string->length(), first_token); + lex.set_mode(lex_mode); + + Parser_closure closure(filename, + cmdline->position_dependent_options(), + first_token == Lex::DYNAMIC_LIST, + false, + is_in_sysroot, + cmdline, + script_options, + &lex, + false, + NULL); + const int rv = yyparse(&closure); + if (closure.has_includes_to_resolve()) + { + // Cyclic or too-deep INCLUDE. + if (++include_level >= 16) + { + gold_error(_("Cyclic or too-deep INCLUDE.")); + return false; + } + // Resolve includes. + script_resolve_includes(closure, *input_string, *new_string); + // Swap pointers to make input_string point to the resolve result, + // input_string now can be used in next parse process. + std::string* t = input_string; + input_string = new_string; + // "new_string" now points to temporary string to be used in next + // "script_resolve_includes". + new_string = t; + *new_string = ""; + } + else + { + // Only after we resolve all includes, we check parser return value. + if (rv != 0) + return false; + // Successfully finished parsing script. + gold_assert(!closure.saw_inputs()); + return true; + } + } while (true); } // FILENAME was found as an argument to --script (-T). // Read it as a script, and execute its contents immediately. @@ -3363,18 +3546,50 @@ script_parse_memory_attr(void* closurev, const char* attrs, size_t attrlen, attributes = (~ attributes) & MEM_ATTR_MASK; return attributes; } + +// Record the start line/charpos of "INCLUDE". + +extern "C" void +script_record_include_start(void* closurev) +{ + Parser_closure* closure = static_cast(closurev); + Include_info ii; + ii.start_lineno = closure->lineno(); + ii.start_charpos = closure->charpos(); + closure->enqueue_include_info(ii); +} + + +// For "INCLUDE"s, we read the contents (no parse), store the content into +// closure->include_info_queue_. + extern "C" void -script_include_directive(void* closurev, const char* filename, size_t length) +script_include_directive(void* closurev, const char* fn, size_t length) { Parser_closure* closure = static_cast(closurev); - std::string name(filename, length); + Include_info& ii(closure->last_include_info()); + ii.end_lineno = closure->lineno(); + // "2" is for open and end quotes. + ii.end_charpos = closure->charpos() + length + 2; + ii.filename = std::string(fn, length); + gold_assert(ii.start_lineno >= 0 && ii.start_charpos >= 0 + && ((ii.start_lineno == ii.end_lineno + && ii.start_charpos < ii.end_charpos) + || ii.start_lineno < ii.end_lineno)); + + // Read file content into ii.content, without parsing. Command_line* cmdline = closure->command_line(); - read_script_file(name.c_str(), cmdline, &cmdline->script_options(), - PARSING_LINKER_SCRIPT, Lex::LINKER_SCRIPT); + if (!read_script_file(ii.filename.c_str(), + cmdline, &cmdline->script_options(), + PARSING_LINKER_SCRIPT, + Lex::LINKER_SCRIPT, &ii.content)) + { + yyerror(closurev, "failed to read INCLUDE script."); + } } // Functions for memory regions. extern "C" Expression* diff --git a/gold/yyscript.y b/gold/yyscript.y index 87aab58..2821a53 100644 --- a/gold/yyscript.y +++ b/gold/yyscript.y @@ -526,12 +526,14 @@ section_cmd: file format. It does nothing when using ELF. Since some ELF linker scripts use it although it does nothing, we accept it and ignore it. */ } | SORT_BY_NAME '(' CONSTRUCTORS ')' - | INCLUDE string - { script_include_directive(closure, $2.value, $2.length); } + | INCLUDE + { script_record_include_start(closure); } + string + { script_include_directive(closure, $3.value, $3.length); } | ';' ; /* The length of data which may appear within the description of an output section in a SECTIONS block. */ @@ -689,12 +691,14 @@ file_or_sections_cmd: ENTRY '(' string ')' { script_set_entry(closure, $3.value, $3.length); } | assignment end | ASSERT_K '(' parse_exp ',' string ')' { script_add_assertion(closure, $3, $5.value, $5.length); } - | INCLUDE string - { script_include_directive(closure, $2.value, $2.length); } + | INCLUDE + { script_record_include_start(closure); } + string + { script_include_directive(closure, $3.value, $3.length); } ; /* A list of MEMORY definitions. */ memory_defs: memory_defs opt_comma memory_def @@ -705,12 +709,14 @@ memory_defs: memory_def: string memory_attr ':' memory_origin '=' parse_exp opt_comma memory_length '=' parse_exp { script_add_memory(closure, $1.value, $1.length, $2, $6, $10); } | /* LD supports an INCLUDE directive here, currently GOLD does not. */ - INCLUDE string - { script_include_directive(closure, $2.value, $2.length); } + INCLUDE + { script_record_include_start(closure); } + string + { script_include_directive(closure, $3.value, $3.length); } | ; /* The (optional) attributes of a MEMORY region. */ memory_attr: