/* IELR main implementation. Copyright (C) 2009-2015, 2018-2019 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include "system.h" #include "ielr.h" #include #include #include "AnnotationList.h" #include "derives.h" #include "getargs.h" #include "lalr.h" #include "muscle-tab.h" #include "nullable.h" #include "relation.h" #include "state.h" #include "symtab.h" /** Records the value of the \%define variable lr.type. */ typedef enum { LR_TYPE__LR0, LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType; /* The user's requested LR type. */ static LrType lr_type_get (void) { char *type = muscle_percent_define_get ("lr.type"); LrType res; if (STREQ (type, "lr""(0)")) res = LR_TYPE__LR0; else if (STREQ (type, "lalr")) res = LR_TYPE__LALR; else if (STREQ (type, "ielr")) res = LR_TYPE__IELR; else if (STREQ (type, "canonical-lr")) res = LR_TYPE__CANONICAL_LR; else { aver (false); abort (); } free (type); return res; } /** * \post: * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i * is set iff ritem[i] is a nonterminal after which all ritems in * the same RHS are nullable nonterminals. In other words, the follows of * a goto on ritem[i] include the lookahead set of the item. */ static bitset ielr_compute_ritem_sees_lookahead_set (void) { bitset result = bitset_create (nritems, BITSET_FIXED); unsigned i = nritems-1; while (0 < i) { --i; while (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]) && nullable [item_number_as_symbol_number (ritem[i]) - ntokens]) bitset_set (result, i--); if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i])) bitset_set (result, i--); while (!item_number_is_rule_number (ritem[i]) && 0 < i) --i; } if (trace_flag & trace_ielr) { fprintf (stderr, "ritem_sees_lookahead_set:\n"); debug_bitset (result); fprintf (stderr, "\n"); } return result; } /** * \pre: * - \c ritem_sees_lookahead_set was computed by * \c ielr_compute_ritem_sees_lookahead_set. * \post: * - Each of \c *edgesp and \c *edge_countsp is a new array of size * \c ::ngotos. * - (*edgesp)[i] points to a \c goto_number array of size * (*edge_countsp)[i]+1. * - In such a \c goto_number array, the last element is \c ::END_NODE. * - All remaining elements are the indices of the gotos to which there is an * internal follow edge from goto \c i. * - There is an internal follow edge from goto \c i to goto \c j iff both: * - The from states of gotos \c i and \c j are the same. * - The transition nonterminal for goto \c i appears as the first RHS * symbol of at least one production for which both: * - The LHS is the transition symbol of goto \c j. * - All other RHS symbols are nullable nonterminals. * - In other words, the follows of goto \c i include the follows of * goto \c j and it's an internal edge because the from states are the * same. */ static void ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set, goto_number ***edgesp, int **edge_countsp) { *edgesp = xnmalloc (ngotos, sizeof **edgesp); *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp); { bitset sources = bitset_create (ngotos, BITSET_FIXED); for (goto_number i = 0; i < ngotos; ++i) (*edge_countsp)[i] = 0; for (goto_number i = 0; i < ngotos; ++i) { int nsources = 0; { for (rule **rulep = derives[states[to_state[i]]->accessing_symbol - ntokens]; *rulep; ++rulep) { /* If there is at least one RHS symbol, if the first RHS symbol is a nonterminal, and if all remaining RHS symbols (if any) are nullable nonterminals, create an edge from the LHS symbol's goto to the first RHS symbol's goto such that the RHS symbol's goto will be the source of the edge after the eventual relation_transpose below. Unlike in ielr_compute_always_follows, I use a bitset for edges rather than an array because it is possible that multiple RHS's with the same first symbol could fit and thus that we could end up with redundant edges. With the possibility of redundant edges, it's hard to know ahead of time how large to make such an array. Another possible redundancy is that source and destination might be the same goto. Eliminating all these possible redundancies now might possibly help performance a little. I have not proven any of this, but I'm guessing the bitset shouldn't entail much of a performance penalty, if any. */ if (bitset_test (ritem_sees_lookahead_set, (*rulep)->rhs - ritem)) { goto_number source = map_goto (from_state[i], item_number_as_symbol_number (*(*rulep)->rhs)); if (i != source && !bitset_test (sources, source)) { bitset_set (sources, source); ++nsources; ++(*edge_countsp)[source]; } } } } if (nsources == 0) (*edgesp)[i] = NULL; else { (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]); { bitset_iterator biter_source; bitset_bindex source; int j = 0; BITSET_FOR_EACH (biter_source, sources, source, 0) (*edgesp)[i][j++] = source; } (*edgesp)[i][nsources] = END_NODE; } bitset_zero (sources); } bitset_free (sources); } relation_transpose (edgesp, ngotos); if (trace_flag & trace_ielr) relation_print ("internal_follow_edges", *edgesp, ngotos, NULL, stderr); } /** * \pre: * - \c ritem_sees_lookahead_set was computed by * \c ielr_compute_ritem_sees_lookahead_set. * - \c internal_follow_edges was computed by * \c ielr_compute_internal_follow_edges. * \post: * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows * is \c ngotos and the number of columns is maximum number of kernel items * in any state. * - (*follow_kernel_itemsp)[i][j] is set iff the follows of goto * \c i include the lookahead set of item \c j in the from state of goto * \c i. * - Thus, (*follow_kernel_itemsp)[i][j] is always unset if there is * no item \c j in the from state of goto \c i. */ static void ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set, goto_number **internal_follow_edges, bitsetv *follow_kernel_itemsp) { { size_t max_nitems = 0; for (state_number i = 0; i < nstates; ++i) if (states[i]->nitems > max_nitems) max_nitems = states[i]->nitems; *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED); } for (goto_number i = 0; i < ngotos; ++i) { size_t nitems = states[from_state[i]]->nitems; item_number *items = states[from_state[i]]->items; for (size_t j = 0; j < nitems; ++j) /* If this item has this goto and if all subsequent symbols in this RHS (if any) are nullable nonterminals, then record this item as one whose lookahead set is included in this goto's follows. */ if (item_number_is_symbol_number (ritem[items[j]]) && item_number_as_symbol_number (ritem[items[j]]) == states[to_state[i]]->accessing_symbol && bitset_test (ritem_sees_lookahead_set, items[j])) bitset_set ((*follow_kernel_itemsp)[i], j); } relation_digraph (internal_follow_edges, ngotos, *follow_kernel_itemsp); if (trace_flag & trace_ielr) { fprintf (stderr, "follow_kernel_items:\n"); debug_bitsetv (*follow_kernel_itemsp); } } /** * \pre * - \c *edgesp and \c edge_counts were computed by * \c ielr_compute_internal_follow_edges. * \post * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and * \c ntokens columns. * - (*always_followsp)[i][j] is set iff token \c j is an always * follow (that is, it's computed by internal and successor edges) of goto * \c i. * - Rows of \c *edgesp have been realloc'ed and extended to include * successor follow edges. \c edge_counts has not been updated. */ static void ielr_compute_always_follows (goto_number ***edgesp, int const edge_counts[], bitsetv *always_followsp) { *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED); { goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array); for (goto_number i = 0; i < ngotos; ++i) { goto_number nedges = edge_counts[i]; { int j; transitions *trans = states[to_state[i]]->transitions; FOR_EACH_SHIFT (trans, j) bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j)); for (; j < trans->num; ++j) { symbol_number sym = TRANSITION_SYMBOL (trans, j); if (nullable[sym - ntokens]) edge_array[nedges++] = map_goto (to_state[i], sym); } } if (nedges - edge_counts[i]) { (*edgesp)[i] = xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]); memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i], (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]); (*edgesp)[i][nedges] = END_NODE; } } free (edge_array); } relation_digraph (*edgesp, ngotos, *always_followsp); if (trace_flag & trace_ielr) { relation_print ("always follow edges", *edgesp, ngotos, NULL, stderr); fprintf (stderr, "always_follows:\n"); debug_bitsetv (*always_followsp); } } /** * \post * - \c result is a new array of size \c ::nstates. * - result[i] is an array of pointers to the predecessor * state's of state \c i. * - The last element of such an array is \c NULL. */ static state *** ielr_compute_predecessors (void) { int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts); state ***result = xnmalloc (nstates, sizeof *result); for (state_number i = 0; i < nstates; ++i) predecessor_counts[i] = 0; for (state_number i = 0; i < nstates; ++i) for (int j = 0; j < states[i]->transitions->num; ++j) ++predecessor_counts[states[i]->transitions->states[j]->number]; for (state_number i = 0; i < nstates; ++i) { result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]); result[i][predecessor_counts[i]] = NULL; predecessor_counts[i] = 0; } for (state_number i = 0; i < nstates; ++i) for (int j = 0; j < states[i]->transitions->num; ++j) { state_number k = states[i]->transitions->states[j]->number; result[k][predecessor_counts[k]++] = states[i]; } free (predecessor_counts); return result; } /** * \post * - \c *follow_kernel_itemsp and \c *always_followsp were computed by * \c ielr_compute_follow_kernel_items and * \c ielr_compute_always_follows. * - Iff predecessorsp != NULL, then \c *predecessorsp was computed * by \c ielr_compute_predecessors. */ static void ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp, bitsetv *always_followsp, state ****predecessorsp) { goto_number **edges; int *edge_counts; { bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set (); ielr_compute_internal_follow_edges (ritem_sees_lookahead_set, &edges, &edge_counts); ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges, follow_kernel_itemsp); bitset_free (ritem_sees_lookahead_set); } ielr_compute_always_follows (&edges, edge_counts, always_followsp); for (int i = 0; i < ngotos; ++i) free (edges[i]); free (edges); free (edge_counts); if (predecessorsp) *predecessorsp = ielr_compute_predecessors (); } /** * \note * - FIXME: It might be an interesting experiment to compare the space and * time efficiency of computing \c item_lookahead_sets either: * - Fully up front. * - Just-in-time, as implemented below. * - Not at all. That is, just let annotations continue even when * unnecessary. */ bool ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item, symbol_number lookahead, state ***predecessors, bitset **item_lookahead_sets) { if (!item_lookahead_sets[s->number]) { item_lookahead_sets[s->number] = xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]); for (size_t i = 0; i < s->nitems; ++i) item_lookahead_sets[s->number][i] = NULL; } if (!item_lookahead_sets[s->number][item]) { item_lookahead_sets[s->number][item] = bitset_create (ntokens, BITSET_FIXED); /* If this kernel item is the beginning of a RHS, it must be the kernel item in the start state, and so its LHS has no follows and no goto to check. If, instead, this kernel item is the successor of the start state's kernel item, there are still no follows and no goto. This situation is fortunate because we want to avoid the - 2 below in both cases. Actually, IELR(1) should never invoke this function for either of those cases because (1) follow_kernel_items will never reference a kernel item for this RHS because the end token blocks sight of the lookahead set from the RHS's only nonterminal, and (2) no reduction has a lookback dependency on this lookahead set. Nevertheless, I didn't change this test to an aver just in case the usage of this function evolves to need those two cases. In both cases, the current implementation returns the right result. */ if (s->items[item] > 1) { /* If the LHS symbol of this item isn't known (because this is a top-level invocation), go get it. */ if (!lhs) { unsigned i; for (i = s->items[item]; !item_number_is_rule_number (ritem[i]); ++i) continue; lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number; } /* If this kernel item is next to the beginning of the RHS, then check all predecessors' goto follows for the LHS. */ if (item_number_is_rule_number (ritem[s->items[item] - 2])) { aver (lhs != accept->content->number); for (state **predecessor = predecessors[s->number]; *predecessor; ++predecessor) bitset_or (item_lookahead_sets[s->number][item], item_lookahead_sets[s->number][item], goto_follows[map_goto ((*predecessor)->number, lhs)]); } /* If this kernel item is later in the RHS, then check all predecessor items' lookahead sets. */ else { for (state **predecessor = predecessors[s->number]; *predecessor; ++predecessor) { size_t predecessor_item; for (predecessor_item = 0; predecessor_item < (*predecessor)->nitems; ++predecessor_item) if ((*predecessor)->items[predecessor_item] == s->items[item] - 1) break; aver (predecessor_item != (*predecessor)->nitems); ielr_item_has_lookahead (*predecessor, lhs, predecessor_item, 0 /*irrelevant*/, predecessors, item_lookahead_sets); bitset_or (item_lookahead_sets[s->number][item], item_lookahead_sets[s->number][item], item_lookahead_sets[(*predecessor)->number] [predecessor_item]); } } } } return bitset_test (item_lookahead_sets[s->number][item], lookahead); } /** * \pre * - \c follow_kernel_items, \c always_follows, and \c predecessors * were computed by \c ielr_compute_auxiliary_tables. * \post * - Each of *inadequacy_listsp and *annotation_listsp * points to a new array of size \c ::nstates. * - For 0 <= i < ::nstates: * - (*inadequacy_listsp)[i] contains the \c InadequacyList head * node for states[i]. * - (*annotation_listsp)[i] contains the \c AnnotationList head * node for states[i]. * - *max_annotationsp is the maximum number of annotations per * state. */ static void ielr_compute_annotation_lists (bitsetv follow_kernel_items, bitsetv always_follows, state ***predecessors, AnnotationIndex *max_annotationsp, InadequacyList ***inadequacy_listsp, AnnotationList ***annotation_listsp, struct obstack *annotations_obstackp) { bitset **item_lookahead_sets = xnmalloc (nstates, sizeof *item_lookahead_sets); AnnotationIndex *annotation_counts = xnmalloc (nstates, sizeof *annotation_counts); ContributionIndex max_contributions = 0; unsigned total_annotations = 0; *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp); *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp); for (state_number i = 0; i < nstates; ++i) { item_lookahead_sets[i] = NULL; (*inadequacy_listsp)[i] = NULL; (*annotation_listsp)[i] = NULL; annotation_counts[i] = 0; } { InadequacyListNodeCount inadequacy_list_node_count = 0; for (state_number i = 0; i < nstates; ++i) AnnotationList__compute_from_inadequacies ( states[i], follow_kernel_items, always_follows, predecessors, item_lookahead_sets, *inadequacy_listsp, *annotation_listsp, annotation_counts, &max_contributions, annotations_obstackp, &inadequacy_list_node_count); } *max_annotationsp = 0; for (state_number i = 0; i < nstates; ++i) { if (annotation_counts[i] > *max_annotationsp) *max_annotationsp = annotation_counts[i]; total_annotations += annotation_counts[i]; } if (trace_flag & trace_ielr) { for (state_number i = 0; i < nstates; ++i) { fprintf (stderr, "Inadequacy annotations for state %d:\n", i); AnnotationList__debug ((*annotation_listsp)[i], states[i]->nitems, 2); } fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates); fprintf (stderr, "Average number of annotations per state: %f\n", (float)total_annotations/nstates); fprintf (stderr, "Max number of annotations per state: %d\n", *max_annotationsp); fprintf (stderr, "Max number of contributions per annotation: %d\n", max_contributions); } for (state_number i = 0; i < nstates; ++i) if (item_lookahead_sets[i]) { for (size_t j = 0; j < states[i]->nitems; ++j) if (item_lookahead_sets[i][j]) bitset_free (item_lookahead_sets[i][j]); free (item_lookahead_sets[i]); } free (item_lookahead_sets); free (annotation_counts); } typedef struct state_list { struct state_list *next; state *state; /** Has this state been recomputed as a successor of another state? */ bool recomputedAsSuccessor; /** * \c NULL iff all lookahead sets are empty. lookaheads[i] = NULL * iff the lookahead set on item \c i is empty. */ bitset *lookaheads; /** * nextIsocore is the next state in a circularly linked-list of all states * with the same core. The one originally computed by generate_states in * lr0.c is lr0Isocore. */ struct state_list *lr0Isocore; struct state_list *nextIsocore; } state_list; /** * \pre * - \c follow_kernel_items and \c always_follows were computed by * \c ielr_compute_auxiliary_tables. * - n->class = nterm_sym. * \post * - \c follow_set contains the follow set for the goto on nonterminal \c n * in state \c s based on the lookaheads stored in s->lookaheads. */ static void ielr_compute_goto_follow_set (bitsetv follow_kernel_items, bitsetv always_follows, state_list *s, sym_content *n, bitset follow_set) { goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number); bitset_copy (follow_set, always_follows[n_goto]); if (s->lookaheads) { bitset_iterator biter_item; bitset_bindex item; BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0) if (s->lookaheads[item]) bitset_or (follow_set, follow_set, s->lookaheads[item]); } } /** * \pre * - \c follow_kernel_items and \c always_follows were computed by * \c ielr_compute_auxiliary_tables. * - \c lookahead_filter was computed by * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore * of \c t. * - The number of rows in \c lookaheads is at least the number of items in * \c t, and the number of columns is \c ::ntokens. * \post * - lookaheads[i][j] is set iff both: * - lookahead_filter[i][j] is set. * - The isocore of \c t that will be the transition successor of \c s will * inherit from \c s token \c j into the lookahead set of item \c i. */ static void ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows, state_list *s, state *t, bitsetv lookahead_filter, bitsetv lookaheads) { size_t s_item = 0; bitsetv_zero (lookaheads); for (size_t t_item = 0; t_item < t->nitems; ++t_item) { /* If this kernel item is the beginning of a RHS, it must be the kernel item in the start state, but t is supposed to be a successor state. If, instead, this kernel item is the successor of the start state's kernel item, the lookahead set is empty. This second case is a special case to avoid the - 2 below, but the next successor can be handled fine without special casing it. */ aver (t->items[t_item] != 0); if (t->items[t_item] > 1 && !bitset_empty_p (lookahead_filter[t_item])) { if (item_number_is_rule_number (ritem[t->items[t_item] - 2])) { unsigned rule_item; for (rule_item = t->items[t_item]; !item_number_is_rule_number (ritem[rule_item]); ++rule_item) ; ielr_compute_goto_follow_set ( follow_kernel_items, always_follows, s, rules[item_number_as_rule_number (ritem[rule_item])].lhs, lookaheads[t_item]); } else if (s->lookaheads) { /* We don't have to start the s item search at the beginning every time because items from both states are sorted by their indices in ritem. */ for (; s_item < s->state->nitems; ++s_item) if (s->state->items[s_item] == t->items[t_item] - 1) break; aver (s_item != s->state->nitems); if (s->lookaheads[s_item]) bitset_copy (lookaheads[t_item], s->lookaheads[s_item]); } bitset_and (lookaheads[t_item], lookaheads[t_item], lookahead_filter[t_item]); } } } /** * \pre * - \c follow_kernel_items and \c always_follows were computed by * \c ielr_compute_auxiliary_tables. * - Either: * - annotation_lists = NULL and all bits in work2 are set. * - \c annotation_lists was computed by \c ielr_compute_annotation_lists. * - The number of rows in each of \c lookaheads and \c work2 is the maximum * number of items in any state. The number of columns in each is * \c ::ntokens. * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t. * - \c ::nstates is the total number of states, some not yet fully computed, * in the list ending at \c *last_statep. It is at least the number of * original LR(0) states. * - The size of \c work1 is at least the number of annotations for the LR(0) * isocore of \c t. * \post * - Either: * - In the case that annotation_lists != NULL, * lookaheads \@pre was merged with some isocore of \c t if * permitted by the annotations for the original LR(0) isocore of \c t. * If this changed the lookaheads in that isocore, those changes were * propagated to all already computed transition successors recursively * possibly resulting in the splitting of some of those successors. * - In the case that annotation_lists = NULL, * lookaheads \@pre was merged with some isocore of \c t if the * isocore's lookahead sets were identical to those specified by * lookaheads \@pre. * - If no such merge was permitted, a new isocore of \c t containing * lookaheads \@pre was appended to the state list whose * previous tail was *last_statep \@pre and \c ::nstates was * incremented. It was also appended to \c t's isocore list. * - *tp = the isocore of \c t into which * lookaheads \@pre was placed/merged. * - \c lookaheads, \c work1, and \c work2 may have been altered. */ static void ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows, AnnotationList **annotation_lists, state *t, bitsetv lookaheads, state_list **last_statep, ContributionIndex work1[], bitsetv work2, state **tp) { state_list *lr0_isocore = t->state_list->lr0Isocore; state_list **this_isocorep; /* Determine whether there's an isocore of t with which these lookaheads can be merged. */ { AnnotationIndex ai; AnnotationList *a; if (annotation_lists) for (ai = 0, a = annotation_lists[lr0_isocore->state->number]; a; ++ai, a = a->next) work1[ai] = AnnotationList__computeDominantContribution ( a, lr0_isocore->state->nitems, lookaheads, false); for (this_isocorep = &t->state_list; this_isocorep == &t->state_list || *this_isocorep != t->state_list; this_isocorep = &(*this_isocorep)->nextIsocore) { if (!(*this_isocorep)->recomputedAsSuccessor) break; if (annotation_lists) { for (ai = 0, a = annotation_lists[lr0_isocore->state->number]; a; ++ai, a = a->next) { if (work1[ai] != ContributionIndex__none) { /* This isocore compatibility test depends on the fact that, if the dominant contributions are the same for the two isocores, then merging their lookahead sets will not produce a state with a different dominant contribution. */ ContributionIndex ci = AnnotationList__computeDominantContribution ( a, lr0_isocore->state->nitems, (*this_isocorep)->lookaheads, false); if (ci != ContributionIndex__none && work1[ai] != ci) break; } } if (!a) break; } else { size_t i; for (i = 0; i < t->nitems; ++i) { if (!(*this_isocorep)->lookaheads || !(*this_isocorep)->lookaheads[i]) { if (!bitset_empty_p (lookaheads[i])) break; } /* bitset_equal_p uses the size of the first argument, so lookaheads[i] must be the second argument. */ else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i], lookaheads[i])) break; } if (i == t->nitems) break; } } } bool has_lookaheads = false; for (size_t i = 0; i < lr0_isocore->state->nitems; ++i) if (!bitset_empty_p (lookaheads[i])) { has_lookaheads = true; break; } /* Merge with an existing isocore. */ if (this_isocorep == &t->state_list || *this_isocorep != t->state_list) { bool new_lookaheads = false; *tp = (*this_isocorep)->state; /* Merge lookaheads into the state and record whether any of them are actually new. */ if (has_lookaheads) { size_t i; if (!(*this_isocorep)->lookaheads) { (*this_isocorep)->lookaheads = xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads); for (i = 0; i < t->nitems; ++i) (*this_isocorep)->lookaheads[i] = NULL; } for (i = 0; i < t->nitems; ++i) if (!bitset_empty_p (lookaheads[i])) { if (!(*this_isocorep)->lookaheads[i]) (*this_isocorep)->lookaheads[i] = bitset_create (ntokens, BITSET_FIXED); bitset_andn (lookaheads[i], lookaheads[i], (*this_isocorep)->lookaheads[i]); bitset_or ((*this_isocorep)->lookaheads[i], lookaheads[i], (*this_isocorep)->lookaheads[i]); if (!bitset_empty_p (lookaheads[i])) new_lookaheads = true; } } /* If new lookaheads were merged, propagate those lookaheads to the successors, possibly splitting them. If *tp is being recomputed for the first time, this isn't necessary because the main ielr_split_states loop will handle the successors later. */ if (!(*this_isocorep)->recomputedAsSuccessor) (*this_isocorep)->recomputedAsSuccessor = true; else if (new_lookaheads) { /* When merging demands identical lookahead sets, it is impossible to merge new lookaheads. */ aver (annotation_lists); for (int i = 0; i < (*tp)->transitions->num; ++i) { state *t2 = (*tp)->transitions->states[i]; /* At any time, there's at most one state for which we have so far initially recomputed only some of its successors in the main ielr_split_states loop. Because we recompute successors in order, we can just stop at the first such successor. Of course, *tp might be a state some of whose successors have been recomputed as successors of other states rather than as successors of *tp. It's fine if we go ahead and propagate to some of those. We'll propagate to them again (but stop when we see nothing changes) and to the others when we reach *tp in the main ielr_split_states loop later. */ if (!t2->state_list->recomputedAsSuccessor) break; AnnotationList__computeLookaheadFilter ( annotation_lists[t2->state_list->lr0Isocore->state->number], t2->nitems, work2); ielr_compute_lookaheads (follow_kernel_items, always_follows, (*this_isocorep), t2, work2, lookaheads); /* FIXME: If splitting t2 here, it's possible that lookaheads that had already propagated from *tp to t2 will be left in t2 after *tp has been removed as t2's predecessor: - We're going to recompute all lookaheads in phase 4, so these extra lookaheads won't appear in the final parser table. - If t2 has just one annotation, then these extra lookaheads cannot alter the dominating contribution to the associated inadequacy and thus cannot needlessly prevent a future merge of some new state with t2. We can be sure of this because: - The fact that we're splitting t2 now means that some predecessors (at least one) other than *tp must be propagating contributions to t2. - The fact that t2 was merged in the first place means that, if *tp propagated any contributions, the dominating contribution must be the same as that from those other predecessors. - Thus, if some new state to be merged with t2 in the future proves to be compatible with the contributions propagated by the other predecessors, it will also be compatible with the contributions made by the extra lookaheads left behind by *tp. - However, if t2 has more than one annotation and these extra lookaheads contribute to one of their inadequacies, it's possible these extra lookaheads may needlessly prevent a future merge with t2. For example: - Let's say there's an inadequacy A that makes the split necessary as follows: - There's currently just one other predecessor and it propagates to t2 some contributions to inadequacy A. - The new lookaheads that we were attempting to propagate from *tp to t2 made contributions to inadequacy A with a different dominating contribution than those from that other predecessor. - The extra lookaheads either make no contribution to inadequacy A or have the same dominating contribution as the contributions from the other predecessor. Either way, as explained above, they can't prevent a future merge. - Let's say there's an inadequacy B that causes the trouble with future merges as follows: - The extra lookaheads make contributions to inadequacy B. - Those extra contributions did not prevent the original merge to create t2 because the other predecessor propagates to t2 no contributions to inadequacy B. - Thus, those extra contributions may prevent a future merge with t2 even though the merge would be fine if *tp had not left them behind. - Is the latter case common enough to worry about? - Perhaps we should track all predecessors and iterate them now to recreate t2 without those extra lookaheads. */ ielr_compute_state (follow_kernel_items, always_follows, annotation_lists, t2, lookaheads, last_statep, work1, work2, &(*tp)->transitions->states[i]); } } } /* Create a new isocore. */ else { state_list *old_isocore = *this_isocorep; (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep); *last_statep = *this_isocorep; (*last_statep)->state = *tp = state_new_isocore (t); (*tp)->state_list = *last_statep; (*last_statep)->recomputedAsSuccessor = true; (*last_statep)->next = NULL; (*last_statep)->lookaheads = NULL; if (has_lookaheads) { (*last_statep)->lookaheads = xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads); for (size_t i = 0; i < t->nitems; ++i) { if (bitset_empty_p (lookaheads[i])) (*last_statep)->lookaheads[i] = NULL; else { (*last_statep)->lookaheads[i] = bitset_create (ntokens, BITSET_FIXED); bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]); } } } (*last_statep)->lr0Isocore = lr0_isocore; (*last_statep)->nextIsocore = old_isocore; } } /** * \pre * - \c follow_kernel_items and \c always_follows were computed by * \c ielr_compute_auxiliary_tables. * - Either: * - annotation_lists = NULL and max_annotations=0. * - \c annotation_lists and \c max_annotations were computed by * \c ielr_compute_annotation_lists. * \post * - \c ::states is of size \c ::nstates (which might be greater than * ::nstates \@pre) and no longer contains any LR(1)-relative * inadequacy. \c annotation_lists was used to determine state * compatibility or, if annotation_lists = NULL, the canonical * LR(1) state compatibility test was used. * - If annotation_lists = NULL, reduction lookahead sets were * computed in all states. tv_ielr_phase4 was pushed while they were * computed from item lookahead sets. */ static void ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows, AnnotationList **annotation_lists, AnnotationIndex max_annotations) { state_list *first_state; state_list *last_state; bitsetv lookahead_filter = NULL; bitsetv lookaheads; /* Set up state list and some reusable bitsets. */ { size_t max_nitems = 0; state_list **nodep = &first_state; for (state_number i = 0; i < nstates; ++i) { *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep); (*nodep)->state = states[i]; (*nodep)->recomputedAsSuccessor = false; (*nodep)->lookaheads = NULL; (*nodep)->lr0Isocore = *nodep; (*nodep)->nextIsocore = *nodep; nodep = &(*nodep)->next; if (states[i]->nitems > max_nitems) max_nitems = states[i]->nitems; } *nodep = NULL; lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED); if (!annotation_lists) bitsetv_ones (lookahead_filter); lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED); } /* Recompute states. */ { ContributionIndex *work = xnmalloc (max_annotations, sizeof *work); for (state_list *this_state = first_state; this_state; this_state = this_state->next) { state *s = this_state->state; for (int i = 0; i < s->transitions->num; ++i) { state *t = s->transitions->states[i]; if (annotation_lists) AnnotationList__computeLookaheadFilter ( annotation_lists[t->state_list->lr0Isocore->state->number], t->nitems, lookahead_filter); ielr_compute_lookaheads (follow_kernel_items, always_follows, this_state, t, lookahead_filter, lookaheads); ielr_compute_state (follow_kernel_items, always_follows, annotation_lists, t, lookaheads, &last_state, work, lookahead_filter, &s->transitions->states[i]); } } free (work); } bitsetv_free (lookahead_filter); bitsetv_free (lookaheads); /* Store states back in the states array. */ states = xnrealloc (states, nstates, sizeof *states); for (state_list *node = first_state; node; node = node->next) states[node->state->number] = node->state; /* In the case of canonical LR(1), copy item lookahead sets to reduction lookahead sets. */ if (!annotation_lists) { timevar_push (tv_ielr_phase4); initialize_LA (); for (state_list *node = first_state; node; node = node->next) if (!node->state->consistent) { size_t i = 0; item_number *itemset = node->state->items; for (size_t r = 0; r < node->state->reductions->num; ++r) { rule *this_rule = node->state->reductions->rules[r]; bitset lookahead_set = node->state->reductions->lookahead_tokens[r]; if (item_number_is_rule_number (*this_rule->rhs)) ielr_compute_goto_follow_set (follow_kernel_items, always_follows, node, this_rule->lhs, lookahead_set); else if (node->lookaheads) { /* We don't need to start the kernel item search back at i=0 because both items and reductions are sorted on rule number. */ while (!item_number_is_rule_number (ritem[itemset[i]]) || item_number_as_rule_number (ritem[itemset[i]]) != this_rule->number) { ++i; aver (i < node->state->nitems); } if (node->lookaheads[i]) bitset_copy (lookahead_set, node->lookaheads[i]); } } } timevar_pop (tv_ielr_phase4); } /* Free state list. */ while (first_state) { state_list *node = first_state; if (node->lookaheads) { for (size_t i = 0; i < node->state->nitems; ++i) if (node->lookaheads[i]) bitset_free (node->lookaheads[i]); free (node->lookaheads); } first_state = node->next; free (node); } } void ielr (void) { LrType lr_type = lr_type_get (); /* Phase 0: LALR(1). */ switch (lr_type) { case LR_TYPE__LR0: timevar_push (tv_lalr); set_goto_map (); timevar_pop (tv_lalr); return; case LR_TYPE__CANONICAL_LR: timevar_push (tv_lalr); set_goto_map (); timevar_pop (tv_lalr); break; case LR_TYPE__LALR: timevar_push (tv_lalr); lalr (); bitsetv_free (goto_follows); timevar_pop (tv_lalr); return; case LR_TYPE__IELR: timevar_push (tv_lalr); lalr (); timevar_pop (tv_lalr); break; } { bitsetv follow_kernel_items; bitsetv always_follows; InadequacyList **inadequacy_lists = NULL; AnnotationList **annotation_lists = NULL; struct obstack annotations_obstack; AnnotationIndex max_annotations = 0; { /* Phase 1: Compute Auxiliary Tables. */ state ***predecessors; timevar_push (tv_ielr_phase1); ielr_compute_auxiliary_tables ( &follow_kernel_items, &always_follows, lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors); timevar_pop (tv_ielr_phase1); /* Phase 2: Compute Annotations. */ timevar_push (tv_ielr_phase2); if (lr_type != LR_TYPE__CANONICAL_LR) { obstack_init (&annotations_obstack); ielr_compute_annotation_lists (follow_kernel_items, always_follows, predecessors, &max_annotations, &inadequacy_lists, &annotation_lists, &annotations_obstack); for (state_number i = 0; i < nstates; ++i) free (predecessors[i]); free (predecessors); bitsetv_free (goto_follows); lalr_free (); } timevar_pop (tv_ielr_phase2); } /* Phase 3: Split States. */ timevar_push (tv_ielr_phase3); { state_number nstates_lr0 = nstates; ielr_split_states (follow_kernel_items, always_follows, annotation_lists, max_annotations); if (inadequacy_lists) for (state_number i = 0; i < nstates_lr0; ++i) InadequacyList__delete (inadequacy_lists[i]); } free (inadequacy_lists); if (annotation_lists) obstack_free (&annotations_obstack, NULL); free (annotation_lists); bitsetv_free (follow_kernel_items); bitsetv_free (always_follows); timevar_pop (tv_ielr_phase3); } /* Phase 4: Compute Reduction Lookaheads. */ timevar_push (tv_ielr_phase4); free (goto_map); free (from_state); free (to_state); if (lr_type == LR_TYPE__CANONICAL_LR) { /* Reduction lookaheads are computed in ielr_split_states above but are timed as part of phase 4. */ set_goto_map (); } else { lalr (); bitsetv_free (goto_follows); } timevar_pop (tv_ielr_phase4); }