/* GNU SED, a batch stream editor. Copyright (C) 1989-2022 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; If not, see . */ #include #include "basicdefs.h" #include "dfa.h" #include "localeinfo.h" #include "regex.h" #include #include "unlocked-io.h" #include "utils.h" /* Struct vector is used to describe a compiled sed program. */ struct vector { struct sed_cmd *v; /* a dynamically allocated array */ size_t v_allocated; /* ... number of slots allocated */ size_t v_length; /* ... number of slots in use */ }; /* This structure tracks files used by sed so that they may all be closed cleanly at normal program termination. A flag is kept that tells if a missing newline was encountered, so that it is added on the next line and the two lines are not concatenated. */ struct output { char *name; bool missing_newline; FILE *fp; struct output *link; }; struct text_buf { char *text; size_t text_length; }; struct regex { regex_t pattern; int flags; size_t sz; struct dfa *dfa; bool begline; bool endline; char re[1]; }; struct readcmd { char *fname; bool append; /* true: append (default); false: prepend (gnu extension) */ }; enum replacement_types { REPL_ASIS = 0, REPL_UPPERCASE = 1, REPL_LOWERCASE = 2, REPL_UPPERCASE_FIRST = 4, REPL_LOWERCASE_FIRST = 8, REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST, /* These are given to aid in debugging */ REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE, REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE, REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE, REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE }; enum text_types { TEXT_BUFFER, TEXT_REPLACEMENT, TEXT_REGEX }; enum posixicity_types { POSIXLY_EXTENDED, /* with GNU extensions */ POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */ POSIXLY_BASIC /* pedantically POSIX */ }; enum addr_state { RANGE_INACTIVE, /* never been active */ RANGE_ACTIVE, /* between first and second address */ RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */ }; enum addr_types { ADDR_IS_NULL, /* null address */ ADDR_IS_REGEX, /* a.addr_regex is valid */ ADDR_IS_NUM, /* a.addr_number is valid */ ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */ ADDR_IS_STEP, /* address is +N (only valid for addr2) */ ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */ ADDR_IS_LAST /* address is $ */ }; struct addr { enum addr_types addr_type; countT addr_number; countT addr_step; struct regex *addr_regex; }; struct replacement { char *prefix; size_t prefix_length; int subst_id; enum replacement_types repl_type; struct replacement *next; }; struct subst { struct regex *regx; struct replacement *replacement; countT numb; /* if >0, only substitute for match number "numb" */ struct output *outf; /* 'w' option given */ unsigned global : 1; /* 'g' option given */ unsigned print : 2; /* 'p' option given (before/after eval) */ unsigned eval : 1; /* 'e' option given */ unsigned max_id : 4; /* maximum backreference on the RHS */ #ifdef lint char* replacement_buffer; #endif }; struct sed_cmd { struct addr *a1; /* save space: usually is NULL */ struct addr *a2; /* See description the enum, above. */ enum addr_state range_state; /* Non-zero if command is to be applied to non-matches. */ char addr_bang; /* The actual command character. */ char cmd; /* auxiliary data for various commands */ union { /* This structure is used for a, i, and c commands. */ struct text_buf cmd_txt; /* This is used for the l, q and Q commands. */ int int_arg; /* This is used for the {}, b, and t commands. */ countT jump_index; /* This is used for the r command. */ struct readcmd readcmd; /* This is used for the hairy s command. */ struct subst *cmd_subst; /* This is used for the w command. */ struct output *outf; /* This is used for the R command. (despite the struct name, it is used for both in and out files). */ struct output *inf; /* This is used for the y command. */ unsigned char *translate; char **translatemb; /* This is used for the ':' command (debug only). */ char* label_name; } x; }; _Noreturn void bad_prog (const char *why); size_t normalize_text (char *text, size_t len, enum text_types buftype); struct vector *compile_string (struct vector *, char *str, size_t len); struct vector *compile_file (struct vector *, const char *cmdfile); void check_final_program (struct vector *); void rewind_read_files (void); void finish_program (struct vector *); struct regex *compile_regex (struct buffer *b, int flags, int needed_sub); int match_regex (struct regex *regex, char *buf, size_t buflen, size_t buf_start_offset, struct re_registers *regarray, int regsize); #ifdef lint void release_regex (struct regex *); #endif void debug_print_command (const struct vector *program, const struct sed_cmd *sc); void debug_print_program (const struct vector *program); void debug_print_char (char c); int process_files (struct vector *, char **argv); int main (int, char **); extern struct localeinfo localeinfo; extern int extended_regexp_flags; /* one-byte buffer delimiter */ extern char buffer_delimiter; /* If set, fflush(stdout) on every line output, and turn off stream buffering on inputs. */ extern bool unbuffered; /* If set, don't write out the line unless explicitly told to. */ extern bool no_default_output; /* If set, reset line counts on every new file. */ extern bool separate_files; /* If set, follow symlinks when invoked with -i option */ extern bool follow_symlinks; /* Do we need to be pedantically POSIX compliant? */ extern enum posixicity_types posixicity; /* How long should the `l' command's output line be? */ extern countT lcmd_out_line_len; /* How do we edit files in-place? (we don't if NULL) */ extern char *in_place_extension; /* The mode to use to read and write files, either "rt"/"w" or "rb"/"wb". */ extern char const *read_mode; extern char const *write_mode; /* Should we use EREs? */ extern bool use_extended_syntax_p; /* Declarations for multibyte character sets. */ extern int mb_cur_max; extern bool is_utf8; /* If set, operate in 'sandbox' mode - disable e/r/w commands */ extern bool sandbox; /* If set, print debugging information. */ extern bool debug; #define MBRTOWC(pwc, s, n, ps) \ (mb_cur_max == 1 ? \ (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \ mbrtowc ((pwc), (s), (n), (ps))) #define WCRTOMB(s, wc, ps) \ (mb_cur_max == 1 ? \ (*(s) = wctob ((wint_t) (wc)), 1) : \ wcrtomb ((s), (wc), (ps))) #define MBSINIT(s) \ (mb_cur_max == 1 ? 1 : mbsinit ((s))) #define MBRLEN(s, n, ps) \ (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps)) #define IS_MB_CHAR(ch, ps) \ (mb_cur_max == 1 ? 0 : is_mb_char (ch, ps)) extern int is_mb_char (int ch, mbstate_t *ps); extern void initialize_mbcs (void); /* Use this to suppress gcc's '...may be used before initialized' warnings. */ #ifdef lint # define IF_LINT(Code) Code #else # define IF_LINT(Code) /* empty */ #endif #ifndef FALLTHROUGH # if __GNUC__ < 7 # define FALLTHROUGH ((void) 0) # else # define FALLTHROUGH __attribute__ ((__fallthrough__)) # endif #endif