Skip to content

Commit

Permalink
Reintroduce Regexp mutations
Browse files Browse the repository at this point in the history
- Reintroduces regular expression mutations to `mutant` by reverting commit 21d3fef with various improvements and adjustments.
  • Loading branch information
dgollahon committed Dec 30, 2020
1 parent 0fbc345 commit a848e48
Show file tree
Hide file tree
Showing 43 changed files with 2,004 additions and 20 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Unreleased

* Reintroduce regexp mutation support [#1166](https://github.com/mbj/mutant/pull/1166)

# v0.10.23 2020-12-30

* [#1179](https://github.com/mbj/mutant/pull/1181)
Expand Down
11 changes: 6 additions & 5 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ PATH
mprelude (~> 0.1.0)
parser (~> 3.0.0)
procto (~> 0.0.2)
regexp_parser (~> 2.0, >= 2.0.3)
unparser (~> 0.5.6)
variable (~> 0.0.1)

Expand Down Expand Up @@ -51,24 +52,24 @@ GEM
ast (~> 2.4.1)
procto (0.0.3)
rainbow (3.0.0)
regexp_parser (2.0.2)
regexp_parser (2.0.3)
rexml (3.2.4)
rspec (3.10.0)
rspec-core (~> 3.10.0)
rspec-expectations (~> 3.10.0)
rspec-mocks (~> 3.10.0)
rspec-core (3.10.0)
rspec-core (3.10.1)
rspec-support (~> 3.10.0)
rspec-expectations (3.10.0)
rspec-expectations (3.10.1)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.10.0)
rspec-its (1.3.0)
rspec-core (>= 3.0.0)
rspec-expectations (>= 3.0.0)
rspec-mocks (3.10.0)
rspec-mocks (3.10.1)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.10.0)
rspec-support (3.10.0)
rspec-support (3.10.1)
rubocop (1.7.0)
parallel (~> 1.10)
parser (>= 2.7.1.5)
Expand Down
17 changes: 17 additions & 0 deletions lib/mutant.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
require 'parser'
require 'parser/current'
require 'pathname'
require 'regexp_parser'
require 'set'
require 'singleton'
require 'stringio'
Expand Down Expand Up @@ -53,6 +54,15 @@ module Mutant
require 'mutant/ast/named_children'
require 'mutant/ast/node_predicates'
require 'mutant/ast/find_metaclass_containing'
require 'mutant/ast/regexp'
require 'mutant/ast/regexp/transformer'
require 'mutant/ast/regexp/transformer/direct'
require 'mutant/ast/regexp/transformer/named_group'
require 'mutant/ast/regexp/transformer/options_group'
require 'mutant/ast/regexp/transformer/quantifier'
require 'mutant/ast/regexp/transformer/recursive'
require 'mutant/ast/regexp/transformer/root'
require 'mutant/ast/regexp/transformer/text'
require 'mutant/ast/meta'
require 'mutant/ast/meta/send'
require 'mutant/ast/meta/const'
Expand All @@ -75,6 +85,13 @@ module Mutant
require 'mutant/mutator/util/symbol'
require 'mutant/mutator/node'
require 'mutant/mutator/node/generic'
require 'mutant/mutator/node/regexp'
require 'mutant/mutator/node/regexp/alternation_meta'
require 'mutant/mutator/node/regexp/capture_group'
require 'mutant/mutator/node/regexp/character_type'
require 'mutant/mutator/node/regexp/end_of_line_anchor'
require 'mutant/mutator/node/regexp/end_of_string_or_before_end_of_line_anchor'
require 'mutant/mutator/node/regexp/greedy_zero_or_more'
require 'mutant/mutator/node/literal'
require 'mutant/mutator/node/literal/boolean'
require 'mutant/mutator/node/literal/range'
Expand Down
37 changes: 37 additions & 0 deletions lib/mutant/ast/regexp.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

module Mutant
module AST
# Regexp source mapper
module Regexp
# Parse regex string into expression
#
# @param regexp [String]
#
# @return [Regexp::Expression, nil]
def self.parse(regexp)
::Regexp::Parser.parse(regexp)
end

# Convert expression into ast node
#
# @param expression [Regexp::Expression]
#
# @return [Parser::AST::Node]
def self.to_ast(expression)
ast_type = :"regexp_#{expression.token}_#{expression.type}"

Transformer.lookup(ast_type).to_ast(expression)
end

# Convert node into expression
#
# @param node [Parser::AST::Node]
#
# @return [Regexp::Expression]
def self.to_expression(node)
Transformer.lookup(node.type).to_expression(node)
end
end # Regexp
end # AST
end # Mutant
150 changes: 150 additions & 0 deletions lib/mutant/ast/regexp/transformer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# frozen_string_literal: true

module Mutant
module AST
module Regexp
# Regexp bijective mapper
#
# Transforms parsed regular expression representation from
# `Regexp::Expression` instances (provided by `regexp_parser`) into
# equivalent representations using `Parser::AST::Node`
class Transformer
include AbstractType

REGISTRY = Registry.new

# Lookup transformer class for regular expression node type
#
# @param type [Symbol]
#
# @return [Class<Transformer>]
def self.lookup(type)
REGISTRY.lookup(type)
end

def self.register(type)
REGISTRY.register(type, self)
end
private_class_method :register

# Transform expression
#
# @param expression [Regexp::Expression]
#
# @return [Parser::AST::Node]
def self.to_ast(expression)
self::ExpressionToAST.call(expression)
end

# Transform node
#
# @param node [Parser::AST::Node]
#
# @return [Regexp::Expression]
def self.to_expression(node)
self::ASTToExpression.call(node)
end

# Abstract expression transformer
class ExpressionToAST
PREFIX = :regexp

include Concord.new(:expression), Procto.call, AST::Sexp, AbstractType, Adamantium

private

def ast(*children)
s(type, *children)
end

def quantify(node)
return node unless expression.quantified?

Quantifier.to_ast(expression.quantifier).append(node)
end

def children
expression.map(&Regexp.public_method(:to_ast))
end

def type
:"#{PREFIX}_#{expression.token}_#{expression.type}"
end
end # ExpressionToAST

# Abstract node transformer
class ASTToExpression
include Concord.new(:node), Procto.call, AbstractType, Adamantium

# Call generic transform method and freeze result
#
# @return [Regexp::Expression]
def call
transform.freeze
end

private

abstract_method :transform

def subexpressions
node.children.map(&Regexp.public_method(:to_expression))
end
end # ASTToExpression

# Mixin for node transformers
#
# Helps construct a mapping from Parser::AST::Node domain to
# Regexp::Expression domain
module LookupTable
Mapping = Class.new.include(Concord::Public.new(:token, :regexp_class))

# Table mapping ast types to object information for regexp domain
class Table

# Coerce array of mapping information into structured table
#
# @param [Array(Symbol, Array, Class<Regexp::Expression>)]
#
# @return [Table]
def self.create(*rows)
table = rows.map do |ast_type, token, klass|
[ast_type, Mapping.new(::Regexp::Token.new(*token), klass)]
end.to_h

new(table)
end

include Concord.new(:table), Adamantium

# Types defined by the table
#
# @return [Array<Symbol>]
def types
table.keys
end

# Lookup mapping information given an ast node type
#
# @param type [Symbol]
#
# @return [Mapping]
def lookup(type)
table.fetch(type)
end
end # Table

private

def expression_token
self.class::TABLE.lookup(node.type).token
end

def expression_class
self.class::TABLE.lookup(node.type).regexp_class
end
end # LookupTable
end # Transformer
end # Regexp
end # AST
end # Mutant
121 changes: 121 additions & 0 deletions lib/mutant/ast/regexp/transformer/direct.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# frozen_string_literal: true

module Mutant
module AST
module Regexp
class Transformer
# Transformer for nodes which map directly to other domain
#
# A node maps "directly" to another domain if the node never
# has children or text which needs to be preserved for a mapping
#
# @example direct mapping
#
# input = /\d/
# expression = Regexp::Parser.parse(input).first
# node = Transformer::Direct.to_ast(expression)
#
# # the digit type always has the same text and no children
# expression.text # => "\\d"
# expression.terminal? # => true
#
# # therefore the `Parser::AST::Node` is always the same
# node # => s(:regexp_digit_type)
class Direct < self
# Mapper from `Regexp::Expression` to `Parser::AST::Node`
class ExpressionToAST < Transformer::ExpressionToAST
# Transform expression into node
#
# @return [Parser::AST::Node]
def call
quantify(ast)
end
end # ExpressionToAST

# Mapper from `Parser::AST::Node` to `Regexp::Expression`
class ASTToExpression < Transformer::ASTToExpression
include LookupTable

# rubocop:disable Layout/LineLength
TABLE = Table.create(
[:regexp_alnum_posixclass, [:posixclass, :alnum, '[:alnum:]'], ::Regexp::Expression::PosixClass],
[:regexp_alpha_posixclass, [:posixclass, :alpha, '[:alpha:]'], ::Regexp::Expression::PosixClass],
[:regexp_alpha_property, [:property, :alpha, '\p{Alpha}'], ::Regexp::Expression::UnicodeProperty::Alpha],
[:regexp_alternation_escape, [:escape, :alternation, '\|'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_arabic_property, [:property, :arabic, '\p{Arabic}'], ::Regexp::Expression::UnicodeProperty::Script],
[:regexp_ascii_posixclass, [:posixclass, :ascii, '[:ascii:]'], ::Regexp::Expression::PosixClass],
[:regexp_backspace_escape, [:escape, :backspace, '\b'], ::Regexp::Expression::EscapeSequence::Backspace],
[:regexp_bell_escape, [:escape, :bell, '\a'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_blank_posixclass, [:posixclass, :blank, '[:blank:]'], ::Regexp::Expression::PosixClass],
[:regexp_bol_anchor, [:anchor, :bol, '^'], ::Regexp::Expression::Anchor::BeginningOfLine],
[:regexp_bol_escape, [:escape, :bol, '\^'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_bos_anchor, [:anchor, :bos, '\\A'], ::Regexp::Expression::Anchor::BeginningOfString],
[:regexp_carriage_escape, [:escape, :carriage, '\r'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_cntrl_posixclass, [:posixclass, :cntrl, '[:cntrl:]'], ::Regexp::Expression::PosixClass],
[:regexp_digit_posixclass, [:posixclass, :digit, '[:digit:]'], ::Regexp::Expression::PosixClass],
[:regexp_digit_type, [:type, :digit, '\d'], ::Regexp::Expression::CharacterType::Digit],
[:regexp_dot_escape, [:escape, :dot, '\.'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_dot_meta, [:meta, :dot, '.'], ::Regexp::Expression::CharacterType::Any],
[:regexp_eol_anchor, [:anchor, :eol, '$'], ::Regexp::Expression::Anchor::EndOfLine],
[:regexp_eol_escape, [:escape, :eol, '\$'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_eos_anchor, [:anchor, :eos, '\\z'], ::Regexp::Expression::Anchor::EndOfString],
[:regexp_eos_ob_eol_anchor, [:anchor, :eos_ob_eol, '\\Z'], ::Regexp::Expression::Anchor::EndOfStringOrBeforeEndOfLine],
[:regexp_escape_escape, [:escape, :escape, '\e'], ::Regexp::Expression::EscapeSequence::AsciiEscape],
[:regexp_form_feed_escape, [:escape, :form_feed, '\f'], ::Regexp::Expression::EscapeSequence::FormFeed],
[:regexp_graph_posixclass, [:posixclass, :graph, '[:graph:]'], ::Regexp::Expression::PosixClass],
[:regexp_group_close_escape, [:escape, :group_close, '\)'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_group_open_escape, [:escape, :group_open, '\('], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_han_property, [:property, :han, '\p{Han}'], ::Regexp::Expression::UnicodeProperty::Script],
[:regexp_hangul_property, [:property, :hangul, '\p{Hangul}'], ::Regexp::Expression::UnicodeProperty::Script],
[:regexp_hex_type, [:type, :hex, '\h'], ::Regexp::Expression::CharacterType::Hex],
[:regexp_hiragana_property, [:property, :hiragana, '\p{Hiragana}'], ::Regexp::Expression::UnicodeProperty::Script],
[:regexp_interval_close_escape, [:escape, :interval_close, '\}'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_interval_open_escape, [:escape, :interval_open, '\{'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_katakana_property, [:property, :katakana, '\p{Katakana}'], ::Regexp::Expression::UnicodeProperty::Script],
[:regexp_letter_property, [:property, :letter, '\p{L}'], ::Regexp::Expression::UnicodeProperty::Letter::Any],
[:regexp_linebreak_type, [:type, :linebreak, '\R'], ::Regexp::Expression::CharacterType::Linebreak],
[:regexp_lower_posixclass, [:posixclass, :lower, '[:lower:]'], ::Regexp::Expression::PosixClass],
[:regexp_mark_keep, [:keep, :mark, '\K'], ::Regexp::Expression::Keep::Mark],
[:regexp_match_start_anchor, [:anchor, :match_start, '\\G'], ::Regexp::Expression::Anchor::MatchStart],
[:regexp_newline_escape, [:escape, :newline, '\n'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_nondigit_type, [:type, :nondigit, '\D'], ::Regexp::Expression::CharacterType::NonDigit],
[:regexp_nonhex_type, [:type, :nonhex, '\H'], ::Regexp::Expression::CharacterType::NonHex],
[:regexp_nonspace_type, [:type, :nonspace, '\S'], ::Regexp::Expression::CharacterType::NonSpace],
[:regexp_nonword_boundary_anchor, [:anchor, :nonword_boundary, '\\B'], ::Regexp::Expression::Anchor::NonWordBoundary],
[:regexp_nonword_type, [:type, :nonword, '\W'], ::Regexp::Expression::CharacterType::NonWord],
[:regexp_one_or_more_escape, [:escape, :one_or_more, '\+'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_print_nonposixclass, [:nonposixclass, :print, '[:^print:]'], ::Regexp::Expression::PosixClass],
[:regexp_print_nonproperty, [:nonproperty, :print, '\P{Print}'], ::Regexp::Expression::UnicodeProperty::Print],
[:regexp_print_posixclass, [:posixclass, :print, '[:print:]'], ::Regexp::Expression::PosixClass],
[:regexp_print_posixclass, [:posixclass, :print, '[:print:]'], ::Regexp::Expression::PosixClass],
[:regexp_print_property, [:property, :print, '\p{Print}'], ::Regexp::Expression::UnicodeProperty::Print],
[:regexp_punct_posixclass, [:posixclass, :punct, '[:punct:]'], ::Regexp::Expression::PosixClass],
[:regexp_set_close_escape, [:escape, :set_close, '\]'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_set_open_escape, [:escape, :set_open, '\['], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_space_posixclass, [:posixclass, :space, '[:space:]'], ::Regexp::Expression::PosixClass],
[:regexp_space_type, [:type, :space, '\s'], ::Regexp::Expression::CharacterType::Space],
[:regexp_upper_posixclass, [:posixclass, :upper, '[:upper:]'], ::Regexp::Expression::PosixClass],
[:regexp_vertical_tab_escape, [:escape, :vertical_tab, '\v'], ::Regexp::Expression::EscapeSequence::VerticalTab],
[:regexp_word_boundary_anchor, [:anchor, :word_boundary, '\b'], ::Regexp::Expression::Anchor::WordBoundary],
[:regexp_word_posixclass, [:posixclass, :word, '[:word:]'], ::Regexp::Expression::PosixClass],
[:regexp_word_type, [:type, :word, '\w'], ::Regexp::Expression::CharacterType::Word],
[:regexp_xdigit_posixclass, [:posixclass, :xdigit, '[:xdigit:]'], ::Regexp::Expression::PosixClass],
[:regexp_xgrapheme_type, [:type, :xgrapheme, '\X'], ::Regexp::Expression::CharacterType::ExtendedGrapheme],
[:regexp_zero_or_more_escape, [:escape, :zero_or_more, '\*'], ::Regexp::Expression::EscapeSequence::Literal],
[:regexp_zero_or_one_escape, [:escape, :zero_or_one, '\?'], ::Regexp::Expression::EscapeSequence::Literal]
)
# rubocop:enable Layout/LineLength

private

def transform
expression_class.new(expression_token)
end
end # ASTToExpression

ASTToExpression::TABLE.types.each(&method(:register))
end # Direct
end # Transformer
end # Regexp
end # AST
end # Mutant
Loading

0 comments on commit a848e48

Please sign in to comment.