Add regular expression matching algorithm.
This commit is contained in:
parent
c96bbdf00e
commit
d69199e658
@ -76,6 +76,7 @@ a set of rules that precisely define a sequence of operations.
|
||||
* `A` [Z Algorithm](src/algorithms/string/z-algorithm) - substring search (pattern matching)
|
||||
* `A` [Rabin Karp Algorithm](src/algorithms/string/rabin-karp) - substring search
|
||||
* `A` [Longest Common Substring](src/algorithms/string/longest-common-substring)
|
||||
* `A` [Regular Expression Matching](src/algorithms/string/regular-expression-matching)
|
||||
* **Searches**
|
||||
* `B` [Linear Search](src/algorithms/search/linear-search)
|
||||
* `B` [Binary Search](src/algorithms/search/binary-search)
|
||||
@ -147,6 +148,7 @@ algorithm is an abstraction higher than a computer program.
|
||||
* `A` [Integer Partition](src/algorithms/math/integer-partition)
|
||||
* `A` [Maximum Subarray](src/algorithms/sets/maximum-subarray)
|
||||
* `A` [Bellman-Ford Algorithm](src/algorithms/graph/bellman-ford) - finding shortest path to all graph vertices
|
||||
* `A` [Regular Expression Matching](src/algorithms/string/regular-expression-matching)
|
||||
* **Backtracking** - similarly to brute force, try to generate all possible solutions, but each time you generate next solution you test
|
||||
if it satisfies all conditions, and only then continue generating subsequent solutions. Otherwise, backtrack, and go on a
|
||||
different path of finding a solution. Normally the DFS traversal of state-space is being used.
|
||||
|
73
src/algorithms/string/regular-expression-matching/README.md
Normal file
73
src/algorithms/string/regular-expression-matching/README.md
Normal file
@ -0,0 +1,73 @@
|
||||
# Regular Expression Matching
|
||||
|
||||
Given an input string `s` and a pattern `p`, implement regular
|
||||
expression matching with support for `.` and `*`.
|
||||
|
||||
- `.` Matches any single character.
|
||||
- `*` Matches zero or more of the preceding element.
|
||||
|
||||
The matching should cover the **entire** input string (not partial).
|
||||
|
||||
**Note**
|
||||
|
||||
- `s` could be empty and contains only lowercase letters `a-z`.
|
||||
- `p` could be empty and contains only lowercase letters `a-z`, and characters like `.` or `*`.
|
||||
|
||||
## Examples
|
||||
|
||||
**Example #1**
|
||||
|
||||
Input:
|
||||
```
|
||||
s = 'aa'
|
||||
p = 'a'
|
||||
```
|
||||
|
||||
Output: `false`
|
||||
|
||||
Explanation: `a` does not match the entire string `aa`.
|
||||
|
||||
**Example #2**
|
||||
|
||||
Input:
|
||||
```
|
||||
s = 'aa'
|
||||
p = 'a*'
|
||||
```
|
||||
|
||||
Output: `true`
|
||||
|
||||
Explanation: `*` means zero or more of the preceding element, `a`.
|
||||
Therefore, by repeating `a` once, it becomes `aa`.
|
||||
|
||||
**Example #3**
|
||||
|
||||
Input:
|
||||
|
||||
```
|
||||
s = 'ab'
|
||||
p = '.*'
|
||||
```
|
||||
|
||||
Output: `true`
|
||||
|
||||
Explanation: `.*` means "zero or more (`*`) of any character (`.`)".
|
||||
|
||||
**Example #4**
|
||||
|
||||
Input:
|
||||
|
||||
```
|
||||
s = 'aab'
|
||||
p = 'c*a*b'
|
||||
```
|
||||
|
||||
Output: `true`
|
||||
|
||||
Explanation: `c` can be repeated 0 times, `a` can be repeated
|
||||
1 time. Therefore it matches `aab`.
|
||||
|
||||
## References
|
||||
|
||||
- [YouTube](https://www.youtube.com/watch?v=l3hda49XcDE&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8&index=71&t=0s)
|
||||
- [LeetCode](https://leetcode.com/problems/regular-expression-matching/description/)
|
@ -0,0 +1,34 @@
|
||||
import regularExpressionMatching from '../regularExpressionMatching';
|
||||
|
||||
describe('regularExpressionMatching', () => {
|
||||
it('should match regular expressions in a string', () => {
|
||||
expect(regularExpressionMatching('', '')).toBeTruthy();
|
||||
expect(regularExpressionMatching('a', 'a')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aa', 'aa')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aab', 'aab')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aab', 'aa.')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aab', '.a.')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aab', '...')).toBeTruthy();
|
||||
expect(regularExpressionMatching('a', 'a*')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aaa', 'a*')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aaab', 'a*b')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aaabb', 'a*b*')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aaabb', 'a*b*c*')).toBeTruthy();
|
||||
expect(regularExpressionMatching('', 'a*')).toBeTruthy();
|
||||
expect(regularExpressionMatching('xaabyc', 'xa*b.c')).toBeTruthy();
|
||||
expect(regularExpressionMatching('aab', 'c*a*b*')).toBeTruthy();
|
||||
expect(regularExpressionMatching('mississippi', 'mis*is*.p*.')).toBeTruthy();
|
||||
expect(regularExpressionMatching('ab', '.*')).toBeTruthy();
|
||||
|
||||
expect(regularExpressionMatching('', 'a')).toBeFalsy();
|
||||
expect(regularExpressionMatching('a', '')).toBeFalsy();
|
||||
expect(regularExpressionMatching('aab', 'aa')).toBeFalsy();
|
||||
expect(regularExpressionMatching('aab', 'baa')).toBeFalsy();
|
||||
expect(regularExpressionMatching('aabc', '...')).toBeFalsy();
|
||||
expect(regularExpressionMatching('aaabbdd', 'a*b*c*')).toBeFalsy();
|
||||
expect(regularExpressionMatching('mississippi', 'mis*is*p*.')).toBeFalsy();
|
||||
expect(regularExpressionMatching('ab', 'a*')).toBeFalsy();
|
||||
expect(regularExpressionMatching('abba', 'a*b*.c')).toBeFalsy();
|
||||
expect(regularExpressionMatching('abba', '.*c')).toBeFalsy();
|
||||
});
|
||||
});
|
@ -0,0 +1,135 @@
|
||||
const ZERO_OR_MORE_CHARS = '*';
|
||||
const ANY_CHAR = '.';
|
||||
|
||||
/**
|
||||
* Dynamic programming approach.
|
||||
*
|
||||
* @param {string} string
|
||||
* @param {string} pattern
|
||||
* @return {boolean}
|
||||
*/
|
||||
export default function regularExpressionMatching(string, pattern) {
|
||||
/*
|
||||
* Let's initiate dynamic programming matrix for this string and pattern.
|
||||
* We will have pattern characters on top (as columns) and string characters
|
||||
* will be placed to the left of the table (as rows).
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* a * b . b
|
||||
* - - - - - -
|
||||
* a - - - - - -
|
||||
* a - - - - - -
|
||||
* b - - - - - -
|
||||
* y - - - - - -
|
||||
* b - - - - - -
|
||||
*/
|
||||
const matchMatrix = Array(string.length + 1).fill(null).map(() => {
|
||||
return Array(pattern.length + 1).fill(null);
|
||||
});
|
||||
|
||||
// Let's fill the top-left cell with true. This would mean that empty
|
||||
// string '' matches to empty pattern ''.
|
||||
matchMatrix[0][0] = true;
|
||||
|
||||
// Let's fill the first row of the matrix with false. That would mean that
|
||||
// empty string can't match any non-empty pattern.
|
||||
//
|
||||
// Example:
|
||||
// string: ''
|
||||
// pattern: 'a.z'
|
||||
//
|
||||
// The one exception here is patterns like a*b* that matches the empty string.
|
||||
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
|
||||
const patternIndex = columnIndex - 1;
|
||||
|
||||
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
|
||||
matchMatrix[0][columnIndex] = matchMatrix[0][columnIndex - 2];
|
||||
} else {
|
||||
matchMatrix[0][columnIndex] = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Let's fill the first column with false. That would mean that empty pattern
|
||||
// can't match any non-empty string.
|
||||
//
|
||||
// Example:
|
||||
// string: 'ab'
|
||||
// pattern: ''
|
||||
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
|
||||
matchMatrix[rowIndex][0] = false;
|
||||
}
|
||||
|
||||
// Not let's go through every letter of the pattern and every letter of
|
||||
// the string and compare them one by one.
|
||||
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
|
||||
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
|
||||
// Take into account that fact that matrix contain one extra column and row.
|
||||
const stringIndex = rowIndex - 1;
|
||||
const patternIndex = columnIndex - 1;
|
||||
|
||||
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
|
||||
/*
|
||||
* In case if current pattern character is special '*' character we have
|
||||
* two options:
|
||||
*
|
||||
* 1. Since * char allows it previous char to not be presented in a string we
|
||||
* need to check if string matches the pattern without '*' char and without the
|
||||
* char that goes before '*'. That would mean to go two positions left on the
|
||||
* same row.
|
||||
*
|
||||
* 2. Since * char allows it previous char to be presented in a string many times we
|
||||
* need to check if char before * is the same as current string char. If they are the
|
||||
* same that would mean that current string matches the current pattern in case if
|
||||
* the string WITHOUT current char matches the same pattern. This would mean to go
|
||||
* one position up in the same row.
|
||||
*/
|
||||
if (matchMatrix[rowIndex][columnIndex - 2] === true) {
|
||||
matchMatrix[rowIndex][columnIndex] = true;
|
||||
} else if (
|
||||
(
|
||||
pattern[patternIndex - 1] === string[stringIndex] ||
|
||||
pattern[patternIndex - 1] === ANY_CHAR
|
||||
) &&
|
||||
matchMatrix[rowIndex - 1][columnIndex] === true
|
||||
) {
|
||||
matchMatrix[rowIndex][columnIndex] = true;
|
||||
} else {
|
||||
matchMatrix[rowIndex][columnIndex] = false;
|
||||
}
|
||||
} else if (
|
||||
pattern[patternIndex] === string[stringIndex] ||
|
||||
pattern[patternIndex] === ANY_CHAR
|
||||
) {
|
||||
/*
|
||||
* In case if current pattern char is the same as current string char
|
||||
* or it may be any character (in case if pattern contains '.' char)
|
||||
* we need to check if there was a match for the pattern and for the
|
||||
* string by WITHOUT current char. This would mean that we may copy
|
||||
* left-top diagonal value.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* a b
|
||||
* a 1 -
|
||||
* b - 1
|
||||
*/
|
||||
matchMatrix[rowIndex][columnIndex] = matchMatrix[rowIndex - 1][columnIndex - 1];
|
||||
} else {
|
||||
/*
|
||||
* In case if pattern char and string char are different we may
|
||||
* treat this case as "no-match".
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* a b
|
||||
* a - -
|
||||
* c - 0
|
||||
*/
|
||||
matchMatrix[rowIndex][columnIndex] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matchMatrix[string.length][pattern.length];
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user