Regex

ref:

MDN
ECMAScript 6入门 CH5

12345let regex = new RegExp('xyz', 'i')

let regex = /xyz/i

let regex = new RegExp(/xyz/i))

12若第二個參數有指定修飾符，則會取代原有的正規表達中的修飾
new RegExp(/abc/ig, 'i').flags // "i"

s 修饰符：dotAll 模式

正则表达式中，点（.）是一个特殊字符，代表任意的单个字符，但是有两个例外。一个是四个字节的 UTF-16 字符，这个可以用u修饰符解决；另一个是行终止符（line terminator character）。

所谓行终止符，就是该字符表示一行的终结。以下四个字符属于”行终止符“。

U+000A 换行符（\n）
U+000D 回车符（\r）
U+2028 行分隔符（line separator）
U+2029 段分隔符（paragraph separator）

12/foo.bar/.test('foo\nbar')
// false

上面代码中，因为.不匹配\n，所以正则表达式返回false。

但是，很多时候我们希望匹配的是任意单个字符，这时有一种变通的写法。

12/foo[^]bar/.test('foo\nbar')
// true

这种解决方案毕竟不太符合直觉，ES2018 引入s修饰符，使得.可以匹配任意单个字符。

1/foo.bar/s.test('foo\nbar') // true

具名组匹配 (實用)

正则表达式使用圆括号进行组匹配。

1const RE_DATE = /(\d{4})-(\d{2})-(\d{2})/;

上面代码中，正则表达式里面有三组圆括号。使用exec方法，就可以将这三组匹配结果提取出来。

123456const RE_DATE = /(\d{4})-(\d{2})-(\d{2})/;

const matchObj = RE_DATE.exec('1999-12-31');
const year = matchObj[1]; // 1999
const month = matchObj[2]; // 12
const day = matchObj[3]; // 31

ES2018 引入了具名组匹配（Named Capture Groups），允许为每一个组匹配指定一个名字，既便于阅读代码，又便于引用。

123456const RE_DATE = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;

const matchObj = RE_DATE.exec('1999-12-31');
const year = matchObj.groups.year; // 1999
const month = matchObj.groups.month; // 12
const day = matchObj.groups.day; // 31

如果具名组没有匹配，那么对应的groups对象属性会是undefined。

12345const RE_OPT_A = /^(?<as>a+)?$/;
const matchObj = RE_OPT_A.exec('');

matchObj.groups.as // undefined
'as' in matchObj.groups // true

搭配解构赋值和替换

123let {groups: {one, two}} = /^(?<one>.*):(?<two>.*)$/u.exec('foo:bar');
one  // foo
two  // bar

1234let re = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/u;

'2015-01-02'.replace(re, '$<day>/$<month>/$<year>')
// '02/01/2015'

String.prototype.matchAll

123456789101112131415var regex = /t(e)(st(\d?))/g;
var string = 'test1test2test3';

var matches = [];
var match;
while (match = regex.exec(string)) {
  matches.push(match);
}

matches
// [
//   ["test1", "e", "st1", "1", index: 0, input: "test1test2test3"],
//   ["test2", "e", "st2", "2", index: 5, input: "test1test2test3"],
//   ["test3", "e", "st3", "3", index: 10, input: "test1test2test3"]
// ]

1234567891011const string = 'test1test2test3';

// g 修饰符加不加都可以
const regex = /t(e)(st(\d?))/g;

for (const match of string.matchAll(regex)) {
  console.log(match);
}
// ["test1", "e", "st1", "1", index: 0, input: "test1test2test3"]
// ["test2", "e", "st2", "2", index: 5, input: "test1test2test3"]
// ["test3", "e", "st3", "3", index: 10, input: "test1test2test3"]

12345// 转为数组方法一
[...string.matchAll(regex)]

// 转为数组方法二
Array.from(string.matchAll(regex));

RegExp.

1-

12345var re = /(\w+)\s(\w+)/;
var str = 'John Smith';
str.replace(re, '$2, $1'); // "Smith, John"
RegExp.$1; // "John"
RegExp.$2; // "Smith"

RegExp.input ($_)

1234567var re = /hi/g;
re.test('hi there!');
RegExp.input;         // "hi there!"
re.test('foo');       // new test, non-matching
RegExp.$_;            // "hi there!"
re.test('hi world!'); // new test, matching
RegExp.$_;            // "hi world!"

RegExp.lastMatch ($&)

1234var re = /hi/g;
re.test('hi there!');
RegExp.lastMatch; // "hi"
RegExp['$&'];     // "hi"

RegExp.lastParen ($+)

1234var re = /(hi)/g;
re.test('hi there!');
RegExp.lastParen; // "hi"
RegExp['$+'];     // "hi"

RegExp.leftContext ($`)

1234var re = /world/g;
re.test('hello world!');
RegExp.leftContext; // "hello "
RegExp['$`'];       // "hello "

RegExp.prototype

RegExp.prototype.flags

12/foo/ig.flags;   // "gi"
/bar/myu.flags;  // "muy"

RegExp.prototype.global

123var regex = new RegExp('foo', 'g');

console.log(regex.global); // true

RegExp.prototype.ignoreCase

123var regex = new RegExp('foo', 'i');

console.log(regex.ignoreCase); // true

RegExp.prototype.multiline

123var regex = new RegExp('foo', 'm');

console.log(regex.multiline); // true

RegExp.prototype.source

1234new RegExp().source; // "(?:)"

new RegExp('\n').source === '\n';  // true, prior to ES5
new RegExp('\n').source === '\\n'; // true, starting with ES5

RegExp.prototype.sticky

123456789101112131415161718192021var str = '#foo#';
var regex = /foo/y;

regex.lastIndex = 1;
regex.test(str); // true
regex.lastIndex = 5;
regex.test(str); // false (lastIndex is taken into account with sticky flag)
regex.lastIndex; // 0 (reset after match failure)

// When the y flag is used with a pattern, ^ always matches only at the beginning of the input, or (if multiline is true) at the beginning of a line.

var regex = /^foo/y;
regex.lastIndex = 2;
regex.test('..foo');   // false - index 2 is not the beginning of the string

var regex2 = /^foo/my;
regex2.lastIndex = 2;
regex2.test('..foo');  // false - index 2 is not the beginning of the string or line
regex2.lastIndex = 2;
regex2.test('.\nfoo'); // true - index 2 is the beginning of a line

RegExp.prototype.unicode

123var regex = new RegExp('\u{61}', 'u');

console.log(regex.unicode); // true

RegExp.rightContext ($’)

1234var re = /hello/g;
re.test('hello world!');
RegExp.rightContext; // " world!"
RegExp["$'"];       // " world!"

get RegExp[@@species]

123456789101112class MyRegExp extends RegExp {
  // Overwrite MyRegExp species to the parent RegExp constructor
  static get [Symbol.species]() {
    return RegExp;
  }
}

const regex1 = new MyRegExp('foo','g');

console.log(regex1.test('football'));
// expected output: true

regexp.lastIndex

12345// The lastIndex is a read/write integer property of regular expression instances that specifies the index at which to start the next match.
var re = /hello/g;
re.test('hello world!');
RegExp.rightContext; // " world!"
RegExp["$'"];       // " world!"

METHODS

RegExp.prototype.exec()

The exec() method executes a search for a match in a specified string. Returns a result array, or null.

123456789101112var myRe = /ab*/g;
var str = 'abbcdefabh';
var myArray;
while ((myArray = myRe.exec(str)) !== null) {
  var msg = 'Found ' + myArray[0] + '. ';
  msg += 'Next match starts at ' + myRe.lastIndex;
  console.log(msg);
}

// Found abb. Next match starts at 3
// Found ab. Next match starts at 9

RegExp.prototype.test()

12345678910111213141516var regex1 = RegExp('foo*');
var regex2 = RegExp('foo*','g');
var str1 = 'table football';

console.log(regex1.test(str1));
// expected output: true

console.log(regex1.test(str1));
// expected output: true

console.log(regex2.test(str1));
// expected output: true

console.log(regex2.test(str1));
// expected output: false

RegExp.prototype.toSource()

The toSource() method returns a string representing the source code of the object.

1regexObj.toSource()

RegExp.prototype.toString()

The toString() method returns a string representing the regular expression.

123456789101112131415console.log(new RegExp('a+b+c'));
// expected output: /a+b+c/

console.log(new RegExp('a+b+c').toString());
// expected output: "/a+b+c/"

console.log(new RegExp('bar', 'g').toString());
// expected output: "/bar/g"

console.log(new RegExp('\n', 'g').toString());
// expected output (if your browser supports escaping): "/\n/g" 

console.log(new RegExp('\\n', 'g').toString());
// expected output: "/\n/g"

RegExp.prototype@@match

12345var re = /[0-9]+/g;
var str = '2016-01-02';
var result = re[Symbol.match](str);
console.log(result);  // ["2016", "01", "02"]

RegExp.prototype@@replace

1234var re = /-/g; 
var str = '2016-01-01';
var newstr = re[Symbol.replace](str, '.');
console.log(newstr);  // 2016.01.01

RegExp.prototype@@search

1234var re = /-/g;
var str = '2016-01-02';
var result = re[Symbol.search](str);
console.log(result);  // 4

RegExp.prototype@@split

1234var re = /-/g;
var str = '2016-01-02';
var result = re[Symbol.split](str);
console.log(result);  // ["2016", "01", "02"]

搜尋此網誌

筆記

Markdown

[ES6]Regex