Markdown

[ES6]Regex

Regex

ref:

let regex = new RegExp('xyz', 'i')

let regex = /xyz/i

let regex = new RegExp(/xyz/i))
若第二個參數有指定修飾符,則會取代原有的正規表達中的修飾
new RegExp(/abc/ig, 'i').flags // "i"  

s 修饰符:dotAll 模式

正则表达式中,点(.)是一个特殊字符,代表任意的单个字符,但是有两个例外。一个是四个字节的 UTF-16 字符,这个可以用u修饰符解决;另一个是行终止符(line terminator character)。
所谓行终止符,就是该字符表示一行的终结。以下四个字符属于”行终止符“。
U+000A 换行符(\n)
U+000D 回车符(\r)
U+2028 行分隔符(line separator)
U+2029 段分隔符(paragraph separator)
/foo.bar/.test('foo\nbar')
// false
上面代码中,因为.不匹配\n,所以正则表达式返回false。
但是,很多时候我们希望匹配的是任意单个字符,这时有一种变通的写法。
/foo[^]bar/.test('foo\nbar')
// true
这种解决方案毕竟不太符合直觉,ES2018 引入s修饰符,使得.可以匹配任意单个字符。
/foo.bar/s.test('foo\nbar') // true

具名组匹配 (實用)

正则表达式使用圆括号进行组匹配。
const RE_DATE = /(\d{4})-(\d{2})-(\d{2})/;
上面代码中,正则表达式里面有三组圆括号。使用exec方法,就可以将这三组匹配结果提取出来。
const RE_DATE = /(\d{4})-(\d{2})-(\d{2})/;

const matchObj = RE_DATE.exec('1999-12-31');
const year = matchObj[1]; // 1999
const month = matchObj[2]; // 12
const day = matchObj[3]; // 31
ES2018 引入了具名组匹配(Named Capture Groups),允许为每一个组匹配指定一个名字,既便于阅读代码,又便于引用。
const RE_DATE = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;

const matchObj = RE_DATE.exec('1999-12-31');
const year = matchObj.groups.year; // 1999
const month = matchObj.groups.month; // 12
const day = matchObj.groups.day; // 31
如果具名组没有匹配,那么对应的groups对象属性会是undefined。
const RE_OPT_A = /^(?<as>a+)?$/;
const matchObj = RE_OPT_A.exec('');

matchObj.groups.as // undefined
'as' in matchObj.groups // true

搭配解构赋值和替换

let {groups: {one, two}} = /^(?<one>.*):(?<two>.*)$/u.exec('foo:bar');
one  // foo
two  // bar
let re = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/u;

'2015-01-02'.replace(re, '$<day>/$<month>/$<year>')
// '02/01/2015'

String.prototype.matchAll

var regex = /t(e)(st(\d?))/g;
var string = 'test1test2test3';

var matches = [];
var match;
while (match = regex.exec(string)) {
  matches.push(match);
}

matches
// [
//   ["test1", "e", "st1", "1", index: 0, input: "test1test2test3"],
//   ["test2", "e", "st2", "2", index: 5, input: "test1test2test3"],
//   ["test3", "e", "st3", "3", index: 10, input: "test1test2test3"]
// ]
const string = 'test1test2test3';

// g 修饰符加不加都可以
const regex = /t(e)(st(\d?))/g;

for (const match of string.matchAll(regex)) {
  console.log(match);
}
// ["test1", "e", "st1", "1", index: 0, input: "test1test2test3"]
// ["test2", "e", "st2", "2", index: 5, input: "test1test2test3"]
// ["test3", "e", "st3", "3", index: 10, input: "test1test2test3"]
// 转为数组方法一
[...string.matchAll(regex)]

// 转为数组方法二
Array.from(string.matchAll(regex));

RegExp.1-9
var re = /(\w+)\s(\w+)/;
var str = 'John Smith';
str.replace(re, '$2, $1'); // "Smith, John"
RegExp.$1; // "John"
RegExp.$2; // "Smith"
RegExp.input ($_)
var re = /hi/g;
re.test('hi there!');
RegExp.input;         // "hi there!"
re.test('foo');       // new test, non-matching
RegExp.$_;            // "hi there!"
re.test('hi world!'); // new test, matching
RegExp.$_;            // "hi world!"
RegExp.lastMatch ($&)
var re = /hi/g;
re.test('hi there!');
RegExp.lastMatch; // "hi"
RegExp['$&'];     // "hi"
RegExp.lastParen ($+)
var re = /(hi)/g;
re.test('hi there!');
RegExp.lastParen; // "hi"
RegExp['$+'];     // "hi"
RegExp.leftContext ($`)
var re = /world/g;
re.test('hello world!');
RegExp.leftContext; // "hello "
RegExp['$`'];       // "hello "
RegExp.prototype
RegExp.prototype.flags
/foo/ig.flags;   // "gi"
/bar/myu.flags;  // "muy"
RegExp.prototype.global
var regex = new RegExp('foo', 'g');

console.log(regex.global); // true
RegExp.prototype.ignoreCase
var regex = new RegExp('foo', 'i');

console.log(regex.ignoreCase); // true
RegExp.prototype.multiline
var regex = new RegExp('foo', 'm');

console.log(regex.multiline); // true
RegExp.prototype.source
new RegExp().source; // "(?:)"

new RegExp('\n').source === '\n';  // true, prior to ES5
new RegExp('\n').source === '\\n'; // true, starting with ES5
RegExp.prototype.sticky
var str = '#foo#';
var regex = /foo/y;

regex.lastIndex = 1;
regex.test(str); // true
regex.lastIndex = 5;
regex.test(str); // false (lastIndex is taken into account with sticky flag)
regex.lastIndex; // 0 (reset after match failure)

// When the y flag is used with a pattern, ^ always matches only at the beginning of the input, or (if multiline is true) at the beginning of a line.

var regex = /^foo/y;
regex.lastIndex = 2;
regex.test('..foo');   // false - index 2 is not the beginning of the string

var regex2 = /^foo/my;
regex2.lastIndex = 2;
regex2.test('..foo');  // false - index 2 is not the beginning of the string or line
regex2.lastIndex = 2;
regex2.test('.\nfoo'); // true - index 2 is the beginning of a line

RegExp.prototype.unicode
var regex = new RegExp('\u{61}', 'u');

console.log(regex.unicode); // true
RegExp.rightContext ($’)
var re = /hello/g;
re.test('hello world!');
RegExp.rightContext; // " world!"
RegExp["$'"];       // " world!"
get RegExp[@@species]
class MyRegExp extends RegExp {
  // Overwrite MyRegExp species to the parent RegExp constructor
  static get [Symbol.species]() {
    return RegExp;
  }
}

const regex1 = new MyRegExp('foo','g');

console.log(regex1.test('football'));
// expected output: true

regexp.lastIndex
// The lastIndex is a read/write integer property of regular expression instances that specifies the index at which to start the next match.
var re = /hello/g;
re.test('hello world!');
RegExp.rightContext; // " world!"
RegExp["$'"];       // " world!"

METHODS

RegExp.prototype.exec()
The exec() method executes a search for a match in a specified string. Returns a result array, or null.
var myRe = /ab*/g;
var str = 'abbcdefabh';
var myArray;
while ((myArray = myRe.exec(str)) !== null) {
  var msg = 'Found ' + myArray[0] + '. ';
  msg += 'Next match starts at ' + myRe.lastIndex;
  console.log(msg);
}

// Found abb. Next match starts at 3
// Found ab. Next match starts at 9

RegExp.prototype.test()
var regex1 = RegExp('foo*');
var regex2 = RegExp('foo*','g');
var str1 = 'table football';

console.log(regex1.test(str1));
// expected output: true

console.log(regex1.test(str1));
// expected output: true

console.log(regex2.test(str1));
// expected output: true

console.log(regex2.test(str1));
// expected output: false

RegExp.prototype.toSource()
The toSource() method returns a string representing the source code of the object.
regexObj.toSource()
RegExp.prototype.toString()
The toString() method returns a string representing the regular expression.
console.log(new RegExp('a+b+c'));
// expected output: /a+b+c/

console.log(new RegExp('a+b+c').toString());
// expected output: "/a+b+c/"

console.log(new RegExp('bar', 'g').toString());
// expected output: "/bar/g"

console.log(new RegExp('\n', 'g').toString());
// expected output (if your browser supports escaping): "/\n/g" 

console.log(new RegExp('\\n', 'g').toString());
// expected output: "/\n/g"

RegExp.prototype@@match
var re = /[0-9]+/g;
var str = '2016-01-02';
var result = re[Symbol.match](str);
console.log(result);  // ["2016", "01", "02"]

RegExp.prototype@@replace
var re = /-/g; 
var str = '2016-01-01';
var newstr = re[Symbol.replace](str, '.');
console.log(newstr);  // 2016.01.01
RegExp.prototype@@search
var re = /-/g;
var str = '2016-01-02';
var result = re[Symbol.search](str);
console.log(result);  // 4
RegExp.prototype@@split
var re = /-/g;
var str = '2016-01-02';
var result = re[Symbol.split](str);
console.log(result);  // ["2016", "01", "02"]

留言