JavaScript版正则(RegExp)实现 - 《写给你我的知识》

发现
移植
结果
代码
发现

一直在用正则，但是如何写一个正则，没有什么头绪，在网上搜索时发现MuJS，作者发布了一版正则实现，不过是C语言写的，里面使用了不少操作内存地址的用法，调试起来不是很舒服，我想能不能移植到JavaScript上面呢？
移植

这样调试起来会方便许多，随即开始移植，经历了两天左右初版就移植完成了，肯定还有Bug，后续再说吧，总算调试器来不是那么不舒服了。
结果

算是移植成功了吧，大多是按照C语言原写法做的处理，有的地方做了符合JavaScript的处理，但是没有完全按照JavaScript来进行处理，还是有遗憾！仓库在这里。希望对大家有帮助！
代码

/* regcomp flags */
const REG_ICASE = 1
const REG_NEWLINE = 2
/* regexec flags */
const REG_NOTBOL = 4
/* limits */
const REG_MAXSUB = 10
const ESCAPES = "BbDdSsWw^$\\.*+?()[]{}|0123456789"
let REPINF = 255
let MAXSUB = REG_MAXSUB
let MAXPROG = (32 << 10)
L_CHAR = 256
L_CCLASS = 257 /* character class */
L_NCCLASS = 258 /* negative character class */
L_NC = 259 /* "(?:" no capture */
L_PLA = 300 /* "(?=" positive lookahead */
L_NLA = 301 /* "(?!" negative lookahead */
L_WORD = 302 /* "\b" word boundary */
L_NWORD = 303 /* "\B" non-word boundary */
L_REF = 304 /* "\1" back-reference */
L_COUNT = 305 /* {M,N} */
/* Parse */
P_CAT = 0
P_ALT = 1
P_REP = 2
P_BOL = 3
P_EOL = 4
P_WORD = 5
P_NWORD = 6
P_PAR = 7
P_PLA = 8
P_NLA = 9
P_ANY = 10
P_CHAR = 11
P_CCLASS = 12
P_NCCLASS = 13
P_REF = 14
/* Compile */
I_END = 0;
I_JUMP = 1;
I_SPLIT = 2;
I_PLA = 3;
I_NLA = 4;
I_ANYNL = 5;
I_ANY = 6;
I_CHAR = 7;
I_CCLASS = 8;
I_NCCLASS = 9;
I_REF = 10;
I_BOL = 11;
I_EOL = 12;
I_WORD = 13;
I_NWORD = 14;
I_LPAR = 15;
I_RPAR = 16
let ccclass_memory = []
for (let i = 0; i < 16; i++) {
    ccclass_memory.push({
        end: 3452816845, // 参考C语言
        spans: Array(64).fill(3452816845)
    })
}
let g = {
    sub: [],
    prog: {
        cclass: ccclass_memory
    }
}
// 由于JavaScript中无法使用指针访问内存地址，在移植C语言程序时，使用数组中放置空对象模拟一片内存空间，暂时先放100个内存单元
let memory = []
for (let i = 0; i < 100; i++) {
    memory.push({})
}
function recomp(pattern, cflags) {
    console.log(pattern)
    let node;
    let split;
    let jump;
    let i = 0;
    let j = 0;
    g.pstart = null;
    // g.prog = {}; // 分配内存
    n = pattern.length * 2;
    if (n > 0) {
        // 分配内存
        g.pstart = g.pend = {}
    }
    g.source = pattern;
    g.ncclass = 0;
    g.nsub = 1;
    for (i = 1; i < MAXSUB; ++i) {
        g.sub[i] = 0;
    }
    g.prog.flags = cflags;
    next()
    node = parsealt();
    if (g.lookahead === ')')
        die("unmatched ')'");
    // if (g.lookahead != 0) // c语言最后为0
    if (g.lookahead != 0) // c语言和JavaScript非严格等号相同 "" == 0 为true
        die("syntax error");
    n = 6 + count(node);
    if (n < 0 || n > MAXPROG)
        die("program too large");
    g.prog.nsub = g.nsub;
    g.prog.start = g.prog.end = memory[0];
    split = emit(g.prog, I_SPLIT);
    let splitIndex = memory.indexOf(split);
    split.x = memory[splitIndex + 3];
    split.y = memory[splitIndex + 1];
    emit(g.prog, I_ANYNL);
    jump = emit(g.prog, I_JUMP);
    jump.x = split;
    emit(g.prog, I_LPAR);
    compile(g.prog, node);
    emit(g.prog, I_RPAR);
    emit(g.prog, I_END);
    // free(g.pstart);
    // if (errorp) *errorp = NULL;
    return g.prog;
}
function toupperrune(c) {
    /* TODO: Add unicode support */
    if (c >= 'a' && c <= 'z')
        return c - 'a' + 'A';
    return c;
}
function canon(c) {
    let u = toupperrune(c);
    if (c >= 128 && u < 128)
        return c;
    return u;
}
function compile(prog, node) {
    let inst, split, jump;
    let i;
    let flag = true
    if (!node)
        return;
    loop:
        while (flag) {
            switch (node.type) {
                case P_CAT:
                    compile(prog, node.x);
                    node = node.y;
                    continue loop;
                case P_ALT:
                    split = emit(prog, I_SPLIT);
                    compile(prog, node.x);
                    jump = emit(prog, I_JUMP);
                    compile(prog, node.y);
                    let splitIndex = memory.indexOf(split);
                    let jumpIndex = memory.indexOf(jump);
                    split.x = memory[splitIndex + 1];
                    split.y = memory[jumpIndex + 1];
                    jump.x = prog.end;
                    flag = false;
                    break;
                case P_REP:
                    for (i = 0; i < node.m; ++i) {
                        inst = prog.end;
                        compile(prog, node.x);
                    }
                    if (node.m == node.n) {
                        flag = false;
                        break;
                    }
                    if (node.n < REPINF) {
                        for (i = node.m; i < node.n; ++i) {
                            split = emit(prog, I_SPLIT);
                            compile(prog, node.x);
                            if (node.ng) {
                                let splitIndex = memory.indexOf(split);
                                split.y = memory[splitIndex + 1];
                                split.x = prog.end;
                            } else {
                                let splitIndex = memory.indexOf(split);
                                split.x = memory[splitIndex + 1];
                                split.y = prog.end;
                            }
                        }
                    } else if (node.m == 0) {
                        split = emit(prog, I_SPLIT);
                        compile(prog, node.x);
                        jump = emit(prog, I_JUMP);
                        if (node.ng) {
                            let splitIndex = memory.indexOf(split);
                            split.y = memory[splitIndex + 1];
                            split.x = prog.end;
                        } else {
                            let splitIndex = memory.indexOf(split);
                            split.x = memory[splitIndex + 1];
                            split.y = prog.end;
                        }
                        jump.x = split;
                    } else {
                        split = emit(prog, I_SPLIT);
                        if (node.ng) {
                            split.y = inst;
                            split.x = prog.end;
                        } else {
                            split.x = inst;
                            split.y = prog.end;
                        }
                    }
                    flag = false;
                    break;
                case P_BOL:
                    emit(prog, I_BOL);
                    flag = false;
                    break;
                case P_EOL:
                    emit(prog, I_EOL);
                    flag = false;
                    break;
                case P_WORD:
                    emit(prog, I_WORD);
                    flag = false;
                    break;
                case P_NWORD:
                    emit(prog, I_NWORD);
                    flag = false;
                    break;
                case P_PAR:
                    inst = emit(prog, I_LPAR);
                    inst.n = node.n;
                    compile(prog, node.x);
                    inst = emit(prog, I_RPAR);
                    inst.n = node.n;
                    flag = false;
                    break;
                case P_PLA:
                    split = emit(prog, I_PLA);
                    compile(prog, node.x);
                    emit(prog, I_END);
                    splitIndex = memory.indexOf(split);
                    split.x = memory[splitIndex + 1];
                    split.y = prog.end;
                    flag = false;
                    break;
                case P_NLA:
                    split = emit(prog, I_NLA);
                    compile(prog, node.x);
                    emit(prog, I_END);
                    splitIndex = memory.indexOf(split);
                    split.x = memory[splitIndex + 1];
                    split.y = prog.end;
                    flag = false;
                    break;
                case P_ANY:
                    emit(prog, I_ANY);
                    flag = false;
                    break;
                case P_CHAR:
                    inst = emit(prog, I_CHAR);
                    inst.c = (prog.flags & REG_ICASE) ? canon(node.c) : node.c;
                    flag = false;
                    break;
                case P_CCLASS:
                    inst = emit(prog, I_CCLASS);
                    inst.cc = node.cc;
                    flag = false;
                    break;
                case P_NCCLASS:
                    inst = emit(prog, I_NCCLASS);
                    inst.cc = node.cc;
                    flag = false;
                    break;
                case P_REF:
                    inst = emit(prog, I_REF);
                    inst.n = node.n;
                    flag = false;
                    break;
            }
        }
}
function count(node) {
    let min, max, n;
    if (!node) return 0;
    switch (node.type) {
        default:
            return 1;
        case P_CAT:
            return count(node.x) + count(node.y);
        case P_ALT:
            return count(node.x) + count(node.y) + 2;
        case P_REP:
            min = node.m;
            max = node.n;
            if (min == max) n = count(node.x) * min;
            else if (max < REPINF) n = count(node.x) * max + (max - min);
            else n = count(node.x) * (min + 1) + 2;
            if (n > MAXPROG) die("program too large");
            return n;
        case P_PAR:
            return count(node.x) + 2;
        case P_PLA:
            return count(node.x) + 2;
        case P_NLA:
            return count(node.x) + 2;
    }
}
function emit(prog, opcode) {
    // Reinst *inst = prog.end++;
    let inst = prog.end; // 先将地址赋值给inst
    let index = memory.indexOf(prog.end); // 在模拟内存中寻找索引
    prog.end = memory[index + 1]; // 将模拟内存中的下一个地址赋值给end
    inst.opcode = opcode;
    inst.n = 0;
    inst.c = 0;
    inst.cc = null;
    inst.x = inst.y = null;
    return inst;
}
function accept(t) {
    if (g.lookahead == t) {
        next();
        return 1;
    }
    return 0;
}
function newnode(type) {
    // let node = g.pend++;
    let node = {}
    node.type = type;
    node.cc = null;
    node.c = 0;
    node.ng = 0;
    node.m = 0;
    node.n = 0;
    node.x = node.y = null;
    return node;
}
function parseatom() {
    let atom;
    if (g.lookahead == L_CHAR) {
        atom = newnode(P_CHAR);
        atom.c = g.yychar;
        next();
        return atom;
    }
    if (g.lookahead == L_CCLASS) {
        atom = newnode(P_CCLASS);
        atom.cc = g.yycc;
        next();
        return atom;
    }
    if (g.lookahead == L_NCCLASS) {
        atom = newnode(P_NCCLASS);
        atom.cc = g.yycc;
        next();
        return atom;
    }
    if (g.lookahead == L_REF) {
        atom = newnode(P_REF);
        if (g.yychar == '' || g.yychar > g.nsub || !g.sub[g.yychar])
            die("invalid back-reference");
        atom.n = g.yychar;
        atom.x = g.sub[g.yychar];
        next();
        return atom;
    }
    if (accept('.'))
        return newnode(P_ANY);
    if (accept('(')) {
        atom = newnode(P_PAR);
        if (g.nsub == MAXSUB)
            die("too many captures");
        atom.n = g.nsub++;
        atom.x = parsealt();
        g.sub[atom.n] = atom;
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    if (accept(L_NC)) {
        atom = parsealt();
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    if (accept(L_PLA)) {
        atom = newnode(P_PLA);
        atom.x = parsealt();
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    if (accept(L_NLA)) {
        atom = newnode(P_NLA);
        atom.x = parsealt();
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    die("syntax error");
    return null;
}
function parserep() {
    let atom;
    if (accept('^')) return newnode(P_BOL);
    if (accept('$')) return newnode(P_EOL);
    if (accept(L_WORD)) return newnode(P_WORD);
    if (accept(L_NWORD)) return newnode(P_NWORD);
    atom = parseatom();
    if (g.lookahead == L_COUNT) {
        let min = g.yymin,
            max = g.yymax;
        next();
        if (max < min)
            die("invalid quantifier");
        return newrep(atom, accept('?'), min, max);
    }
    if (accept('*')) return newrep(atom, accept('?'), 0, REPINF);
    if (accept('+')) return newrep(atom, accept('?'), 1, REPINF);
    if (accept('?')) return newrep(atom, accept('?'), 0, 1);
    return atom;
}
function parsecat() {
    let cat, head, tail = {};
    if (g.lookahead && g.lookahead != '|' && g.lookahead != ')') {
        /* Build a right-leaning tree by splicing in new 'cat' at the tail. */
        head = parserep();
        tail.point = head; // ??
        let prev = head;
        while (g.lookahead && g.lookahead != '|' && g.lookahead != ')') {
            cat = newnode(P_CAT);
            cat.x = tail.point;
            cat.y = parserep();
            if (prev.x === tail.point) {
                tail.all.x = cat
                prev = cat
            } else if (prev.y === tail.point) {
                tail.all.y = cat
                prev = cat
            } else if (prev === tail.point) {
                head = cat
                prev = cat
            }
            tail.point = cat.y;
            tail.all = cat;
        }
        return head;
    }
    return null;
}
function empty(node) {
    if (!node) return 1;
    switch (node.type) {
        default:
            return 1;
        case P_CAT:
            return empty(node.x) && empty(node.y);
        case P_ALT:
            return empty(node.x) || empty(node.y);
        case P_REP:
            return empty(node.x) || node.m == 0;
        case P_PAR:
            return empty(node.x);
        case P_REF:
            return empty(node.x);
        case P_ANY:
        case P_CHAR:
        case P_CCLASS:
        case P_NCCLASS:
            return 0;
    }
}
function newrep(atom, ng, min, max) {
    let rep = newnode(P_REP);
    if (max == REPINF && empty(atom))
        die("infinite loop matching the empty string");
    rep.ng = ng;
    rep.m = min;
    rep.n = max;
    rep.x = atom;
    return rep;
}
function parsealt() {
    let alt, x;
    alt = parsecat();
    while (accept('|')) {
        x = alt;
        alt = newnode(P_ALT);
        alt.x = x;
        alt.y = parsecat();
    }
    return alt;
}
function next() {
    g.lookahead = lex();
}
function chartorune(r, s, key) {
    /* TODO: Add UTF-8 decoding */
    r[key] = s.slice(0, 1);
    return 1;
}
function incclasscanon(cc, c) {
    let p, r;
    for (p = cc.spans; p < cc.end; p += 2)
        for (r = p[0]; r <= p[1]; ++r)
            if (c == canon(r))
                return 1;
    return 0;
}
function incclass(cc, c) {
    let p;
    // for (p = cc.spans; p < cc.end; p += 2)
    for (p = 0; p < cc.end; p += 2)
        if (cc.spans[p] <= c && c <= cc.spans[p + 1])
            return 1;
    return 0;
}
function isnewline(c) {
    return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}
function iswordchar(c) {
    return c == '_' ||
        (c >= 'a' && c <= 'z') ||
        (c >= 'A' && c <= 'Z') ||
        (c >= '0' && c <= '9');
}
function die(message) {
    g.error = message;
    throw new Error(message);
    // longjmp(g.kaboom, 1);
}
function hex(c) {
    if (c >= '0' && c <= '9') return c - '0';
    if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
    if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
    die("invalid escape sequence");
    return 0;
}
function nextrune() {
    g.source = g.source.slice(chartorune(g, g.source, "yychar"), g.source.length);
    if (g.yychar == '\\') {
        g.source = g.source.slice(chartorune(g, g.source, "yychar"), g.source.length);
        switch (g.yychar) {
            case 0:
                die("unterminated escape sequence");
                break;
            case 'f':
                g.yychar = '\f';
                return 0;
            case 'n':
                g.yychar = '\n';
                return 0;
            case 'r':
                g.yychar = '\r';
                return 0;
            case 't':
                g.yychar = '\t';
                return 0;
            case 'v':
                g.yychar = '\v';
                return 0;
            case 'c':
                g.yychar = (g.source++) & 31;
                return 0;
            case 'x':
                g.yychar = hex(g.source++) << 4;
                g.yychar += hex(g.source++);
                if (g.yychar == '') { // ??
                    g.yychar = '0';
                    return 1;
                }
                return 0;
            case 'u':
                g.yychar = hex(g.source++) << 12;
                g.yychar += hex(g.source++) << 8;
                g.yychar += hex(g.source++) << 4;
                g.yychar += hex(g.source++);
                if (g.yychar == '') { // ??
                    g.yychar = '0';
                    return 1;
                }
                return 0;
        }
        if (ESCAPES.includes(g.yychar)) {
            return 1;
        }
        // if (strchr(ESCAPES, g.yychar)) // strchr 该函数返回在字符串 str 中第一次出现字符 c 的位置，如果未找到该字符则返回 NULL。
        //     return 1;
        if (isunicodeletter(g.yychar) || g.yychar == '_') /* check identity escape */
            die("invalid escape character");
        return 0;
    }
    return 0;
}
function newcclass() {
    // #define nelem(a) (sizeof (a) / sizeof (a)[0]) 求数组长度
    // if (g.ncclass >= nelem(g.prog.cclass))
    if (g.ncclass >= g.prog.cclass.length)
        die("too many character classes");
    // g.yycc = g.prog.cclass + g.ncclass++;
    g.yycc = ccclass_memory[g.ncclass++];
    g.yycc.end = g.yycc.spans[0]; // ??
    rangeIndex = 0; // 新的[]区间，索引清零
}
function addranges_d() {
    addrange('0', '9');
}
function addranges_D() {
    addrange(0, '0' - 1);
    addrange('9' + 1, 0xFFFF);
}
function addranges_W() {
    addrange(0, '0' - 1);
    addrange('9' + 1, 'A' - 1);
    addrange('Z' + 1, '_' - 1);
    addrange('_' + 1, 'a' - 1);
    addrange('z' + 1, 0xFFFF);
}
function addranges_w() {
    addrange('0', '9');
    addrange('A', 'Z');
    addrange('_', '_');
    addrange('a', 'z');
}
function addranges_S() {
    addrange(0, 0x9 - 1);
    addrange(0x9 + 1, 0xA - 1);
    addrange(0xD + 1, 0x20 - 1);
    addrange(0x20 + 1, 0xA0 - 1);
    addrange(0xA0 + 1, 0x2028 - 1);
    addrange(0x2029 + 1, 0xFEFF - 1);
    addrange(0xFEFF + 1, 0xFFFF);
}
function addranges_s() {
    addrange(0x9, 0x9);
    addrange(0xA, 0xD);
    addrange(0x20, 0x20);
    addrange(0xA0, 0xA0);
    addrange(0x2028, 0x2029);
    addrange(0xFEFF, 0xFEFF);
}
let rangeIndex = 0; // 为了避免复杂，以JavaScript的方式实现了相同功能
function addrange(a, b) {
    if (a > b)
        die("invalid character class range");
    // if (g.yycc.end + 2 == g.yycc.spans + nelem(g.yycc.spans))
    //      die("too many character class ranges");
    // g.yycc.end++ = a;
    // g.yycc.end++ = b;
    if (rangeIndex + 2 == 1 + g.yycc.spans.length) { // ??
        die("too many character class ranges");
    }
    // ??
    g.yycc.spans[rangeIndex++] = a;
    g.yycc.spans[rangeIndex++] = b;
    g.yycc.end = rangeIndex; // 无法访问指针，使用JavaScript的方式解决
}
function isalpharune(c) {
    /* TODO: Add unicode support */
    return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
function isunicodeletter(c) {
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || isalpharune(c);
}
function lexclass() {
    let type = L_CCLASS;
    let quoted, havesave, havedash;
    let save = 0;
    newcclass();
    quoted = nextrune();
    if (!quoted && g.yychar == '^') {
        type = L_NCCLASS;
        quoted = nextrune();
    }
    havesave = havedash = 0;
    for (;;) {
        // if (g.yychar == 0)
        if (g.yychar == '')
            die("unterminated character class");
        if (!quoted && g.yychar == ']')
            break;
        if (!quoted && g.yychar == '-') {
            if (havesave) {
                if (havedash) {
                    addrange(save, '-');
                    havesave = havedash = 0;
                } else {
                    havedash = 1;
                }
            } else {
                save = '-';
                havesave = 1;
            }
        } else if (quoted && "DSWdsw".includes(g.yychar)) {
            if (havesave) {
                addrange(save, save);
                if (havedash)
                    addrange('-', '-');
            }
            switch (g.yychar) {
                case 'd':
                    addranges_d();
                    break;
                case 's':
                    addranges_s();
                    break;
                case 'w':
                    addranges_w();
                    break;
                case 'D':
                    addranges_D();
                    break;
                case 'S':
                    addranges_S();
                    break;
                case 'W':
                    addranges_W();
                    break;
            }
            havesave = havedash = 0;
        } else {
            if (quoted) {
                if (g.yychar == 'b')
                    g.yychar = '\b';
                else if (g.yychar == '0')
                    g.yychar = 0;
                /* else identity escape */
            }
            if (havesave) {
                if (havedash) {
                    addrange(save, g.yychar);
                    havesave = havedash = 0;
                } else {
                    addrange(save, save);
                    save = g.yychar;
                }
            } else {
                save = g.yychar;
                havesave = 1;
            }
        }
        quoted = nextrune();
    }
    if (havesave) {
        addrange(save, save);
        if (havedash)
            addrange('-', '-');
    }
    return type;
}
function lex() {
    let quoted = nextrune();
    if (quoted) {
        switch (g.yychar) {
            case 'b':
                return L_WORD;
            case 'B':
                return L_NWORD;
            case 'd':
                newcclass();
                addranges_d();
                return L_CCLASS;
            case 's':
                newcclass();
                addranges_s();
                return L_CCLASS;
            case 'w':
                newcclass();
                addranges_w();
                return L_CCLASS;
            case 'D':
                newcclass();
                addranges_d();
                return L_NCCLASS;
            case 'S':
                newcclass();
                addranges_s();
                return L_NCCLASS;
            case 'W':
                newcclass();
                addranges_w();
                return L_NCCLASS;
            case '0':
                g.yychar = 0;
                return L_CHAR;
        }
        if (g.yychar >= '0' && g.yychar <= '9') {
            g.yychar -= '0';
            if (g.source >= '0' && g.source <= '9')
                g.yychar = g.yychar * 10 + g.source++ - '0';
            return L_REF;
        }
        return L_CHAR;
    }
    switch (g.yychar) {
        case 0: // C里面最后是0
        case "":
        case '$':
        case ')':
        case '*':
        case '+':
        case '.':
        case '?':
        case '^':
        case '|':
            return g.yychar;
    }
    if (g.yychar == '{')
        return lexcount();
    if (g.yychar == '[')
        return lexclass();
    if (g.yychar == '(') {
        if (g.source[0] == '?') {
            if (g.source[1] == ':') {
                // g.source += 2;
                g.source = g.source.slice(2, g.source.length);
                return L_NC;
            }
            if (g.source[1] == '=') {
                // g.source += 2;
                g.source = g.source.slice(2, g.source.length);
                return L_PLA;
            }
            if (g.source[1] == '!') {
                // g.source += 2;
                g.source = g.source.slice(2, g.source.length);
                return L_NLA;
            }
        }
        return '(';
    }
    return L_CHAR;
}
function strncmpcanon(a, b, n) {
    let ra, rb;
    let c;
    let tempra = {
        ra: ra
    }
    let temprb = {
        rb: rb
    }
    while (n--) {
        if (!a) return -1;
        if (!b) return 1;
        a = a.slice(chartorune(tempra, a, "ra"), a.length);
        ra = tempra.ra;
        b = b.slice(chartorune(temprb, b, "rb"), b.length);
        rb = temprb.rb;
        c = canon(ra) - canon(rb);
        if (c)
            return c;
    }
    return 0;
}
function strncmp(str1, str2, n) {
    str1 = str1.substring(0, n);
    str2 = str2.substring(0, n);
    return ((str1 == str2) ? 0 :
        ((str1 > str2) ? 1 : -1));
}
function match(pc, sp, bol, flags, out) {
    let scratch;
    let i;
    let c;
    let tempc = {
        c: c
    }
    let pcIndex;
    for (;;) {
        switch (pc.opcode) {
            case I_END:
                return 1;
            case I_JUMP:
                pc = pc.x;
                break;
            case I_SPLIT:
                scratch = out;
                if (match(pc.x, sp, bol, flags, scratch)) {
                    out = scratch;
                    return 1;
                }
                pc = pc.y;
                break;
            case I_PLA:
                if (!match(pc.x, sp, bol, flags, out))
                    return 0;
                pc = pc.y;
                break;
            case I_NLA:
                scratch = out;
                if (match(pc.x, sp, bol, flags, scratch))
                    return 0;
                pc = pc.y;
                break;
            case I_ANYNL:
                sp = sp.slice(chartorune(tempc, sp, "c"), sp.length);
                c = tempc.c;
                if (c == '') // JavaScript中不使用严格等号 "" == 0 为true
                    return 0;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_ANY:
                sp = sp.slice(chartorune(tempc, sp, "c"), sp.length);
                c = tempc.c;
                if (c == '')
                    return 0;
                if (isnewline(c))
                    return 0;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_CHAR:
                sp = sp.slice(chartorune(tempc, sp, "c"), sp.length);
                c = tempc.c;
                if (c == '')
                    return 0;
                if (flags & REG_ICASE)
                    c = canon(c);
                if (c != pc.c)
                    return 0;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_CCLASS:
                sp = sp.slice(chartorune(tempc, sp, "c"), sp.length);
                c = tempc.c;
                if (c == '')
                    return 0;
                if (flags & REG_ICASE) {
                    if (!incclasscanon(pc.cc, canon(c)))
                        return 0;
                } else {
                    if (!incclass(pc.cc, c))
                        return 0;
                }
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_NCCLASS:
                sp = sp.slice(chartorune(tempc, sp, "c"), sp.length);
                c = tempc.c;
                if (c == '')
                    return 0;
                if (flags & REG_ICASE) {
                    if (incclasscanon(pc.cc, canon(c)))
                        return 0;
                } else {
                    if (incclass(pc.cc, c))
                        return 0;
                }
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_REF:
                i = out.sub[pc.n].sp.length - out.sub[pc.n].ep.length;
                if (flags & REG_ICASE) {
                    if (strncmpcanon(sp, out.sub[pc.n].sp, i))
                        return 0;
                } else {
                    if (strncmp(sp, out.sub[pc.n].sp, i))
                        return 0;
                }
                if (i > 0)
                    sp = sp.slice(i, sp.length);
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_BOL:
                if (sp == bol && !(flags & REG_NOTBOL)) {
                    pcIndex = memory.indexOf(pc);
                    pc = memory[pcIndex + 1];
                    break;
                }
                if (flags & REG_NEWLINE) {
                    // 如果有Bug，修复建议，sp > bol之间的比较，在C语言中，sp与bol是char*指针，即内存地址，
                    // sp > bol若为true，则表示sp的内存地址（指针）较大，实际存储的有效字符个数（内存地址开头到\0之间的字符）是sp比bol少的
                    // 这和JavaScript里面字符串比较很大不同
                    // if (sp > bol && isnewline(sp[-1])) { // 原C代码
                    if (sp.length < bol.length /*改为字符串长度比较*/ && isnewline(sp[-1])) {
                        pcIndex = memory.indexOf(pc);
                        pc = memory[pcIndex + 1];
                        break;
                    }
                }
                return 0;
            case I_EOL:
                if (sp == '') { // 在C语言和JavaScript非严格等号里面成立
                    pcIndex = memory.indexOf(pc);
                    pc = memory[pcIndex + 1];
                    break;
                }
                if (flags & REG_NEWLINE) {
                    if (isnewline(sp)) {
                        pcIndex = memory.indexOf(pc);
                        pc = memory[pcIndex + 1];
                        break;
                    }
                }
                return 0;
            case I_WORD:
                // i = sp > bol && iswordchar(sp[-1]);
                if (sp.length < bol.length) {
                    let index = bol.indexOf(sp);
                    i = iswordchar(bol[index - 1]);
                }
                i ^= iswordchar(sp[0]);
                if (!i)
                    return 0;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_NWORD:
                // i = sp > bol && iswordchar(sp[-1]);
                if (sp.length < bol.length) {
                    let index = bol.indexOf(sp);
                    i = iswordchar(bol[index - 1]);
                }
                i ^= iswordchar(sp[0]);
                if (i)
                    return 0;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_LPAR:
                out.sub[pc.n].sp = sp;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            case I_RPAR:
                out.sub[pc.n].ep = sp;
                pcIndex = memory.indexOf(pc);
                pc = memory[pcIndex + 1];
                break;
            default:
                return 0;
        }
    }
}
function regexec(prog, sp, sub, eflags) {
    let scratch;
    let i;
    if (!sub)
        sub = scratch;
    sub.nsub = prog.nsub;
    for (i = 0; i < MAXSUB; ++i) {
        if (!sub.sub[i]) {
            sub.sub[i] = {}
        }
        // sub.sub[i].sp = sub.sub[i].ep = null;
        sub.sub[i].sp = sub.sub[i].ep = "";
    }
    return !match(prog.start, sp, sp, prog.flags | eflags, sub);
}
function main() {
    let m = {
        sub: []
    }
    // let p = recomp(String.raw `.+\/(.+\..+)$`, 0);
    // let s = "/root/temp/hello.mp3";
    // let p = recomp(String.raw `\B..`, 0);
    // let s = "noonday";
    let p = recomp(String.raw `^((?:[_a-zA-Z])+(?:[_a-zA-Z\d])*)[ ]*(?:\((.*)\))`, 0);
    let s = "_foo0_  (x,y)";
    console.log("nsub =", p.nsub)
    if (!regexec(p, s, m, 0)) {
        for (i = 0; i < m.nsub; ++i) {
            let n = m.sub[i].sp.length - m.sub[i].ep.length;
            if (n > 0)
                console.log("match %d: s=%d e=%d n=%d '%s'\n", i, (s.length - m.sub[i].sp.length), (s.length - m.sub[i].ep.length), n, m.sub[i].sp.slice(0, n));
            else
                console.log("match %d: n=0 ''\n", i);
        }
    } else {
        console.log("no match\n");
    }
}
main()