ref: 6c37f967b4972c98e0bd55a804f3dab0795cb0b5
parent: 63143d8341e1f233332c7f7a00aa02e1cf5ae6b7
author: Tor Andersson <[email protected]>
date: Thu Jan 30 11:53:00 EST 2014
Use POSIX regcomp/regexec. A temporary measure, since POSIX regexes (a) don't have the same syntax as javascript's regexes, and (b) don't work on UTF-8 (unless the setlocale crap is configured correctly, and the implementation supports it), and (c) doesn't work on windows.
--- a/jsgc.c
+++ b/jsgc.c
@@ -3,6 +3,8 @@
#include "jsvalue.h"
#include "jsrun.h"
+#include <regex.h>
+
static void jsG_markobject(js_State *J, int mark, js_Object *obj);
static void jsG_freeenvironment(js_State *J, js_Environment *env)
@@ -42,6 +44,10 @@
{
if (obj->head)
jsG_freeproperty(J, obj->head);
+ if (obj->type == JS_CREGEXP) {
+ regfree(obj->u.r.prog);
+ free(obj->u.r.prog);
+ }
if (obj->type == JS_CITERATOR)
jsG_freeiterator(J, obj->u.iter.head);
free(obj);
--- a/jsi.h
+++ b/jsi.h
@@ -44,6 +44,8 @@
void js_rot2(js_State *J);
void js_rot3(js_State *J);
+int js_RegExp_prototype_exec(js_State *J, int idx, const char *text);
+
/* Exception handling */
struct js_Jumpbuf
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -2,12 +2,72 @@
#include "jsvalue.h"
#include "jsbuiltin.h"
+#define nelem(a) (sizeof (a) / sizeof (a)[0])
+
+#include <regex.h>
+
+int js_RegExp_prototype_exec(js_State *J, int idx, const char *text)
+{
+ int flags, opts;
+ regex_t *prog;
+ regmatch_t m[10];
+ char *s;
+ int i, n;
+
+ prog = js_toregexp(J, idx, &flags);
+
+ opts = REG_EXTENDED;
+ if (flags & JS_REGEXP_I) opts |= REG_ICASE;
+ if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+
+ if (!regexec(prog, text, nelem(m), m, opts)) {
+ js_newarray(J);
+
+ s = malloc(strlen(text) + 1);
+ if (js_try(J)) {
+ free(s);
+ js_throw(J);
+ }
+
+ for (i = 0; i < nelem(m) && m[i].rm_so >= 0; ++i) {
+ n = m[i].rm_eo - m[i].rm_so;
+ memcpy(s, text + m[i].rm_so, n);
+ s[n] = 0;
+ js_pushstring(J, s);
+ js_setindex(J, -2, i);
+ }
+
+ js_endtry(J);
+ free(s);
+ return 1;
+ }
+
+ js_pushnull(J);
+ return 1;
+}
+
void js_newregexp(js_State *J, const char *pattern, int flags)
{
+ char msg[256];
js_Object *obj;
+ regex_t *prog;
+ int opts, status;
obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
- obj->u.r.prog = NULL;
+
+ opts = REG_EXTENDED;
+ if (flags & JS_REGEXP_I) opts |= REG_ICASE;
+ if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+
+ prog = malloc(sizeof (regex_t));
+ status = regcomp(prog, pattern, opts);
+ if (status) {
+ free(prog);
+ regerror(status, prog, msg, sizeof msg);
+ js_syntaxerror(J, "%s", msg);
+ }
+
+ obj->u.r.prog = prog;
obj->u.r.flags = flags;
js_pushobject(J, obj);
@@ -106,13 +166,19 @@
static int Rp_exec(js_State *J, int argc)
{
- js_pushnull(J);
- return 1;
+ return js_RegExp_prototype_exec(J, 0, js_tostring(J, 1));
}
static int Rp_test(js_State *J, int argc)
{
- js_pushboolean(J, 0);
+ int flags;
+ regex_t *prog;
+ const char *text;
+
+ prog = js_toregexp(J, 0, &flags);
+ text = js_tostring(J, 1);
+
+ js_pushboolean(J, !regexec(prog, text, 0, NULL, 0));
return 1;
}
--- a/jsstring.c
+++ b/jsstring.c
@@ -3,6 +3,8 @@
#include "jsbuiltin.h"
#include "utf.h"
+#include <regex.h>
+
static int jsB_new_String(js_State *J, int argc)
{
js_newstring(J, argc > 0 ? js_tostring(J, 1) : "");
@@ -301,6 +303,50 @@
return 1;
}
+static int Sp_match(js_State *J, int argc)
+{
+ const char *text;
+
+ text = js_tostring(J, 0);
+
+ if (js_isregexp(J, 1))
+ js_copy(J, 1);
+ else if (js_isundefined(J, 1))
+ js_newregexp(J, "", 0);
+ else
+ js_newregexp(J, js_tostring(J, 1), 0);
+
+ // TODO: JS_REGEXP_G looping
+
+ return js_RegExp_prototype_exec(J, -1, text);
+}
+
+static int Sp_search(js_State *J, int argc)
+{
+ const char *text;
+ regmatch_t m;
+ regex_t *prog;
+ int flags;
+
+ text = js_tostring(J, 0);
+
+ if (js_isregexp(J, 1))
+ js_copy(J, 1);
+ else if (js_isundefined(J, 1))
+ js_newregexp(J, "", 0);
+ else
+ js_newregexp(J, js_tostring(J, 1), 0);
+
+ prog = js_toregexp(J, -1, &flags);
+
+ if (!regexec(prog, text, 1, &m, 0))
+ js_pushnumber(J, m.rm_so); // TODO: convert to utf-8 index offset
+ else
+ js_pushnumber(J, -1);
+
+ return 1;
+}
+
void jsB_initstring(js_State *J)
{
J->String_prototype->u.string = "";
@@ -315,10 +361,10 @@
jsB_propf(J, "indexOf", Sp_indexOf, 1);
jsB_propf(J, "lastIndexOf", Sp_lastIndexOf, 1);
jsB_propf(J, "localeCompare", Sp_localeCompare, 1);
- jsB_propf(J, "slice", Sp_slice, 2);
- // match (uses regexp)
+ jsB_propf(J, "match", Sp_match, 1);
// replace (uses regexp)
- // search (uses regexp)
+ jsB_propf(J, "search", Sp_search, 1);
+ jsB_propf(J, "slice", Sp_slice, 2);
// split (uses regexp)
jsB_propf(J, "substring", Sp_substring, 2);
jsB_propf(J, "toLowerCase", Sp_toLowerCase, 0);