shithub: libmujs

Download patch

ref: 6c37f967b4972c98e0bd55a804f3dab0795cb0b5
parent: 63143d8341e1f233332c7f7a00aa02e1cf5ae6b7
author: Tor Andersson <[email protected]>
date: Thu Jan 30 11:53:00 EST 2014

Use POSIX regcomp/regexec.

A temporary measure, since POSIX regexes (a) don't have the same
syntax as javascript's regexes, and (b) don't work on UTF-8 (unless
the setlocale crap is configured correctly, and the implementation
supports it), and (c) doesn't work on windows.

--- a/jsgc.c
+++ b/jsgc.c
@@ -3,6 +3,8 @@
 #include "jsvalue.h"
 #include "jsrun.h"
 
+#include <regex.h>
+
 static void jsG_markobject(js_State *J, int mark, js_Object *obj);
 
 static void jsG_freeenvironment(js_State *J, js_Environment *env)
@@ -42,6 +44,10 @@
 {
 	if (obj->head)
 		jsG_freeproperty(J, obj->head);
+	if (obj->type == JS_CREGEXP) {
+		regfree(obj->u.r.prog);
+		free(obj->u.r.prog);
+	}
 	if (obj->type == JS_CITERATOR)
 		jsG_freeiterator(J, obj->u.iter.head);
 	free(obj);
--- a/jsi.h
+++ b/jsi.h
@@ -44,6 +44,8 @@
 void js_rot2(js_State *J);
 void js_rot3(js_State *J);
 
+int js_RegExp_prototype_exec(js_State *J, int idx, const char *text);
+
 /* Exception handling */
 
 struct js_Jumpbuf
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -2,12 +2,72 @@
 #include "jsvalue.h"
 #include "jsbuiltin.h"
 
+#define nelem(a) (sizeof (a) / sizeof (a)[0])
+
+#include <regex.h>
+
+int js_RegExp_prototype_exec(js_State *J, int idx, const char *text)
+{
+	int flags, opts;
+	regex_t *prog;
+	regmatch_t m[10];
+	char *s;
+	int i, n;
+
+	prog = js_toregexp(J, idx, &flags);
+
+	opts = REG_EXTENDED;
+	if (flags & JS_REGEXP_I) opts |= REG_ICASE;
+	if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+
+	if (!regexec(prog, text, nelem(m), m, opts)) {
+		js_newarray(J);
+
+		s = malloc(strlen(text) + 1);
+		if (js_try(J)) {
+			free(s);
+			js_throw(J);
+		}
+
+		for (i = 0; i < nelem(m) && m[i].rm_so >= 0; ++i) {
+			n = m[i].rm_eo - m[i].rm_so;
+			memcpy(s, text + m[i].rm_so, n);
+			s[n] = 0;
+			js_pushstring(J, s);
+			js_setindex(J, -2, i);
+		}
+
+		js_endtry(J);
+		free(s);
+		return 1;
+	}
+
+	js_pushnull(J);
+	return 1;
+}
+
 void js_newregexp(js_State *J, const char *pattern, int flags)
 {
+	char msg[256];
 	js_Object *obj;
+	regex_t *prog;
+	int opts, status;
 
 	obj = jsV_newobject(J, JS_CREGEXP, J->RegExp_prototype);
-	obj->u.r.prog = NULL;
+
+	opts = REG_EXTENDED;
+	if (flags & JS_REGEXP_I) opts |= REG_ICASE;
+	if (flags & JS_REGEXP_M) opts |= REG_NEWLINE;
+
+	prog = malloc(sizeof (regex_t));
+	status = regcomp(prog, pattern, opts);
+	if (status) {
+		free(prog);
+		regerror(status, prog, msg, sizeof msg);
+		js_syntaxerror(J, "%s", msg);
+	}
+
+	obj->u.r.prog = prog;
 	obj->u.r.flags = flags;
 	js_pushobject(J, obj);
 
@@ -106,13 +166,19 @@
 
 static int Rp_exec(js_State *J, int argc)
 {
-	js_pushnull(J);
-	return 1;
+	return js_RegExp_prototype_exec(J, 0, js_tostring(J, 1));
 }
 
 static int Rp_test(js_State *J, int argc)
 {
-	js_pushboolean(J, 0);
+	int flags;
+	regex_t *prog;
+	const char *text;
+
+	prog = js_toregexp(J, 0, &flags);
+	text = js_tostring(J, 1);
+
+	js_pushboolean(J, !regexec(prog, text, 0, NULL, 0));
 	return 1;
 }
 
--- a/jsstring.c
+++ b/jsstring.c
@@ -3,6 +3,8 @@
 #include "jsbuiltin.h"
 #include "utf.h"
 
+#include <regex.h>
+
 static int jsB_new_String(js_State *J, int argc)
 {
 	js_newstring(J, argc > 0 ? js_tostring(J, 1) : "");
@@ -301,6 +303,50 @@
 	return 1;
 }
 
+static int Sp_match(js_State *J, int argc)
+{
+	const char *text;
+
+	text = js_tostring(J, 0);
+
+	if (js_isregexp(J, 1))
+		js_copy(J, 1);
+	else if (js_isundefined(J, 1))
+		js_newregexp(J, "", 0);
+	else
+		js_newregexp(J, js_tostring(J, 1), 0);
+
+	// TODO: JS_REGEXP_G looping
+
+	return js_RegExp_prototype_exec(J, -1, text);
+}
+
+static int Sp_search(js_State *J, int argc)
+{
+	const char *text;
+	regmatch_t m;
+	regex_t *prog;
+	int flags;
+
+	text = js_tostring(J, 0);
+
+	if (js_isregexp(J, 1))
+		js_copy(J, 1);
+	else if (js_isundefined(J, 1))
+		js_newregexp(J, "", 0);
+	else
+		js_newregexp(J, js_tostring(J, 1), 0);
+
+	prog = js_toregexp(J, -1, &flags);
+
+	if (!regexec(prog, text, 1, &m, 0))
+		js_pushnumber(J, m.rm_so); // TODO: convert to utf-8 index offset
+	else
+		js_pushnumber(J, -1);
+
+	return 1;
+}
+
 void jsB_initstring(js_State *J)
 {
 	J->String_prototype->u.string = "";
@@ -315,10 +361,10 @@
 		jsB_propf(J, "indexOf", Sp_indexOf, 1);
 		jsB_propf(J, "lastIndexOf", Sp_lastIndexOf, 1);
 		jsB_propf(J, "localeCompare", Sp_localeCompare, 1);
-		jsB_propf(J, "slice", Sp_slice, 2);
-		// match (uses regexp)
+		jsB_propf(J, "match", Sp_match, 1);
 		// replace (uses regexp)
-		// search (uses regexp)
+		jsB_propf(J, "search", Sp_search, 1);
+		jsB_propf(J, "slice", Sp_slice, 2);
 		// split (uses regexp)
 		jsB_propf(J, "substring", Sp_substring, 2);
 		jsB_propf(J, "toLowerCase", Sp_toLowerCase, 0);