457 lines
15 KiB
C
457 lines
15 KiB
C
/*
|
|
* Copyright 2011 - 2014
|
|
* Andr\xe9 Malo or his licensors, as applicable
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "cext.h"
|
|
EXT_INIT_FUNC;
|
|
|
|
#define RJSMIN_DULL_BIT (1 << 0)
|
|
#define RJSMIN_PRE_REGEX_BIT (1 << 1)
|
|
#define RJSMIN_REGEX_DULL_BIT (1 << 2)
|
|
#define RJSMIN_REGEX_CC_DULL_BIT (1 << 3)
|
|
#define RJSMIN_ID_LIT_BIT (1 << 4)
|
|
#define RJSMIN_ID_LIT_O_BIT (1 << 5)
|
|
#define RJSMIN_ID_LIT_C_BIT (1 << 6)
|
|
#define RJSMIN_STRING_DULL_BIT (1 << 7)
|
|
#define RJSMIN_SPACE_BIT (1 << 8)
|
|
|
|
#ifdef EXT3
|
|
typedef Py_UNICODE rchar;
|
|
#else
|
|
typedef unsigned char rchar;
|
|
#endif
|
|
#define U(c) ((rchar)(c))
|
|
|
|
#define RJSMIN_IS_DULL(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_DULL_BIT))
|
|
|
|
#define RJSMIN_IS_REGEX_DULL(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_DULL_BIT))
|
|
|
|
#define RJSMIN_IS_REGEX_CC_DULL(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_CC_DULL_BIT))
|
|
|
|
#define RJSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_STRING_DULL_BIT))
|
|
|
|
#define RJSMIN_IS_ID_LITERAL(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_BIT))
|
|
|
|
#define RJSMIN_IS_ID_LITERAL_OPEN(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_O_BIT))
|
|
|
|
#define RJSMIN_IS_ID_LITERAL_CLOSE(c) ((U(c) > 127) || \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_C_BIT))
|
|
|
|
#define RJSMIN_IS_SPACE(c) ((U(c) <= 127) && \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_SPACE_BIT))
|
|
|
|
#define RJSMIN_IS_PRE_REGEX_1(c) ((U(c) <= 127) && \
|
|
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_PRE_REGEX_BIT))
|
|
|
|
|
|
static const unsigned short rjsmin_charmask[128] = {
|
|
396, 396, 396, 396, 396, 396, 396, 396,
|
|
396, 396, 2, 396, 396, 2, 396, 396,
|
|
396, 396, 396, 396, 396, 396, 396, 396,
|
|
396, 396, 396, 396, 396, 396, 396, 396,
|
|
396, 175, 76, 141, 253, 141, 143, 76,
|
|
175, 205, 141, 237, 143, 237, 141, 136,
|
|
253, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 143, 143, 141, 143, 141, 143,
|
|
141, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 253, 171, 1, 197, 141, 253,
|
|
141, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 253, 253, 253, 253, 253, 253,
|
|
253, 253, 253, 175, 143, 207, 141, 253
|
|
};
|
|
|
|
static Py_ssize_t
|
|
rjsmin(const rchar *source, rchar *target, Py_ssize_t length,
|
|
int keep_bang_comments)
|
|
{
|
|
const rchar *reset, *sentinel = source + length;
|
|
rchar *tstart = target;
|
|
rchar c, quote;
|
|
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
if (RJSMIN_IS_DULL(c)) {
|
|
*target++ = c;
|
|
continue;
|
|
}
|
|
switch (c) {
|
|
|
|
/* String */
|
|
case U('\''): case U('"'):
|
|
reset = source;
|
|
*target++ = quote = c;
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (RJSMIN_IS_STRING_DULL(c))
|
|
continue;
|
|
switch (c) {
|
|
case U('\''): case U('"'):
|
|
if (c == quote)
|
|
goto cont;
|
|
continue;
|
|
case U('\\'):
|
|
if (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (c == U('\r') && source < sentinel
|
|
&& *source == U('\n'))
|
|
*target++ = *source++;
|
|
}
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
target -= source - reset;
|
|
source = reset;
|
|
continue;
|
|
|
|
/* Comment or Regex or something else entirely */
|
|
case U('/'):
|
|
if (!(source < sentinel)) {
|
|
*target++ = c;
|
|
}
|
|
else {
|
|
switch (*source) {
|
|
/* Comment */
|
|
case U('*'): case U('/'):
|
|
goto skip_or_copy_ws;
|
|
|
|
default:
|
|
if ( target == tstart
|
|
|| RJSMIN_IS_PRE_REGEX_1(*(target - 1))
|
|
|| (
|
|
(target - tstart >= 6)
|
|
&& *(target - 1) == U('n')
|
|
&& *(target - 2) == U('r')
|
|
&& *(target - 3) == U('u')
|
|
&& *(target - 4) == U('t')
|
|
&& *(target - 5) == U('e')
|
|
&& *(target - 6) == U('r')
|
|
&& (
|
|
target - tstart == 6
|
|
|| !RJSMIN_IS_ID_LITERAL(*(target - 7))
|
|
)
|
|
)) {
|
|
|
|
/* Regex */
|
|
reset = source;
|
|
*target++ = U('/');
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (RJSMIN_IS_REGEX_DULL(c))
|
|
continue;
|
|
switch (c) {
|
|
case U('/'):
|
|
goto cont;
|
|
case U('\\'):
|
|
if (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (c == U('\r') || c == U('\n'))
|
|
break;
|
|
}
|
|
continue;
|
|
case U('['):
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (RJSMIN_IS_REGEX_CC_DULL(c))
|
|
continue;
|
|
switch (c) {
|
|
case U('\\'):
|
|
if (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (c == U('\r') || c == U('\n'))
|
|
break;
|
|
}
|
|
continue;
|
|
case U(']'):
|
|
goto cont_regex;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
cont_regex:
|
|
continue;
|
|
}
|
|
target -= source - reset;
|
|
source = reset;
|
|
}
|
|
else {
|
|
/* Just a slash */
|
|
*target++ = c;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
continue;
|
|
|
|
/* Whitespace */
|
|
default:
|
|
skip_or_copy_ws:
|
|
quote = U(' ');
|
|
--source;
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
if (RJSMIN_IS_SPACE(c))
|
|
continue;
|
|
switch (c) {
|
|
case U('\r'): case U('\n'):
|
|
quote = U('\n');
|
|
continue;
|
|
case U('/'):
|
|
if (source < sentinel) {
|
|
switch (*source) {
|
|
case U('*'):
|
|
reset = source++;
|
|
/* copy bang comment, if requested */
|
|
if ( keep_bang_comments && source < sentinel
|
|
&& *source == U('!')) {
|
|
*target++ = U('/');
|
|
*target++ = U('*');
|
|
*target++ = *source++;
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
*target++ = c;
|
|
if (c == U('*') && source < sentinel
|
|
&& *source == U('/')) {
|
|
*target++ = *source++;
|
|
reset = NULL;
|
|
break;
|
|
}
|
|
}
|
|
if (!reset)
|
|
continue;
|
|
target -= source - reset;
|
|
source = reset;
|
|
}
|
|
/* strip regular comment */
|
|
else {
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
if (c == U('*') && source < sentinel
|
|
&& *source == U('/')) {
|
|
++source;
|
|
reset = NULL;
|
|
break;
|
|
}
|
|
}
|
|
if (!reset)
|
|
continue;
|
|
source = reset;
|
|
*target++ = U('/');
|
|
}
|
|
goto cont;
|
|
case U('/'):
|
|
++source;
|
|
while (source < sentinel) {
|
|
c = *source++;
|
|
switch (c) {
|
|
case U('\n'):
|
|
break;
|
|
case U('\r'):
|
|
if (source < sentinel
|
|
&& *source == U('\n'))
|
|
++source;
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
quote = U('\n');
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
--source;
|
|
break;
|
|
}
|
|
|
|
if ((tstart < target && source < sentinel)
|
|
&& ((quote == U('\n')
|
|
&& RJSMIN_IS_ID_LITERAL_CLOSE(*(target - 1))
|
|
&& RJSMIN_IS_ID_LITERAL_OPEN(*source))
|
|
||
|
|
(quote == U(' ')
|
|
&& ((RJSMIN_IS_ID_LITERAL(*(target - 1))
|
|
&& RJSMIN_IS_ID_LITERAL(*source))
|
|
|| (source < sentinel
|
|
&& ((*(target - 1) == U('+')
|
|
&& *source == U('+'))
|
|
|| (*(target - 1) == U('-')
|
|
&& *source == U('-'))))))))
|
|
*target++ = quote;
|
|
}
|
|
cont:
|
|
continue;
|
|
}
|
|
return (Py_ssize_t)(target - tstart);
|
|
}
|
|
|
|
|
|
PyDoc_STRVAR(rjsmin_jsmin__doc__,
|
|
"jsmin(script, keep_bang_comments=False)\n\
|
|
\n\
|
|
Minify javascript based on `jsmin.c by Douglas Crockford`_\\.\n\
|
|
\n\
|
|
Instead of parsing the stream char by char, it uses a regular\n\
|
|
expression approach which minifies the whole script with one big\n\
|
|
substitution regex.\n\
|
|
\n\
|
|
.. _jsmin.c by Douglas Crockford:\n\
|
|
http://www.crockford.com/javascript/jsmin.c\n\
|
|
\n\
|
|
:Note: This is a hand crafted C implementation built on the regex\n\
|
|
semantics.\n\
|
|
\n\
|
|
:Parameters:\n\
|
|
`script` : ``str``\n\
|
|
Script to minify\n\
|
|
\n\
|
|
`keep_bang_comments` : ``bool``\n\
|
|
Keep comments starting with an exclamation mark? (``/*!...*/``)\n\
|
|
\n\
|
|
:Return: Minified script\n\
|
|
:Rtype: ``str``");
|
|
|
|
static PyObject *
|
|
rjsmin_jsmin(PyObject *self, PyObject *args, PyObject *kwds)
|
|
{
|
|
PyObject *script, *keep_bang_comments_ = NULL, *result;
|
|
static char *kwlist[] = {"script", "keep_bang_comments", NULL};
|
|
Py_ssize_t slength, length;
|
|
int keep_bang_comments;
|
|
#ifdef EXT2
|
|
int uni;
|
|
#define UOBJ "O"
|
|
#endif
|
|
#ifdef EXT3
|
|
#define UOBJ "U"
|
|
#endif
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist,
|
|
&script, &keep_bang_comments_))
|
|
return NULL;
|
|
|
|
if (!keep_bang_comments_)
|
|
keep_bang_comments = 0;
|
|
else {
|
|
keep_bang_comments = PyObject_IsTrue(keep_bang_comments_);
|
|
if (keep_bang_comments == -1)
|
|
return NULL;
|
|
}
|
|
|
|
#ifdef EXT2
|
|
if (PyUnicode_Check(script)) {
|
|
if (!(script = PyUnicode_AsUTF8String(script)))
|
|
return NULL;
|
|
uni = 1;
|
|
}
|
|
else {
|
|
if (!(script = PyObject_Str(script)))
|
|
return NULL;
|
|
uni = 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef EXT3
|
|
Py_INCREF(script);
|
|
#define PyString_GET_SIZE PyUnicode_GET_SIZE
|
|
#define PyString_AS_STRING PyUnicode_AS_UNICODE
|
|
#define _PyString_Resize PyUnicode_Resize
|
|
#define PyString_FromStringAndSize PyUnicode_FromUnicode
|
|
#endif
|
|
|
|
slength = PyString_GET_SIZE(script);
|
|
if (!(result = PyString_FromStringAndSize(NULL, slength))) {
|
|
Py_DECREF(script);
|
|
return NULL;
|
|
}
|
|
Py_BEGIN_ALLOW_THREADS
|
|
length = rjsmin((rchar *)PyString_AS_STRING(script),
|
|
(rchar *)PyString_AS_STRING(result),
|
|
slength, keep_bang_comments);
|
|
Py_END_ALLOW_THREADS
|
|
|
|
Py_DECREF(script);
|
|
if (length < 0) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|
|
if (length != slength && _PyString_Resize(&result, length) == -1)
|
|
return NULL;
|
|
|
|
#ifdef EXT2
|
|
if (uni) {
|
|
script = PyUnicode_DecodeUTF8(PyString_AS_STRING(result),
|
|
PyString_GET_SIZE(result), "strict");
|
|
Py_DECREF(result);
|
|
if (!script)
|
|
return NULL;
|
|
result = script;
|
|
}
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
/* ------------------------ BEGIN MODULE DEFINITION ------------------------ */
|
|
|
|
EXT_METHODS = {
|
|
{"jsmin",
|
|
(PyCFunction)rjsmin_jsmin, METH_VARARGS | METH_KEYWORDS,
|
|
rjsmin_jsmin__doc__},
|
|
|
|
{NULL} /* Sentinel */
|
|
};
|
|
|
|
PyDoc_STRVAR(EXT_DOCS_VAR,
|
|
"C implementation of rjsmin\n\
|
|
==========================\n\
|
|
\n\
|
|
C implementation of rjsmin.");
|
|
|
|
|
|
EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR);
|
|
|
|
EXT_INIT_FUNC {
|
|
PyObject *m;
|
|
|
|
/* Create the module and populate stuff */
|
|
if (!(m = EXT_CREATE(&EXT_DEFINE_VAR)))
|
|
EXT_INIT_ERROR(NULL);
|
|
|
|
EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1");
|
|
EXT_ADD_STRING(m, "__docformat__", "restructuredtext en");
|
|
|
|
EXT_INIT_RETURN(m);
|
|
}
|
|
|
|
/* ------------------------- END MODULE DEFINITION ------------------------- */
|