2014-02-18 23:01:48 +01:00

457 lines
15 KiB
C

/*
* Copyright 2011 - 2014
* Andr\xe9 Malo or his licensors, as applicable
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cext.h"
EXT_INIT_FUNC;
#define RJSMIN_DULL_BIT (1 << 0)
#define RJSMIN_PRE_REGEX_BIT (1 << 1)
#define RJSMIN_REGEX_DULL_BIT (1 << 2)
#define RJSMIN_REGEX_CC_DULL_BIT (1 << 3)
#define RJSMIN_ID_LIT_BIT (1 << 4)
#define RJSMIN_ID_LIT_O_BIT (1 << 5)
#define RJSMIN_ID_LIT_C_BIT (1 << 6)
#define RJSMIN_STRING_DULL_BIT (1 << 7)
#define RJSMIN_SPACE_BIT (1 << 8)
#ifdef EXT3
typedef Py_UNICODE rchar;
#else
typedef unsigned char rchar;
#endif
#define U(c) ((rchar)(c))
#define RJSMIN_IS_DULL(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_DULL_BIT))
#define RJSMIN_IS_REGEX_DULL(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_DULL_BIT))
#define RJSMIN_IS_REGEX_CC_DULL(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_CC_DULL_BIT))
#define RJSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_STRING_DULL_BIT))
#define RJSMIN_IS_ID_LITERAL(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_BIT))
#define RJSMIN_IS_ID_LITERAL_OPEN(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_O_BIT))
#define RJSMIN_IS_ID_LITERAL_CLOSE(c) ((U(c) > 127) || \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_C_BIT))
#define RJSMIN_IS_SPACE(c) ((U(c) <= 127) && \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_SPACE_BIT))
#define RJSMIN_IS_PRE_REGEX_1(c) ((U(c) <= 127) && \
(rjsmin_charmask[U(c) & 0x7F] & RJSMIN_PRE_REGEX_BIT))
static const unsigned short rjsmin_charmask[128] = {
396, 396, 396, 396, 396, 396, 396, 396,
396, 396, 2, 396, 396, 2, 396, 396,
396, 396, 396, 396, 396, 396, 396, 396,
396, 396, 396, 396, 396, 396, 396, 396,
396, 175, 76, 141, 253, 141, 143, 76,
175, 205, 141, 237, 143, 237, 141, 136,
253, 253, 253, 253, 253, 253, 253, 253,
253, 253, 143, 143, 141, 143, 141, 143,
141, 253, 253, 253, 253, 253, 253, 253,
253, 253, 253, 253, 253, 253, 253, 253,
253, 253, 253, 253, 253, 253, 253, 253,
253, 253, 253, 171, 1, 197, 141, 253,
141, 253, 253, 253, 253, 253, 253, 253,
253, 253, 253, 253, 253, 253, 253, 253,
253, 253, 253, 253, 253, 253, 253, 253,
253, 253, 253, 175, 143, 207, 141, 253
};
static Py_ssize_t
rjsmin(const rchar *source, rchar *target, Py_ssize_t length,
int keep_bang_comments)
{
const rchar *reset, *sentinel = source + length;
rchar *tstart = target;
rchar c, quote;
while (source < sentinel) {
c = *source++;
if (RJSMIN_IS_DULL(c)) {
*target++ = c;
continue;
}
switch (c) {
/* String */
case U('\''): case U('"'):
reset = source;
*target++ = quote = c;
while (source < sentinel) {
c = *source++;
*target++ = c;
if (RJSMIN_IS_STRING_DULL(c))
continue;
switch (c) {
case U('\''): case U('"'):
if (c == quote)
goto cont;
continue;
case U('\\'):
if (source < sentinel) {
c = *source++;
*target++ = c;
if (c == U('\r') && source < sentinel
&& *source == U('\n'))
*target++ = *source++;
}
continue;
}
break;
}
target -= source - reset;
source = reset;
continue;
/* Comment or Regex or something else entirely */
case U('/'):
if (!(source < sentinel)) {
*target++ = c;
}
else {
switch (*source) {
/* Comment */
case U('*'): case U('/'):
goto skip_or_copy_ws;
default:
if ( target == tstart
|| RJSMIN_IS_PRE_REGEX_1(*(target - 1))
|| (
(target - tstart >= 6)
&& *(target - 1) == U('n')
&& *(target - 2) == U('r')
&& *(target - 3) == U('u')
&& *(target - 4) == U('t')
&& *(target - 5) == U('e')
&& *(target - 6) == U('r')
&& (
target - tstart == 6
|| !RJSMIN_IS_ID_LITERAL(*(target - 7))
)
)) {
/* Regex */
reset = source;
*target++ = U('/');
while (source < sentinel) {
c = *source++;
*target++ = c;
if (RJSMIN_IS_REGEX_DULL(c))
continue;
switch (c) {
case U('/'):
goto cont;
case U('\\'):
if (source < sentinel) {
c = *source++;
*target++ = c;
if (c == U('\r') || c == U('\n'))
break;
}
continue;
case U('['):
while (source < sentinel) {
c = *source++;
*target++ = c;
if (RJSMIN_IS_REGEX_CC_DULL(c))
continue;
switch (c) {
case U('\\'):
if (source < sentinel) {
c = *source++;
*target++ = c;
if (c == U('\r') || c == U('\n'))
break;
}
continue;
case U(']'):
goto cont_regex;
}
}
break;
}
break;
cont_regex:
continue;
}
target -= source - reset;
source = reset;
}
else {
/* Just a slash */
*target++ = c;
}
continue;
}
}
continue;
/* Whitespace */
default:
skip_or_copy_ws:
quote = U(' ');
--source;
while (source < sentinel) {
c = *source++;
if (RJSMIN_IS_SPACE(c))
continue;
switch (c) {
case U('\r'): case U('\n'):
quote = U('\n');
continue;
case U('/'):
if (source < sentinel) {
switch (*source) {
case U('*'):
reset = source++;
/* copy bang comment, if requested */
if ( keep_bang_comments && source < sentinel
&& *source == U('!')) {
*target++ = U('/');
*target++ = U('*');
*target++ = *source++;
while (source < sentinel) {
c = *source++;
*target++ = c;
if (c == U('*') && source < sentinel
&& *source == U('/')) {
*target++ = *source++;
reset = NULL;
break;
}
}
if (!reset)
continue;
target -= source - reset;
source = reset;
}
/* strip regular comment */
else {
while (source < sentinel) {
c = *source++;
if (c == U('*') && source < sentinel
&& *source == U('/')) {
++source;
reset = NULL;
break;
}
}
if (!reset)
continue;
source = reset;
*target++ = U('/');
}
goto cont;
case U('/'):
++source;
while (source < sentinel) {
c = *source++;
switch (c) {
case U('\n'):
break;
case U('\r'):
if (source < sentinel
&& *source == U('\n'))
++source;
break;
default:
continue;
}
break;
}
quote = U('\n');
continue;
}
}
}
--source;
break;
}
if ((tstart < target && source < sentinel)
&& ((quote == U('\n')
&& RJSMIN_IS_ID_LITERAL_CLOSE(*(target - 1))
&& RJSMIN_IS_ID_LITERAL_OPEN(*source))
||
(quote == U(' ')
&& ((RJSMIN_IS_ID_LITERAL(*(target - 1))
&& RJSMIN_IS_ID_LITERAL(*source))
|| (source < sentinel
&& ((*(target - 1) == U('+')
&& *source == U('+'))
|| (*(target - 1) == U('-')
&& *source == U('-'))))))))
*target++ = quote;
}
cont:
continue;
}
return (Py_ssize_t)(target - tstart);
}
PyDoc_STRVAR(rjsmin_jsmin__doc__,
"jsmin(script, keep_bang_comments=False)\n\
\n\
Minify javascript based on `jsmin.c by Douglas Crockford`_\\.\n\
\n\
Instead of parsing the stream char by char, it uses a regular\n\
expression approach which minifies the whole script with one big\n\
substitution regex.\n\
\n\
.. _jsmin.c by Douglas Crockford:\n\
http://www.crockford.com/javascript/jsmin.c\n\
\n\
:Note: This is a hand crafted C implementation built on the regex\n\
semantics.\n\
\n\
:Parameters:\n\
`script` : ``str``\n\
Script to minify\n\
\n\
`keep_bang_comments` : ``bool``\n\
Keep comments starting with an exclamation mark? (``/*!...*/``)\n\
\n\
:Return: Minified script\n\
:Rtype: ``str``");
static PyObject *
rjsmin_jsmin(PyObject *self, PyObject *args, PyObject *kwds)
{
PyObject *script, *keep_bang_comments_ = NULL, *result;
static char *kwlist[] = {"script", "keep_bang_comments", NULL};
Py_ssize_t slength, length;
int keep_bang_comments;
#ifdef EXT2
int uni;
#define UOBJ "O"
#endif
#ifdef EXT3
#define UOBJ "U"
#endif
if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist,
&script, &keep_bang_comments_))
return NULL;
if (!keep_bang_comments_)
keep_bang_comments = 0;
else {
keep_bang_comments = PyObject_IsTrue(keep_bang_comments_);
if (keep_bang_comments == -1)
return NULL;
}
#ifdef EXT2
if (PyUnicode_Check(script)) {
if (!(script = PyUnicode_AsUTF8String(script)))
return NULL;
uni = 1;
}
else {
if (!(script = PyObject_Str(script)))
return NULL;
uni = 0;
}
#endif
#ifdef EXT3
Py_INCREF(script);
#define PyString_GET_SIZE PyUnicode_GET_SIZE
#define PyString_AS_STRING PyUnicode_AS_UNICODE
#define _PyString_Resize PyUnicode_Resize
#define PyString_FromStringAndSize PyUnicode_FromUnicode
#endif
slength = PyString_GET_SIZE(script);
if (!(result = PyString_FromStringAndSize(NULL, slength))) {
Py_DECREF(script);
return NULL;
}
Py_BEGIN_ALLOW_THREADS
length = rjsmin((rchar *)PyString_AS_STRING(script),
(rchar *)PyString_AS_STRING(result),
slength, keep_bang_comments);
Py_END_ALLOW_THREADS
Py_DECREF(script);
if (length < 0) {
Py_DECREF(result);
return NULL;
}
if (length != slength && _PyString_Resize(&result, length) == -1)
return NULL;
#ifdef EXT2
if (uni) {
script = PyUnicode_DecodeUTF8(PyString_AS_STRING(result),
PyString_GET_SIZE(result), "strict");
Py_DECREF(result);
if (!script)
return NULL;
result = script;
}
#endif
return result;
}
/* ------------------------ BEGIN MODULE DEFINITION ------------------------ */
EXT_METHODS = {
{"jsmin",
(PyCFunction)rjsmin_jsmin, METH_VARARGS | METH_KEYWORDS,
rjsmin_jsmin__doc__},
{NULL} /* Sentinel */
};
PyDoc_STRVAR(EXT_DOCS_VAR,
"C implementation of rjsmin\n\
==========================\n\
\n\
C implementation of rjsmin.");
EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR);
EXT_INIT_FUNC {
PyObject *m;
/* Create the module and populate stuff */
if (!(m = EXT_CREATE(&EXT_DEFINE_VAR)))
EXT_INIT_ERROR(NULL);
EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1");
EXT_ADD_STRING(m, "__docformat__", "restructuredtext en");
EXT_INIT_RETURN(m);
}
/* ------------------------- END MODULE DEFINITION ------------------------- */