Sep 7th, 2012
421
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
(function () {
"use strict";
// Load cheerio.js and fs for test some file
var fs = require("fs"),
cheerio = require('cheerio'),
file = 'tags.html';
function clearMarkup(rawHTML) {
var $ = cheerio.load(rawHTML);
// First, we remove all the <script> tags
$("script").remove();
// then, on every tag we remove all the javascript attributes
// like, onclick, onload, etc...
$("*").each(function () {
var attribute, tag, attribs;
tag = $(this);
attribs = tag.get(0).attribs;
for (attribute in attribs) {
if (attribs.hasOwnProperty(attribute)) {
if (attribute.toLowerCase().substr(0, 2) === "on") {
tag.removeAttr(attribute);
}
}
}
});
return $.html();
}
// The test, there is no time for handling fs exceptions!!!
fs.readFile(file, function (error, data) {
var cleanHTML = clearMarkup(data);
console.log(cleanHTML);
});
}());
/* content of "tags.html" */
/*
<!doctype html>
<html>
<head>
<title>test</title>
</head>
<body onload="alert('evil xss')">
<ul>
<li onmouseover="alert('evil XSS')">a</li>
<li>b</li>
<li>c</li>
</ul>
<script>alert("xss")</script>
</body>
</html>
*/
/* console output */
/*
<!doctype html>
<html>
<head>
<title>test</title>
</head>
<body>
<ul>
<li>a</li>
<li>b</li>
<li>c</li>
</ul>
</body>
</html>
*/