gravatar

Convertir codigo HTML a caracteres ASCII y viceversa

En java puede utilizar este programa para convertir texto con caracteres españoles en código HTML hacia los caracteres conocidos como ASCII

Use el siguiente programa:


import java.util.Map;
import java.util.Hashtable;

public class Convert {

private static Map<String, String> entities;
static {
entities = new Hashtable<String, String>();
entities.put("acute", "\u00B4");
entities.put("quot", "\"");
entities.put("amp", "\u0026");
entities.put("lt", "\u003C");
entities.put("gt", "\u003E");
entities.put("nbsp", "\u00A0");
entities.put("iexcl", "\u00A1");
entities.put("cent", "\u00A2");
entities.put("pound", "\u00A3");
entities.put("curren", "\u00A4");
entities.put("yen", "\u00A5");
entities.put("brvbar", "\u00A6");
entities.put("sect", "\u00A7");
entities.put("uml", "\u00A8");
entities.put("copy", "\u00A9");
entities.put("ordf", "\u00AA");
entities.put("laquo", "\u00AB");
entities.put("not", "\u00AC");
entities.put("shy", "\u00AD");
entities.put("reg", "\u00AE");
entities.put("macr", "\u00AF");
entities.put("deg", "\u00B0");
entities.put("plusmn", "\u00B1");
entities.put("sup2", "\u00B2");
entities.put("sup3", "\u00B3");
entities.put("acute", "\u00B4");
entities.put("micro", "\u00B5");
entities.put("para", "\u00B6");
entities.put("middot", "\u00B7");
entities.put("cedil", "\u00B8");
entities.put("sup1", "\u00B9");
entities.put("ordm", "\u00BA");
entities.put("raquo", "\u00BB");
entities.put("frac14", "\u00BC");
entities.put("frac12", "\u00BD");
entities.put("frac34", "\u00BE");
entities.put("iquest", "\u00BF");
entities.put("Agrave", "\u00C0");
entities.put("Aacute", "\u00C1");
entities.put("Acirc", "\u00C2");
entities.put("Atilde", "\u00C3");
entities.put("Auml", "\u00C4");
entities.put("Aring", "\u00C5");
entities.put("AElig", "\u00C6");
entities.put("Ccedil", "\u00C7");
entities.put("Egrave", "\u00C8");
entities.put("Eacute", "\u00C9");
entities.put("Ecirc", "\u00CA");
entities.put("Euml", "\u00CB");
entities.put("Igrave", "\u00CC");
entities.put("Iacute", "\u00CD");
entities.put("Icirc", "\u00CE");
entities.put("Iuml", "\u00CF");
entities.put("ETH", "\u00D0");
entities.put("Ntilde", "\u00D1");
entities.put("Ograve", "\u00D2");
entities.put("Oacute", "\u00D3");
entities.put("Ocirc", "\u00D4");
entities.put("Otilde", "\u00D5");
entities.put("Ouml", "\u00D6");
entities.put("times", "\u00D7");
entities.put("Oslash", "\u00D8");
entities.put("Ugrave", "\u00D9");
entities.put("Uacute", "\u00DA");
entities.put("Ucirc", "\u00DB");
entities.put("Uuml", "\u00DC");
entities.put("Yacute", "\u00DD");
entities.put("THORN", "\u00DE");
entities.put("szlig", "\u00DF");
entities.put("agrave", "\u00E0");
entities.put("aacute", "\u00E1");
entities.put("acirc", "\u00E2");
entities.put("atilde", "\u00E3");
entities.put("auml", "\u00E4");
entities.put("aring", "\u00E5");
entities.put("aelig", "\u00E6");
entities.put("ccedil", "\u00E7");
entities.put("egrave", "\u00E8");
entities.put("eacute", "\u00E9");
entities.put("ecirc", "\u00EA");
entities.put("euml", "\u00EB");
entities.put("igrave", "\u00EC");
entities.put("iacute", "\u00ED");
entities.put("icirc", "\u00EE");
entities.put("iuml", "\u00EF");
entities.put("eth", "\u00F0");
entities.put("ntilde", "\u00F1");
entities.put("ograve", "\u00F2");
entities.put("oacute", "\u00F3");
entities.put("ocirc", "\u00F4");
entities.put("otilde", "\u00F5");
entities.put("ouml", "\u00F6");
entities.put("divide", "\u00F7");
entities.put("oslash", "\u00F8");
entities.put("ugrave", "\u00F9");
entities.put("uacute", "\u00FA");
entities.put("ucirc", "\u00FB");
entities.put("uuml", "\u00FC");
entities.put("yacute", "\u00FD");
entities.put("thorn", "\u00FE");
entities.put("yuml", "\u00FF");
}

public static String decode(String str) {
StringBuffer ostr = new StringBuffer();
int i1 = 0;
int i2 = 0;

while (i2 < str.length()) {
i1 = str.indexOf("&", i2);
if (i1 == -1) {
ostr.append(str.substring(i2, str.length()));
break;
}
ostr.append(str.substring(i2, i1));
i2 = str.indexOf(";", i1);
if (i2 == -1) {
ostr.append(str.substring(i1, str.length()));
break;
}

String tok = str.substring(i1 + 1, i2);
if (tok.charAt(0) == \'#\') {
tok = tok.substring(1);
try {
int radix = 10;
if (tok.trim().charAt(0) == \'x\') {
radix = 16;
tok = tok.substring(1, tok.length());
}
ostr.append((char) Integer.parseInt(tok, radix));
} catch (NumberFormatException exp) {
ostr.append(\'?\');
}
} else {
tok = (String) entities.get(tok);
if (tok != null)
ostr.append(tok);
else
ostr.append(\'?\');
}
i2++;
}
return ostr.toString();
}

public static String encode(String str) {
StringBuffer ostr = new StringBuffer();

for (int i = 0; i < str.length(); i++) {
boolean found = false;
for (Map.Entry<String, String> par : entities.entrySet()) {
if (par.getValue().equals(str.charAt(i) + "")) {
found = true;
ostr.append("&" + par.getKey() + ";");
break;
}
}
if (found == false) {
ostr.append(str.charAt(i));
}
}
return ostr.toString();
}

public static void main(String[] args) {
String htmlCode = "&lt;p&gt;Es un p&aacute;rrafo&lt;/p&gt;";
System.out.println(htmlCode);
System.out.println(decode(htmlCode));
String html = "<b>Año del árbol</b>";
System.out.println(html);
System.out.println(encode(html));
}

}

Con lo cual resulta:


&lt;p&gt;Es un p&aacute;rrafo&lt;/p&gt;
<p>Es un párrafo</p>

<b>Año del árbol</b>
&lt;b&gt;A&ntilde;o del &aacute;rbol&lt;/b&gt;