Friday, October 28, 2011

Sample parsing program on DOM


DomSample.java :-
import javax.xml.parsers.*;
import org.w3c.dom.*;

class DOMUtil
{
public static Element getFirstElement( Element element, String name ) {
NodeList nl = element.getElementsByTagName( name );
if ( nl.getLength() < 1 )
throw new RuntimeException(
"Element: "+element+" does not contain: "+name);
return (Element)nl.item(0);
}
public static String getSimpleElementText(Element node, String name) {
        NodeList nl = node.getElementsByTagName(name);
        StringBuffer sb = new StringBuffer();
        for(int n=0;n<nl.getLength();n++){
            Element namedElement = (Element)nl.item(n);
            sb.append(getSimpleElementText(namedElement).trim());
        }
        return sb.toString();
    }
    public static String getSimpleElementText(Element node) {
        StringBuffer sb = new StringBuffer();
        NodeList children = node.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
            Node child = children.item(i);
            if (child instanceof Text){
                sb.append(child.getNodeValue().trim());
            }else{
                sb.append(getSimpleElementText((Element)child).trim());
            }
        }
        return sb.toString().trim();
    }
}
public class DomSample {
public static void main(String args[]) throws Exception {
//Open the XML file, load into DOM
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("DomSample.xml");
Element elem = doc.getDocumentElement();
NodeList nl = elem.getElementsByTagName("CAMPAIGN");
for( int i=0; i<nl.getLength(); i++ ) {
String id = DOMUtil.getSimpleElementText((Element)nl.item(i),"ID" );
String geotarget = DOMUtil.getSimpleElementText((Element)nl.item(i), "GEOTARGET" );
String status = DOMUtil.getSimpleElementText((Element)nl.item(i), "STATUS" );
System.out.println( id + "\t" + status + "\t" + geotarget);
}
}
}
DomSample.xml :-
<?xml version="1.0" encoding="ISO-8859-1"?>
<GOOGLEADWORDS>
 <CAMPAIGN>
  <ID>9677460</ID>
  <GEOTARGET>
   <COUNTRY>FR</COUNTRY>
   <COUNTRY>ES</COUNTRY>
  </GEOTARGET>
  <STATUS>PAUSED</STATUS>
 </CAMPAIGN>
 <CAMPAIGN>
  <ID>9677461</ID>
  <GEOTARGET>
   <COUNTRY>NL</COUNTRY>
   <COUNTRY>UK</COUNTRY>
  </GEOTARGET>
  <STATUS>PAUSED</STATUS>
 </CAMPAIGN>
</GOOGLEADWORDS>