Skip to content

Instantly share code, notes, and snippets.

@lachlan
Created March 26, 2015 00:38
Show Gist options
  • Select an option

  • Save lachlan/709ead50f1401ee3afa4 to your computer and use it in GitHub Desktop.

Select an option

Save lachlan/709ead50f1401ee3afa4 to your computer and use it in GitHub Desktop.
Split a large XML file into smaller XML files by a given element name
package com.github.lachlan.xml.splitter;
public class Main {
public static final int BUFFER_SIZE = 8192;
public static void main(String[] args) {
Main main = new Main();
if (args.length < 2) {
System.out.println("Usage: ");
System.out.println("\tjava " + main.getClass().getName() + " input_file qualified_element_name\n");
System.out.println("\tinput_file \tfile name of the XML file to be split, for example C:\\example.xml");
System.out.println("\tqualified_element_name\tqualified name of the element in the XML to split, in the format {namespace_uri}element_name");
System.out.println("\t \trefer: <http://docs.oracle.com/javase/8/docs/api/javax/xml/namespace/QName.html#valueOf-java.lang.String->");
return;
}
java.io.File inputFile = new java.io.File(args[0]);
javax.xml.namespace.QName elementToSplitOn = javax.xml.namespace.QName.valueOf(args[1]);
java.io.InputStream inputStream = null;
try {
System.out.println("Splitting '" + inputFile.toURI() + "' on element '" + elementToSplitOn + "'");
inputStream = new java.io.BufferedInputStream(new java.io.FileInputStream(inputFile), BUFFER_SIZE);
javax.xml.stream.XMLInputFactory inputFactory = javax.xml.stream.XMLInputFactory.newInstance();
javax.xml.stream.XMLOutputFactory outputFactory = javax.xml.stream.XMLOutputFactory.newInstance();
javax.xml.stream.XMLEventReader reader = inputFactory.createXMLEventReader(inputStream);
javax.xml.stream.XMLEventWriter writer = null;
int i = 1;
while (reader.hasNext()) {
javax.xml.stream.events.XMLEvent event = reader.nextEvent();
switch(event.getEventType()) {
case javax.xml.stream.XMLStreamConstants.START_ELEMENT:
javax.xml.stream.events.StartElement startElement = (javax.xml.stream.events.StartElement)event;
if (startElement.getName().equals(elementToSplitOn)) {
java.io.File outputFile = new java.io.File(inputFile.getParent(), String.format("%s.split.%03d", inputFile.getName(), i++));
System.out.println(String.format("Element '%s' found, splitting to file: '%s'", elementToSplitOn, outputFile.toURI()));
writer = outputFactory.createXMLEventWriter(new java.io.BufferedOutputStream(new java.io.FileOutputStream(outputFile), BUFFER_SIZE));
if (writer != null) writer.add(event);
}
break;
case javax.xml.stream.XMLStreamConstants.END_ELEMENT:
javax.xml.stream.events.EndElement endElement = (javax.xml.stream.events.EndElement)event;
if (endElement.getName().equals(elementToSplitOn)) {
if (writer != null) writer.add(event);
writer.close();
writer = null;
}
break;
default:
if (writer != null) writer.add(event);
break;
}
}
reader.close();
if (writer != null) writer.close();
} catch(Throwable ex) {
ex.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (java.io.IOException ex) {
// do nothing
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment