Shuffle XML Elements with XPath and VTD-XML

This is a simple app that shuffles elements in an XML file. It uses XPath to address individual element then re-arrange and re-combine the fragments.  Those fragments are identified by their offsets and lengths, both of which are obtained by calling VTDNav’s getElementFragment().

Why not using SAX and STaX?

Simply speaking, the lack of XPath support makes it very tedious, almost impossible, to re-arrange XML element fragments.

Why not using DOM?

Aside from performance and memory usage, the redundant, wasteful de-serialization/serialization contributes nothing but overhead to the task.

The Code

Input XML:

   <root>
     <a> text </a>
     <b> text </b>
     <c> text </c>
     <a> text </a>
     <b> text </b>
     <c> text </c>
     <a> text </a>
     <b> text </b>
     <c> text </c>
   </root>

Output.xml

   <root>
     <a> text </a>
     <a> text </a>
     <a> text </a>
     <b> text </b>
     <b> text </b>
     <b> text </b>
     <c> text </c>
     <c> text </c>
     <c> text </c>
   </root>

Java Code:

import com.ximpleware.*;
import java.io.*;
public class shuffle {
    public static void main(String[] args) throws Exception {
        VTDGen vg = new VTDGen();
        AutoPilot ap0 = new AutoPilot();
        AutoPilot ap1 = new AutoPilot();
        AutoPilot ap2 = new AutoPilot();
        ap0.selectXPath("/root/a");
        ap1.selectXPath("/root/b");
        ap2.selectXPath("/root/c");

        if (vg.parseFile("old.xml",false)){
            VTDNav vn = vg.getNav();
            ap0.bind(vn);
            ap1.bind(vn);
            ap2.bind(vn);
            FileOutputStream fos = new FileOutputStream("new.xml");
            fos.write("<root>".getBytes());
            byte[] ba = vn.getXML().getBytes();
            while(ap0.evalXPath()!=-1){
                long l= vn.getElementFragment();
                int offset = (int)l;
                int len = (int)(l>>32);
                fos.write('\n');
                fos.write(ba,offset, len);
            }
            ap0.resetXPath();
            while(ap1.evalXPath()!=-1){
                long l= vn.getElementFragment();
                int offset = (int)l;
                int len = (int)(l>>32);
                fos.write('\n');
                fos.write(ba,offset, len);
            }
            ap1.resetXPath();
            while(ap2.evalXPath()!=-1){
                long l= vn.getElementFragment();
                int offset = (int)l;
                int len = (int)(l>>32);
                fos.write('\n');
                fos.write(ba,offset, len);
            }
            ap2.resetXPath();
            fos.write('\n');
            fos.write("</root>".getBytes());
        }
    }
}

C# code:

using System;
using com.ximpleware;

namespace shuffle
{
    public class shuffle
    {
        public static void Main(String[] args)
        {
            VTDGen vg = new VTDGen();
            AutoPilot ap0 = new AutoPilot();
            AutoPilot ap1 = new AutoPilot();
            AutoPilot ap2 = new AutoPilot();
            ap0.selectXPath("/root/a");
            ap1.selectXPath("/root/b");
            ap2.selectXPath("/root/c");
            Encoding eg = System.Text.Encoding.GetEncoding("utf-8");
            if (vg.parseFile("old.xml", false))
            {
                VTDNav vn = vg.getNav();
                ap0.bind(vn);
                ap1.bind(vn);
                ap2.bind(vn);
                FileStream fos = new FileStream("new.xml", System.IO.FileMode.OpenOrCreate);
                //fos.Write("<root>".getBytes());
                byte[] ba0,ba1, ba2, ba3, ba4;
                //ba0 = eg.GetBytes("
                ba1 = eg.GetBytes("<root>");
                ba2 = eg.GetBytes("</root>");
                ba3 = eg.GetBytes("\n");
                fos.Write(ba1, 0, ba1.Length);
                byte[] ba = vn.getXML().getBytes();
                while (ap0.evalXPath() != -1)
                {
                    long l = vn.getElementFragment();
                    int offset = (int)l;
                    int len = (int)(l >> 32);
                    fos.Write(ba3,0,ba3.Length);
                    fos.Write(ba, offset, len);
                }
                ap0.resetXPath();
                while (ap1.evalXPath() != -1)
                {
                    long l = vn.getElementFragment();
                    int offset = (int)l;
                    int len = (int)(l >> 32);
                    fos.Write(ba3,0,ba3.Length);
                    fos.Write(ba, offset, len);
                }
                ap1.resetXPath();
                while (ap2.evalXPath() != -1)
                {
                    long l = vn.getElementFragment();
                    int offset = (int)l;
                    int len = (int)(l >> 32);
                    fos.Write(ba3,0,ba3.Length);
                    fos.Write(ba, offset, len);
                }
                ap2.resetXPath();
                fos.Write(ba3,0,ba3.Length);
                fos.Write(ba2,0,ba2.Length);
            }
        }
    }
}
Advertisements

No comments yet

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: