<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Andrew E. Bruno &#187; XML</title>
	<atom:link href="http://left.subtree.org/category/xml/feed/" rel="self" type="application/rss+xml" />
	<link>http://left.subtree.org</link>
	<description>A sourceful of secrets</description>
	<lastBuildDate>Mon, 10 May 2010 03:56:37 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='left.subtree.org' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://0.gravatar.com/blavatar/e14c799c6e8030a8abefcb495c0b0e17?s=96&#038;d=http://s2.wp.com/i/buttonw-com.png</url>
		<title>Andrew E. Bruno &#187; XML</title>
		<link>http://left.subtree.org</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://left.subtree.org/osd.xml" title="Andrew E. Bruno" />
	<atom:link rel='hub' href='http://left.subtree.org/?pushpress=hub'/>
		<item>
		<title>MIF XML at O&#039;Reilly</title>
		<link>http://left.subtree.org/2007/02/04/mif-xml-at-oreilly/</link>
		<comments>http://left.subtree.org/2007/02/04/mif-xml-at-oreilly/#comments</comments>
		<pubDate>Mon, 05 Feb 2007 05:08:51 +0000</pubDate>
		<dc:creator>Andrew</dc:creator>
				<category><![CDATA[XML]]></category>

		<guid isPermaLink="false">http://left.subtree.org/2007/02/04/mif-xml-at-oreilly/</guid>
		<description><![CDATA[Keith, a fellow O&#8217;Reillyer, has written a few posts on how O&#8217;Reilly has been making use of MIF XML (MX). Keith gives some background and example uses as a follow up to my previous posts on converting MIF to XML. He also posted some XSLT for round-tripping the XML back into MIF.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=left.subtree.org&amp;blog=13566420&amp;post=9&amp;subd=qnot&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://kfahlgren.com/blog/">Keith</a>, a fellow <a href="http://www.oreilly.com">O&#8217;Reillyer</a>, has written a few <a href="http://kfahlgren.com/blog/?p=34">posts</a> on how O&#8217;Reilly has been making use of MIF XML (MX). Keith gives some background and example uses as a follow up to my previous posts on <a href="http://left.subtree.org/2007/01/25/converting-mif-to-xml/">converting MIF to XML</a>. He also posted some <a href="http://kfahlgren.com/blog/?p=35">XSLT</a> for round-tripping the XML back into MIF.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/qnot.wordpress.com/9/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/qnot.wordpress.com/9/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/qnot.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/qnot.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/qnot.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/qnot.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/qnot.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/qnot.wordpress.com/9/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/qnot.wordpress.com/9/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/qnot.wordpress.com/9/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=left.subtree.org&amp;blog=13566420&amp;post=9&amp;subd=qnot&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://left.subtree.org/2007/02/04/mif-xml-at-oreilly/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">sigma110</media:title>
		</media:content>
	</item>
		<item>
		<title>Converting MIF to XML &#8211; Java Version</title>
		<link>http://left.subtree.org/2007/01/31/converting-mif-to-xml-java-version/</link>
		<comments>http://left.subtree.org/2007/01/31/converting-mif-to-xml-java-version/#comments</comments>
		<pubDate>Thu, 01 Feb 2007 03:57:01 +0000</pubDate>
		<dc:creator>Andrew</dc:creator>
				<category><![CDATA[Java]]></category>
		<category><![CDATA[XML]]></category>

		<guid isPermaLink="false">http://left.subtree.org/2007/01/31/converting-mif-to-xml-java-version/</guid>
		<description><![CDATA[In my previous post I discussed a tool called mif2xml for converting MIF files to an intermediate XML dialect. In this post I&#8217;ll talk about the Java port of mif2xml called mif2xml-j which you can download here including just the executable jar or browse the source online via svn. JFlex is a lexical analyzer generator [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=left.subtree.org&amp;blog=13566420&amp;post=7&amp;subd=qnot&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>In my <a href="http://left.subtree.org/2007/01/25/converting-mif-to-xml/">previous post</a> I discussed a tool called <code>mif2xml</code> for converting MIF files to an intermediate XML dialect. In this post I&#8217;ll talk about the Java port of <code>mif2xml</code> called <code>mif2xml-j</code> which you can download <a href="http://code.qnot.org/svn/projects/mif2xml-j/releases/">here</a> including just the <a href="http://code.qnot.org/svn/projects/mif2xml-j/releases/mif2xml-0.2.jar">executable jar</a> or browse the <a href="http://code.qnot.org/svn/projects/mif2xml-j/trunk/">source online</a> via svn.</p>
<p><a href="http://www.jflex.de/">JFlex</a> is a lexical analyzer generator for Java and is the library I chose to use for creating the MIF lexer. The first step was to get JFlex integrated into my build environment. For this project I decided to use <a href="http://ant.apache.org/">ant</a> but integrating JFlex into another build environment <span id="more-7"></span>should be straightforward. I created the following directory structure:</p>
<pre class="brush: plain; light: true;">
--/
  |-- src/main/jflex/               - JFlex lexical specifications
  |-- src/main/resources/MANIFEST   - Defines main class for executable jar
  |-- src/main/java/                - Java source
  |-- lib/                          - 3rd party libraries (JFlex.jar)
  |-- build.xml                     - Ant build file
</pre>
<p>JFlex comes bundled with a <code>JFlexAntTask</code> which provides a very convenient <code>&lt;jflex/&gt;</code> task. Here&#8217;s a snippet of the ant build file I created which shows how to set it up:</p>
<pre class="brush: xml;">
&lt;property name=&quot;src&quot;   location=&quot;${basedir}/src/main/java&quot; /&gt;
&lt;property name=&quot;lib&quot; location=&quot;${basedir}/lib&quot; /&gt;
&lt;property name=&quot;scanner-file&quot; value=&quot;${basedir}/src/main/jflex/mif.jflex&quot; /&gt;

&lt;path id=&quot;classpath&quot;&gt;
    &lt;pathelement location=&quot;${build}&quot; /&gt;
    &lt;fileset dir=&quot;${lib}&quot;&gt;
        &lt;include name=&quot;*.jar&quot; /&gt;
    &lt;/fileset&gt;
&lt;/path&gt;

&lt;taskdef classpathref=&quot;classpath&quot; classname=&quot;JFlex.anttask.JFlexTask&quot; name=&quot;jflex&quot; /&gt;

&lt;target name=&quot;jflex&quot; description=&quot;Generate the MIF lexer&quot;&gt;
    &lt;echo message=&quot;Generating the MIF Lexer&quot; /&gt;
    &lt;jflex file=&quot;${scanner-file}&quot; destdir=&quot;${src}&quot; /&gt;
&lt;/target&gt;
</pre>
<p>I found writing the lexical specification in JFlex and flex to be very similar. JFlex has a great <a href="http://www.jflex.de/manual.html">user manual</a> which contains a lot of useful info. Here&#8217;s the <code>mif.jflex</code> file:</p>
<pre class="brush: cpp;">
/*
 * Copyright 2007 Andrew Bruno &lt;aeb@qnot.org&gt;
 * Licensed under the Apache License, Version 2.0
 */

package org.qnot.mif2xml;
import java.util.Stack;

%%

%{
  private Stack&lt;Tag&gt; tags = new Stack&lt;Tag&gt;();
  private StringBuffer data = new StringBuffer();
  private StringBuffer facet = new StringBuffer();
%}

%line
%char
%standalone
%class  MifLexer
%xstate DATA
%xstate STR
%xstate FACET

ID=[A-Za-z][A-Za-z0-9]*
TAG=&quot;&lt;&quot;{ID}&quot; &quot;
TAG_END=&quot;&gt;&quot;
NONNEWLINE=[^\r|\n|\r\n]
NEWLINE=[\r|\n|\r\n]
WHITE_SPACE_CHAR=[ \n\t]

%%

&lt;YYINITIAL&gt; {
   {TAG}   {
        Tag tag = new Tag();
        tag.setName(yytext().substring(1, yytext().length()-1));
        tags.push(tag);
        tag.writeStart();
        data = new StringBuffer();
        yybegin(DATA);
    }

    {TAG_END}   {
        if(!tags.empty()) {
            Tag tag = (Tag)tags.pop();
            tag.writeEnd();
        }
    }

    ^&quot;=&quot;[a-zA-Z][a-zA-Z0-9]*{NEWLINE} {
        facet = new StringBuffer();
        facet.append(yytext());
        yybegin(FACET);
    }

    {WHITE_SPACE_CHAR}+   {  /* eat up whitespace */ }
    {NONNEWLINE}          {  /* eat up everything else  */ }
}

&lt;DATA&gt; {
    {NEWLINE}  {
        if(!tags.empty()) {
            Tag tag = (Tag)tags.pop();
            tag.setValue(data.toString());
            tags.push(tag);
        }
        yybegin(YYINITIAL);
    }
    &quot;`&quot;  {  yybegin(STR); }
    {TAG_END}  {
        if(!tags.empty()) {
            Tag tag = (Tag)tags.pop();
            String value = tag.getValue();

            String dataStr = data.toString();
            if(dataStr != null &amp;&amp; dataStr.length() &gt; 0) {
                value = dataStr;
            }

            if(value != null) {
                value = value.replaceAll(&quot;^\\s+&quot;, &quot;&quot;);
                value = value.replaceAll(&quot;\\s+$&quot;, &quot;&quot;);
            }

            tag.setValue(value);
            tag.writeEnd();
        }
        yybegin(YYINITIAL);
    }
    [^\n|\r|\r\n|`|&gt;] {
        data.append(yytext());
    }
}

&lt;STR&gt; {
    &quot;'&quot;  {
        if(!tags.empty()) {
            Tag tag = (Tag)tags.pop();
            if(tag.getValue() == null || tag.getValue().length() == 0) {
                tag.setValue(&quot;`'&quot;);
            }
            tags.push(tag);
        }
        yybegin(YYINITIAL);
    }
    [^']*  {
        if(!tags.empty()) {
            Tag tag = (Tag)tags.pop();
            StringBuffer buf = new StringBuffer();
            buf.append(&quot;`&quot;);
            buf.append(yytext());
            buf.append(&quot;'&quot;);
            tag.setValue(buf.toString());
            tags.push(tag);
        }
    }
}

&lt;FACET&gt; {
    ^&quot;=EndInset&quot;{NEWLINE} {
        facet.append(yytext());
        Tag.writeFacet(facet.toString());
        yybegin(YYINITIAL);
    }

    .*{NEWLINE} {
        facet.append(yytext());
    }
}
</pre>
<p>I created a simple <code>Tag</code> class to encapsulate a MIF XML tag and handle writing out each tag. The <code>MifLexer</code> keeps a stack of <code>Tag</code> instances while it&#8217;s processing the input file:</p>
<pre class="brush: java;">
/*
 * Copyright 2007 Andrew Bruno &lt;aeb@qnot.org&gt;
 * Licensed under the Apache License, Version 2.0
 */

package org.qnot.mif2xml;

public class Tag {
    private String name;
    private String value;

    public String getName() {
        return this.name;
    }

    public String getValue() {
        return this.value;
    }

    public void setName(String name) {
        this.name = name;
    }

    public void setValue(String value) {
        this.value = value;
    }

    public void writeEnd() {
        if(value != null &amp;&amp; value.length() &gt; 0) {
            System.out.print(escape(value) + &quot;&lt;/&quot; + name + &quot;&gt;&quot;);
        } else {
            System.out.print(&quot;&lt;/&quot; + name + &quot;&gt;&quot;);
        }
    }

    public void writeStart() {
        System.out.print(&quot;&lt;&quot; + name + &quot;&gt;&quot; );
    }

    public static void writeFacet(String facet) {
        System.out.print(&quot;&lt;_facet&gt;&lt;![CDATA[&quot;);
        System.out.print(facet);
        System.out.print(&quot;]]&gt;&lt;/_facet&gt;&quot;);
    }

    private String escape(String str) {
        str = str.replaceAll(&quot;&amp;&quot;, &quot;&amp;amp;&quot;);
        str = str.replaceAll(&quot;\&quot;&quot;, &quot;&amp;quot;&quot;);
        str = str.replaceAll(&quot;&gt;&quot;, &quot;&amp;gt;&quot;);
        str = str.replaceAll(&quot;&lt;&quot;, &quot;&amp;lt;&quot;);
        str = str.replaceAll(&quot;^\\s+&quot;, &quot;&quot;);
        str = str.replaceAll(&quot;\\s+$&quot;, &quot;&quot;);

        return str;
    }
}
</pre>
<p>There&#8217;s a separate <code>Main</code> class which creates a new instance of the <code>MifLexer</code> class for processing the file passed in on the command line. I&#8217;d like to eventually extend this class so that it handles command line options and possibly even runs some XSLT&#8217;s over the generated MIF XML.</p>
<pre class="brush: java;">
/*
 * Copyright 2007 Andrew Bruno &lt;aeb@qnot.org&gt;
 * Licensed under the Apache License, Version 2.0
 */

package org.qnot.mif2xml;

import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.FileReader;

public class Main {
    public static void main(String[] args) {
        if(args.length != 1) {
            System.err.println(&quot;Usage : mif2xml &lt;inputfile&gt;&quot;);
            System.exit(1);
        }

        try {
            MifLexer scanner = new MifLexer(new FileReader(args[0]));
            System.out.print(&quot;&lt;?xml version=\&quot;1.0\&quot;?&gt;&lt;mif&gt;&quot;);
            scanner.yylex();
            System.out.print(&quot;&lt;/mif&gt;&quot;);
        } catch(FileNotFoundException e) {
            System.out.println(&quot;File not found : &quot;+args[0]);
        } catch(IOException e) {
            System.out.println(&quot;I/O error scanning file '&quot;+args[0]+&quot;': &quot;+e.getMessage());
        } catch(Exception e) {
            System.out.println(&quot;Unexpected exception: &quot; + e.getMessage());
            e.printStackTrace();
        }
    }
}
</pre>
<p>To run the code download the <a href="http://code.qnot.org/svn/projects/mif2xml-j/releases/mif2xml-0.1.jar">executable jar</a> and run</p>
<pre class="brush: plain; light: true;">
$ java -jar mif2xml-0.1.jar myfile.mif
</pre>
<p>The MIF XML will be printed to stdout.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/qnot.wordpress.com/7/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/qnot.wordpress.com/7/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/qnot.wordpress.com/7/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/qnot.wordpress.com/7/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/qnot.wordpress.com/7/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/qnot.wordpress.com/7/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/qnot.wordpress.com/7/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/qnot.wordpress.com/7/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/qnot.wordpress.com/7/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/qnot.wordpress.com/7/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=left.subtree.org&amp;blog=13566420&amp;post=7&amp;subd=qnot&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://left.subtree.org/2007/01/31/converting-mif-to-xml-java-version/feed/</wfw:commentRss>
		<slash:comments>16</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">sigma110</media:title>
		</media:content>
	</item>
		<item>
		<title>Converting MIF to XML</title>
		<link>http://left.subtree.org/2007/01/25/converting-mif-to-xml/</link>
		<comments>http://left.subtree.org/2007/01/25/converting-mif-to-xml/#comments</comments>
		<pubDate>Thu, 25 Jan 2007 08:39:43 +0000</pubDate>
		<dc:creator>Andrew</dc:creator>
				<category><![CDATA[XML]]></category>

		<guid isPermaLink="false">http://left.subtree.org/2007/01/25/converting-mif-to-xml/</guid>
		<description><![CDATA[MIF (Maker Interchange Format) is an ASCII text representation of a FrameMaker document. You can export your FrameMaker documents into this text based representation to allow for parsing and manipulation by external tools outside of FrameMaker. You can also import MIF files back into FrameMaker. If your interested in reading more about MIF you can [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=left.subtree.org&amp;blog=13566420&amp;post=6&amp;subd=qnot&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>MIF (Maker Interchange Format) is an ASCII text representation of a <a href="http://en.wikipedia.org/wiki/FrameMaker">FrameMaker</a> document. You can export your FrameMaker documents into this text based representation to allow for parsing and manipulation by external tools outside of FrameMaker. You can also import MIF files back into FrameMaker. If your interested in reading more about MIF you can check out the <a href="http://partners.adobe.com/public/developer/en/framemaker/MIF_Reference.pdf">MIF Reference</a> from Adobe (link may be out of date).</p>
<p>There&#8217;s a great perl module on CPAN for working with MIF files called <a href="http://search.cpan.org/perldoc?FrameMaker%3A%3AMifTree">FrameMaker::MifTree</a>. It&#8217;s a subclass of <a href="http://search.cpan.org/perldoc?Tree%3A%3ADAG_Node">Tree::DAG_Node</a> and provides a nice interface for modifying the in-memory tree structure and dumping back out into MIF. The only downside to this module is that it&#8217;s very slow especially with larger MIF files.</p>
<p>At <a href="http://www.oreilly.com">O&#8217;Reilly</a> we&#8217;ve had to work with MIF files quite a bit and have taken several different approaches for processing MIF most of which turn out to be unmaintainable scripts that are not very pleasant to work with. One of the ideas <a href="http://www.oreillynet.com/pub/au/1848">Andrew S.</a> and <a href="http://kfahlgren.com/blog/">Keith</a> came up with was to convert MIF to an intermediate XML format which would allow us to process MIF using XML tools such as XSLT and XQuery. From this intermediate XML format we can transform to DocBook, WordML, or even convert back to MIF again for later importing into FrameMaker. This approach was very appealing as it can greatly reduce the number of one off scripts and allow us to benefit from the wide variety of libraries for parsing and transforming XML.<span id="more-6"></span></p>
<p>For example, the following snippet from a MIF file:</p>
<pre class="brush: plain;">
#
# Example of MIF
#
&lt;FontCatalog
 &lt;Font
  &lt;FTag `Acronym'&gt;
  &lt;FPosition FSubscript&gt;
  &lt;FLocked No&gt;
 &gt; # end of Font
&gt; # end of FontCatalog
</pre>
<p>Would get converted to this XML:</p>
<pre class="brush: xml;">
&lt;?xml version=&quot;1.0&quot;?&gt;
&lt;!--
 Example XML from MIF
--&gt;
&lt;mif&gt;
  &lt;FontCatalog&gt;
    &lt;Font&gt;
      &lt;FTag&gt;`Acronym'&lt;/FTag&gt;
      &lt;FPosition&gt;FSubscript&lt;/FPosition&gt;
      &lt;FLocked&gt;No&lt;/FLocked&gt;
    &lt;/Font&gt;
  &lt;/FontCatalog&gt;
&lt;/mif&gt;
</pre>
<p>This is not a new idea and one tool I know of which seems to do a similar task is called <a href="http://www.leximation.com/tools/mifml/">MIFML</a> written by Leximation which coverts MIF to MIFML (an intermediate XML dialect they created). Unfortunately, it only runs on Windows and is not open source. They have however released the <a href="http://www.leximation.com/tools/mifml/mifml.dtd.txt">DTD</a> they are using for MIFML.</p>
<p>I thought this would be a fun problem to take a stab at so I built tool called <code>mif2xml</code> that produces output that looks a lot like the example above. You can download a <a href="http://code.qnot.org/svn/projects/mif2xml/releases/mif2xml-0.2.tar.gz">copy here</a> or browse the <a href="http://code.qnot.org/svn/projects/mif2xml/trunk/">source code</a> online via svn.</p>
<p>The guts of <code>mif2xml</code> include a lexer <code>mif.ll</code> and a helper class for writing out MIF XML tags.  I chose to create a <code>c++</code> lexer so I could make use of the STL <code>stack</code> and <code>string</code> classes. Here&#8217;s the <code>mif.ll</code> file which gets run through flex to generate the lexer:</p>
<pre class="brush: cpp;">
/**
 * Copyright (c) 2007 Andrew Bruno &lt;aeb@qnot.org&gt;
 * Licensed under the GNU General Public License version 2
 */

%{
#include &lt;iostream&gt;
#include &lt;stack&gt;
#include &lt;string&gt;
#include &lt;miftag.h&gt;
using namespace std;

stack&lt;Tag&gt; tags;
string data;
string facet;
%}

%option  noyywrap
%option  c++
%x DATA
%x STR
%x FACET

ID                [A-Za-z][A-Za-z0-9]*
TAG               &quot;&lt;&quot;{ID}&quot; &quot;
TAG_END           &quot;&gt;&quot;
NONNEWLINE        [^\r|\n|\r\n]
NEWLINE           [\r|\n|\r\n]
WHITE_SPACE_CHAR  [ \n\t]

%%

&lt;INITIAL&gt;{TAG}  {
    Tag tag;
    string name = YYText();
    tag.name = name.substr(1, name.length()-2);
    tags.push(tag);
    tag.writeStart();
    data = string(&quot;&quot;);
    BEGIN(DATA);
}

&lt;INITIAL&gt;{TAG_END} {
    if(!tags.empty()) {
        Tag tag = tags.top();
        tag.writeEnd();
        tags.pop();
    }
}

&lt;INITIAL&gt;^&quot;=&quot;[a-zA-Z][a-zA-Z0-9]*{NEWLINE} {
    facet = string(&quot;&quot;);
    string str = string(YYText());
    facet.append(str);
    BEGIN(FACET);
}

&lt;INITIAL&gt;{WHITE_SPACE_CHAR}+   {  /* eat up whitespace */ }
&lt;INITIAL&gt;{NONNEWLINE}          {  /* eat up everything else  */ }

&lt;DATA&gt;{NEWLINE}  {
    if(!tags.empty()) {
        Tag tag = tags.top();
        tag.value = data;
    }
    BEGIN(INITIAL);
}
&lt;DATA&gt;&quot;`&quot;  {  BEGIN(STR); }
&lt;DATA&gt;{TAG_END}  {
    if(!tags.empty()) {
        Tag tag = tags.top();

        if(data.length() &gt; 0) {
            tag.value = data;
        }
        tag.writeEnd();
        tags.pop();
    }
    BEGIN(INITIAL);
}
&lt;DATA&gt;[^\n|\r|\r\n|`|&gt;] {
    string str = string(YYText());
    data.append(str);
}

&lt;STR&gt;&quot;'&quot;  {
    if(!tags.empty()) {
        Tag &amp;tag = tags.top();
        if(tag.value.length() == 0) {
            tag.value = &quot;`'&quot;;
        }
    }
    BEGIN(INITIAL);
}
&lt;STR&gt;[^']*  {
    if(!tags.empty()) {
        Tag &amp;tag = tags.top();
        string str = string(YYText());
        string buf = &quot;`&quot;;
        buf.append(str);
        buf.append(&quot;'&quot;);
        tag.value = buf;
    }
}

&lt;FACET&gt;^&quot;=EndInset&quot;{NEWLINE} {
    string str = string(YYText());
    facet.append(str);
    writeFacet(facet);
    BEGIN(INITIAL);
}

&lt;FACET&gt;.*{NEWLINE} {
    string str = string(YYText());
    facet.append(str);
}

%%

int main(int argc, char **argv) {
    cout &lt;&lt; &quot;&lt;?xml version=\&quot;1.0\&quot;?&gt;&lt;mif&gt;&quot;;
    FlexLexer* lexer = new yyFlexLexer;
    while(lexer-&gt;yylex() != 0);
    cout &lt;&lt; &quot;&lt;/mif&gt;&quot;;
    return 0;
}
</pre>
<p>Here&#8217;s the <code>miftag.h</code> file which contains a helper class for writing out MIF XML tags. Rather than having a dependency on libxml or some other XML processing library I choose to just implement the XML output by hand. It&#8217;s not nearly as robust but it worked out ok for a first pass.</p>
<pre class="brush: cpp;">
/**
 * Copyright (c) 2007 Andrew Bruno &lt;aeb@qnot.org&gt;
 * Licensed under the GNU General Public License version 2
 */

#ifndef __MIFTAG__
#define __MIFTAG__

#include &lt;string&gt;
using namespace std;

class Tag {
    public:
        string name;
        string value;

        void writeEnd();
        void writeStart();
};

void Tag::writeEnd() {
    if(!this-&gt;value.empty()) {
        /* escape xml special chars */
        string::size_type size = this-&gt;value.size();
        for(string::size_type i = 0; i &lt; size;) {
            if(this-&gt;value[i] == '&amp;') {
                this-&gt;value.replace(i, 1, &quot;&amp;amp;&quot;);
                i += 4;
                size += 4;
            } else if(this-&gt;value[i] == '&lt;') {
                this-&gt;value.replace(i, 1, &quot;&amp;lt;&quot;);
                i += 3;
                size += 3;
            } else if(this-&gt;value[i] == '&gt;') {
                this-&gt;value.replace(i, 1, &quot;&amp;gt;&quot;);
                i += 3;
                size += 3;
            } else if(this-&gt;value[i] == '&quot;') {
                this-&gt;value.replace(i, 1, &quot;&amp;quot;&quot;);
                i += 5;
                size += 5;
            } else {
                i++;
            }
        }

        /* Trim leading spaces */
        while(this-&gt;value[0] == ' ') {
            this-&gt;value.erase(0, 1);
        }

        /* Trim trailing spaces */
        while(this-&gt;value[this-&gt;value.size()-1] == ' ') {
            this-&gt;value.erase(this-&gt;value.size()-1, 1);
        }

        cout &lt;&lt; value &lt;&lt; &quot;&lt;/&quot; &lt;&lt; this-&gt;name &lt;&lt; &quot;&gt;&quot;;
    } else {
        cout &lt;&lt; &quot;&lt;/&quot; &lt;&lt; this-&gt;name &lt;&lt; &quot;&gt;&quot;;
    }
}

void Tag::writeStart() {
    cout &lt;&lt; &quot;&lt;&quot; &lt;&lt; this-&gt;name &lt;&lt; &quot;&gt;&quot;;
}

void writeFacet(string facet) {
    cout &lt;&lt; &quot;&lt;_facet&gt;&lt;![CDATA[&quot; &lt;&lt; facet &lt;&lt; &quot;]]&gt;&lt;/_facet&gt;&quot;;
}

#endif
</pre>
<p>Finally a quick and dirty Makefile:</p>
<pre class="brush: plain;">
all:
	flex++ mif.ll
	g++ -I. -o mif2xml lex.yy.cc -lfl

clean:
	rm -f lex.yy.cc *.o mif2xml
</pre>
<p>The code above has not been thoroughly tested on all possible MIF files so your mileage may vary. We currently use a version of <code>mif2xml</code> at O&#8217;Reilly on the occasions we need to process MIF and has been working out quite well. The XML generated from <code>mif2xml</code> is then run through a set of custom transforms written in XSLT 2.0 which transform the MIF XML to DocBook, WordML, and various other formats.</p>
<p>In my <a href="http://left.subtree.org/2007/01/31/converting-mif-to-xml-java-version/">next post</a> I&#8217;ll discuss a pure Java version of <code>mif2xml</code> which uses a great library called <a href="http://www.jflex.de/">JFlex</a> for generating the MIF lexer.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/qnot.wordpress.com/6/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/qnot.wordpress.com/6/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/qnot.wordpress.com/6/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/qnot.wordpress.com/6/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/qnot.wordpress.com/6/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/qnot.wordpress.com/6/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/qnot.wordpress.com/6/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/qnot.wordpress.com/6/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/qnot.wordpress.com/6/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/qnot.wordpress.com/6/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=left.subtree.org&amp;blog=13566420&amp;post=6&amp;subd=qnot&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://left.subtree.org/2007/01/25/converting-mif-to-xml/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">sigma110</media:title>
		</media:content>
	</item>
	</channel>
</rss>