2009-03-08 01:46:58 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Xml;
|
|
|
|
|
using System.Xml.Xsl;
|
|
|
|
|
using System.Xml.XPath;
|
|
|
|
|
|
|
|
|
|
namespace Bind
|
|
|
|
|
{
|
|
|
|
|
class DocProcessor
|
|
|
|
|
{
|
|
|
|
|
static readonly Regex remove_mathml = new Regex(@"<(mml:math)[^>]*?>(?:.|\n)*?</\s*\1\s*>",
|
|
|
|
|
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
|
|
|
|
|
|
|
|
|
|
static readonly XslCompiledTransform xslt = new System.Xml.Xsl.XslCompiledTransform();
|
|
|
|
|
static readonly XmlReaderSettings settings = new XmlReaderSettings();
|
|
|
|
|
|
|
|
|
|
public DocProcessor(string transform_file)
|
|
|
|
|
{
|
|
|
|
|
xslt.Load(transform_file);
|
|
|
|
|
settings.ProhibitDtd = false;
|
|
|
|
|
settings.XmlResolver = null;
|
|
|
|
|
}
|
|
|
|
|
|
2009-03-08 19:08:35 +01:00
|
|
|
|
// Strips MathML tags from the source and replaces the equations with the content
|
|
|
|
|
// found in the <!-- eqn: :--> comments in the docs.
|
|
|
|
|
// Todo: Some simple MathML tags do not include comments, find a solution.
|
|
|
|
|
// Todo: Some files include more than 1 function - find a way to map these extra functions.
|
2009-03-08 01:46:58 +01:00
|
|
|
|
public string ProcessFile(string file)
|
|
|
|
|
{
|
|
|
|
|
string text = File.ReadAllText(file);
|
|
|
|
|
|
|
|
|
|
Match m = remove_mathml.Match(text);
|
|
|
|
|
while (m.Length > 0)
|
|
|
|
|
{
|
2009-03-08 19:08:35 +01:00
|
|
|
|
string removed = text.Substring(m.Index, m.Length);
|
2009-03-08 01:46:58 +01:00
|
|
|
|
text = text.Remove(m.Index, m.Length);
|
2009-03-08 19:08:35 +01:00
|
|
|
|
int equation = removed.IndexOf("eqn");
|
|
|
|
|
if (equation > 0)
|
|
|
|
|
{
|
|
|
|
|
text = text.Insert(m.Index,
|
|
|
|
|
"<![CDATA[" +
|
|
|
|
|
removed.Substring(equation + 4, removed.IndexOf(":-->") - equation - 4) +
|
|
|
|
|
"]]>");
|
|
|
|
|
}
|
2009-03-08 01:46:58 +01:00
|
|
|
|
m = remove_mathml.Match(text);
|
|
|
|
|
}
|
|
|
|
|
|
2009-03-08 19:08:35 +01:00
|
|
|
|
XmlReader doc = null;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
// The pure XmlReader is ~20x faster than the XmlTextReader.
|
|
|
|
|
doc = XmlReader.Create(new StringReader(text), settings);
|
|
|
|
|
//doc = new XmlTextReader(new StringReader(text));
|
|
|
|
|
|
|
|
|
|
using (StringWriter sw = new StringWriter())
|
|
|
|
|
{
|
|
|
|
|
xslt.Transform(doc, null, sw);
|
|
|
|
|
return sw.ToString();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
catch (XmlException e)
|
2009-03-08 01:46:58 +01:00
|
|
|
|
{
|
2009-03-08 19:08:35 +01:00
|
|
|
|
Console.WriteLine(e.ToString());
|
|
|
|
|
Console.WriteLine(doc.ToString());
|
|
|
|
|
return String.Empty;
|
2009-03-08 01:46:58 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|