2009-11-03 09:48:49 +01:00
|
|
|
using System;
|
2009-03-08 01:46:58 +01:00
|
|
|
using System.IO;
|
2009-08-17 14:28:22 +02:00
|
|
|
using System.Text.RegularExpressions;
|
2009-03-08 01:46:58 +01:00
|
|
|
using System.Xml;
|
|
|
|
using System.Xml.Xsl;
|
|
|
|
|
|
|
|
namespace Bind
|
|
|
|
{
|
|
|
|
class DocProcessor
|
|
|
|
{
|
2010-12-04 22:51:40 +01:00
|
|
|
static readonly Regex remove_mathml = new Regex(
|
|
|
|
@"<(mml:math|inlineequation)[^>]*?>(?:.|\n)*?</\s*\1\s*>",
|
2009-03-08 01:46:58 +01:00
|
|
|
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
|
|
|
|
|
2009-08-21 22:28:14 +02:00
|
|
|
static readonly XslCompiledTransform xslt = new XslCompiledTransform();
|
2009-03-08 01:46:58 +01:00
|
|
|
static readonly XmlReaderSettings settings = new XmlReaderSettings();
|
|
|
|
|
2010-12-04 22:51:40 +01:00
|
|
|
string Text;
|
|
|
|
string LastFile;
|
|
|
|
|
2009-03-08 01:46:58 +01:00
|
|
|
public DocProcessor(string transform_file)
|
|
|
|
{
|
|
|
|
xslt.Load(transform_file);
|
|
|
|
settings.ProhibitDtd = false;
|
|
|
|
settings.XmlResolver = null;
|
|
|
|
}
|
|
|
|
|
2009-03-08 19:08:35 +01:00
|
|
|
// Strips MathML tags from the source and replaces the equations with the content
|
|
|
|
// found in the <!-- eqn: :--> comments in the docs.
|
|
|
|
// Todo: Some simple MathML tags do not include comments, find a solution.
|
|
|
|
// Todo: Some files include more than 1 function - find a way to map these extra functions.
|
2009-03-08 01:46:58 +01:00
|
|
|
public string ProcessFile(string file)
|
|
|
|
{
|
2010-12-04 22:51:40 +01:00
|
|
|
if (LastFile == file)
|
|
|
|
return Text;
|
|
|
|
|
|
|
|
LastFile = file;
|
|
|
|
Text = File.ReadAllText(file);
|
2009-03-08 01:46:58 +01:00
|
|
|
|
2010-12-04 22:51:40 +01:00
|
|
|
Match m = remove_mathml.Match(Text);
|
2009-03-08 01:46:58 +01:00
|
|
|
while (m.Length > 0)
|
|
|
|
{
|
2010-12-04 22:51:40 +01:00
|
|
|
string removed = Text.Substring(m.Index, m.Length);
|
|
|
|
Text = Text.Remove(m.Index, m.Length);
|
2009-03-08 19:08:35 +01:00
|
|
|
int equation = removed.IndexOf("eqn");
|
|
|
|
if (equation > 0)
|
|
|
|
{
|
2010-12-04 22:51:40 +01:00
|
|
|
// Find the start and end of the equation string
|
|
|
|
int eqn_start = equation + 4;
|
|
|
|
int eqn_end = removed.IndexOf(":-->") - equation - 4;
|
|
|
|
if (eqn_end < 0)
|
|
|
|
{
|
|
|
|
// Note: a few docs from man4 delimit eqn end with ": -->"
|
|
|
|
eqn_end = removed.IndexOf(": -->") - equation - 4;
|
|
|
|
}
|
|
|
|
if (eqn_end < 0)
|
|
|
|
{
|
|
|
|
Console.WriteLine("[Warning] Failed to find equation for mml.");
|
|
|
|
goto next;
|
|
|
|
}
|
|
|
|
|
|
|
|
string eqn_substring = removed.Substring(eqn_start, eqn_end);
|
|
|
|
Text = Text.Insert(m.Index, "<![CDATA[" + eqn_substring + "]]>");
|
2009-03-08 19:08:35 +01:00
|
|
|
}
|
2010-12-04 22:51:40 +01:00
|
|
|
|
|
|
|
next:
|
|
|
|
m = remove_mathml.Match(Text);
|
2009-03-08 01:46:58 +01:00
|
|
|
}
|
|
|
|
|
2009-03-08 19:08:35 +01:00
|
|
|
XmlReader doc = null;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
// The pure XmlReader is ~20x faster than the XmlTextReader.
|
2010-12-04 22:51:40 +01:00
|
|
|
doc = XmlReader.Create(new StringReader(Text), settings);
|
2009-03-08 19:08:35 +01:00
|
|
|
//doc = new XmlTextReader(new StringReader(text));
|
2009-11-03 09:48:49 +01:00
|
|
|
|
2009-03-08 19:08:35 +01:00
|
|
|
using (StringWriter sw = new StringWriter())
|
|
|
|
{
|
|
|
|
xslt.Transform(doc, null, sw);
|
2010-12-04 22:51:40 +01:00
|
|
|
Text = sw.ToString().TrimEnd('\n');
|
|
|
|
return Text;
|
2009-03-08 19:08:35 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (XmlException e)
|
2009-03-08 01:46:58 +01:00
|
|
|
{
|
2009-03-08 19:08:35 +01:00
|
|
|
Console.WriteLine(e.ToString());
|
|
|
|
Console.WriteLine(doc.ToString());
|
|
|
|
return String.Empty;
|
2009-03-08 01:46:58 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|