/*************************************************************************** Copyright (c) Microsoft Corporation 2011. This code is licensed using the Microsoft Public License (Ms-PL). The text of the license can be found here: http://www.microsoft.com/resources/sharedsource/licensingbasics/publiclicense.mspx ***************************************************************************/ using System; using System.Collections.Generic; using System.Linq; using System.Xml; using System.Xml.Linq; using System.IO; using System.IO.Packaging; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Validation; using System.Text; using System.Text.RegularExpressions; namespace OpenXmlPowerTools { /// /// Contains extension methods to modify Open XML Documents /// public static class PowerToolsExtensions { public static void RemovePart(this OpenXmlPart part) { var parentParts = part.GetParentParts().ToList(); foreach (var parentPart in parentParts) parentPart.DeletePart(part); } /// /// Removes personal information from the document. /// /// public static OpenXmlPowerToolsDocument RemovePersonalInformation(WmlDocument doc) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { XNamespace x = "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"; XDocument extendedFileProperties = document.ExtendedFilePropertiesPart.GetXDocument(); extendedFileProperties.Elements(x + "Properties").Elements(x + "Company").Remove(); XElement totalTime = extendedFileProperties.Elements(x + "Properties").Elements(x + "TotalTime").FirstOrDefault(); if (totalTime != null) totalTime.Value = "0"; document.ExtendedFilePropertiesPart.PutXDocument(); XNamespace dc = "http://purl.org/dc/elements/1.1/"; XNamespace cp = "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"; XDocument coreFileProperties = document.CoreFilePropertiesPart.GetXDocument(); foreach (var textNode in coreFileProperties.Elements(cp + "coreProperties") .Elements(dc + "creator") .Nodes() .OfType()) textNode.Value = ""; foreach (var textNode in coreFileProperties.Elements(cp + "coreProperties") .Elements(cp + "lastModifiedBy") .Nodes() .OfType()) textNode.Value = ""; foreach (var textNode in coreFileProperties.Elements(cp + "coreProperties") .Elements(dc + "title") .Nodes() .OfType()) textNode.Value = ""; XElement revision = coreFileProperties.Elements(cp + "coreProperties").Elements(cp + "revision").FirstOrDefault(); if (revision != null) revision.Value = "1"; document.CoreFilePropertiesPart.PutXDocument(); // add w:removePersonalInformation, w:removeDateAndTime to DocumentSettingsPart XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; XDocument documentSettings = document.MainDocumentPart.DocumentSettingsPart.GetXDocument(); // add the new elements in the right position. Add them after the following three elements // (which may or may not exist in the xml document). XElement settings = documentSettings.Root; XElement lastOfTop3 = settings.Elements() .Where(e => e.Name == w + "writeProtection" || e.Name == w + "view" || e.Name == w + "zoom") .InDocumentOrder() .LastOrDefault(); if (lastOfTop3 == null) { // none of those three exist, so add as first children of the root element settings.AddFirst( settings.Elements(w + "removePersonalInformation").Any() ? null : new XElement(w + "removePersonalInformation"), settings.Elements(w + "removeDateAndTime").Any() ? null : new XElement(w + "removeDateAndTime") ); } else { // one of those three exist, so add after the last one lastOfTop3.AddAfterSelf( settings.Elements(w + "removePersonalInformation").Any() ? null : new XElement(w + "removePersonalInformation"), settings.Elements(w + "removeDateAndTime").Any() ? null : new XElement(w + "removeDateAndTime") ); } document.MainDocumentPart.DocumentSettingsPart.PutXDocument(); } return streamDoc.GetModifiedDocument(); } } private static string StringConcatenate(this IEnumerable source, Func func, string separator) { StringBuilder sb = new StringBuilder(); foreach (T item in source) sb.Append(func(item)).Append(separator); if (sb.Length > separator.Length) sb.Length -= separator.Length; return sb.ToString(); } static string ContainsAnyStyles(IEnumerable stylesToSearch, IEnumerable searchStrings) { if (searchStrings == null) return null; foreach (var style in stylesToSearch) foreach (var s in searchStrings) if (String.Compare(style, s, true) == 0) return s; return null; } static string ContainsAnyContent(string stringToSearch, IEnumerable searchStrings, IEnumerable regularExpressions, bool isRegularExpression, bool caseInsensitive) { if (searchStrings == null) return null; if (isRegularExpression) { foreach (var r in regularExpressions) if (r.IsMatch(stringToSearch)) return r.ToString(); } else if (caseInsensitive) { foreach (var s in searchStrings) if (stringToSearch.ToLower().Contains(s.ToLower())) return s; } else { foreach (var s in searchStrings) if (stringToSearch.Contains(s)) return s; } return null; } static IEnumerable GetAllStyleIdsAndNames(WordprocessingDocument doc, string styleId) { string localStyleId = styleId; XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; yield return styleId; string styleNameForFirstStyle = (string)doc .MainDocumentPart .StyleDefinitionsPart .GetXDocument() .Root .Elements(w + "style") .Where(e => (string)e.Attribute(w + "type") == "paragraph" && (string)e.Attribute(w + "styleId") == styleId) .Elements(w + "name") .Attributes(w + "val") .FirstOrDefault(); if (styleNameForFirstStyle != null) yield return styleNameForFirstStyle; while (true) { XElement style = doc .MainDocumentPart .StyleDefinitionsPart .GetXDocument() .Root .Elements(w + "style") .Where(e => (string)e.Attribute(w + "type") == "paragraph" && (string)e.Attribute(w + "styleId") == localStyleId) .FirstOrDefault(); if (style == null) yield break; var basedOn = (string)style .Elements(w + "basedOn") .Attributes(w + "val") .FirstOrDefault(); if (basedOn == null) yield break; yield return basedOn; XElement basedOnStyle = doc .MainDocumentPart .StyleDefinitionsPart .GetXDocument() .Root .Elements(w + "style") .Where(e => (string)e.Attribute(w + "type") == "paragraph" && (string)e.Attribute(w + "styleId") == basedOn) .FirstOrDefault(); string basedOnStyleName = (string)basedOnStyle .Elements(w + "name") .Attributes(w + "val") .FirstOrDefault(); if (basedOnStyleName != null) yield return basedOnStyleName; localStyleId = basedOn; } } private static IEnumerable GetInheritedStyles(WordprocessingDocument doc, string styleName) { string localStyleName = styleName; XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; yield return styleName; while (true) { XElement style = doc .MainDocumentPart .StyleDefinitionsPart .GetXDocument() .Root .Elements(w + "style") .Where(e => (string)e.Attribute(w + "type") == "paragraph" && (string)e.Element(w + "name").Attribute(w + "val") == localStyleName) .FirstOrDefault(); if (style == null) yield break; var basedOn = (string)style .Elements(w + "basedOn") .Attributes(w + "val") .FirstOrDefault(); if (basedOn == null) yield break; yield return basedOn; localStyleName = basedOn; } } static public Commands.MatchInfo[] SearchInDocument(WmlDocument doc, IEnumerable styleSearchString, IEnumerable contentSearchString, bool isRegularExpression, bool caseInsensitive) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { XNamespace w = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"; XName r = w + "r"; XName ins = w + "ins"; RegexOptions options; Regex[] regularExpressions = null; if (isRegularExpression && contentSearchString != null) { if (caseInsensitive) options = RegexOptions.IgnoreCase | RegexOptions.Compiled; else options = RegexOptions.Compiled; regularExpressions = contentSearchString .Select(s => new Regex(s, options)).ToArray(); } var defaultStyleName = (string)document .MainDocumentPart .StyleDefinitionsPart .GetXDocument() .Root .Elements(w + "style") .Where(style => (string)style.Attribute(w + "type") == "paragraph" && (string)style.Attribute(w + "default") == "1") .First() .Attribute(w + "styleId"); var q1 = document .MainDocumentPart .GetXDocument() .Root .Element(w + "body") .Elements() .Select((p, i) => { var styleNode = p .Elements(w + "pPr") .Elements(w + "pStyle") .FirstOrDefault(); var styleName = styleNode != null ? (string)styleNode.Attribute(w + "val") : defaultStyleName; return new { Element = p, Index = i, StyleName = styleName }; } ); var q2 = q1 .Select(i => { string text = null; if (i.Element.Name == w + "p") text = i.Element.Elements() .Where(z => z.Name == r || z.Name == ins) .Descendants(w + "t") .StringConcatenate(element => (string)element); else text = i.Element .Descendants(w + "p") .StringConcatenate(p => p .Elements() .Where(z => z.Name == r || z.Name == ins) .Descendants(w + "t") .StringConcatenate(element => (string)element), Environment.NewLine ); return new { Element = i.Element, StyleName = i.StyleName, Index = i.Index, Text = text }; } ); var q3 = q2 .Select(i => new Commands.MatchInfo { ElementNumber = i.Index + 1, Content = i.Text, Style = ContainsAnyStyles(GetAllStyleIdsAndNames(document, i.StyleName).Distinct(), styleSearchString), Pattern = ContainsAnyContent(i.Text, contentSearchString, regularExpressions, isRegularExpression, caseInsensitive), IgnoreCase = caseInsensitive } ) .Where(i => (styleSearchString == null || i.Style != null) && (contentSearchString == null || i.Pattern != null)); return q3.ToArray(); } } public static OpenXmlPowerToolsDocument InsertXml(OpenXmlPowerToolsDocument doc, string[] partPaths, string insertionXpath, string content) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (Package package = streamDoc.GetPackage()) { foreach (string partPath in partPaths) { Uri xmlPartUri; XmlDocument xmlDocument; PackagePart xmlPart = null; // Searches for the given part xmlPartUri = new Uri(partPath, UriKind.Relative); //if (!document.Package.PartExists(xmlPartUri)) //throw new ArgumentException("Specified part does not exist."); xmlPart = package.GetPart(xmlPartUri); using (XmlReader xmlReader = XmlReader.Create(xmlPart.GetStream(FileMode.Open, FileAccess.Read))) { try { xmlDocument = new XmlDocument(); xmlDocument.Load(xmlReader); } catch (XmlException) { xmlDocument = new XmlDocument(); } } // Looks into the XmlDocument for nodes at the specified path XmlNamespaceManager namespaceManager = new XmlNamespaceManager(xmlDocument.NameTable); namespaceManager.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); XmlNode insertionPoint = xmlDocument.SelectSingleNode(insertionXpath, namespaceManager); if (insertionPoint == null) throw new ArgumentException("Insertion point does not exist."); StringReader r = new StringReader("" + content + ""); XmlNode nodoid = xmlDocument.ReadNode(XmlReader.Create(r)); //doc.LoadXml("" + xmlContent + ""); //// Inserts new contents into the given part XmlNode xmlNodeToInsert = nodoid;// doc.FirstChild; // xmlDocument.CreateElement("w","node", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); //xmlNodeToInsert. .InnerXml = xmlContent; XmlNodeList nodes = xmlNodeToInsert.ChildNodes; if (nodes.Count > 0) for (int i = nodes.Count - 1; i >= 0; i--) { XmlNode node = nodes[i]; insertionPoint.ParentNode.InsertAfter(node, insertionPoint); } // Writes the XmlDocument back to the part using (XmlWriter writer = XmlWriter.Create(xmlPart.GetStream(FileMode.Create, FileAccess.Write))) { xmlDocument.WriteTo(writer); } } } return streamDoc.GetModifiedDocument(); } } public static OpenXmlPowerToolsDocument Lock(WmlDocument doc) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(doc)) { using (WordprocessingDocument document = streamDoc.GetWordprocessingDocument()) { //Finds the settings part XDocument settingsDocument; XElement documentProtectionElement = null; if (document.MainDocumentPart.DocumentSettingsPart == null) { //If settings part does not exist creates a new one DocumentSettingsPart settingsPart = document.MainDocumentPart.AddNewPart(); settingsDocument = settingsPart.GetXDocument(); settingsDocument.Add(new XElement(W.settings, new XAttribute(XNamespace.Xmlns + "w", W.w), new XAttribute(XNamespace.Xmlns + "r", R.r))); } else { //If the settings part does exist looks if documentProtection has been included settingsDocument = document.MainDocumentPart.DocumentSettingsPart.GetXDocument(); documentProtectionElement = settingsDocument.Element(W.settings).Element(W.documentProtection); } //Creates the documentProtection element, or edits it if it exists if (documentProtectionElement == null) { settingsDocument .Element(W.settings) .Add( new XElement(W.documentProtection, new XAttribute(W.edit, "readOnly") ) ); } else documentProtectionElement.SetAttributeValue(W.edit, "readOnly"); document.MainDocumentPart.DocumentSettingsPart.PutXDocument(); } return streamDoc.GetModifiedDocument(); } } public static void SetContent(WordprocessingDocument document, IEnumerable contents) { MainDocumentPart mainDocumentPart = document.MainDocumentPart; if (mainDocumentPart == null) mainDocumentPart = document.AddMainDocumentPart(); XDocument mainDocumentPartContentToInsert = new XDocument( new XElement(W.document, new XAttribute(XNamespace.Xmlns + "w", W.w), new XAttribute(XNamespace.Xmlns + "r", R.r), new XElement(W.body, contents))); XDocument mainDocumentPartContent = mainDocumentPart.GetXDocument(); if (mainDocumentPartContent.Root == null) mainDocumentPartContent.Add(mainDocumentPartContentToInsert.Root); else mainDocumentPartContent.Root.ReplaceWith(mainDocumentPartContentToInsert.Root); mainDocumentPart.PutXDocument(); } public static IEnumerable ValidateXml(OpenXmlPowerToolsDocument document) { if (document is WmlDocument) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document)) using (WordprocessingDocument doc = streamDoc.GetWordprocessingDocument()) { OpenXmlValidator validator = new OpenXmlValidator(); return validator.Validate(doc); } } else if (document is SmlDocument) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document)) using (SpreadsheetDocument doc = streamDoc.GetSpreadsheetDocument()) { OpenXmlValidator validator = new OpenXmlValidator(); return validator.Validate(doc); } } else if (document is PmlDocument) { using (OpenXmlMemoryStreamDocument streamDoc = new OpenXmlMemoryStreamDocument(document)) using (PresentationDocument doc = streamDoc.GetPresentationDocument()) { OpenXmlValidator validator = new OpenXmlValidator(); return validator.Validate(doc); } } throw new PowerToolsDocumentException("Not an Open XML document."); } } }