using System ;
using System. IO ;
using System. Linq ;
using System. Text ;
using DocumentFormat. OpenXml ;
using DocumentFormat. OpenXml. Packaging ;
using DocumentFormat. OpenXml. Wordprocessing ;
public static string GetWordContentByOpenXml ( string path, string password)
{ try { using ( var document = WordprocessingDocument. Open ( path, false , new OpenSettings ( ) { Password = password} ) ) { if ( document. MainDocumentPart?. Document?. Body == null ) return null ; var contentBuilder = new StringBuilder ( ) ; var body = document. MainDocumentPart. Document. Body; ExtractBodyContent ( body, contentBuilder) ; string contentWithoutHeaderFooter = contentBuilder. ToString ( ) ; string content = CleanContent ( contentWithoutHeaderFooter) ; int index = content. LastIndexOf ( "限公司第" ) ; if ( index > 0 ) { return content. Substring ( 0 , index) . Trim ( ) ; } else { return content; } } } catch ( Exception ex) { LogManager. WriteError ( "GetWordContentByOpenXml()" , ex. StackTrace?. ToString ( ) ) ; return null ; }
}
private static void ExtractBodyContent ( Body body, StringBuilder contentBuilder)
{ foreach ( var element in body. Elements ( ) ) { ExtractElementContent ( element, contentBuilder) ; }
}
private static void ExtractElementContent ( OpenXmlElement element, StringBuilder contentBuilder)
{ switch ( element) { case Paragraph paragraph: ExtractParagraphContent ( paragraph, contentBuilder) ; contentBuilder. AppendLine ( ) ; break ; case Table table: ExtractTableContent ( table, contentBuilder) ; break ; case SectionProperties _: break ; default : foreach ( var childElement in element. Elements ( ) ) { ExtractElementContent ( childElement, contentBuilder) ; } break ; }
}
private static void ExtractParagraphContent ( Paragraph paragraph, StringBuilder contentBuilder)
{ foreach ( var run in paragraph. Elements < Run> ( ) ) { foreach ( var text in run. Elements < Text> ( ) ) { contentBuilder. Append ( text. Text) ; } foreach ( var tab in run. Elements < TabChar> ( ) ) { contentBuilder. Append ( "\t" ) ; } foreach ( var br in run. Elements < Break> ( ) ) { contentBuilder. AppendLine ( ) ; } }
}
private static void ExtractTableContent ( Table table, StringBuilder contentBuilder)
{ foreach ( var row in table. Elements < TableRow> ( ) ) { foreach ( var cell in row. Elements < TableCell> ( ) ) { foreach ( var paragraph in cell. Elements < Paragraph> ( ) ) { ExtractParagraphContent ( paragraph, contentBuilder) ; } contentBuilder. Append ( "\t" ) ; } contentBuilder. AppendLine ( ) ; }
}
private static string CleanContent ( string content)
{ if ( string . IsNullOrEmpty ( content) ) return string . Empty; content = System. Text. RegularExpressions. Regex. Replace ( content, @"\s+" , " " ) ; content = content. Trim ( ) ; content = System. Text. RegularExpressions. Regex. Replace ( content, @"\n\s*\n" , "\n" ) ; content = content. Replace ( "EvaluationOnly.CreatedwithAspose.Words.Copyright2003-2024AsposePtyLtd." , "" ) ; content = System. Text. RegularExpressions. Regex. Replace ( content, @"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]" , "" ) ; return content. Trim ( ) ;
}
public static bool IsPasswordRequired ( string path)
{ try { using ( var document = WordprocessingDocument. Open ( path, false ) ) { return false ; } } catch ( OpenXmlPackageException ex) { return ex. Message. Contains ( "password" ) || ex. Message. Contains ( "encrypted" ) || ex. Message. Contains ( "protected" ) ; } catch { return true ; }
}
public static string GetWordContentByOpenXmlAdvanced ( string path, string password, bool includeHyperlinks = false , bool includeFootnotes = false )
{ try { using ( var document = WordprocessingDocument. Open ( path, false , new OpenSettings ( ) { Password = password} ) ) { if ( document. MainDocumentPart?. Document?. Body == null ) return null ; var contentBuilder = new StringBuilder ( ) ; var body = document. MainDocumentPart. Document. Body; ExtractBodyContentAdvanced ( body, contentBuilder, includeHyperlinks) ; if ( includeFootnotes && document. MainDocumentPart. FootnotesPart != null ) { ExtractFootnotesContent ( document. MainDocumentPart. FootnotesPart, contentBuilder) ; } string contentWithoutHeaderFooter = contentBuilder. ToString ( ) ; string content = CleanContent ( contentWithoutHeaderFooter) ; int index = content. LastIndexOf ( "公司第" ) ; if ( index > 0 ) { return content. Substring ( 0 , index) . Trim ( ) ; } else { return content; } } } catch ( Exception ex) { LogManager. WriteError ( "GetWordContentByOpenXmlAdvanced()" , ex. StackTrace?. ToString ( ) ) ; return null ; }
}
private static void ExtractBodyContentAdvanced ( Body body, StringBuilder contentBuilder, bool includeHyperlinks)
{ foreach ( var element in body. Elements ( ) ) { if ( element is Paragraph paragraph) { ExtractParagraphContentAdvanced ( paragraph, contentBuilder, includeHyperlinks) ; contentBuilder. AppendLine ( ) ; } else if ( element is Table table) { ExtractTableContentAdvanced ( table, contentBuilder, includeHyperlinks) ; } else if ( ! ( element is SectionProperties ) ) { foreach ( var childElement in element. Elements ( ) ) { ExtractBodyContentAdvanced ( new Body ( childElement) , contentBuilder, includeHyperlinks) ; } } }
}
private static void ExtractParagraphContentAdvanced ( Paragraph paragraph, StringBuilder contentBuilder, bool includeHyperlinks)
{ foreach ( var element in paragraph. Elements ( ) ) { if ( element is Run run) { foreach ( var text in run. Elements < Text> ( ) ) { contentBuilder. Append ( text. Text) ; } } else if ( element is Hyperlink hyperlink && includeHyperlinks) { foreach ( var run2 in hyperlink. Elements < Run> ( ) ) { foreach ( var text in run2. Elements < Text> ( ) ) { contentBuilder. Append ( text. Text) ; } } } }
}
private static void ExtractTableContentAdvanced ( Table table, StringBuilder contentBuilder, bool includeHyperlinks)
{ foreach ( var row in table. Elements < TableRow> ( ) ) { foreach ( var cell in row. Elements < TableCell> ( ) ) { foreach ( var paragraph in cell. Elements < Paragraph> ( ) ) { ExtractParagraphContentAdvanced ( paragraph, contentBuilder, includeHyperlinks) ; } contentBuilder. Append ( "\t" ) ; } contentBuilder. AppendLine ( ) ; }
}
private static void ExtractFootnotesContent ( FootnotesPart footnotesPart, StringBuilder contentBuilder)
{ if ( footnotesPart. Footnotes != null ) { contentBuilder. AppendLine ( "\n--- 脚注 ---" ) ; foreach ( var footnote in footnotesPart. Footnotes. Elements < Footnote> ( ) ) { foreach ( var paragraph in footnote. Elements < Paragraph> ( ) ) { ExtractParagraphContent ( paragraph, contentBuilder) ; contentBuilder. AppendLine ( ) ; } } }
}