1 //读取Word文档中的表格
2 // DataTable 需要添加引用 using System.Data;
3 public static DataTable Run()
4 {
5 try
6 {
7 //DataTable 申明DataTable变量,保存从Word获取到的数据
8 DataTable dt = new DataTable();
9 DataColumn dc1 = new DataColumn("One", Type.GetType("System.String"));
10 DataColumn dc2 = new DataColumn("TwoText", Type.GetType("System.String"));
11 DataColumn dc3 = new DataColumn("TwoHtml", Type.GetType("System.String"));
12 dt.Columns.Add(dc1);
13 dt.Columns.Add(dc2);
14 dt.Columns.Add(dc3);
15
16 int number = 1;//记录有数据的文档数目
17 int troublecount = 0;//记录没有数据的文档数目
18
19 string Content = "";//申明变量,保存word文档内容
20 //获取目录下的所有文件
21 //DirectoryInfo FileInfo 需要添加引用 using System.IO;
22 DirectoryInfo dir = new DirectoryInfo("E:/20190917");
23 FileInfo[] fileList = dir.GetFiles();
24 foreach (var item in fileList)
25 {
26 object fileName = item.FullName;
27 object confirmCovert = false;
28 //判断文档类型是否为word文档
29 if (item.Extension.ToUpper() == ".DOC" || item.Extension.ToUpper() == ".DOCX")
30 {
31 //获取word文档内容
32 //Application Document需要添加引用 using Microsoft.Office.Interop.Word;
33 Application app = new Application();
34 Document doc = null;
35
36 doc = app.Documents.Open(ref fileName, ref confirmCovert);
37 app.Visible = false;
38 Content = doc.Content.Text;
39
40 string[] arr = Content.Split('\r');
41 if (arr.Count() < 2)
42 {
43 troublecount++;
44 Console.WriteLine("文件{0}中没有正文!!!!!!!!。{1}", fileName, troublecount);
45 continue;
46 }
47 else
48 {
49 //抓取表格内容
50 DataRow dr = dt.NewRow();
51 dr["One"] = arr[0].ToString();
52 int contentIndex = Content.IndexOf("表格显示:");
53 List<string> lst = GetContent(doc, Content, contentIndex);
54
55 dr["TwoText"] = lst[0].ToString();
56 dr["TwoHtml"] = lst[1].ToString();
57 dt.Rows.Add(dr);
58
59 number++;
60 }
61
62 doc.Close();
63 app.Quit();
64
65 }
66 WriteOuputInformation(string.Format("{0}:文档已经存入数据库。{1}", fileName, number));
67 }
68 Console.WriteLine("所有文件已读取完毕,共读取了{0}条数据,没有数据的Word文档总条数为{1}", number, troublecount);
69 return dt;
70 }
71 catch (Exception exp)
72 {
73 WriteErrorInformation(string.Format("Exception: {0}", exp.Message));
74 return null;
75 }
76 }
知识兔
#region[获取表格纯文本内容和富文本内容]
static List<string> GetContent(Document doc, string Content, int contentIndex)
{
List<string> lst = new List<string>();
string contentText = Content.Substring(contentIndex + 6);
string contentHtml = contentText;
int R = 0;//保存行索引
int C = 0;//保存列索引
//表格格式
if (doc.Tables.Count > 0)
{
string text = contentText;
//遍历<table>
for (int i = 1; i <= doc.Tables.Count; i++)
{
//读取到word文档中table的内容
string wordtable = doc.Tables[i].Range.Text;
string htmltable = "";
htmltable += "<table cellspacing='0' bordercolor='black' border='1' cellpadding='5' text-align='center'>";
//遍历行
for (int row = 1; row <= doc.Tables[i].Rows.Count; row++)
{
htmltable += "<tr>";
//遍历列
for (int column = 1; column <= doc.Tables[i].Columns.Count; column++)
{
htmltable += "<td>";
//R = getTableRowIndex(row, column, doc, i);
//C = getTablecolumnIndex(row, column, doc, i);
//htmltable += doc.Tables[i].Cell(R, C).Range.Text.Replace("\r", "").Replace("\a", "");
R = getTableRowIndex(row, column, doc, i);
C = getTablecolumnIndex(row, column, doc, i);
htmltable += doc.Tables[i].Cell(row, column).Range.Text.Replace("\r", "").Replace("\a", "");
htmltable += "</td>";
contentText += " ";
}
htmltable += "</tr>";
}
htmltable += "</table>";
contentHtml = contentHtml.Replace(wordtable, htmltable);
}
contentHtml = "<p>" + contentHtml;
contentHtml = contentHtml.Replace("\r", "</p><p>");
contentHtml += "</p>";
}
else//文本格式
{
contentText = Content.Substring(contentIndex + 6);
contentHtml = "<p>";
contentHtml += contentText;
contentHtml = contentHtml.Replace("\r", "</p><p>");
contentHtml += "</p>";
}
lst.Add(contentText);
lst.Add(contentHtml);
return lst;
}
#endregion
知识兔
#region[操作后给出提示信息]
static void WriteErrorInformation(string errorInformation)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Error: " + errorInformation);
Console.ForegroundColor = ConsoleColor.Gray;
}
static void WriteOuputInformation(string outputInformation)
{
Console.ForegroundColor = ConsoleColor.DarkGreen;
Console.WriteLine("-->>" + outputInformation);
Console.ForegroundColor = ConsoleColor.Gray;
}
#endregion
知识兔
-
计算机
-