WordAnalyze.cs 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. using Word = Microsoft.Office.Interop.Word;
  2. using System.Diagnostics;
  3. using Logger = Log.Log;
  4. using System.Text.RegularExpressions;
  5. using System.Collections;
  6. namespace WordAnalyze
  7. {
  8. public class Analyze
  9. {
  10. private string[] bigQuestionNum = { "一、", "二、", "三、", "四、", "五、", "六、", "七、", "八、", "九、" };
  11. private ArrayList smallQuestionNum = new ArrayList();
  12. private ArrayList smallQuestionNum2 = new ArrayList();
  13. private Word.ApplicationClass app;
  14. public Analyze()
  15. {
  16. // 初始化小题目的编号
  17. for (int i = 1; i < 200; i++)
  18. {
  19. string num = i.ToString();
  20. smallQuestionNum.Add(num + ".");
  21. smallQuestionNum2.Add(num + ".");
  22. }
  23. }
  24. public string Application()
  25. {
  26. try
  27. {
  28. // 开始app应用
  29. app = new Word.ApplicationClass();
  30. return "";
  31. } catch (System.Exception e)
  32. {
  33. return e.Message.ToString();
  34. }
  35. }
  36. public void Close()
  37. {
  38. if (app != null)
  39. {
  40. app.Quit();
  41. }
  42. }
  43. public static string splitChar = "---";
  44. public string path = "";
  45. public void SetPath(string path)
  46. {
  47. this.path = path;
  48. }
  49. public string AnalyzeFile(string fileName)
  50. {
  51. if (!ValidFileName(fileName))
  52. {
  53. Logger.D("AnalyzeFile with invalid file name {0}", fileName);
  54. return "无效的文件名";
  55. }
  56. object fn = fileName;
  57. //Word.ApplicationClass app = new Word.ApplicationClass(); // 打开word应用,只会打开一个,即单例模式
  58. Word.Document doc = null; // 源word文件对象
  59. try
  60. {
  61. doc = app.Documents.Open(ref fn);
  62. doc.Paragraphs.Add();
  63. Word.Paragraphs garapraph = doc.Paragraphs; // 源文件的内容
  64. //bool insert = false; //判断是否有插入数据到新文档中,如果有则保存新word,否则直接关掉新word
  65. //int fileIndex = 1; // 切割后word的文件序号,例子: test1.doc, test2.doc
  66. Word.Document newDoc = null;
  67. newDoc = app.Documents.Add();
  68. // 遍历word,通过 --- 三个横线分割
  69. return dot(doc, fileName);
  70. }
  71. catch (System.Exception e)
  72. {
  73. Logger.E("analyze with file name({0}) error ->({1})", fileName, e.Message.ToString());
  74. return e.Message.ToString();
  75. }
  76. }
  77. // 判断文件后缀名是否是.doc 或者.docx
  78. public bool ValidFileName(string fileName)
  79. {
  80. if (fileName == null || fileName.Length < 1)
  81. {
  82. return false;
  83. }
  84. if (!fileName.EndsWith(".doc") && !fileName.EndsWith(".docx"))
  85. {
  86. return false;
  87. }
  88. return true;
  89. }
  90. // 用设定的目录当做子word的目录,默认为选择文件的目录
  91. public string Rename(string filename, int index)
  92. {
  93. var filePathIndex = filename.LastIndexOf('\\');
  94. string name = "";
  95. string tempPath = "";
  96. if (-1 != filePathIndex)
  97. {
  98. name = filename.Substring(filePathIndex);
  99. if (name.Length > 0)
  100. {
  101. name = name.Substring(1);
  102. }
  103. tempPath = filename.Substring(0, filePathIndex);
  104. }
  105. if ("" == path)
  106. {
  107. path = tempPath;
  108. }
  109. string newFilename = name;
  110. newFilename = path + "\\" + name;
  111. int lastIndex = newFilename.LastIndexOf('.');
  112. string newName = "";
  113. if (-1 != lastIndex)
  114. {
  115. newName = string.Format("{0}{1}{2}", newFilename.Substring(0, lastIndex), index, newFilename.Substring(lastIndex));
  116. }
  117. else
  118. {
  119. newName = string.Format("{0}{1}", newFilename, index);
  120. }
  121. return newName;
  122. }
  123. public string dot(Word.Document doc, string filename)
  124. {
  125. Word.Paragraphs paragraphs = doc.Paragraphs;
  126. Word.Document newDoc = app.Documents.Add();
  127. bool insert = false;
  128. bool bigMark = false;
  129. bool firstBig = false;
  130. int fileIndex = 1;
  131. try
  132. {
  133. for (int i = 1; i < paragraphs.Count; i++)
  134. {
  135. string rangeText = paragraphs[i].Range.Text.ToString();
  136. if (rangeText.Trim().Length < 1)
  137. {
  138. continue;
  139. }
  140. string listNum = paragraphs[i].Range.ListFormat.ListString;
  141. Logger.D("dot get index:{0}, message: {1}", i, rangeText);
  142. if (checkTextStartWithBig(rangeText))
  143. {
  144. Logger.D("dot get index: {0}, firstBig: {1}", i, firstBig);
  145. if (firstBig)
  146. {
  147. object file = Rename(filename, fileIndex++);
  148. newDoc.SaveAs2(file);
  149. newDoc.Close();
  150. newDoc = app.Documents.Add();
  151. }
  152. paragraphs[i].Range.Select();
  153. app.Selection.Copy();
  154. app.Documents[1].Activate();
  155. app.Selection.Paste();
  156. insert = false;
  157. bigMark = true;
  158. continue;
  159. }
  160. if (checkTextStartWithNum(listNum) || checkTextStartWithNum(rangeText))
  161. {
  162. Logger.D("dot get index: {0}, insert: {1}, bigMark: {2}, condition: {3}, condition2:{4}", i, insert, bigMark, (!bigMark && insert), (!bigMark) && insert);
  163. if (!bigMark && insert)
  164. {
  165. object file = Rename(filename, fileIndex++);
  166. newDoc.SaveAs2(file);
  167. newDoc.Close();
  168. newDoc = app.Documents.Add();
  169. }
  170. paragraphs[i].Range.Select();
  171. app.Selection.Copy();
  172. app.Documents[1].Activate();
  173. app.Selection.Paste();
  174. insert = true;
  175. bigMark = false;
  176. firstBig = true;
  177. continue;
  178. }
  179. paragraphs[i].Range.Select();
  180. app.Selection.Copy();
  181. app.Documents[1].Activate();
  182. app.Selection.Paste();
  183. insert = true;
  184. }
  185. if (insert)
  186. {
  187. object file = Rename(filename, fileIndex++);
  188. newDoc.SaveAs2(file);
  189. }
  190. newDoc.Close();
  191. insert = false;
  192. } catch(System.Exception e)
  193. {
  194. Logger.E("dot with filename({0}) error ->({1})", filename, e.Message.ToString());
  195. return e.Message.ToString();
  196. }
  197. finally
  198. {
  199. if(doc!= null)
  200. {
  201. doc.Undo();
  202. doc.Close();
  203. }
  204. }
  205. Logger.D("dot with filename({0}) success", filename);
  206. return "";
  207. }
  208. /// <summary>
  209. /// 判断内容是否是以大写的题目号开头,如一、或二、
  210. /// </summary>
  211. /// <param name="text"></param>
  212. /// <returns></returns>
  213. public bool checkTextStartWithBig(string text)
  214. {
  215. string textEscapeSpace = Regex.Replace(text, @"\s", "");
  216. textEscapeSpace = textEscapeSpace.Trim();
  217. for (int i = 0; i < bigQuestionNum.Length; i++)
  218. {
  219. if (textEscapeSpace.StartsWith(bigQuestionNum[i]))
  220. {
  221. return true;
  222. }
  223. }
  224. return false;
  225. }
  226. /// <summary>
  227. /// 判断内容是否有题目号开头,如1.或2.或3.
  228. /// </summary>
  229. /// <param name="text"></param>
  230. /// <returns></returns>
  231. public bool checkTextStartWithNum(string text)
  232. {
  233. if (text.Length < 1)
  234. {
  235. return false;
  236. }
  237. string textEscapeSpace = Regex.Replace(text, @"\s", "");
  238. textEscapeSpace = textEscapeSpace.Trim();
  239. foreach(var arg in smallQuestionNum)
  240. {
  241. string num = arg.ToString();
  242. if (textEscapeSpace.StartsWith(num))
  243. {
  244. return true;
  245. }
  246. }
  247. foreach(var arg in smallQuestionNum2)
  248. {
  249. string num = arg.ToString();
  250. if (textEscapeSpace.StartsWith(num))
  251. {
  252. return true;
  253. }
  254. }
  255. return false;
  256. }
  257. }
  258. }