2
0
Эх сурвалжийг харах

ADD: Find files - search text in Office XML (*.odt)

Alexander Koblov 4 жил өмнө
parent
commit
582d3ed903

+ 0 - 1
src/fFindDlg.lfm

@@ -526,7 +526,6 @@ object frmFindDlg: TfrmFindDlg
             AnchorSideTop.Control = cbTextRegExp
             Left = 592
             Height = 24
-            Hint = 'Office XML (*.docx)'
             Top = 67
             Width = 93
             BorderSpacing.Left = 15

+ 0 - 1
src/fFindDlg.lrj

@@ -25,7 +25,6 @@
 {"hash":35720169,"name":"tfrmfinddlg.cbreplacetext.caption","sourcebytes":[82,101,38,112,108,97,99,101,32,98,121],"value":"Re&place by"},
 {"hash":137727326,"name":"tfrmfinddlg.cbtextregexp.caption","sourcebytes":[82,101,103,38,117,108,97,114,32,101,120,112,114,101,115,115,105,111,110],"value":"Reg&ular expression"},
 {"hash":259470556,"name":"tfrmfinddlg.chkhex.caption","sourcebytes":[72,101,120,97,100,101,99,105,38,109,97,108],"value":"Hexadeci&mal"},
-{"hash":223887657,"name":"tfrmfinddlg.cbofficexml.hint","sourcebytes":[79,102,102,105,99,101,32,88,77,76,32,40,42,46,100,111,99,120,41],"value":"Office XML (*.docx)"},
 {"hash":214077868,"name":"tfrmfinddlg.cbofficexml.caption","sourcebytes":[79,102,102,105,38,99,101,32,88,77,76],"value":"Offi&ce XML"},
 {"hash":197676484,"name":"tfrmfinddlg.tsadvanced.caption","sourcebytes":[65,100,118,97,110,99,101,100],"value":"Advanced"},
 {"hash":122109610,"name":"tfrmfinddlg.cbdatefrom.caption","sourcebytes":[38,68,97,116,101,32,102,114,111,109,58],"value":"&Date from:"},

+ 2 - 0
src/fFindDlg.pas

@@ -667,6 +667,8 @@ begin
   cmbFileSizeUnit.Items.Add(rsSizeUnitGBytes);
   cmbFileSizeUnit.Items.Add(rsSizeUnitTBytes);
 
+  cbOfficeXML.Hint := StripHotkey(cbOfficeXML.Caption) + ' (*.docx, *.odt)';
+
   // fill search depth combobox
   cmbSearchDepth.Items.Add(rsFindDepthAll);
   cmbSearchDepth.Items.Add(rsFindDepthCurDir);

+ 1 - 1
src/ufindthread.pas

@@ -355,7 +355,7 @@ begin
   Result := False;
   if sData = '' then Exit;
 
-  if FSearchTemplate.OfficeXML and MatchesMask(sFileName, '*.docx') then
+  if FSearchTemplate.OfficeXML and MatchesMaskList(sFileName, '*.docx;*.odt') then
   begin
     if LoadFromOffice(sFileName, S) then
     begin

+ 121 - 24
src/uofficexml.pas

@@ -1,7 +1,7 @@
 {
    Double commander
    -------------------------------------------------------------------------
-   Load text from office xml (*.docx)
+   Load text from office xml (*.docx, *.odt)
 
    Copyright (C) 2021 Alexander Koblov ([email protected])
 
@@ -35,7 +35,27 @@ implementation
 uses
   Unzip, ZipUtils, Laz2_DOM, laz2_XMLRead;
 
-procedure ProcessNodes(var S: String; ANode: TDOMNode);
+function ExtractFile(ZipFile: unzFile; MemoryStream: TMemoryStream): Boolean;
+var
+  ASize: LongInt;
+  FileInfo: unz_file_info;
+begin
+  Result:= unzGetCurrentFileInfo(ZipFile, @FileInfo, nil, 0, nil, 0, nil, 0) = UNZ_OK;
+  if Result then
+  begin
+    MemoryStream.SetSize(FileInfo.uncompressed_size);
+    if unzOpenCurrentFile(ZipFile) = UNZ_OK then
+    begin
+      ASize:= unzReadCurrentFile(ZipFile, MemoryStream.Memory, FileInfo.uncompressed_size);
+      Result:= (ASize = FileInfo.uncompressed_size);
+      unzCloseCurrentFile(ZipFile);
+    end;
+  end;
+end;
+
+{ Office Open XML }
+
+procedure ProcessOfficeOpenNodes(var S: String; ANode: TDOMNode);
 var
   I: Integer;
   ASubNode: TDOMNode;
@@ -59,25 +79,7 @@ begin
       S += #9;
 
     if ASubNode.ChildNodes.Count > 0 then
-      ProcessNodes(S, ASubNode);
-  end;
-end;
-
-function ExtractFile(ZipFile: unzFile; FileName: PAnsiChar; MemoryStream: TMemoryStream): Boolean;
-var
-  ASize: LongInt;
-  FileInfo: unz_file_info;
-begin
-  Result:= unzGetCurrentFileInfo(ZipFile, @FileInfo, nil, 0, nil, 0, nil, 0) = UNZ_OK;
-  if Result then
-  begin
-    MemoryStream.SetSize(FileInfo.uncompressed_size);
-    if unzOpenCurrentFile(ZipFile) = UNZ_OK then
-    begin
-      ASize:= unzReadCurrentFile(ZipFile, MemoryStream.Memory, FileInfo.uncompressed_size);
-      Result:= (ASize = FileInfo.uncompressed_size);
-      unzCloseCurrentFile(ZipFile);
-    end;
+      ProcessOfficeOpenNodes(S, ASubNode);
   end;
 end;
 
@@ -90,12 +92,12 @@ begin
   begin
     AStream:= TMemoryStream.Create;
     try
-      if ExtractFile(ZipFile, PAnsiChar(FileName), AStream) then
+      if ExtractFile(ZipFile, AStream) then
       begin
         ReadXMLFile(ADoc, AStream, [xrfPreserveWhiteSpace]);
         if Assigned (ADoc) then
         begin
-          ProcessNodes(AText, ADoc.DocumentElement);
+          ProcessOfficeOpenNodes(AText, ADoc.DocumentElement);
           ADoc.Free;
         end;
       end;
@@ -105,7 +107,7 @@ begin
   end;
 end;
 
-function LoadFromOffice(const FileName: String; out AText: String): Boolean;
+function LoadFromOfficeOpen(const FileName: String; out AText: String): Boolean;
 const
   HEADER_XML = 'word/header%d.xml';
   FOOTER_XML = 'word/footer%d.xml';
@@ -136,5 +138,100 @@ begin
   end;
 end;
 
+{ Open Document Format }
+
+procedure ProcessOpenOfficeNodes(var S: String; ANode: TDOMNode);
+var
+  I: Integer;
+  ASubNode: TDOMNode;
+  ANodeName: DOMString;
+
+  procedure ParseSubNode(ANode: TDOMNode);
+  var
+    J: Integer;
+    ASubNode: TDOMNode;
+  begin
+    for J:= 0 to ANode.ChildNodes.Count - 1 do
+    begin
+      ASubNode := ANode.ChildNodes.Item[J];
+      ANodeName := ASubNode.NodeName;
+
+      if ANodeName = 'text:s' then
+        S += ' '
+      else if ANodeName = 'text:tab' then
+        S += #9
+      else if ANodeName = 'text:line-break' then
+        S += LineEnding
+      else if (ASubNode.NodeType = TEXT_NODE) then
+        S += ASubNode.NodeValue
+      else begin
+        ParseSubNode(ASubNode);
+      end;
+    end;
+  end;
+
+begin
+  for I:= 0 to ANode.ChildNodes.Count - 1 do
+  begin
+    ASubNode := ANode.ChildNodes.Item[I];
+    ANodeName := ASubNode.NodeName;
+
+    if (ANodeName = 'text:p') or (ANodeName = 'text:h')then
+    begin
+      if ASubNode.ChildNodes.Count > 0 then
+      begin
+        ParseSubNode(ASubNode);
+        S += LineEnding;
+      end;
+    end
+    else if ASubNode.ChildNodes.Count > 0 then
+      ProcessOpenOfficeNodes(S, ASubNode);
+  end;
+end;
+
+function LoadFromOpenOffice(const FileName: String; out AText: String): Boolean;
+const
+  CONTENT_XML = 'content.xml';
+var
+  ZipFile: unzFile;
+  ADoc: TXMLDocument;
+  AStream: TMemoryStream;
+begin
+  Result:= False;
+  AText:= EmptyStr;
+  ZipFile:= unzOpen(PAnsiChar(FileName));
+  if Assigned(ZipFile) then
+  try
+    if unzLocateFile(ZipFile, CONTENT_XML, 0) = UNZ_OK then
+    begin
+      AStream:= TMemoryStream.Create;
+      try
+        if ExtractFile(ZipFile, AStream) then
+        begin
+          ReadXMLFile(ADoc, AStream, [xrfPreserveWhiteSpace]);
+          if Assigned (ADoc) then
+          begin
+            ProcessOpenOfficeNodes(AText, ADoc.DocumentElement);
+            ADoc.Free;
+          end;
+        end;
+      finally
+        AStream.Free;
+      end;
+    end;
+    Result:= Length(AText) > 0;
+  finally
+    unzClose(ZipFile);
+  end;
+end;
+
+function LoadFromOffice(const FileName: String; out AText: String): Boolean;
+begin
+  if SameText(ExtractFileExt(FileName), '.docx') then
+    Result:= LoadFromOfficeOpen(FileName, AText)
+  else
+    Result:= LoadFromOpenOffice(FileName, AText);
+end;
+
 end.