2012-11-30 3 views
2

XPath 또는 XQuery로 HTML을 구문 분석 할 수있는 델파이 라이브러리가 있습니까? PHP가 기본적으로 구현 한 것과 유사합니다. 예를 들어 FLWOR ..Delphi 용 XQuery/XPATH 2.0

답변

2

저는 라이브러리를 모르지만 여기에는 XPath를 '수행'하는 데 사용하는 SelectNode (s) 함수가있는 XE2 도우미 클래스 단위가 있습니다. RemoveNameSpaces 및 XMLToTree 기능이 적용되지 않습니다,하지만 그들은 ;-) 유용하게 사용할 수

unit uXMLHelper; 

interface 

Uses 
    System.SysUtils, System.Classes, System.TypInfo, Vcl.ComCtrls, 
    XML.XMLDoc, XMLDom, XML.XMLIntf; 

type 
    TXMLHelper = class 
    public 
     class function SelectNode(StartNode: IXmlNode; const NodeXPath: WideString): IXmlNode; 
     class function SelectNodes(StartNode: IXmlNode; const NodeXPath: WideString): IXMLNodeList; 
     class function RemoveNameSpaces(XMLString: String): String; 
     class procedure XMLToTree(XmlDoc: IXMLDocument; TV: TTreeView); 
    end; 

function ConcatNodeNames(NodeNames: Array of String): String; 
// Concatenates the strings in NodeNames to /name1/name2/.../namex 

implementation 

Uses 
    MSXML2_TLB; // IXMLDOMdocument 

class function TXMLHelper.RemoveNameSpaces(XMLString: String): String; 
const 
    // An XSLT script for removing the namespaces from any document. It will remove the prefix as well. 
    // From http://wiki.tei-c.org/index.php/Remove-Namespaces.xsl 
    cRemoveNSTransform = 
    '<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">' + 
    '<xsl:output method="xml" indent="no"/>' + 

    '<xsl:template match="/|comment()|processing-instruction()">' + 
    ' <xsl:copy>' + 
    '  <xsl:apply-templates/>' + 
    ' </xsl:copy>' + 
    '</xsl:template>' + 

    '<xsl:template match="*">' + 
    ' <xsl:element name="{local-name()}">' + 
    '  <xsl:apply-templates select="@*|node()"/>' + 
    ' </xsl:element>' + 
    '</xsl:template>' + 

    '<xsl:template match="@*">' + 
    ' <xsl:attribute name="{local-name()}">' + 
    '  <xsl:value-of select="."/>' + 
    ' </xsl:attribute>' + 
    '</xsl:template>' + 

    '</xsl:stylesheet>'; 

var 
    Doc, XSL: IXMLDOMdocument2; 
    Res  : string; 
    p  : Integer; 
begin 
    Doc := ComsDOMDocument.Create; 
    Doc.ASync := false; 
    XSL := ComsDOMDocument.Create; 
    XSL.ASync := false; 
    try 
    Doc.loadXML(XMLString); 
    XSL.loadXML(cRemoveNSTransform); 
    Res := Doc.TransFormNode(XSL); 
    // This now contains the original text with a <?xml version="1.0" encoding="UTF-16"?> prepended; remove it: 
    p := Pos('?>',Res); 
    result := Copy(Res,P+2); 
    except 
    on E:Exception do Result := E.Message; 
    end; 
end; { RemoveNameSpaces } 


class function TXMLHelper.SelectNode(StartNode: IXmlNode; const NodeXPath: WideString): IXmlNode; 
// Geeft de node in path NodeXPath onder StartNode 
// http://delphi.about.com/od/delphi-tips-2011/qt/select-single-node-ixmlnode-txmlnode-xpath-delphi-xmldom.htm 
var 
    intfSelect : IDomNodeSelect; 
    dnResult  : IDomNode; 
    intfDocAccess : IXmlDocumentAccess; 
    XMLDoc  : TXmlDocument; 
begin 
    Result := nil; 
    if not Assigned(StartNode) 
    or not Supports(StartNode.DOMNode, IDomNodeSelect, intfSelect) then 
    Exit; 

    dnResult := intfSelect.selectNode(NodeXPath); 
    if Assigned(dnResult) then 
    begin 
    if Supports(StartNode.OwnerDocument, IXmlDocumentAccess, intfDocAccess) then 
     XMLDoc := intfDocAccess.DocumentObject 
    else 
     XMLDoc := nil; 
    Result := TXmlNode.Create(dnResult, nil, XMLDoc); 
    end; 
end; { SelectNode } 


class function TXMLHelper.SelectNodes(StartNode: IXmlNode; const NodeXPath: WideString): IXMLNodeList; 
(* Returns a list of all nodes in path NodeXPath below StartNode. 
* NodeXPath is relative; e.g. with: 
* 
* <Envelope>       <= DocumentElement root 
* <Body> 
*  <FindItemResponse> 
*   <ResponseMessages> 
*    <FindItemResponseMessage> 
*    <RootFolder>   <= IRootNode 
*     <Items> 
*      <CalendarItem> 
* 
* these are identical: 
* SelectNodes(DocumentElement,'Envelope/Body/FindItemResponse/ResponseMessages/FindItemResponseMessage/RootFolder/Items/CalendarItem') 
* SelectNodes(DocumentElement,'/Envelope/Body/FindItemResponse/ResponseMessages/FindItemResponseMessage/RootFolder/Items/CalendarItem') 
* SelectNodes(IRootNode,'Items/CalendarItem') 
* 
* http://delphi.about.com/od/vclusing/qt/delphi-select-xml-nodes-ixmlnodelist-selectnodes-xpath-xmldom.htm 
*) 
var 
    intfSelect : IDomNodeSelect; 
    intfAccess : IXmlNodeAccess; 
    dnlResult  : IDomNodeList; 
    intfDocAccess : IXmlDocumentAccess; 
    XMLDoc  : TXmlDocument; 
    i    : Integer; 
    dn   : IDomNode; 
begin 
    Result := nil; 
    if not Assigned(StartNode) 
    or not Supports(StartNode, IXmlNodeAccess, intfAccess) 
    or not Supports(StartNode.DOMNode, IDomNodeSelect, intfSelect) then 
    Exit; 

    dnlResult := intfSelect.selectNodes(NodeXPath); 
    if Assigned(dnlResult) then 
    begin 
    // Since the XPath implementation of SelectNodes returns an IDomNodeList and we need an IXMLNodeList 
    // we need to "wrap" a call to IDomNodeSelect.selectNodes into a function that will result in IXMLNodeList. 
    Result := TXmlNodeList.Create(intfAccess.GetNodeObject, '', nil); 
    if Supports(StartNode.OwnerDocument, IXmlDocumentAccess, intfDocAccess) then 
     XMLDoc := intfDocAccess.DocumentObject 
    else 
     XMLDoc := nil; 

    for i := 0 to dnlResult.length - 1 do 
    begin 
     dn := dnlResult.item[i]; 
     Result.Add(TXmlNode.Create(dn, nil, XMLDoc)); 
    end; 
    end; 
end; { SelectNodes } 


procedure DomToTree(XmlNode: IXMLNode; TV: TTreeView; TreeNode: TTreeNode); 
var 
    I: Integer; 
    NewTreeNode: TTreeNode; 
    NodeText: string; 
    AttrNode: IXMLNode; 
begin 
    // Skip text nodes and other special cases 
    if XmlNode.NodeType <> ntElement then Exit; 
    try 
    // Add the node itself 
    NodeText := XmlNode.NodeName; 
    if XmlNode.IsTextElement then 
     NodeText := NodeText + ' = ' + XmlNode.NodeValue; 
    NewTreeNode := TV.Items.AddChild(TreeNode, NodeText); 
    // Add attributes 
    for I := 0 to xmlNode.AttributeNodes.Count - 1 do 
    begin 
     AttrNode := xmlNode.AttributeNodes.Nodes[I]; 
     TV.Items.AddChild(NewTreeNode, 
     '[' + AttrNode.NodeName + ' = "' + AttrNode.Text + '"]'); 
    end; 
    // add each child node 
    if XmlNode.HasChildNodes then 
     for I := 0 to xmlNode.ChildNodes.Count - 1 do 
     DomToTree (xmlNode.ChildNodes.Nodes [I], TV, NewTreeNode); 
    except 
    on E:Exception do 
     TV.Items.AddChild(TreeNode, E.Message); 
    end; 
end; { DomToTree } 


class procedure TXMLHelper.XMLToTree(XmlDoc: IXMLDocument; TV: TTreeView); 
begin 
    XMlDoc.Active := true; 
    TV.Items.Clear; 
    DomToTree(XMLDoc.DocumentElement,TV,nil); 
end; { XMLToTree } 


function ConcatNodeNames(NodeNames: Array of String): String; 
var Res,Nam : String; 
begin 
    for Nam in NodeNames do Res := Res + '/' + Nam; 
    Result := Res; 
end; 

end. 
+0

MSXML은 XPATH 2.0 버전을 지원합니까? –

+0

또한 html 문서와 함께 어떻게 사용할 수 있는지 지정하십시오. –

+0

유효한 질문 ;-) 나는 HTML이 반드시 유효한 XML이 아니라는 점을 간과했다. HTML 문서가 유효한 XHTML 일 경우에만 (http://en.wikipedia.org/wiki/XHTML). 그것은 아마도 당신이 통제 할 수없는 어떤 것입니다. –

1

가 (XPath는 데모 참조) OmniXML이 유용 할 수 있습니다 때 누가 알 겠어. 그리고 HTML 구문 분석을위한 another 라이브러리는 XPath 2 지원을 포함합니다.