Files
FileServices/Helper/XmlLinqExtensionMethods.cs
2019-08-19 19:00:14 +08:00

388 lines
14 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using System.Xml.Linq;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace System.Xml.Linq
{
public static class XmlLinqExtensionMethods
{
public static XElement Attr(this XElement e, string name, object value)
{
if (e == null||string.IsNullOrEmpty(name)) return e;
XAttribute attr = e.Attribute(name);
if (attr == null)
{
if (value != null)
{
e.Add(new XAttribute(name, value));
}
}
else {
if (value == null) {
attr.Remove();
}
else
{
attr.Value = Convert.ToString(value);
}
}
return e;
}
public static string Attr(this XElement e, string name)
{
XAttribute attr = e == null || string.IsNullOrEmpty(name) ? null : e.Attribute(name);
return attr == null ? string.Empty : HttpUtility.HtmlDecode(attr.Value);
}
public static T Attr<T>(this XElement e, string name) where T : struct
{
return e.AttrStruct<T>(name, default(T));
}
public static T AttrStruct<T>(this XElement e, string name, T defaultValue = default(T)) where T : struct
{
string v = e.Attr(name);
if (string.IsNullOrWhiteSpace(v)) return defaultValue;
Type t = typeof(T);
try
{
if (t.IsEnum)
{
return (T)Enum.Parse(t, v);
}
else
{
return (T)Convert.ChangeType(v, t);
}
}
catch {
if (t == typeof(Guid))
{
object o = new Guid(v);
return (T)o;
}
}
return defaultValue;
}
public static string Attr(this HtmlNode e, string name)
{
var attr = e == null || !e.HasAttributes || string.IsNullOrEmpty(name) ? null : e.Attributes[name];
return attr == null ? string.Empty : HttpUtility.HtmlDecode(attr.Value);
}
public static T Attr<T>(this HtmlNode e, string name) where T : struct
{
return e.AttrStruct<T>(name, default(T));
}
public static T AttrStruct<T>(this HtmlNode e, string name, T defaultValue = default(T)) where T : struct
{
string v = e.Attr(name);
if (string.IsNullOrWhiteSpace(v)) return defaultValue;
Type t = typeof(T);
try
{
if (t.IsEnum)
{
return (T)Enum.Parse(t, v);
}
else
{
return (T)Convert.ChangeType(v, t);
}
}
catch
{
if (t == typeof(Guid))
{
object o = new Guid(v);
return (T)o;
}
}
return defaultValue;
}
public static string Text(this HtmlNode e)
{
return e != null ? e.InnerText : string.Empty;
}
public static string Html(this HtmlNode e)
{
return e != null ? e.InnerHtml : string.Empty;
}
public static bool HasClass(this HtmlNode node,string className)
{
string es = Regex.Escape(className);
var rgx = new Regex(@"(\s+" + es + @"\s*$)|(^\s*" + es + @"\s+)|(\s+" + es + @"\s+)");
var v = node.Attr("class").Trim();
return v != "" && (v == className || rgx.IsMatch(v));
}
public static HtmlNode GetNodeByClasName(this HtmlNode node, string className)
{
if (!node.HasChildNodes) return null;
var nodes = node.ChildNodes;
foreach (var item in nodes)
{
if (item.NodeType != HtmlNodeType.Element) continue;
if (item.HasClass(className))
{
return item;
}
var o = item.GetNodeByClasName(className);
if (o != null) return o;
}
return null;
}
public static HtmlNode GetNodeByTagName(this HtmlNode node, string tagName)
{
if (!node.HasChildNodes||string.IsNullOrWhiteSpace(tagName)) return null;
var nodes = node.ChildNodes;
foreach (var item in nodes)
{
if (item.NodeType != HtmlNodeType.Element) continue;
if (string.Equals(tagName,item.Name, StringComparison.OrdinalIgnoreCase))
{
return item;
}
var o = item.GetNodeByTagName(tagName);
if (o != null) return o;
}
return null;
}
public static HtmlNode GetNodeById(this HtmlNode node, string id)
{
if (!node.HasChildNodes || string.IsNullOrWhiteSpace(id)) return null;
var nodes = node.ChildNodes;
foreach (var item in nodes)
{
if (item.NodeType != HtmlNodeType.Element) continue;
if (string.Equals(id, item.Attr("id"), StringComparison.OrdinalIgnoreCase))
{
return item;
}
var o = item.GetNodeById(id);
if (o != null) return o;
}
return null;
}
public static IEnumerable<HtmlNode> GetNodesByClasName(this HtmlNode node, string className)
{
if (!node.HasChildNodes) return null;
var nodes = node.ChildNodes;
var list = new List<HtmlNode>();
foreach (var item in nodes)
{
if (item.NodeType != HtmlNodeType.Element) continue;
if (item.HasClass(className))
{
list.Add(item);
}
list.AddRange(item.GetNodesByClasName(className));
}
return list;
}
public static IEnumerable<HtmlNode> GetNodesByTagName(this HtmlNode node, string tagName)
{
if (!node.HasChildNodes || string.IsNullOrWhiteSpace(tagName)) return null;
var nodes = node.ChildNodes;
var list = new List<HtmlNode>();
foreach (var item in nodes)
{
if (item.NodeType != HtmlNodeType.Element) continue;
if (string.Equals(tagName, item.Name, StringComparison.OrdinalIgnoreCase))
{
list.Add(item);
}
list.AddRange(item.GetNodesByTagName(tagName));
}
return list;
}
public static IEnumerable<HtmlNode> AncestorNodes(this HtmlNode node)
{
var n = node;
do {
if (n != null)
{
n = n.ParentNode;
if (n != null && n.NodeType== HtmlNodeType.Element) yield return n;
}
} while (n != null);
}
public static bool Is(this HtmlNode node, string[] selectors)
{
if (node == null || node.NodeType != HtmlNodeType.Element || selectors == null || selectors.Length == 0) return false;
return selectors.Where(s => node.Is(s)).Any();
}
static bool IsMatch(HtmlNode e, string s)
{
var tagRgx = new Regex(@"^\w+", RegexOptions.IgnoreCase);
var classRgx = new Regex(@"\.[a-zA-Z_\-]+");
var idRgx = new Regex(@"#[a-zA-Z_\-]+");
var css = e.Attr("class");
var tagMatch = tagRgx.Match(s);
var idMatch = idRgx.Match(s);
var classMatches = classRgx.Matches(s);
var matched = (tagMatch == null || !tagMatch.Success || string.Equals(tagMatch.Value, e.Name, StringComparison.OrdinalIgnoreCase))
&& (idMatch == null || !idMatch.Success || string.Equals(idMatch.Value.TrimStart('#'), e.Attr("id"), StringComparison.Ordinal))
&& (classMatches == null || classMatches.Count == 0 || (
from n in classMatches.OfType<Match>()
join c in css.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(o => "." + o)
on n.Value equals c
select c
).Count() == classMatches.Count);
return matched;
}
public static bool Is(this HtmlNode node, string selector)
{
if(node==null || node.NodeType!= HtmlNodeType.Element || string.IsNullOrWhiteSpace(selector)) return false;
//string[] arr = selector.Split(new char[] { ' ' },StringSplitOptions.RemoveEmptyEntries).Reverse().ToArray();
selector = Regex.Replace(selector, @"\s*\>\s*", ">");
Func<string, Tuple<string[], bool>> lastSplit = s => {
int i1 = s.LastIndexOf('>'), i2 = s.LastIndexOf(' ');
if (i1 < 0 && i2 < 0)
{
return null;
}
else
{
int i = Math.Max(i1, i2);
return new Tuple<string[], bool>(new string[] { s.Substring(i + 1).Trim(), s.Substring(0, i).Trim() }, i1 > 0 && i1 > i2);
}
};
var current = node;
string sltrs = selector;
do
{
var tu = lastSplit(sltrs);
string sltr;
if (tu == null)
{
sltr = sltrs;
sltrs = null;
}
else {
sltr = tu.Item1[0];
sltrs = tu.Item1[1];
}
var m = IsMatch(current,sltr);
if (!m) return false;
if (tu != null && !tu.Item2)
{
bool nearMatched = false;
tu = lastSplit(sltrs);
sltr = tu == null ? sltrs : tu.Item1[0];
foreach (var e in current.AncestorNodes())
{
nearMatched = IsMatch(e, sltr);
if (nearMatched)
{
current = e.ParentNode;
sltrs = tu == null ? null : tu.Item1[1];
break;
}
}
if (!nearMatched) return false;
}
else {
current = current.ParentNode;
}
} while (sltrs != null);
return true;
}
public static IEnumerable<HtmlNode> GetNodes(this HtmlNode node, string selectors)
{
return string.IsNullOrWhiteSpace(selectors) ? node.Descendants() : from n in node.Descendants()
where n.Is(selectors.Split(','))
select n;
}
public static IEnumerable<HtmlNode> Before(this HtmlNode node, Func<HtmlNode, bool> assert=null)
{
var n =node==null?null: node.PreviousSibling;
do {
if (n != null)
{
if (assert == null || assert(n)) yield return n;
n = n.PreviousSibling;
}
}
while (n != null);
}
public static IEnumerable<HtmlNode> Behind(this HtmlNode node, Func<HtmlNode, bool> assert = null)
{
var n = node == null ? null : node.NextSibling;
do
{
if (n != null)
{
if (assert == null || assert(n)) yield return n;
n = n.NextSibling;
}
}
while (n != null);
}
public static HtmlNode Prev(this HtmlNode node)
{
do
{
node = node != null ? node.PreviousSibling : null;
} while (node != null && node.NodeType != HtmlNodeType.Element);
return node;
}
public static HtmlNode Next(this HtmlNode node)
{
do
{
node = node != null ? node.NextSibling : null;
} while (node != null && node.NodeType != HtmlNodeType.Element);
return node;
}
public static HtmlNode First(this IEnumerable<HtmlNode> nodes, string tagName = null)
{
if (nodes == null) return null;
return string.IsNullOrWhiteSpace(tagName) ? nodes.FirstOrDefault()
: (from n in nodes
where n.NodeType == HtmlNodeType.Element && string.Equals(n.Name, tagName, StringComparison.OrdinalIgnoreCase)
select n).FirstOrDefault();
}
public static HtmlNode First(this HtmlNodeCollection nodes, string tagName = null)
{
if (nodes == null || nodes.Count == 0) return null;
return nodes.Cast<HtmlNode>().First(tagName);
}
public static HtmlNode Last(this IEnumerable<HtmlNode> nodes, string tagName = null)
{
if (nodes == null) return null;
return string.IsNullOrWhiteSpace(tagName) ? nodes.LastOrDefault()
: (from n in nodes
where n.NodeType == HtmlNodeType.Element && string.Equals(n.Name, tagName, StringComparison.OrdinalIgnoreCase)
select n).LastOrDefault();
}
public static HtmlNode Last(this HtmlNodeCollection nodes,string tagName=null)
{
if (nodes == null || nodes.Count == 0) return null;
return nodes.Cast<HtmlNode>().Last(tagName);
}
}
}