Changeset

4254:a4e182d7ff0a

mod_ogp: Improve parsing patterns
author Seve Ferrer <seve@delape.net>
date Wed, 18 Nov 2020 11:16:11 +0100
parents 4253:32b4901a9d8d
children 4255:38da10e4b593
files mod_ogp/mod_ogp.lua mod_ogp/test.lua
diffstat 2 files changed, 32 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mod_ogp/mod_ogp.lua	Tue Nov 17 13:45:33 2020 +0100
+++ b/mod_ogp/mod_ogp.lua	Wed Nov 18 11:16:11 2020 +0100
@@ -2,8 +2,8 @@
 local http = require "net.http"
 local st = require "util.stanza"
 
-local ogp_pattern = [[<meta property=["'](og:.-)["'] content=["'](.-)["'].->]]
-local ogp_pattern2 = [[<meta content=["'](.-)["'] property=["'](og:.-)["'].->]]
+local ogp_pattern = [[<meta property=["']?(og:.-)["']? content=%s*["']?(.-)["']?%s-/?>]]
+local ogp_pattern2 = [[<meta content=%s*["']?(.-)["']? property=["']?(og:.-)["']?%s-/?>]]
 local url_pattern = [[https?://%S+]]
 
 local function ogp_handler(event)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mod_ogp/test.lua	Wed Nov 18 11:16:11 2020 +0100
@@ -0,0 +1,30 @@
+local html = [[
+<meta property="og:title" content="Example 1">
+<meta property=og:title content="Example 2">
+<meta property="og:title" content="Example 3" >
+<meta property="og:title" content="Example 4" />
+<meta property="og:title" content="Example 5"/>
+<meta property=og:title content=Example 6/>
+<meta property="og:title" content= "Example 7" />
+<meta property="og:title" itemprop="image primaryImageOfPage" content="Example 8" />
+<meta content="Example 9" property="og:title" >
+<meta content="Example 10" property="og:title">
+<meta content="Example 11" property="og:title"/>
+<meta content="Example 12" property="og:title" />
+<meta content="Example 13" property=og:title >
+<meta content=Example 14 property=og:title >
+<meta content= "Example 15" property="og:title" />
+<meta content="Example 16" itemprop="image primaryImageOfPage"  property="og:title" />
+]]
+
+
+local ogp_pattern = [[<meta property=["']?(og:.-)["']? content=%s*["']?(.-)["']?%s-/?>]]
+local ogp_pattern2 = [[<meta content=%s*["']?(.-)["']? property=["']?(og:.-)["']?%s-/?>]]
+
+for property, content in html:gmatch(ogp_pattern) do
+    print("Pattern 1|", property, content, "|Pattern 1")
+end
+print('-------------------------------------------------------------')
+for content, property in html:gmatch(ogp_pattern2) do
+    print("Pattern 2|", property, content, "|Pattern 2")
+end