Skip to content

Instantly share code, notes, and snippets.

@aerovulpe
Created March 13, 2023 02:25
Show Gist options
  • Select an option

  • Save aerovulpe/0bdb9a7e645410c9d37bc11d803de25a to your computer and use it in GitHub Desktop.

Select an option

Save aerovulpe/0bdb9a7e645410c9d37bc11d803de25a to your computer and use it in GitHub Desktop.
/*
* © ChatKitty, Inc. 2022 - All Rights Reserved.
* Unauthorized copying of this file, via any medium is strictly prohibited.
* This source code and all its derivatives are proprietary and confidential.
*/
package com.chatkitty.domain.infrastructure.opengraph
import org.htmlcleaner.HtmlCleaner
import java.io.BufferedReader
import java.io.InputStreamReader
import java.net.URL
import java.net.URLConnection
import java.nio.charset.Charset
import java.util.Hashtable
import java.util.regex.Pattern
class OpenGraphImpl(
url: String,
ignoreSpecErrors: Boolean = true
) : OpenGraph {
companion object {
val REQUIRED_META = arrayOf("title", "type", "image", "url")
val BASE_TYPES = Hashtable<String, Array<String>>()
init {
BASE_TYPES["activity"] = arrayOf("activity", "sport")
BASE_TYPES["business"] =
arrayOf("bar", "company", "cafe", "hotel", "restaurant")
BASE_TYPES["group"] =
arrayOf("cause", "sports_league", "sports_team")
BASE_TYPES["organization"] =
arrayOf("band", "government", "non_profit", "school", "university")
BASE_TYPES["person"] = arrayOf(
"actor",
"athlete",
"author",
"director",
"musician",
"politician",
"profile",
"public_figure"
)
BASE_TYPES["place"] = arrayOf("city", "country", "landmark", "state_province")
BASE_TYPES["product"] = arrayOf(
"album",
"book",
"drink",
"food",
"game",
"movie",
"product",
"song",
"tv_show"
)
BASE_TYPES["website"] = arrayOf("blog", "website", "article")
}
private fun getConnectionCharset(connection: URLConnection): Charset {
return try {
var contentType = connection.contentType
if (contentType != null && contentType.isNotEmpty()) {
contentType = contentType.lowercase()
val charsetName = extractCharsetName(contentType)
if (charsetName != null && charsetName.isNotEmpty()) {
return Charset.forName(charsetName)
}
}
Charset.defaultCharset()
} catch (e: Exception) {
Charset.defaultCharset()
}
}
private fun extractCharsetName(contentType: String): String? {
val mediaTypes = contentType.split(":".toRegex()).toTypedArray()
if (mediaTypes.isNotEmpty()) {
val params = mediaTypes[0].split(";".toRegex()).toTypedArray()
for (each in params) {
val trimmed = each.trim { it <= ' ' }
if (trimmed.startsWith("charset=")) {
return trimmed.substring(8).trim { it <= ' ' }
}
}
}
return null
}
}
var baseType: String?
private val originalUrl: String
private val pageNamespaces: MutableList<OpenGraphNamespace> = mutableListOf()
private val metaAttributes: MutableMap<String, MutableList<MetaElement>> = mutableMapOf()
init {
val pageURL = URL(url)
val connection = pageURL.openConnection()
.apply {
setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36")
setRequestProperty("Accept", "*/*")
}
val charset = getConnectionCharset(connection)
val dis = BufferedReader(InputStreamReader(connection.getInputStream(), charset))
val headContents = StringBuffer()
var inputLine: String
while (dis.readLine().also { inputLine = it } != null) {
if (inputLine.contains("</head>")) {
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7)
inputLine = "$inputLine<body></body></html>"
headContents.append(
"""
$inputLine
""".trimIndent()
)
break
}
headContents.append(
"""
$inputLine
""".trimIndent()
)
}
val headContentsStr = headContents.toString()
val cleaner = HtmlCleaner()
val pageData = cleaner.clean(headContentsStr)
var hasOgSpec = false
val headElement = pageData.findElementByName("head", true)
if (headElement.hasAttribute("prefix")) {
val namespaceData = headElement.getAttributeByName("prefix")
val pattern = Pattern.compile("(([A-Za-z0-9_]+):\\s+(http:\\/\\/ogp.me\\/ns(\\/\\w+)*#))\\s*")
val matcher = pattern.matcher(namespaceData)
while (matcher.find()) {
val prefix = matcher.group(2)
val documentURI = matcher.group(3)
pageNamespaces.add(OpenGraphNamespace(prefix, documentURI))
if (prefix == "og") hasOgSpec = true
}
}
if (!hasOgSpec) pageNamespaces.add(OpenGraphNamespace("og", "http:// ogp.me/ns#"))
val metaData = pageData.getElementsByName("meta", true)
for (metaElement in metaData) {
for (namespace in pageNamespaces) {
var target: String? = null
if (metaElement.hasAttribute("property")) target =
"property" else if (metaElement.hasAttribute("name")) target = "name"
if (target != null && metaElement.getAttributeByName(target)
.startsWith(namespace.prefix + ":")
) {
setProperty(
namespace,
metaElement.getAttributeByName(target),
metaElement.getAttributeByName("content")
)
break
}
}
}
if (!ignoreSpecErrors) {
for (req in REQUIRED_META) {
if (!metaAttributes.containsKey(req)) throw Exception("Does not conform to Open Graph protocol")
}
}
baseType = null
var currentType = getContent("type")
if (currentType != null) {
for ((prefix) in pageNamespaces) {
if (currentType!!.startsWith("$prefix:")) {
currentType = currentType.replaceFirst(prefix + ":".toRegex(), "")
break
}
}
}
for (base in BASE_TYPES.keys) {
val baseList = BASE_TYPES[base]!!
var finished = false
for (expandedType in baseList) {
if (expandedType == currentType) {
baseType = base
finished = true
break
}
}
if (finished) break
}
val realURL = connection.url
originalUrl = realURL.toExternalForm()
}
override fun getContent(property: String): String? =
if (metaAttributes.containsKey(property) && metaAttributes[property]!!.size > 0) metaAttributes[property]!![0].content else null
override fun setProperty(namespace: OpenGraphNamespace, property: String, content: String?) {
var sanitizedProperty = property
if (!pageNamespaces.contains(namespace)) pageNamespaces.add(namespace)
sanitizedProperty = sanitizedProperty.replace(namespace.prefix + ":".toRegex(), "")
val element = MetaElement(namespace, sanitizedProperty, content!!)
if (!metaAttributes.containsKey(sanitizedProperty)) metaAttributes[sanitizedProperty] = mutableListOf()
metaAttributes[sanitizedProperty]!!.add(element)
}
override fun getProperties(property: String): List<MetaElement>? =
if (metaAttributes.containsKey(property)) {
metaAttributes[property]!!
} else null
override fun removeProperty(property: String) {
metaAttributes.remove(property)
}
override fun toHtml(): List<String> {
val html = mutableListOf<String>()
for (elements in metaAttributes.values) {
for ((namespace, property, content) in elements) html.add(
"<meta property=\"" + namespace + ":" +
property + "\" content=\"" + content + "\" />"
)
}
return html
}
override fun toXhtml(): List<String> {
val html = mutableListOf<String>()
for (elements in metaAttributes.values) {
for ((namespace, property, content) in elements) html.add(
"<meta name=\"" + namespace.prefix + ":" +
property + "\" content=\"" + content + "\" />"
)
}
return html
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment