aboutsummaryrefslogtreecommitdiffstats
path: root/textproc/py-html-text
diff options
context:
space:
mode:
Diffstat (limited to 'textproc/py-html-text')
-rw-r--r--textproc/py-html-text/Makefile26
-rw-r--r--textproc/py-html-text/distinfo3
-rw-r--r--textproc/py-html-text/pkg-descr7
3 files changed, 36 insertions, 0 deletions
diff --git a/textproc/py-html-text/Makefile b/textproc/py-html-text/Makefile
new file mode 100644
index 000000000000..8abdc85e2e2a
--- /dev/null
+++ b/textproc/py-html-text/Makefile
@@ -0,0 +1,26 @@
+PORTNAME= html-text
+DISTVERSION= 0.7.1
+CATEGORIES= textproc python
+MASTER_SITES= PYPI
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+DISTNAME= ${PORTNAME:S/-/_/}-${DISTVERSION}
+
+MAINTAINER= yuri@FreeBSD.org
+COMMENT= Extract text from HTML
+WWW= https://github.com/zytedata/html-text
+
+LICENSE= MIT
+LICENSE_FILE= ${WRKSRC}/LICENSE
+
+BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}hatchling>=1.27.0:devel/py-hatchling@${PY_FLAVOR}
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}lxml-html-clean>0:devel/py-lxml-html-clean@${PY_FLAVOR}
+
+USES= python
+USE_PYTHON= pep517 autoplist pytest
+
+NO_ARCH= yes
+
+# tests as of 0.7.1: 58 passed, 4 skipped, 1 xfailed, 5 errors in 4.68s
+
+.include <bsd.port.mk>
diff --git a/textproc/py-html-text/distinfo b/textproc/py-html-text/distinfo
new file mode 100644
index 000000000000..6355c5461e17
--- /dev/null
+++ b/textproc/py-html-text/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1760676184
+SHA256 (html_text-0.7.1.tar.gz) = 87fd194310a9f54be32c7b18a70180dfa72a6b4d01cca35ac813c4d2b2b2ed8b
+SIZE (html_text-0.7.1.tar.gz) = 54423
diff --git a/textproc/py-html-text/pkg-descr b/textproc/py-html-text/pkg-descr
new file mode 100644
index 000000000000..3ded2dd0baf6
--- /dev/null
+++ b/textproc/py-html-text/pkg-descr
@@ -0,0 +1,7 @@
+Extract text from HTML.
+
+html_text is a library for extracting text from HTML, with a few handy
+features:
+- It removes leading and trailing whitespace
+- It handles HTML entities
+- It uses lxml for parsing