From d7657811964eac1cb9743bb98649278ad948f0d2 Mon Sep 17 00:00:00 2001
From: Maks Verver <maks@verver.ch>
Date: Tue, 8 Apr 2025 13:13:55 +0200
Subject: [PATCH] [CVE-2025-32414] python: Read at most len/4 characters.

Fixes #889 by reserving space in the buffer for UTF-8 encoding of text.

CVE: CVE-2025-32414
Upstream-Status: Backport [https://gitlab.gnome.org/GNOME/libxml2/-/commit/d7657811964eac1cb9743bb98649278ad948f0d2]
Signed-off-by: Peter Marko <peter.marko@siemens.com>
---
 python/libxml.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/python/libxml.c b/python/libxml.c
index 1fe8d685..2bf14078 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -248,7 +248,9 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
 
     file = (PyObject *) context;
     if (file == NULL) return(-1);
-    ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len);
+    /* When read() returns a string, the length is in characters not bytes, so
+       request at most len / 4 characters to leave space for UTF-8 encoding. */
+    ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len / 4);
     if (ret == NULL) {
 	printf("xmlPythonFileReadRaw: result is NULL\n");
 	return(-1);
@@ -283,10 +285,12 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
 	Py_DECREF(ret);
 	return(-1);
     }
-    if (lenread > len)
-	memcpy(buffer, data, len);
-    else
-	memcpy(buffer, data, lenread);
+    if (lenread < 0 || lenread > len) {
+	printf("xmlPythonFileReadRaw: invalid lenread\n");
+	Py_DECREF(ret);
+	return(-1);
+    }
+    memcpy(buffer, data, lenread);
     Py_DECREF(ret);
     return(lenread);
 }
@@ -310,7 +314,9 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
 
     file = (PyObject *) context;
     if (file == NULL) return(-1);
-    ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len);
+    /* When io_read() returns a string, the length is in characters not bytes, so
+       request at most len / 4 characters to leave space for UTF-8 encoding. */
+    ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len / 4);
     if (ret == NULL) {
 	printf("xmlPythonFileRead: result is NULL\n");
 	return(-1);
@@ -345,10 +351,12 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
 	Py_DECREF(ret);
 	return(-1);
     }
-    if (lenread > len)
-	memcpy(buffer, data, len);
-    else
-	memcpy(buffer, data, lenread);
+    if (lenread < 0 || lenread > len) {
+	printf("xmlPythonFileRead: invalid lenread\n");
+	Py_DECREF(ret);
+	return(-1);
+    }
+    memcpy(buffer, data, lenread);
     Py_DECREF(ret);
     return(lenread);
 }
