From 45c92cb1f4243febc9b5996e71ed2cde7de00825 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gina=20H=C3=A4u=C3=9Fge?= <osd@foosel.net>
Date: Mon, 5 Oct 2015 18:04:05 +0200
Subject: [PATCH] Fix: Open GCODE files als utf-8, replacing encoding errors

Also detect files that contain a BOM and strip it.
Internal handling of GCODE file contents switched to unicode.

 Should take care of #1077
---
 src/octoprint/util/__init__.py         | 23 +++++++++++++++++++++++
 src/octoprint/util/comm.py             | 13 +++++++------
 src/octoprint/util/gcodeInterpreter.py |  4 +++-
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/octoprint/util/__init__.py b/src/octoprint/util/__init__.py
index 07f80b81..58c750a3 100644
--- a/src/octoprint/util/__init__.py
+++ b/src/octoprint/util/__init__.py
@@ -505,6 +505,29 @@ def atomic_write(filename, mode="w+b", prefix="tmp", suffix=""):
 	shutil.move(temp_config.name, filename)
 
 
+def bom_aware_open(filename, encoding="ascii", mode="r", **kwargs):
+	import codecs
+
+	codec = codecs.lookup(encoding)
+	encoding = codec.name
+
+	if kwargs is None:
+		kwargs = dict()
+
+	potential_bom_attribute = "BOM_" + codec.name.replace("utf-", "utf").upper()
+	if "r" in mode and hasattr(codecs, potential_bom_attribute):
+		# these encodings might have a BOM, so let's see if there is one
+		bom = getattr(codecs, potential_bom_attribute)
+
+		with open(filename, "rb") as f:
+			header = f.read(4)
+
+		if header.startswith(bom):
+			encoding += "-sig"
+
+	return codecs.open(filename, encoding=encoding, **kwargs)
+
+
 class RepeatedTimer(threading.Thread):
 	"""
 	This class represents an action that should be run repeatedly in an interval. It is similar to python's
diff --git a/src/octoprint/util/comm.py b/src/octoprint/util/comm.py
index 49c8bc6c..9bc2c702 100644
--- a/src/octoprint/util/comm.py
+++ b/src/octoprint/util/comm.py
@@ -24,7 +24,7 @@ from octoprint.settings import settings, default_settings
 from octoprint.events import eventManager, Events
 from octoprint.filemanager import valid_file_type
 from octoprint.filemanager.destinations import FileDestinations
-from octoprint.util import get_exception_string, sanitize_ascii, filter_non_ascii, CountedEvent, RepeatedTimer
+from octoprint.util import get_exception_string, sanitize_ascii, filter_non_ascii, CountedEvent, RepeatedTimer, to_unicode, bom_aware_open
 
 try:
 	import _winreg
@@ -491,7 +491,7 @@ class MachineCom(object):
 		self._clear_to_send.set()
 
 	def sendCommand(self, cmd, cmd_type=None, processed=False):
-		cmd = cmd.encode('ascii', 'replace')
+		cmd = to_unicode(cmd, errors="replace")
 		if not processed:
 			cmd = process_gcode_line(cmd)
 			if not cmd:
@@ -1549,10 +1549,11 @@ class MachineCom(object):
 						continue
 
 					# now comes the part where we increase line numbers and send stuff - no turning back now
+					command_to_send = command.encode("ascii", errors="replace")
 					if (gcode is not None or self._sendChecksumWithUnknownCommands) and (self.isPrinting() or self._alwaysSendChecksum):
-						self._doIncrementAndSendWithChecksum(command)
+						self._doIncrementAndSendWithChecksum(command_to_send)
 					else:
-						self._doSendWithoutChecksum(command)
+						self._doSendWithoutChecksum(command_to_send)
 
 				# trigger "sent" phase and use up one "ok"
 				self._process_command_phase("sent", command, command_type, gcode=gcode)
@@ -1952,7 +1953,7 @@ class PrintingGcodeFileInformation(PrintingFileInformation):
 		Opens the file for reading and determines the file size.
 		"""
 		PrintingFileInformation.start(self)
-		self._handle = open(self._filename, "r")
+		self._handle = bom_aware_open(self._filename, encoding="utf-8", errors="replace")
 
 	def close(self):
 		"""
@@ -1982,7 +1983,7 @@ class PrintingGcodeFileInformation(PrintingFileInformation):
 				if self._handle is None:
 					# file got closed just now
 					return None
-				line = self._handle.readline()
+				line = to_unicode(self._handle.readline())
 				if not line:
 					self.close()
 				processed = process_gcode_line(line, offsets=offsets, current_tool=current_tool)
diff --git a/src/octoprint/util/gcodeInterpreter.py b/src/octoprint/util/gcodeInterpreter.py
index b0b69b66..853cbfd6 100644
--- a/src/octoprint/util/gcodeInterpreter.py
+++ b/src/octoprint/util/gcodeInterpreter.py
@@ -35,7 +35,9 @@ class gcode(object):
 		if os.path.isfile(filename):
 			self.filename = filename
 			self._fileSize = os.stat(filename).st_size
-			with open(filename, "r") as f:
+
+			import codecs
+			with codecs.open(filename, encoding="utf-8", errors="replace") as f:
 				self._load(f, printer_profile, throttle=throttle)
 
 	def abort(self):