diff --git a/firmware/rust1/src/bin/heizung.rs b/firmware/rust1/src/bin/heizung.rs
index 17156398ca1a8525485408f9cf953d04d7b41fa1..5b9d0d93be967510949ab3ae2d280c473b588ced 100644
--- a/firmware/rust1/src/bin/heizung.rs
+++ b/firmware/rust1/src/bin/heizung.rs
@@ -72,7 +72,6 @@ async fn beeper_task(pwm_channel: embassy_rp::peripherals::PWM_CH3, beeper_pin:
 
 #[embassy_executor::task]
 async fn uart_task(this: RS485) {
-    info!("abc");
     this.run_task().await;
 }
 
@@ -99,8 +98,8 @@ async fn main(spawner: Spawner) {
     let sda = p.PIN_12;
     let scl = p.PIN_13;
     let _ws2811_red_in = Input::new(p.PIN_14, Pull::None);
-    let _tx_en = p.PIN_15;
-    let _tx = p.PIN_16;
+    let tx_en = p.PIN_15;
+    let tx = p.PIN_16;
     let rx = p.PIN_17;
     let _matrix_in2 = Input::new(p.PIN_18, Pull::Up);
     let _reed3 = Input::new(p.PIN_19, Pull::Up);
@@ -124,7 +123,7 @@ async fn main(spawner: Spawner) {
     let mut uart_config = uart::Config::default();
     uart_config.baudrate = 19200;
     uart_config.parity = Parity::ParityEven;
-    let rs485 = RS485::new(p.UART0, rx, interrupt::take!(UART0_IRQ), p.DMA_CH1, uart_config, p.PIO0, p.DMA_CH0);
+    let rs485 = RS485::new(p.UART0, rx, tx, tx_en, interrupt::take!(UART0_IRQ), p.DMA_CH1, uart_config, p.PIO0, p.DMA_CH0, p.DMA_CH2);
     unwrap!(spawner.spawn(uart_task(rs485)));
 
     if false {
diff --git a/firmware/rust1/src/rs485.rs b/firmware/rust1/src/rs485.rs
index e8b9d8a9dd4980129e486a75821b28437c0b2fea..e13565fb08b9869fc7951f189e3abcac751afd51 100644
--- a/firmware/rust1/src/rs485.rs
+++ b/firmware/rust1/src/rs485.rs
@@ -19,7 +19,10 @@ use crate::dont_abort::DontAbort;
 pub struct RS485 {
     pio: peripherals::PIO0,
     rx_pin: peripherals::PIN_17,
+    tx_pin: peripherals::PIN_16,
+    tx_en_pin: peripherals::PIN_15,
     dma_channel: peripherals::DMA_CH0,
+    tx_dma_channel: peripherals::DMA_CH2,
     rx: UartRx<'static, UART0, uart::Async>
 }
 
@@ -47,42 +50,42 @@ async fn debug_print_pio_addr(sm: pac::pio::StateMachine) {
 
 impl RS485 {
     pub fn new(
-            uart: UART0, rx: peripherals::PIN_17, rx_irq: UART0_IRQ, rx_dma_channel: peripherals::DMA_CH1, uart_config: uart::Config,
-            pio: peripherals::PIO0, dma_channel: peripherals::DMA_CH0
+            uart: UART0, rx_pin: peripherals::PIN_17, tx_pin: peripherals::PIN_16, tx_en_pin: peripherals::PIN_15,
+            rx_irq: UART0_IRQ, rx_dma_channel: peripherals::DMA_CH1, uart_config: uart::Config,
+            pio: peripherals::PIO0, dma_channel: peripherals::DMA_CH0, tx_dma_channel: peripherals::DMA_CH2
         ) -> RS485 {
         // SAFETY: The auto-baud will only read from this pin and we will set it back to UART mode after initialising the PIO.
-        let rx_pin_for_autobaud = unsafe { rx.clone_unchecked() };
+        let rx_pin_for_autobaud = unsafe { rx_pin.clone_unchecked() };
 
         let uart_rx = UartRx::new(
             uart,
-            rx,
+            rx_pin,
             rx_irq,
             rx_dma_channel,
             uart_config,
         );
-        RS485 { pio, rx_pin: rx_pin_for_autobaud, dma_channel, rx: uart_rx }
+        RS485 { pio, rx_pin: rx_pin_for_autobaud, tx_pin, tx_en_pin, dma_channel, tx_dma_channel, rx: uart_rx }
     }
 
     pub async fn run_task(mut self: Self) {
         let Pio {
             mut common,
             sm0: mut sm,
+            mut sm1,
             mut irq0,
+            mut irq1,
             ..
         } = Pio::new(self.pio);
 
         sm.set_enable(false);
 
         let prg_set_osr = pio_proc::pio_asm!(
-            ".origin 0",
             "pull",
             "hang:",
             "jmp hang",
         );
 
-        let prg = pio_proc::pio_asm!(
-            ".origin 0",
-
+        let prg_autobaud = pio_proc::pio_asm!(
             // Send zero to RX FIFO to signal start of next measurement.
             ".wrap_target",
             "timeout:",
@@ -101,6 +104,7 @@ impl RS485 {
 
             // We have a long 1, i.e. UART is idle. Wait for start bit.
             "have_long_high:",
+            "irq set 1",          // notify MCU in case they want to know about this (start of Modbus frame)
             "wait 0 pin 0",
             "nop [4]",
             "jmp pin wait_idle",  // abort if zero was just a glitch
@@ -137,6 +141,41 @@ impl RS485 {
             ".wrap",
         );
 
+        // This is based on the UART TX example in the datasheet but we also control the TX_EN pin
+        // and we have 9 data bits because we have parity.
+        /*
+        let prg_tx = pio_proc::pio_asm!(
+            ".side_set 2 opt",
+            // An 8e1 UART transmit program.
+            // OUT pin 0 and side-set pin 1 are both mapped to UART TX pin.
+            "pull side 1",           // disable transmitter, get number of bytes
+            "mov y, osr side 3",     // copy to y reg, enable transmitter
+            "byteloop:",
+            "pull side 3 [6]",       // Assert stop bit, or stall with line in idle state (6 instead of 7 because mov/jmp take one clk)
+            "set x, 8 side 2 [7]",   // Preload bit counter, assert start bit for 8 clocks
+            "bitloop:",              // This loop will run 9 times (8e1 UART, i.e. 8 data bits and one clock bit)
+            "out pins, 1",           // Shift 1 bit from OSR to the first OUT pin
+            "jmp x-- bitloop [6]",   // Each loop iteration is 8 cycles.
+            "jmp y-- byteloop side 3",
+        );
+        */
+        // Let's use 4 cycles for each baud time because we need two bits of side-set.
+        let prg_tx = pio_proc::pio_asm!(
+            ".origin 24",
+            ".side_set 2 opt",
+            // An 8e1 UART transmit program.
+            // OUT pin 0 and side-set pin 1 are both mapped to UART TX pin.
+            "pull side 2",           // disable transmitter, get number of bytes. Side: TX_EN=0, TX=1
+            "mov y, osr side 3",     // copy to y reg, enable transmitter
+            "byteloop:",
+            "pull side 3 [2]",       // Assert stop bit, or stall with line in idle state (6 instead of 7 because mov/jmp take one clk)
+            "set x, 8 side 1 [3]",   // Preload bit counter, assert start bit for 8 clocks. Side: TX_EN=1, TX=0
+            "bitloop:",              // This loop will run 9 times (8e1 UART, i.e. 8 data bits and one clock bit)
+            "out pins, 1",           // Shift 1 bit from OSR to the first OUT pin
+            "jmp x-- bitloop [2]",   // Each loop iteration is 8 cycles.
+            "jmp y-- byteloop side 3",
+        );
+
         let relocated = RelocatedProgram::new(&prg_set_osr.program);
         let mut cfg = Config::default();
         let loaded_program = common.load_program(&relocated);
@@ -162,14 +201,14 @@ impl RS485 {
 
         // Timeout: Modbus wants 1.5 ms between frames so we make this a bit smaller. SM runs at 25 MHz
         // but we need two clocks per loop.
-        let timeout_start_value = (SM_FREQ as f32 * 1.2e-3 / 2.) as u32;
+        let timeout_start_value = (SM_FREQ as f32 * 1.4e-3 / 2.) as u32;
         sm.tx().push(timeout_start_value);
         info!("timeout_start_value: {} = 0x{:08x}", timeout_start_value, timeout_start_value);
 
         // switch to the real program and join FIFOs
         unsafe { common.free_instr(loaded_program.used_memory); };
         sm.set_enable(false);
-        let relocated = RelocatedProgram::new(&prg.program);
+        let relocated = RelocatedProgram::new(&prg_autobaud.program);
         let loaded_program = common.load_program(&relocated);
         cfg.use_program(&loaded_program, &[]);
         cfg.fifo_join = FifoJoin::RxOnly;
@@ -182,18 +221,56 @@ impl RS485 {
             pin_io(&self.rx_pin).ctrl().write(|w| w.set_funcsel(embassy_rp::pac::io::vals::Gpio17ctrlFuncsel::UART0_RX.0));
         }
 
-        info!("Program size for auto-baud: {}", prg.program.code.len());
+        info!("Program size for auto-baud: {}, for tx: {}", prg_autobaud.program.code.len(), prg_tx.program.code.len());
+
+
+        let relocated = RelocatedProgram::new(&prg_tx.program);
+        let mut cfg = Config::default();
+        let tx_pin_pio = common.make_pio_pin(self.tx_pin);
+        let tx_en_pin_pio = common.make_pio_pin(self.tx_en_pin);
+        let loaded_program = common.load_program(&relocated);
+        cfg.use_program(&loaded_program, &[&tx_en_pin_pio, &tx_pin_pio]);
+        const BAUD_RATE: u32 = 19200;
+        cfg.clock_divider = (U56F8!(125_000_000) / U56F8!(76800 /* BAUD_RATE*4 */)).to_fixed();
+        cfg.shift_out = ShiftConfig {
+            auto_fill: false,
+            threshold: 32,
+            direction: ShiftDirection::Right,
+        };
+        cfg.set_out_pins(&[&tx_pin_pio]);
+        cfg.fifo_join = FifoJoin::TxOnly;
+        sm1.set_config(&cfg);
+        sm1.set_pins(gpio::Level::Low, &[&tx_en_pin_pio]);
+        sm1.set_pins(gpio::Level::High, &[&tx_pin_pio]);
+        sm1.set_pin_dirs(embassy_rp::pio::Direction::Out, &[&tx_en_pin_pio, &tx_pin_pio]);
+        sm1.set_enable(true);
+
+        let mut tx_data = [0, 'H' as u32, 'e' as u32, 'l' as u32, 'l' as u32, 'o' as u32, '\r' as u32, '\n' as u32];
+        tx_data[0] = (tx_data.len() - 1) as u32;
+        for i in 1..tx_data.len() {
+            let x = tx_data[i] & 0xff;
+            let mut parity = 0;
+            for j in 0..8 {
+                parity ^= x>>j;
+            }
+            tx_data[i] = x | ((parity&1) << 8);
+        }
+        info!("tx_data: {:?}", tx_data);
+
 
         let mut dma_in_ref = self.dma_channel.into_ref();
+        let mut dma_tx_ref = self.tx_dma_channel.into_ref();
         let mut din = [42u32; 9];
         let mut bit_index = 0;
         let mut rx_buf = [0; 1];
         let mut rx_future = DontAbort::new(self.rx.read(&mut rx_buf));
+        let mut tx_future = DontAbort::new(sm1.tx().dma_push(dma_tx_ref.reborrow(), &tx_data));
         loop {
             let x = select4(
                 irq0.wait(),
                 sm.rx().dma_pull(dma_in_ref.reborrow(), &mut din),
-                debug_print_pio_addr(embassy_rp::pac::PIO0.sm(0)),
+                //debug_print_pio_addr(embassy_rp::pac::PIO0.sm(0)),
+                tx_future.continue_wait(),
                 rx_future.continue_wait(),
             ).await;
             match x {
@@ -279,6 +356,10 @@ impl RS485 {
                         }
                     }
                 },
+                Either4::Third(_) => {
+                    drop(tx_future);
+                    tx_future = DontAbort::new(sm1.tx().dma_push(dma_tx_ref.reborrow(), &tx_data));
+                },
                 Either4::Fourth(x) => {
                     drop(rx_future);
                     match x {