diff --git a/.gitignore b/.gitignore
index b9f3806a22867843e318587d52ca211f261d7ac5..449fc15f746d0797925a51a746d8f7fa0a2b4227 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,7 @@
 .pio
 .vscode
+*.elf
+*.hex
+*.lst
+*.bin
+*.map
diff --git a/README.md b/README.md
index 637d51fca5bf4239dca87e97109a3ba759a09949..13f422fdb7cac9995d8fe4eb3f48da26f9b0d79b 100644
--- a/README.md
+++ b/README.md
@@ -11,16 +11,17 @@ In contrast, blinky is only 500 bytes with ch32v003fun, boots faster, and signif
 As it currently stands it is still designed to use the WCH-Link to do the SDIO programming.  Though I would like to ALSO support an open source programmer.
 
 ch32v003fun contains:
-2. Examples using ch32v003fun, but not as many as using the HAL.
-3. "minichlink" which uses the WCH CH-Link with libusb, for cross-platform use.
-4. An extra copy of libgcc so you can use unusual risc-v build chains, located in the `misc/libgcc.a`.
-5. A folder named "ch32v003fun" containing a single self-contained source file and header file for compling apps for the ch32v003.
-6. On some systems ability to "printf" back through
+1. Examples using ch32v003fun, but not as many as using the HAL.
+2. "minichlink" which uses the WCH CH-Link with libusb, for cross-platform use.
+3. An extra copy of libgcc so you can use unusual risc-v build chains, located in the `misc/libgcc.a`.
+4. A folder named "ch32v003fun" containing a single self-contained source file and header file for compling apps for the ch32v003.
+5. On some systems ability to "printf" back through
+6. A demo bootloader.
 
 In Progress:
 1. Other programmer support (ESP32-S2 works, currently)
 2. OpenOCD-compatible build for `minichlink`.
-3. Improved performance of ESP32-S2 programmer.
+3. Full-chip-write for faster flash.
 4. Support for `NHC-Link042`
 5. Write more demos.
 
diff --git a/attic/external_crystal_run_from_ram_turbo.c b/attic/external_crystal_run_from_ram_turbo.c
new file mode 100644
index 0000000000000000000000000000000000000000..cd2a0bbc47cc7b48969b54d6b6cc430dfeab7fd6
--- /dev/null
+++ b/attic/external_crystal_run_from_ram_turbo.c
@@ -0,0 +1,56 @@
+// Could be defined here, or in the processor defines.
+#define SYSTEM_CORE_CLOCK 24000000
+
+#include "ch32v003fun.h"
+#include <stdio.h>
+
+#define APB_CLOCK SYSTEM_CORE_CLOCK
+
+uint32_t count;
+
+void RamFunction() __attribute__((naked));
+void RamFunction()
+{
+	asm volatile("\n\
+		li a0, 1 | (1<<4)\n\
+		li a1, (1<<16) | (1<<(16+4))\n\
+		la a2, 0x40011410\n\
+1:\n\
+		c.sw a0, 0(a2)\n\
+		c.sw a1, 0(a2)\n\
+		c.sw a0, 0(a2)\n\
+		c.sw a1, 0(a2)\n\
+		c.sw a0, 0(a2)\n\
+		c.sw a1, 0(a2)\n\
+		c.sw a0, 0(a2)\n\
+		c.sw a1, 0(a2)\n\
+		j 1b" );
+}
+
+uint8_t rambuffer[128];
+
+int main()
+{
+	EXTEN->EXTEN_CTR = EXTEN_LDO_TRIM; // Boost LDO.
+	SystemInitHSEPLL( RCC_HSEBYP );
+	// When running from RAM appears to go up to about 96MHz.
+
+	// Enable GPIOD.
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
+
+	// GPIO D0 Push-Pull, 10MHz Output
+	GPIOD->CFGLR &= ~(0xf<<(4*0));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
+	// GPIO D0 Push-Pull, 10MHz Output
+	GPIOD->CFGLR &= ~(0xf<<(4*4));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
+
+	// GPIO D0 Push-Pull, 10MHz Output
+	GPIOC->CFGLR &= ~(0xf<<(4*4));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
+
+memcpy( rambuffer, RamFunction, 128 );
+	void (*fn)() = (void*) rambuffer;
+	fn();
+}
diff --git a/ch32v003fun/ch32v003fun-bootloader.ld b/ch32v003fun/ch32v003fun-bootloader.ld
new file mode 100644
index 0000000000000000000000000000000000000000..39d6580e88a0b572317c6a1821d730df795735a3
--- /dev/null
+++ b/ch32v003fun/ch32v003fun-bootloader.ld
@@ -0,0 +1,148 @@
+ENTRY( InterruptVector )
+
+MEMORY
+{
+	/* Actually at 0x1FFFF000 but the system maps it to 0x00000000 */
+	FLASH (rx) : ORIGIN = 0x00000000, LENGTH = 1920
+	RAM (xrw)  : ORIGIN = 0x20000000, LENGTH = 2K
+}
+
+SECTIONS
+{
+    .init :
+    { 
+      _sinit = .;
+      . = ALIGN(4);
+      KEEP(*(SORT_NONE(.init)))
+      . = ALIGN(4);
+      _einit = .;
+    } >FLASH AT>FLASH
+
+    .text :
+    {
+      . = ALIGN(4);
+      *(.text)
+      *(.text.*)
+      *(.rodata)
+      *(.rodata*)
+      *(.gnu.linkonce.t.*)
+      . = ALIGN(4);
+    } >FLASH AT>FLASH 
+
+    .fini :
+    {
+      KEEP(*(SORT_NONE(.fini)))
+      . = ALIGN(4);
+    } >FLASH AT>FLASH
+
+    PROVIDE( _etext = . );
+    PROVIDE( _eitcm = . );  
+
+    .preinit_array :
+    {
+      PROVIDE_HIDDEN (__preinit_array_start = .);
+      KEEP (*(.preinit_array))
+      PROVIDE_HIDDEN (__preinit_array_end = .);
+    } >FLASH AT>FLASH 
+  
+    .init_array :
+    {
+      PROVIDE_HIDDEN (__init_array_start = .);
+      KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
+      KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
+      PROVIDE_HIDDEN (__init_array_end = .);
+    } >FLASH AT>FLASH 
+  
+    .fini_array :
+    {
+      PROVIDE_HIDDEN (__fini_array_start = .);
+      KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
+      KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
+      PROVIDE_HIDDEN (__fini_array_end = .);
+    } >FLASH AT>FLASH 
+  
+    .ctors :
+    {
+      /* gcc uses crtbegin.o to find the start of
+         the constructors, so we make sure it is
+         first.  Because this is a wildcard, it
+         doesn't matter if the user does not
+         actually link against crtbegin.o; the
+         linker won't look for a file to match a
+         wildcard.  The wildcard also means that it
+         doesn't matter which directory crtbegin.o
+         is in.  */
+      KEEP (*crtbegin.o(.ctors))
+      KEEP (*crtbegin?.o(.ctors))
+      /* We don't want to include the .ctor section from
+         the crtend.o file until after the sorted ctors.
+         The .ctor section from the crtend file contains the
+         end of ctors marker and it must be last */
+      KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
+      KEEP (*(SORT(.ctors.*)))
+      KEEP (*(.ctors))
+    } >FLASH AT>FLASH 
+  
+    .dtors :
+    {
+      KEEP (*crtbegin.o(.dtors))
+      KEEP (*crtbegin?.o(.dtors))
+      KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
+      KEEP (*(SORT(.dtors.*)))
+      KEEP (*(.dtors))
+    } >FLASH AT>FLASH 
+
+    .dalign :
+    {
+      . = ALIGN(4);
+      PROVIDE(_data_vma = .);
+    } >RAM AT>FLASH  
+
+    .dlalign :
+    {
+      . = ALIGN(4); 
+      PROVIDE(_data_lma = .);
+    } >FLASH AT>FLASH
+
+    .data :
+    {
+      . = ALIGN(4);
+      *(.gnu.linkonce.r.*)
+      *(.data .data.*)
+      *(.gnu.linkonce.d.*)
+      . = ALIGN(8);
+      PROVIDE( __global_pointer$ = . + 0x800 );
+      *(.sdata .sdata.*)
+      *(.sdata2*)
+      *(.gnu.linkonce.s.*)
+      . = ALIGN(8);
+      *(.srodata.cst16)
+      *(.srodata.cst8)
+      *(.srodata.cst4)
+      *(.srodata.cst2)
+      *(.srodata .srodata.*)
+      . = ALIGN(4);
+      PROVIDE( _edata = .);
+    } >RAM AT>FLASH
+
+    .bss :
+    {
+      . = ALIGN(4);
+      PROVIDE( _sbss = .);
+      *(.sbss*)
+      *(.gnu.linkonce.sb.*)
+      *(.bss*)
+      *(.gnu.linkonce.b.*)    
+      *(COMMON*)
+      . = ALIGN(4);
+      PROVIDE( _ebss = .);
+    } >RAM AT>FLASH
+
+    PROVIDE( _end = _ebss);
+	PROVIDE( end = . );
+
+	PROVIDE( _eusrstack = ORIGIN(RAM) + LENGTH(RAM));	
+}
+
+
+
diff --git a/ch32v003fun/ch32v003fun.c b/ch32v003fun/ch32v003fun.c
index c800f905c9ef3ffd812bc2f8666a04149aa074c1..72f14f2e9730d67878ed948c61f17a6d707f61ba 100644
--- a/ch32v003fun/ch32v003fun.c
+++ b/ch32v003fun/ch32v003fun.c
@@ -664,10 +664,6 @@ mini_pprintf(int (*puts)(char*s, int len, void* buf), void* buf, const char *fmt
 int main() __attribute__((used));
 void SystemInit( void ) __attribute__((used));
 
-void InterruptVector()         __attribute__((naked)) __attribute((section(".init"))) __attribute__((used)) __attribute((weak));
-void handle_reset()            __attribute__((naked)) __attribute((section(".text.handle_reset"))) __attribute__((used));
-void DefaultIRQHandler( void ) __attribute__((section(".text.vector_handler"))) __attribute__((naked)) __attribute__((used));
-
 extern uint32_t * _sbss;
 extern uint32_t * _ebss;
 extern uint32_t * _data_lma;
@@ -712,7 +708,11 @@ void TIM1_TRG_COM_IRQHandler( void )     __attribute__((section(".text.vector_ha
 void TIM1_CC_IRQHandler( void )          __attribute__((section(".text.vector_handler"))) __attribute((weak,alias("DefaultIRQHandler"))) __attribute__((used));
 void TIM2_IRQHandler( void )             __attribute__((section(".text.vector_handler"))) __attribute((weak,alias("DefaultIRQHandler"))) __attribute__((used));
 
-void InterruptVector()
+void InterruptVector()         __attribute__((naked)) __attribute((section(".init"))) __attribute((weak,alias("InterruptVectorDefault")));
+void InterruptVectorDefault()  __attribute__((naked)) __attribute((section(".init")));
+
+
+void InterruptVectorDefault()
 {
 	asm volatile( "\n\
 	.align  2\n\
@@ -759,7 +759,6 @@ void InterruptVector()
 	.word   TIM2_IRQHandler           /* TIM2 */                           \n");
 }
 
-
 void handle_reset()
 {
 	asm volatile( "\n\
@@ -820,6 +819,38 @@ void SystemInit48HSI( void )
 	while ((RCC->CFGR0 & (uint32_t)RCC_SWS) != (uint32_t)0x08);                // Wait till PLL is used as system clock source
 }
 
+
+void SystemInitHSE( int HSEBYP )
+{
+	// Values lifted from the EVT.  There is little to no documentation on what this does.
+	RCC->CTLR  = RCC_HSION | RCC_HSEON | RCC_PLLON | HSEBYP;      // Enable HSE and keep HSI+PLL on.
+	while(!(RCC->CTLR&RCC_HSERDY));
+	// Not using PLL.
+	FLASH->ACTLR = FLASH_ACTLR_LATENCY_0;                         // 1 Cycle Latency
+	RCC->INTR  = 0x009F0000;                                      // Clear PLL, CSSC, HSE, HSI and LSI ready flags.
+	RCC->CFGR0 = RCC_HPRE_DIV1 | RCC_SW_HSE;                      // HCLK = SYSCLK = APB1 and use HSE for System Clock.
+	while ((RCC->CFGR0 & (uint32_t)RCC_SWS) != (uint32_t)0x04);   // Wait till HSE is used as system clock source
+	RCC->CTLR = RCC_HSEON | HSEBYP; // Turn off HSI + PLL.
+}
+
+
+void SystemInitHSEPLL( int HSEBYP )
+{
+	// Values lifted from the EVT.  There is little to no documentation on what this does.
+	RCC->CTLR  = RCC_HSION | RCC_HSEON | RCC_PLLON | HSEBYP;       // Enable HSE and keep HSI+PLL on.
+	while(!(RCC->CTLR&RCC_HSERDY));
+	RCC->CFGR0 = RCC_SW_HSE | RCC_HPRE_DIV1;                       // HCLK = SYSCLK = APB1 and use HSE for System Clock.
+	FLASH->ACTLR = FLASH_ACTLR_LATENCY_1;                          // 1 Cycle Latency
+	RCC->CTLR  = RCC_HSEON | HSEBYP;                               // Turn off PLL and HSI.
+	RCC->CFGR0 = RCC_SW_HSE | RCC_HPRE_DIV1 | RCC_PLLSRC_HSE_Mul2; // Use PLL with HSE.
+	RCC->CTLR  = RCC_HSEON | RCC_PLLON | HSEBYP;                   // Turn PLL Back on..
+	while((RCC->CTLR & RCC_PLLRDY) == 0);                          // Wait till PLL is ready
+	RCC->CFGR0 = RCC_SW_PLL | RCC_HPRE_DIV1 | RCC_PLLSRC_HSE_Mul2; // Select PLL as system clock source
+	while ((RCC->CFGR0 & (uint32_t)RCC_SWS) != (uint32_t)0x08);    // Wait till PLL is used as system clock source
+}
+
+
+
 void SetupUART( int uartBRR )
 {
 	// Enable GPIOD and UART.
@@ -855,15 +886,17 @@ int _write(int fd, const char *buf, int size)
 	#define DMDATA0 ((volatile uint32_t*)0xe00000f4)
 	#define DMDATA1 ((volatile uint32_t*)0xe00000f8)
 
-
 	char buffer[4] = { 0 };
 	int place = 0;
+	uint32_t timeout = 160000; // Give up after ~40ms
 	while( place < size )
 	{
 		int tosend = size - place;
 		if( tosend > 7 ) tosend = 7;
 
-		while( ((*DMDATA0) & 0x80) );
+		while( ((*DMDATA0) & 0x80) )
+			if( timeout-- == 0 ) return place;
+		timeout = 160000;
 
 		uint32_t d;
 		int t = 3;
@@ -893,8 +926,13 @@ void SetupDebugPrintf()
 {
 	// Clear out the sending flag.
 	*DMDATA1 = 0x0;
+	*DMDATA0 = 0x80;
 }
 
+void WaitForDebuggerToAttach()
+{
+	while( ((*DMDATA0) & 0x80) );
+}
 
 void DelaySysTick( uint32_t n )
 {
diff --git a/ch32v003fun/ch32v003fun.h b/ch32v003fun/ch32v003fun.h
index 9f25a84910c83e962c15c1fb5b860606776e0e24..34ee9b1d174a26e3361b21a9f5ff701fd564b271 100644
--- a/ch32v003fun/ch32v003fun.h
+++ b/ch32v003fun/ch32v003fun.h
@@ -4819,6 +4819,9 @@ extern "C" {
 #define DELAY_US_TIME (SYSTEM_CORE_CLOCK / 8000000)
 #define DELAY_MS_TIME (SYSTEM_CORE_CLOCK / 8000)
 
+void handle_reset()            __attribute__((naked)) __attribute((section(".text.handle_reset"))) __attribute__((used));
+void DefaultIRQHandler( void ) __attribute__((section(".text.vector_handler"))) __attribute__((naked)) __attribute__((used));
+
 void DelaySysTick( uint32_t n );
 
 #define Delay_Us(n) DelaySysTick( n * DELAY_US_TIME )
@@ -4830,6 +4833,11 @@ void SystemInit(void) __attribute__((used));
 
 // Useful functions
 void SystemInit48HSI( void );
+// NOTE: HSEBYP is ORed with RCC_CTLR.  Set it to RCC_HSEBYP or 0.
+// If you are using an external oscillator, set it to RCC_HSEBYP.  Otherwise, if you are using a crystal, it must be 0.
+void SystemInitHSE( int HSEBYP );
+void SystemInitHSEPLL( int HSEBYP );
+
 
 #define UART_BAUD_RATE 115200
 #define OVER8DIV 4
@@ -4843,6 +4851,9 @@ void SetupUART( int uartBRR );
 
 void SetupDebugPrintf();
 
+void WaitForDebuggerToAttach();
+
+
 #ifdef __cplusplus
 };
 #endif
diff --git a/examples/blink/blink.bin b/examples/blink/blink.bin
index b1c36a5c828db912241513fd46ac9f0fae76f4d5..b3a46101b449bc35e39918f5f26d29ececf14ff7 100755
Binary files a/examples/blink/blink.bin and b/examples/blink/blink.bin differ
diff --git a/examples/blink/blink.c b/examples/blink/blink.c
index 408b02f49809ac1de1ce03d4bb50e1317a9cb972..e65639989d63a04cef910d2ae4fd5b238e15407e 100644
--- a/examples/blink/blink.c
+++ b/examples/blink/blink.c
@@ -12,23 +12,30 @@ int main()
 {
 	SystemInit48HSI();
 
-	// Enable GPIOD.
-	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD;
+	// Enable GPIOs
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
 
-	// GPIO D0 Push-Pull, 10MHz Output
+	// GPIO D0 Push-Pull
 	GPIOD->CFGLR &= ~(0xf<<(4*0));
 	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
 
-	// GPIO D0 Push-Pull, 10MHz Output
+	// GPIO D4 Push-Pull
 	GPIOD->CFGLR &= ~(0xf<<(4*4));
 	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
 
+	// GPIO C0 Push-Pull
+	GPIOC->CFGLR &= ~(0xf<<(4*0));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
 	while(1)
 	{
-		GPIOD->BSHR = 1 | (1<<4);	 // Turn on GPIOD0
-		Delay_Ms( 200 );
-		GPIOD->BSHR = (1<<16) | (1<<(16+4)); // Turn off GPIOD0
-		Delay_Ms( 200 );
+		GPIOD->BSHR = 1 | (1<<4);	 // Turn on GPIOs
+		GPIOC->BSHR = 1;
+		Delay_Ms( 250 );
+		GPIOD->BSHR = (1<<16) | (1<<(16+4)); // Turn off GPIODs
+		GPIOC->BSHR = (1<<16);
+		Delay_Ms( 250 );
 		count++;
 	}
 }
+
diff --git a/examples/bootload/Makefile b/examples/bootload/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d39497d767c87bed9dd45d735f0758813c72142d
--- /dev/null
+++ b/examples/bootload/Makefile
@@ -0,0 +1,51 @@
+TARGET:=bootload
+
+all : flash
+
+PREFIX:=riscv64-unknown-elf
+
+GPIO_Toggle:=EXAM/GPIO/GPIO_Toggle/User
+
+EVT:=../../ch32v003evt
+
+MINICHLINK:=../../minichlink
+
+ifeq ($(OS),Windows_NT)
+# On Windows, all the major RISC-V GCC installs are missing the -ec libgcc.
+LIB_GCC=../../misc/libgcc.a
+else
+LIB_GCC=-lgcc
+endif
+
+CH32V003FUN:=../../ch32v003fun
+
+CFLAGS:= \
+	-g -Os -flto -ffunction-sections \
+	-static-libgcc $(LIB_GCC) \
+	-march=rv32ec \
+	-mabi=ilp32e \
+	-I/usr/include/newlib \
+	-I$(CH32V003FUN) \
+	-nostdlib \
+	-I. -DCUSTOM_INTERRUPT_VECTOR
+
+LDFLAGS:=-T $(CH32V003FUN)/ch32v003fun-bootloader.ld -Wl,--gc-sections
+
+SYSTEM_C:=$(CH32V003FUN)/ch32v003fun.c
+
+$(TARGET).elf : $(SYSTEM_C) $(TARGET).c
+	$(PREFIX)-gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
+
+$(TARGET).bin : $(TARGET).elf
+	$(PREFIX)-size $^
+	$(PREFIX)-objdump -S $^ > $(TARGET).lst
+	$(PREFIX)-objdump -t $^ > $(TARGET).map
+	$(PREFIX)-objcopy -O binary $< $(TARGET).bin
+	$(PREFIX)-objcopy -O ihex $< $(TARGET).hex
+
+flash : $(TARGET).bin
+	$(MINICHLINK)/minichlink -h -U -w $< bootloader -B
+
+clean :
+	rm -rf $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).lst $(TARGET).map $(TARGET).hex
+
diff --git a/examples/bootload/bootload.c b/examples/bootload/bootload.c
new file mode 100644
index 0000000000000000000000000000000000000000..a5975940f73027cf267f591b70fca7ff582b05f0
--- /dev/null
+++ b/examples/bootload/bootload.c
@@ -0,0 +1,84 @@
+// Could be defined here, or in the processor defines.
+#define SYSTEM_CORE_CLOCK 48000000
+
+#include "ch32v003fun.h"
+#include <stdio.h>
+
+#define APB_CLOCK SYSTEM_CORE_CLOCK
+
+uint32_t count;
+
+// You can override the interrupt vector this way:
+void InterruptVector()         __attribute__((naked)) __attribute((section(".init")));
+void InterruptVector()
+{
+	asm volatile( "\n\
+	.align  2\n\
+	.option   norvc;\n\
+	j handle_reset");
+}
+
+uint32_t count;
+
+int main()
+{
+	SystemInit48HSI();
+
+	// From here, you can do whatever you'd like!
+	// This code will live up at 0x1ffff000.
+
+	// Enable GPIOD + C
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
+
+	// GPIO D0 Push-Pull, 10MHz Output
+	GPIOD->CFGLR &= ~(0xf<<(4*0));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
+	// GPIO D4 Push-Pull, 10MHz Output
+	GPIOD->CFGLR &= ~(0xf<<(4*4));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
+
+	// GPIO C0 Push-Pull, 10MHz Output
+	GPIOC->CFGLR &= ~(0xf<<(4*0));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
+
+	static const uint32_t marker[] = { 0xaaaaaaaa };
+	count = marker[0];
+
+	int i;
+
+	// Make a clear signature.
+	for( i = 0; i < 10; i++ )
+	{
+		GPIOD->BSHR = 1 | (1<<4);                // Turn on GPIOD0 + D4
+		GPIOC->BSHR = 1;                         // Turn on GPIOC0
+		GPIOD->BSHR = (1<<16) | (1<<(16+4));     // Turn off GPIOD0 + D4
+		GPIOC->BSHR = 1<<16;                     // Turn off GPIOC0
+	}
+
+	for( i = 0; i < 5; i++ )
+	{
+		GPIOD->BSHR = 1 | (1<<4);
+		GPIOC->BSHR = 1;
+		Delay_Ms( 250 );
+		GPIOD->BSHR = (1<<16) | (1<<(16+4)); // Turn off GPIOD0 + D4
+		GPIOC->BSHR = 1<<16;                     // Turn off GPIOC0
+		Delay_Ms( 20 );
+		count++;
+	}
+
+	// Exit bootloader after 5 blinks.
+
+	// Note we have to do this if we ended up in the bootloader because
+	// the main system booted us here.  If you don't care, you don't need
+	// to turn OBTKEYR back off.
+	FLASH->KEYR = FLASH_KEY1;
+	FLASH->KEYR = FLASH_KEY2;
+	FLASH->BOOT_MODEKEYR = FLASH_KEY1;
+	FLASH->BOOT_MODEKEYR = FLASH_KEY2;
+	FLASH->STATR = 0; // 1<<14 is zero, so, boot user code.
+	FLASH->CTLR = CR_LOCK_Set;
+
+	PFIC->SCTLR = 1<<31;
+	while(1);
+}
diff --git a/examples/debugprintfdemo/debugprintfdemo.c b/examples/debugprintfdemo/debugprintfdemo.c
index f5a801d6ae39a298e64ce9485eb7b4cd74df5f6a..80ed6830718e8c677691c5e3db2d9e5dd65e2d58 100644
--- a/examples/debugprintfdemo/debugprintfdemo.c
+++ b/examples/debugprintfdemo/debugprintfdemo.c
@@ -1,9 +1,5 @@
-// Really basic self-contained demo for the ch32v003
-// Doesn't rely on any of the weird HAL stuff from CH
-// Final executable is ~1/4th the size.
-
-// Could be defined here, or in the processor defines.
-#define SYSTEM_CORE_CLOCK 48000000
+/* Small example showing how to use the SWIO programming pin to 
+   do printf through the debug interface */
 
 #include "ch32v003fun.h"
 #include <stdio.h>
@@ -15,22 +11,30 @@ int main()
 	SystemInit48HSI();
 	SetupDebugPrintf();
 
-	// Enable GPIOD.
-	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD;
+	// Enable GPIOs
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
 
-	// GPIO D0, D4 Push-Pull, 10MHz Output
+	// GPIO D0 Push-Pull
 	GPIOD->CFGLR &= ~(0xf<<(4*0));
 	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
 
+	// GPIO D4 Push-Pull
 	GPIOD->CFGLR &= ~(0xf<<(4*4));
 	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
 
+	// GPIO C0 Push-Pull
+	GPIOC->CFGLR &= ~(0xf<<(4*0));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
 	while(1)
 	{
-		GPIOD->BSHR = 1 | (1<<4);	 // Turn on GPIOD0
-		//Delay_Ms( 50 );
-		GPIOD->BSHR = (1<<16) | ( 1<<(4+16) ); // Turn off GPIOD0
-		//Delay_Ms( 50 );
+		GPIOD->BSHR = 1 | (1<<4);	 // Turn on GPIOs
+		GPIOC->BSHR = 1;
 		printf( "+%d\n", count++ );
+		GPIOD->BSHR = (1<<16) | (1<<(16+4)); // Turn off GPIODs
+		GPIOC->BSHR = (1<<16);
+		printf( "-%d\n", count++ );
+		count++;
 	}
 }
+
diff --git a/examples/external_crystal/Makefile b/examples/external_crystal/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..4f4b542e0ff670e4b1ad4cf990adcb7e55d552b0
--- /dev/null
+++ b/examples/external_crystal/Makefile
@@ -0,0 +1,51 @@
+TARGET:=external_crystal
+
+all : flash
+
+PREFIX:=riscv64-unknown-elf
+
+GPIO_Toggle:=EXAM/GPIO/GPIO_Toggle/User
+
+EVT:=../../ch32v003evt
+
+MINICHLINK:=../../minichlink
+
+ifeq ($(OS),Windows_NT)
+# On Windows, all the major RISC-V GCC installs are missing the -ec libgcc.
+LIB_GCC=../../misc/libgcc.a
+else
+LIB_GCC=-lgcc
+endif
+
+CH32V003FUN:=../../ch32v003fun
+
+CFLAGS:= \
+	-g -Os -flto -ffunction-sections \
+	-static-libgcc $(LIB_GCC) \
+	-march=rv32ec \
+	-mabi=ilp32e \
+	-I/usr/include/newlib \
+	-I$(CH32V003FUN) \
+	-nostdlib \
+	-I. -DTINYVECTOR
+
+LDFLAGS:=-T $(CH32V003FUN)/ch32v003fun.ld -Wl,--gc-sections
+
+SYSTEM_C:=$(CH32V003FUN)/ch32v003fun.c
+
+$(TARGET).elf : $(SYSTEM_C) $(TARGET).c
+	$(PREFIX)-gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
+
+$(TARGET).bin : $(TARGET).elf
+	$(PREFIX)-size $^
+	$(PREFIX)-objdump -S $^ > $(TARGET).lst
+	$(PREFIX)-objdump -t $^ > $(TARGET).map
+	$(PREFIX)-objcopy -O binary $< $(TARGET).bin
+	$(PREFIX)-objcopy -O ihex $< $(TARGET).hex
+
+flash : $(TARGET).bin
+	$(MINICHLINK)/minichlink -w $< flash -b
+
+clean :
+	rm -rf $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).lst $(TARGET).map $(TARGET).hex
+
diff --git a/examples/external_crystal/external_crystal.c b/examples/external_crystal/external_crystal.c
new file mode 100644
index 0000000000000000000000000000000000000000..d568775b3bc9d6d539be16a65dc720af08af8842
--- /dev/null
+++ b/examples/external_crystal/external_crystal.c
@@ -0,0 +1,40 @@
+// Could be defined here, or in the processor defines.
+#define SYSTEM_CORE_CLOCK 24000000
+
+#include "ch32v003fun.h"
+#include <stdio.h>
+
+#define APB_CLOCK SYSTEM_CORE_CLOCK
+
+uint32_t count;
+
+int main()
+{
+	SystemInitHSE( 0 );
+
+	// Enable GPIOs
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
+
+	// GPIO D0 Push-Pull
+	GPIOD->CFGLR &= ~(0xf<<(4*0));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
+	// GPIO D4 Push-Pull
+	GPIOD->CFGLR &= ~(0xf<<(4*4));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
+
+	// GPIO C0 Push-Pull
+	GPIOC->CFGLR &= ~(0xf<<(4*0));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
+	while(1)
+	{
+		GPIOD->BSHR = 1 | (1<<4);	 // Turn on GPIOs
+		GPIOC->BSHR = 1;
+		Delay_Ms( 250 );
+		GPIOD->BSHR = (1<<16) | (1<<(16+4)); // Turn off GPIODs
+		GPIOC->BSHR = (1<<16);
+		Delay_Ms( 250 );
+		count++;
+	}
+}
diff --git a/examples/optionbytes/Makefile b/examples/optionbytes/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..967b1fb14448928b5b09d24c68d325c5c9a41a31
--- /dev/null
+++ b/examples/optionbytes/Makefile
@@ -0,0 +1,55 @@
+TARGET:=optionbytes
+
+all : flash
+
+PREFIX:=riscv64-unknown-elf
+
+GPIO_Toggle:=EXAM/GPIO/GPIO_Toggle/User
+
+EVT:=../../ch32v003evt
+
+MINICHLINK:=../../minichlink
+
+ifeq ($(OS),Windows_NT)
+# On Windows, all the major RISC-V GCC installs are missing the -ec libgcc.
+LIB_GCC=../../misc/libgcc.a
+else
+LIB_GCC=-lgcc
+endif
+
+CH32V003FUN:=../../ch32v003fun
+
+CFLAGS:= \
+	-g -Os -flto -ffunction-sections \
+	-static-libgcc $(LIB_GCC) \
+	-march=rv32ec \
+	-mabi=ilp32e \
+	-I/usr/include/newlib \
+	-I$(CH32V003FUN) \
+	-nostdlib \
+	-I. -DTINYVECTOR
+
+LDFLAGS:=-T $(CH32V003FUN)/ch32v003fun.ld -Wl,--gc-sections
+
+SYSTEM_C:=$(CH32V003FUN)/ch32v003fun.c
+
+$(TARGET).elf : $(SYSTEM_C) $(TARGET).c
+	$(PREFIX)-gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
+
+$(TARGET).bin : $(TARGET).elf
+	$(PREFIX)-size $^
+	$(PREFIX)-objdump -S $^ > $(TARGET).lst
+	$(PREFIX)-objdump -t $^ > $(TARGET).map
+	$(PREFIX)-objcopy -O binary $< $(TARGET).bin
+	$(PREFIX)-objcopy -O ihex $< $(TARGET).hex
+
+flash : $(TARGET).bin
+	$(MINICHLINK)/minichlink -w $< flash -b
+
+monitor : flash
+	$(MINICHLINK)/minichlink -T
+
+
+clean :
+	rm -rf $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).lst $(TARGET).map $(TARGET).hex
+
diff --git a/examples/optionbytes/optionbytes.c b/examples/optionbytes/optionbytes.c
new file mode 100644
index 0000000000000000000000000000000000000000..fbec69aa3fadb2217ebacf4d07462e46e626e1ff
--- /dev/null
+++ b/examples/optionbytes/optionbytes.c
@@ -0,0 +1,158 @@
+/* This shows how to use the option bytes.  I.e. how do you disable NRST?
+   WARNING Portions of this code are under the following copyright.
+*/
+/********************************** (C) COPYRIGHT  *******************************
+ * File Name          : ch32v00x_flash.c
+ * Author             : WCH
+ * Version            : V1.0.0
+ * Date               : 2022/08/08
+ * Description        : This file provides all the FLASH firmware functions.
+ *********************************************************************************
+ * Copyright (c) 2021 Nanjing Qinheng Microelectronics Co., Ltd.
+ * Attention: This software (modified or not) and binary are used for 
+ * microcontroller manufactured by Nanjing Qinheng Microelectronics.
+ *******************************************************************************/
+
+// Could be defined here, or in the processor defines.
+#define SYSTEM_CORE_CLOCK 48000000
+
+#include "ch32v003fun.h"
+#include <stdio.h>
+
+uint32_t count;
+
+int FLASH_WaitForLastOperation(uint32_t Timeout);
+
+int main()
+{
+	SystemInit48HSI();
+	SetupDebugPrintf();
+
+	FLASH->OBKEYR = FLASH_KEY1;
+	FLASH->OBKEYR = FLASH_KEY2;
+	FLASH->KEYR = FLASH_KEY1;
+	FLASH->KEYR = FLASH_KEY2;
+	FLASH->MODEKEYR = FLASH_KEY1;
+	FLASH->MODEKEYR = FLASH_KEY2;
+
+	printf( "Option bytes started as:%04x\n", OB->USER );
+
+	uint16_t rdptmp = RDP_Key;
+
+
+	int status = FLASH_WaitForLastOperation(EraseTimeout);
+	if(status == FLASH_COMPLETE)
+	{
+		FLASH->OBKEYR = FLASH_KEY1;
+		FLASH->OBKEYR = FLASH_KEY2;
+
+		FLASH->CTLR |= CR_OPTER_Set;
+		FLASH->CTLR |= CR_STRT_Set;
+		status = FLASH_WaitForLastOperation(EraseTimeout);
+
+		if(status == FLASH_COMPLETE)
+		{
+			FLASH->CTLR &= CR_OPTER_Reset;
+			FLASH->CTLR |= CR_OPTPG_Set;
+			OB->RDPR = (uint16_t)rdptmp;
+			status = FLASH_WaitForLastOperation(ProgramTimeout);
+
+			if(status != FLASH_TIMEOUT)
+			{
+				FLASH->CTLR &= CR_OPTPG_Reset;
+			}
+		}
+		else
+		{
+			if(status != FLASH_TIMEOUT)
+			{
+				FLASH->CTLR &= CR_OPTPG_Reset;
+			}
+		}
+	}
+
+
+	printf( "After Clear:%04x\n", OB->USER );
+/* Notes from flash document:
+ * @param   OB_IWDG - Selects the IWDG mode
+ *            OB_IWDG_SW - Software IWDG selected
+ *            OB_IWDG_HW - Hardware IWDG selected
+ *          OB_STOP - Reset event when entering STOP mode.
+ *            OB_STOP_NoRST - No reset generated when entering in STOP
+ *            OB_STOP_RST - Reset generated when entering in STOP
+ *          OB_STDBY - Reset event when entering Standby mode.
+ *            OB_STDBY_NoRST - No reset generated when entering in STANDBY
+ *            OB_STDBY_RST - Reset generated when entering in STANDBY
+ *          OB_RST - Selects the reset IO mode and Ignore delay time
+ *            OB_RST_NoEN - Reset IO disable (PD7)
+ *            OB_RST_EN_DT12ms - Reset IO enable (PD7) and  Ignore delay time 12ms
+ *            OB_RST_EN_DT1ms - Reset IO enable (PD7) and  Ignore delay time 1ms
+ *            OB_RST_EN_DT128ms - Reset IO enable (PD7) and  Ignore delay time 128ms
+*/
+	uint16_t OB_IWDG = OB_STOP_NoRST;
+	uint16_t OB_STOP = OB_IWDG_SW;
+	uint16_t OB_STDBY = OB_STDBY_NoRST;
+	uint16_t OB_RST = OB_RST_EN_DT1ms;
+
+    FLASH->OBKEYR = FLASH_KEY1;
+    FLASH->OBKEYR = FLASH_KEY2;
+    status = FLASH_WaitForLastOperation(10000);
+
+    if(status == FLASH_COMPLETE)
+    {
+        FLASH->CTLR |= CR_OPTPG_Set;
+        OB->USER = OB_IWDG | (uint16_t)(OB_STOP | (uint16_t)(OB_STDBY | (uint16_t)(OB_RST | (uint16_t)0xE0)));
+
+        status = FLASH_WaitForLastOperation(10000);
+        if(status != FLASH_TIMEOUT)
+        {
+            FLASH->CTLR &= CR_OPTPG_Reset;
+        }
+    }
+
+	printf( "After Write:%04x\n", OB->USER );
+
+	while(1);
+}
+
+
+int FLASH_GetBank1Status(void)
+{
+	int flashstatus = FLASH_COMPLETE;
+
+	if((FLASH->STATR & FLASH_FLAG_BANK1_BSY) == FLASH_FLAG_BSY)
+	{
+		flashstatus = FLASH_BUSY;
+	}
+	else
+	{
+		if((FLASH->STATR & FLASH_FLAG_BANK1_WRPRTERR) != 0)
+		{
+			flashstatus = FLASH_ERROR_WRP;
+		}
+		else
+		{
+			flashstatus = FLASH_COMPLETE;
+		}
+	}
+	return flashstatus;
+}
+
+
+int FLASH_WaitForLastOperation(uint32_t Timeout)
+{
+	int status = FLASH_COMPLETE;
+
+	status = FLASH_GetBank1Status();
+	while((status == FLASH_BUSY) && (Timeout != 0x00))
+	{
+		status = FLASH_GetBank1Status();
+		Timeout--;
+	}
+	if(Timeout == 0x00)
+	{
+		status = FLASH_TIMEOUT;
+	}
+	return status;
+}
+
diff --git a/examples/run_from_ram/Makefile b/examples/run_from_ram/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..41dd42d3616849f0e9271561530dcb6926610b6a
--- /dev/null
+++ b/examples/run_from_ram/Makefile
@@ -0,0 +1,51 @@
+TARGET:=run_from_ram
+
+all : flash
+
+PREFIX:=riscv64-unknown-elf
+
+GPIO_Toggle:=EXAM/GPIO/GPIO_Toggle/User
+
+EVT:=../../ch32v003evt
+
+MINICHLINK:=../../minichlink
+
+ifeq ($(OS),Windows_NT)
+# On Windows, all the major RISC-V GCC installs are missing the -ec libgcc.
+LIB_GCC=../../misc/libgcc.a
+else
+LIB_GCC=-lgcc
+endif
+
+CH32V003FUN:=../../ch32v003fun
+
+CFLAGS:= \
+	-g -Os -flto -ffunction-sections \
+	-static-libgcc $(LIB_GCC) \
+	-march=rv32ec \
+	-mabi=ilp32e \
+	-I/usr/include/newlib \
+	-I$(CH32V003FUN) \
+	-nostdlib \
+	-I. -DTINYVECTOR
+
+LDFLAGS:=-T $(CH32V003FUN)/ch32v003fun.ld -Wl,--gc-sections
+
+SYSTEM_C:=$(CH32V003FUN)/ch32v003fun.c
+
+$(TARGET).elf : $(SYSTEM_C) $(TARGET).c
+	$(PREFIX)-gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
+
+$(TARGET).bin : $(TARGET).elf
+	$(PREFIX)-size $^
+	$(PREFIX)-objdump -S $^ > $(TARGET).lst
+	$(PREFIX)-objdump -t $^ > $(TARGET).map
+	$(PREFIX)-objcopy -O binary $< $(TARGET).bin
+	$(PREFIX)-objcopy -O ihex $< $(TARGET).hex
+
+flash : $(TARGET).bin
+	$(MINICHLINK)/minichlink -w $< flash -b
+
+clean :
+	rm -rf $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).lst $(TARGET).map $(TARGET).hex
+
diff --git a/examples/run_from_ram/run_from_ram.c b/examples/run_from_ram/run_from_ram.c
new file mode 100644
index 0000000000000000000000000000000000000000..8ece215fa526b39c1f5b3a76a0cfaf20e6873f03
--- /dev/null
+++ b/examples/run_from_ram/run_from_ram.c
@@ -0,0 +1,68 @@
+// Could be defined here, or in the processor defines.
+#define SYSTEM_CORE_CLOCK 48000000
+
+#include "ch32v003fun.h"
+#include <stdio.h>
+
+uint32_t count;
+
+// There's a few reasons you might want to run from RAM, for instance
+// it's faster than running from flash, especially if you're running 
+// on PLL.  Or maybe you want to power down the flash for some reaso.
+//
+// Well, no worries!  You can just stick it in the .data segment!
+
+void RamFunction() __attribute__((section(".data"))) __attribute__((used));
+void RamFunction()
+{
+	// GPIO D0 Push-Pull
+	GPIOD->CFGLR &= ~(0xf<<(4*0));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
+	// GPIO D4 Push-Pull
+	GPIOD->CFGLR &= ~(0xf<<(4*4));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
+
+	// GPIO C0 Push-Pull
+	GPIOC->CFGLR &= ~(0xf<<(4*0));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
+
+	while(1)
+	{
+		GPIOD->BSHR = 1 | (1<<4);	 // Turn on GPIOs
+		GPIOC->BSHR = 1;
+		Delay_Ms( 250 );
+		GPIOD->BSHR = (1<<16) | (1<<(16+4)); // Turn off GPIODs
+		GPIOC->BSHR = (1<<16);
+		Delay_Ms( 250 );
+
+		// But we turbo through twiddling a pin.
+		
+		asm volatile("\n\
+			li a0, 1 | (1<<4)\n\
+			li a1, (1<<16) | (1<<(16+4))\n\
+			la a2, 0x40011410 /* GPIO D*/ \n\
+			c.nop\n\
+			c.sw a0, 0(a2)\n\
+			c.sw a1, 0(a2)\n\
+			c.sw a0, 0(a2)       /* Writing out takes 2 cycles from what I can tell*/ \n\
+			c.addi %[count], 1   /* Insert this for comparative timing,  it's 1 cycle */ \n\
+			c.sw a1, 0(a2)\n\
+			c.sw a0, 0(a2)\n\
+			c.sw a1, 0(a2)\n\
+			c.sw a0, 0(a2)\n\
+			c.sw a1, 0(a2)\n\
+			" : [count]"+r"(count) : : "a0", "a1", "a2" );
+	}
+}
+
+int main()
+{
+	SystemInit48HSI();
+
+	// Enable GPIOs
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
+
+	RamFunction();
+}
+
diff --git a/examples/sandbox/Makefile b/examples/sandbox/Makefile
index d626a3809d6818e66b6801b24f555d89a1245ea6..d2c3451040d0ad87ecbd1bef27a9780bd1c19d44 100644
--- a/examples/sandbox/Makefile
+++ b/examples/sandbox/Makefile
@@ -6,7 +6,7 @@ PREFIX:=riscv64-unknown-elf
 
 GPIO_Toggle:=EXAM/GPIO/GPIO_Toggle/User
 
-CH32V003FUN:=../../ch32v003CH32V003FUN
+CH32V003FUN:=../../ch32v003fun
 MINICHLINK:=../../minichlink
 
 CFLAGS:= \
@@ -35,8 +35,11 @@ $(TARGET).bin : $(TARGET).elf
 
 flash : $(TARGET).bin
 	make -C $(MINICHLINK) all
-	$(MINICHLINK)/minichlink -w $< -r
+	$(MINICHLINK)/minichlink -w $< flash -b
+
+monitor : flash
+	$(MINICHLINK)/minichlink -T
+	
 
 clean :
 	rm -rf $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).lst $(TARGET).map $(TARGET).hex
-
diff --git a/examples/sandbox/sandbox.c b/examples/sandbox/sandbox.c
index 05365be064812c3b7366d21e686c4742e3f78b5d..742254cd6ac53cd22462a0c758a4a517a0c89634 100644
--- a/examples/sandbox/sandbox.c
+++ b/examples/sandbox/sandbox.c
@@ -1,50 +1,51 @@
-// Could be defined here, or in the processor defines.
-#define SYSTEM_CORE_CLOCK 48000000
-#define APB_CLOCK SYSTEM_CORE_CLOCK
+/* Small example showing how to use the SWIO programming pin to 
+   do printf through the debug interface */
 
-#include "ch32v00x.h"
+#include "ch32v003fun.h"
 #include <stdio.h>
-#include <string.h>
 
-// Working on WS2812 driving.
+uint32_t count;
+
+
+
+// Tell the compiler to put this code in the .data section.  That
+// will cause the startup code to copy it from flash into RAM where
+// it can be easily modified at runtime.
+void SRAMCode( ) __attribute__(( section(".data"))) __attribute__((noinline)) __attribute__((noreturn));
+void SRAMCode( )
+{
+	asm volatile( 
+"li a0, 0x40011410\n"
+"li a1, (1 | (1<<4))\n"
+"li a2, (1 | (1<<4))<<16\n"
+"1: c.sw a1, 0(a0)\n"
+"   c.sw a2, 0(a0)\n"
+"   j 1b\n" );
+}
 
 int main()
 {
 	SystemInit48HSI();
-	SetupUART( UART_BRR );
+	SetupDebugPrintf();
 
-	int k;
+	// Boost CPU supply.
+	EXTEN->EXTEN_CTR = EXTEN_LDO_TRIM;
 
-	// Enable GPIOD (for debugging)
-	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD;
+	// Enable GPIOs
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
+
+	// GPIO D0 Push-Pull
 	GPIOD->CFGLR &= ~(0xf<<(4*0));
 	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
-	GPIOD->BSHR = 1; // Turn on GPIOD0
-	GPIOD->BSHR = 1<<16; // Turn off GPIOD0
-
-
-		//DCSR
-		asm volatile("\n\
-			li t0, 0x4\n\
-			csrw 0x7B0, t0\n\
-		");
-
-	while(1)
-	{
-		Delay_Ms( 2 );
-	    *(uint32_t*)(0xe0000100) = 2;  //Hopefully enable debug (dmcontrol .0) --> Doesn't work.
 
-		uint32_t val = *(uint32_t*)0xe00000f4;
-		*(uint32_t*)0xe00000f4 = 0xaabbccdd;
+	// GPIO D4 Push-Pull
+	GPIOD->CFGLR &= ~(0xf<<(4*4));
+	GPIOD->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*4);
 
-		printf( "0xe00000f4: %08x %08x\n", val, __get_dscratch0() );
+	// GPIO C0 Push-Pull
+	GPIOC->CFGLR &= ~(0xf<<(4*0));
+	GPIOC->CFGLR |= (GPIO_Speed_10MHz | GPIO_CNF_OUT_PP)<<(4*0);
 
-		// Write to dscratch0
-		asm volatile("\n\
-			li t0, 0xa8b8c8d8\n\
-			csrw 0x7B2, t0\n\
-			csrw 0x7B3, t0\n\
-		");
-	}
+	SRAMCode();
 }
 
diff --git a/examples/self_modify_code/Makefile b/examples/self_modify_code/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..ebe3d0b89b3d4b8a8de1aac99def670d88fd6492
--- /dev/null
+++ b/examples/self_modify_code/Makefile
@@ -0,0 +1,46 @@
+TARGET:=self_modify_code
+
+all : flash
+
+PREFIX:=riscv64-unknown-elf
+
+GPIO_Toggle:=EXAM/GPIO/GPIO_Toggle/User
+
+CH32V003FUN:=../../ch32v003fun
+MINICHLINK:=../../minichlink
+
+CFLAGS:= \
+	-g -Os -flto -ffunction-sections \
+	-static-libgcc -lgcc \
+	-march=rv32ec \
+	-mabi=ilp32e \
+	-I/usr/include/newlib \
+	-I$(CH32V003FUN) \
+	-nostdlib \
+	-I.
+
+LDFLAGS:=-T $(CH32V003FUN)/ch32v003fun.ld -Wl,--gc-sections
+
+SYSTEM_C:=$(CH32V003FUN)/ch32v003fun.c
+
+$(TARGET).elf : $(SYSTEM_C) $(TARGET).c
+	$(PREFIX)-gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
+
+$(TARGET).bin : $(TARGET).elf
+	$(PREFIX)-size $^
+	$(PREFIX)-objdump -S $^ > $(TARGET).lst
+	$(PREFIX)-objdump -t $^ > $(TARGET).map
+	$(PREFIX)-objcopy -O binary $< $(TARGET).bin
+	$(PREFIX)-objcopy -O ihex $< $(TARGET).hex
+
+flash : $(TARGET).bin
+	make -C $(MINICHLINK) all
+	$(MINICHLINK)/minichlink -w $< flash -b
+
+monitor : flash
+	$(MINICHLINK)/minichlink -T
+	
+
+clean :
+	rm -rf $(TARGET).elf $(TARGET).bin $(TARGET).hex $(TARGET).lst $(TARGET).map $(TARGET).hex
+
diff --git a/examples/self_modify_code/self_modify_code.c b/examples/self_modify_code/self_modify_code.c
new file mode 100644
index 0000000000000000000000000000000000000000..bf33dc5b56cbd76408260709067db4aaf65bcaa6
--- /dev/null
+++ b/examples/self_modify_code/self_modify_code.c
@@ -0,0 +1,99 @@
+/* Small example showing how to use the SWIO programming pin to 
+   do printf through the debug interface */
+
+#define SYSTEM_CORE_CLOCK 24000000
+#include "ch32v003fun.h"
+#include <stdio.h>
+
+uint32_t count;
+
+
+// This is a complicated way to do it from C land, as a demonstration
+
+// Tell the compiler to put this code in the .data section.  That
+// will cause the startup code to copy it from flash into RAM where
+// it can be easily modified at runtime.
+uint32_t ReadCSRSelfModify( uint16_t whichcsr ) __attribute__(( section(".data"))) __attribute__((noinline));
+uint32_t ReadCSRSelfModify( uint16_t whichcsr )
+{
+	uint32_t ret;
+
+	// Tricky: GCC will make this variable "point to" the opcode
+	// of the csrr instruction below.
+	volatile extern uint32_t readCSRLabel;
+
+	// We have to put this here to "force" the compiler to order the
+	// instructions in this way.  Otherwise, the compiler will try
+	// to optimize the code and inline the assembly into something where
+	// our global handle into assembly code becomes meaningless.
+	// Annoyingly, it has to contain at least one instruction :(
+	asm volatile( "nop" );
+
+	// 000026f3 is csrrs a3, 0x000, x0; So, we modify it, placing the
+	// CSR we want to read in the top 12 bits of the instruction.
+
+	readCSRLabel = 0x000026f3 | (whichcsr << 20);
+
+	// The actual assembly block inserted into the C function.  This
+	// defines the local label, globally, so the linker will be able to
+	// pick it up.  We also need to used a fixed register, a3, so we
+	// can know what opcode we want to use, then we can let C tell us
+	// what register it would like the value in.
+	//
+	// The fence is needed to make sure the CPU knows to not use
+	// cached instructions.
+	//
+	// The constraints are "ret" is a "write" register, and register a3
+	// is going to be clobbered by the assembly code.
+	asm volatile( 
+		".global readCSRLabel   \n"
+		"	fence               \n"
+		"readCSRLabel:          \n"
+		"	csrrs a3, 0x000, x0 \n"
+		"	addi %[ret], a3, 0  \n"
+		 : [ret]"=r"(ret) : : "a3" );
+
+	return ret;
+}
+
+
+uint32_t ReadCSRSelfModifySimple( uint16_t whichcsr ) __attribute__(( section(".data"))) __attribute__((noinline));
+uint32_t ReadCSRSelfModifySimple( uint16_t whichcsr )
+{
+	uint32_t ret;
+	uint32_t csrcmd = 0x000026f3 | ( whichcsr << 20);
+	asm volatile( 
+		".global readCSRLabel   \n"
+		"   la a3, readCSRLabel \n"
+		"   sw %[csrcmd], 0(a3) \n"
+		"   fence               \n"
+		"readCSRLabel:          \n"
+		"	csrrs a3, 0x000, x0 \n"
+		"	addi %[ret], a3, 0  \n"
+		 : [ret]"=r"(ret) : [csrcmd]"r"(csrcmd) : "a3" );
+
+	return ret;
+}
+
+
+int main()
+{
+	SystemInit48HSI();
+	SetupDebugPrintf();
+
+	WaitForDebuggerToAttach();
+
+	// Enable GPIOs
+	RCC->APB2PCENR |= RCC_APB2Periph_GPIOD | RCC_APB2Periph_GPIOC;
+	puts( "Print all non-zero CSRs:" );
+	int i;
+	for( i = 0x000; i < 0x1000; i++ )
+	{
+		uint32_t rv =  ReadCSRSelfModifySimple( i );
+		if( rv )
+			printf( "%03x = %08x\n", i, rv );
+	}
+	printf( "Done\n" );
+	for(;;);
+}
+
diff --git a/minichlink/minichlink.c b/minichlink/minichlink.c
index 3160afaf05fbcdb4dc03e2f1dcb7f8eb717bef33..5f6b09428db3726254a4cc5473ce177eca07f01a 100644
--- a/minichlink/minichlink.c
+++ b/minichlink/minichlink.c
@@ -11,9 +11,9 @@
 #include "minichlink.h"
 #include "../ch32v003fun/ch32v003fun.h"
 
-static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber );
 static int64_t StringToMemoryAddress( const char * number );
 static void StaticUpdatePROGBUFRegs( void * dev );
+static int InternalUnlockBootloader( void * dev );
 
 void TestFunction(void * v );
 struct MiniChlinkFunctions MCF;
@@ -40,9 +40,11 @@ int main( int argc, char ** argv )
 	int status;
 	int must_be_end = 0;
 
-	int doing_unblock = (argc > 1 && argv[1][0] == '-' && argv[1][1] == 'u' );
+	int skip_startup = 
+		(argc > 1 && argv[1][0] == '-' && argv[1][1] == 'u' ) |
+		(argc > 1 && argv[1][0] == '-' && argv[1][1] == 'X' );
 
-	if( !doing_unblock && MCF.SetupInterface )
+	if( !skip_startup && MCF.SetupInterface )
 	{
 		if( MCF.SetupInterface( dev ) < 0 )
 		{
@@ -108,10 +110,19 @@ keep_going:
 				else
 					goto unimplemented;
 				break;
+			case 'U':
+				// Unlock Bootloader
+				if( InternalUnlockBootloader( dev ) )
+					goto unimplemented;
+				break;
 			case 'b':  //reBoot
 				if( !MCF.HaltMode || MCF.HaltMode( dev, 1 ) )
 					goto unimplemented;
 				break;
+			case 'B':  //reBoot into Bootloader
+				if( !MCF.HaltMode || MCF.HaltMode( dev, 3 ) )
+					goto unimplemented;
+				break;
 			case 'e':  //rEsume
 				if( !MCF.HaltMode || MCF.HaltMode( dev, 2 ) )
 					goto unimplemented;
@@ -168,6 +179,19 @@ keep_going:
 					goto unimplemented;
 				break;
 			}
+			case 'X':
+			{
+				iarg++;
+				if( iarg >= argc )
+				{
+					fprintf( stderr, "Vendor command requires an actual command\n" );
+					goto unimplemented;
+				}
+				if( MCF.VendorCommand )
+					if( MCF.VendorCommand( dev, argv[iarg++] ) )
+						goto unimplemented;
+				break;
+			}
 			case 'r':
 			{
 				if( MCF.HaltMode ) MCF.HaltMode( dev, 0 );
@@ -345,7 +369,7 @@ keep_going:
 					goto unimplemented;
 				}
 
-				printf( "Image written successfully\n" );
+				printf( "Image written.\n" );
 
 				free( image );
 				break;
@@ -375,8 +399,7 @@ help:
 	fprintf( stderr, " -b Reboot out of Halt\n" );
 	fprintf( stderr, " -e Resume from halt\n" );
 	fprintf( stderr, " -h Place into Halt\n" );
-	fprintf( stderr, " -D Configure NRST as GPIO **WARNING** If you do this and you reconfig\n" );
-	fprintf( stderr, "      the SWIO pin (PD1) on boot, your part can never again be programmed!\n" );
+	fprintf( stderr, " -D Configure NRST as GPIO\n" );
 	fprintf( stderr, " -d Configure NRST as NRST\n" );
 //	fprintf( stderr, " -P Enable Read Protection (UNTESTED)\n" );
 //	fprintf( stderr, " -p Disable Read Protection (UNTESTED)\n" );
@@ -400,7 +423,7 @@ unimplemented:
 
 static int StaticUnlockFlash( void * dev, struct InternalState * iss );
 
-static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber )
+int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber )
 {
 	if( !number || !number[0] ) return defaultNumber;
 	int radix = 10;
@@ -528,6 +551,85 @@ static void StaticUpdatePROGBUFRegs( void * dev )
 	MCF.WriteReg32( dev, DMCOMMAND, 0x0023100d ); // Copy data to x13
 }
 
+static int InternalUnlockBootloader( void * dev )
+{
+	if( !MCF.WriteWord ) return -99;
+	int ret = 0;
+	uint32_t OBTKEYR;
+	ret |= MCF.WriteWord( dev, 0x40022028, 0x45670123 ); //(FLASH_BOOT_MODEKEYP)
+	ret |= MCF.WriteWord( dev, 0x40022028, 0xCDEF89AB ); //(FLASH_BOOT_MODEKEYP)
+	ret |= MCF.ReadWord( dev, 0x40022008, &OBTKEYR ); //(FLASH_OBTKEYR)
+	if( ret )
+	{
+		fprintf( stderr, "Error operating with OBTKEYR\n" );
+		return -1;
+	}
+	if( OBTKEYR & (1<<15) )
+	{
+		fprintf( stderr, "Error: Could not unlock boot section (%08x)\n", OBTKEYR );
+	}
+	OBTKEYR |= (1<<14); // Configure for boot-to-bootload.
+	ret |= MCF.WriteWord( dev, 0x40022008, OBTKEYR );
+	ret |= MCF.ReadWord( dev, 0x40022008, &OBTKEYR ); //(FLASH_OBTKEYR)
+	printf( "FLASH_OBTKEYR = %08x (%d)\n", OBTKEYR, ret );
+	return ret;
+}
+
+
+
+static int DefaultWriteHalfWord( void * dev, uint32_t address_to_write, uint32_t data )
+{
+	int ret = 0;
+	struct InternalState * iss = (struct InternalState*)(((struct ProgrammerStructBase*)dev)->internal);
+	if( MCF.VoidHighLevelState ) MCF.VoidHighLevelState( dev );
+	iss->statetag = STTAG( "XXXX" );
+
+	MCF.WriteReg32( dev, DMABSTRACTAUTO, 0x00000000 ); // Disable Autoexec.
+
+	// Different address, so we don't need to re-write all the program regs.
+	// sh x8,0(x9)  // Write to the address.
+	MCF.WriteReg32( dev, DMPROGBUF0, 0x00849023 );
+	MCF.WriteReg32( dev, DMPROGBUF1, 0x00100073 ); // c.ebreak
+
+	MCF.WriteReg32( dev, DMDATA0, address_to_write );
+	MCF.WriteReg32( dev, DMCOMMAND, 0x00231009 ); // Copy data to x9
+	MCF.WriteReg32( dev, DMDATA0, data );
+	MCF.WriteReg32( dev, DMCOMMAND, 0x00271008 ); // Copy data to x8, and execute program.
+
+	ret |= MCF.WaitForDoneOp( dev );
+	iss->currentstateval = -1;
+
+
+	return ret;
+}
+
+static int DefaultReadHalfWord( void * dev, uint32_t address_to_write, uint32_t * data )
+{
+	int ret = 0;
+	struct InternalState * iss = (struct InternalState*)(((struct ProgrammerStructBase*)dev)->internal);
+	if( MCF.VoidHighLevelState ) MCF.VoidHighLevelState( dev );
+	iss->statetag = STTAG( "XXXX" );
+
+	MCF.WriteReg32( dev, DMABSTRACTAUTO, 0x00000000 ); // Disable Autoexec.
+
+	// Different address, so we don't need to re-write all the program regs.
+	// lh x8,0(x9)  // Write to the address.
+	MCF.WriteReg32( dev, DMPROGBUF0, 0x00049403 );
+	MCF.WriteReg32( dev, DMPROGBUF1, 0x00100073 ); // c.ebreak
+
+	MCF.WriteReg32( dev, DMDATA0, address_to_write );
+	MCF.WriteReg32( dev, DMCOMMAND, 0x00231009 ); // Copy data to x9
+	MCF.WriteReg32( dev, DMCOMMAND, 0x00241000 ); // Only execute.
+	MCF.WriteReg32( dev, DMCOMMAND, 0x00221008 ); // Read x8 into DATA0.
+
+	ret |= MCF.WaitForDoneOp( dev );
+	iss->currentstateval = -1;
+
+
+	return ret | MCF.ReadReg32( dev, DMDATA0, data );
+}
+
+
 static int DefaultWriteWord( void * dev, uint32_t address_to_write, uint32_t data )
 {
 	struct InternalState * iss = (struct InternalState*)(((struct ProgrammerStructBase*)dev)->internal);
@@ -633,7 +735,7 @@ int DefaultWriteBinaryBlob( void * dev, uint32_t address_to_write, uint32_t blob
 
 	if( blob_size == 0 ) return 0;
 
-	if( (address_to_write & 0xff000000) == 0x08000000 || (address_to_write & 0xff000000) == 0x00000000 ) 
+	if( (address_to_write & 0xff000000) == 0x08000000 || (address_to_write & 0xff000000) == 0x00000000 || (address_to_write & 0x1FFFF800) == 0x1FFFF000 ) 
 		is_flash = 1;
 
 	if( is_flash && MCF.BlockWrite64 && ( address_to_write & 0x3f ) == 0 )
@@ -730,13 +832,11 @@ timedout:
 static int DefaultReadWord( void * dev, uint32_t address_to_read, uint32_t * data )
 {
 	struct InternalState * iss = (struct InternalState*)(((struct ProgrammerStructBase*)dev)->internal);
-
 	if( iss->statetag != STTAG( "RDSQ" ) || address_to_read != iss->currentstateval )
 	{
 		if( iss->statetag != STTAG( "RDSQ" ) )
 		{
 			MCF.WriteReg32( dev, DMABSTRACTAUTO, 0 ); // Disable Autoexec.
-
 			// c.lw x8,0(x11) // Pull the address from DATA1
 			// c.lw x9,0(x8)  // Read the data at that location.
 			MCF.WriteReg32( dev, DMPROGBUF0, 0x40044180 );
@@ -880,6 +980,21 @@ static int DefaultHaltMode( void * dev, int mode )
 		MCF.WriteReg32( dev, DMCONTROL, 0x40000001 ); // resumereq
 		MCF.FlushLLCommands( dev );
 		break;
+	case 3:
+		MCF.WriteReg32( dev, DMCONTROL, 0x80000001 ); // Make the debug module work properly.
+		MCF.WriteReg32( dev, DMCONTROL, 0x80000001 ); // Initiate a halt request.
+
+		MCF.WriteWord( dev, (intptr_t)&FLASH->KEYR, FLASH_KEY1 );
+		MCF.WriteWord( dev, (intptr_t)&FLASH->KEYR, FLASH_KEY2 );
+		MCF.WriteWord( dev, (intptr_t)&FLASH->BOOT_MODEKEYR, FLASH_KEY1 );
+		MCF.WriteWord( dev, (intptr_t)&FLASH->BOOT_MODEKEYR, FLASH_KEY2 );
+		MCF.WriteWord( dev, (intptr_t)&FLASH->STATR, 1<<14 );
+		MCF.WriteWord( dev, (intptr_t)&FLASH->CTLR, CR_LOCK_Set );
+
+		MCF.WriteReg32( dev, DMCONTROL, 0x80000003 ); // Reboot.
+		MCF.WriteReg32( dev, DMCONTROL, 0x40000001 ); // resumereq
+		MCF.FlushLLCommands( dev );
+		break;
 	}
 	iss->processor_in_mode = mode;
 	return 0;
@@ -972,7 +1087,7 @@ int DefaultUnbrick( void * dev )
 
 	if( timeout == max_timeout ) 
 	{
-		printf( "Timed out trying to unbrick\n" );
+		fprintf( stderr, "Timed out trying to unbrick\n" );
 		return -5;
 	}
 	MCF.Erase( dev, 0, 0, 1);
@@ -980,6 +1095,144 @@ int DefaultUnbrick( void * dev )
 	return -5;
 }
 
+int DefaultConfigureNRSTAsGPIO( void * dev, int one_if_yes_gpio  )
+{
+	fprintf( stderr, "Error: DefaultConfigureNRSTAsGPIO does not work via the programmer here.  Please see the demo \"optionbytes\"\n" );
+	return -5;
+#if 0
+	int ret = 0;
+	uint32_t csw;
+
+
+	if( MCF.ReadWord( dev, 0x1FFFF800, &csw ) )
+	{
+		fprintf( stderr, "Error: failed to get user word\n" );
+		return -5;
+	}
+
+	printf( "CSW WAS : %08x\n", csw );
+
+	MCF.WriteWord( dev, 0x40022008, 0x45670123 ); // OBKEYR = 0x40022008
+	MCF.WriteWord( dev, 0x40022008, 0xCDEF89AB );
+	MCF.WriteWord( dev, 0x40022004, 0x45670123 ); // FLASH->KEYR = 0x40022004
+	MCF.WriteWord( dev, 0x40022004, 0xCDEF89AB );
+	MCF.WriteWord( dev, 0x40022024, 0x45670123 ); // MODEKEYR = 0x40022024
+	MCF.WriteWord( dev, 0x40022024, 0xCDEF89AB );
+
+//XXXX THIS DOES NOT WORK IT CANNOT ERASE.
+	uint32_t ctlr;
+	if( MCF.ReadWord( dev, 0x40022010, &ctlr ) ) // FLASH->CTLR = 0x40022010
+	{
+		return -9;
+	}
+	ctlr |= CR_OPTER_Set | CR_STRT_Set; // OBER
+	MCF.WriteWord( dev, 0x40022010, ctlr ); // FLASH->CTLR = 0x40022010
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+
+	MCF.WriteHalfWord( dev, (intptr_t)&OB->RDPR, RDP_Key );
+
+    ctlr &=~CR_OPTER_Reset;
+	MCF.WriteWord( dev, 0x40022010, ctlr ); // FLASH->CTLR = 0x40022010
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+    ctlr |= CR_OPTPG_Set;
+	MCF.WriteWord( dev, 0x40022010, ctlr ); // FLASH->CTLR = 0x40022010
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+    ctlr &=~CR_OPTPG_Reset;
+	MCF.WriteWord( dev, 0x40022010, ctlr ); // FLASH->CTLR = 0x40022010
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+
+
+// This does work to write the option bytes, but does NOT work to erase.
+
+	if( MCF.ReadWord( dev, 0x40022010, &ctlr ) ) // FLASH->CTLR = 0x40022010
+	{
+		return -9;
+	}
+	ctlr |= CR_OPTPG_Set; //OBPG
+	MCF.WriteWord( dev, 0x40022010, ctlr ); // FLASH->CTLR = 0x40022010
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+
+	uint32_t config = OB_IWDG_HW | OB_STOP_NoRST | OB_STDBY_NoRST | (one_if_yes_gpio?OB_RST_NoEN:OB_RST_EN_DT1ms) | (uint16_t)0xE0;
+	printf( "Config (%08x): %08x\n", (intptr_t)&OB->USER, config );
+	MCF.WriteHalfWord( dev,  (intptr_t)&OB->USER, config );
+
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+
+	ctlr &= CR_OPTPG_Reset;
+	MCF.WriteWord( dev, 0x40022010, ctlr ); // FLASH->CTLR = 0x40022010
+
+
+	if( MCF.ReadWord( dev, 0x1FFFF800, &csw ) )
+	{
+		fprintf( stderr, "Error: failed to get user word\n" );
+		return -5;
+	}
+
+	//csw >>= 16; // Only want bottom part of word.
+	printf( "CSW: %08x\n", csw );
+
+#if 0
+	uint32_t prevuser;
+	if( MCF.ReadWord( dev, 0x1FFFF800, &prevuser ) )
+	{
+		fprintf( stderr, "Error: failed to get user word\n" );
+		return -5;
+	}
+
+	ret |= MCF.WaitForFlash( dev );
+
+	// Erase.
+	MCF.ReadWord( dev, 0x40022010, &csw ); // FLASH->CTLR = 0x40022010
+	csw |= 1<<5;//OBER;
+	MCF.WriteWord( dev, 0x40022010, csw ); // FLASH->CTLR = 0x40022010
+	MCF.WriteHalfWord( dev, 0x1FFFF802, 0xffff );
+	ret |= MCF.WaitForDoneOp( dev );
+	ret |= MCF.WaitForFlash( dev );
+
+	MCF.ReadWord( dev, 0x40022010, &csw ); // FLASH->CTLR = 0x40022010
+	printf( "CTLR: %08x\n", csw );
+	csw |= 1<<9;//OBPG, OBWRE
+	MCF.WriteWord( dev, 0x40022010, csw );
+
+	int j;
+	for( j = 0; j < 5; j++ )
+	{
+		if( MCF.ReadWord( dev, 0x1FFFF800, &prevuser ) )
+		{
+			fprintf( stderr, "Error: failed to get user word\n" );
+			return -5;
+		}
+
+		//csw >>= 16; // Only want bottom part of word.
+		printf( "CSW was: %08x\n", prevuser );
+		csw = prevuser >> 16;
+		csw = csw & 0xe7e7;
+		csw |= (one_if_yes_gpio?0b11:0b00)<<(3+0);
+		csw |= (one_if_yes_gpio?0b00:0b11)<<(3+8);
+		printf( "CSW writing: %08x\n", csw );
+		MCF.WriteHalfWord( dev, 0x1FFFF802, csw );
+		ret |= MCF.WaitForDoneOp( dev );
+		ret |= MCF.WaitForFlash( dev );
+	}
+
+
+	MCF.ReadWord( dev, 0x40022010, &csw ); // FLASH->CTLR = 0x40022010
+	printf( "CTLR: %08x\n", csw );
+	csw &= ~(1<<9);//OBPG, OBWRE
+	MCF.WriteWord( dev, 0x40022010, csw );
+
+#endif
+	printf( "RET: %d\n", ret );
+	return 0;
+#endif
+}
+
 int DefaultPrintChipInfo( void * dev )
 {
 	uint32_t reg;
@@ -1023,8 +1276,12 @@ int SetupAutomaticHighLevelFunctions( void * dev )
 		MCF.ReadBinaryBlob = DefaultReadBinaryBlob;
 	if( !MCF.WriteWord )
 		MCF.WriteWord = DefaultWriteWord;
+	if( !MCF.WriteHalfWord )
+		MCF.WriteHalfWord = DefaultWriteHalfWord;
 	if( !MCF.ReadWord )
 		MCF.ReadWord = DefaultReadWord;
+	if( !MCF.ReadHalfWord )
+		MCF.ReadHalfWord = DefaultReadHalfWord;
 	if( !MCF.Erase )
 		MCF.Erase = DefaultErase;
 	if( !MCF.HaltMode )
@@ -1039,6 +1296,8 @@ int SetupAutomaticHighLevelFunctions( void * dev )
 		MCF.PrintChipInfo = DefaultPrintChipInfo;
 	if( !MCF.Unbrick )
 		MCF.Unbrick = DefaultUnbrick;
+	if( !MCF.ConfigureNRSTAsGPIO )
+		MCF.ConfigureNRSTAsGPIO = DefaultConfigureNRSTAsGPIO;
 
 	struct InternalState * iss = malloc( sizeof( struct InternalState ) );
 	iss->statetag = 0;
diff --git a/minichlink/minichlink.h b/minichlink/minichlink.h
index de5302a76dd9847e9ea4c7a8d4b823f4668069a0..100e1270d11d652b69505a4514271dd1a7cc578b 100644
--- a/minichlink/minichlink.h
+++ b/minichlink/minichlink.h
@@ -31,7 +31,8 @@ struct MiniChlinkFunctions
 	int (*Erase)( void * dev, uint32_t address, uint32_t length, int type ); //type = 0 for fast, 1 for whole-chip
 
 	// MUST be 4-byte-aligned.
-	int (*WriteWord)( void * dev, uint32_t address_to_write, uint32_t data ); // Flags = 1 for "doing a fast FLASH write."
+	int (*VoidHighLevelState)( void * dev );
+	int (*WriteWord)( void * dev, uint32_t address_to_write, uint32_t data );
 	int (*ReadWord)( void * dev, uint32_t address_to_read, uint32_t * data );
 
 	int (*WaitForFlash)( void * dev );
@@ -52,6 +53,12 @@ struct MiniChlinkFunctions
 	int (*PollTerminal)( void * dev, uint8_t * buffer, int maxlen );
 
 	int (*PerformSongAndDance)( void * dev );
+
+	int (*VendorCommand)( void * dev, const char * command );
+
+	// Do Not override these.  they are cursed.
+	int (*WriteHalfWord)( void * dev, uint32_t address_to_write, uint32_t data );
+	int (*ReadHalfWord)( void * dev, uint32_t address_to_read, uint32_t * data );
 };
 
 /** If you are writing a driver, the minimal number of functions you can implement are:
@@ -111,5 +118,8 @@ void * TryInit_ESP32S2CHFUN();
 // Returns 0 if ok, populated, 1 if not populated.
 int SetupAutomaticHighLevelFunctions( void * dev );
 
+// Useful for converting numbers like 0x, etc.
+int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber );
+
 #endif
 
diff --git a/minichlink/pgm-esp32s2-ch32xx.c b/minichlink/pgm-esp32s2-ch32xx.c
index 2b3c67d5574832dab892fdf1ef44316b6725be01..77e2f03a3e65b272ed42590f1660054522d5efc7 100644
--- a/minichlink/pgm-esp32s2-ch32xx.c
+++ b/minichlink/pgm-esp32s2-ch32xx.c
@@ -105,7 +105,7 @@ int ESPFlushLLCommands( void * dev )
 	if( r < 0 )
 	{
 		fprintf( stderr, "Error: Got error %d when sending hid feature report.\n", r );
-		return r;
+		exit( -9 );
 	}
 retry:
 	eps->reply[0] = 0xad; // Key report ID
@@ -247,6 +247,80 @@ int ESPPerformSongAndDance( void * dev )
 	return 0;
 }
 
+int ESPVoidHighLevelState( void * dev )
+{
+	struct ESP32ProgrammerStruct * eps = (struct ESP32ProgrammerStruct *)dev;
+	Write2LE( eps, 0x05fe );
+	ESPFlushLLCommands( dev );	
+	return 0;
+}
+
+int ESPVendorCommand( void * dev, const char * cmd )
+{
+	char command[10] = { 0 };
+	char tbuf[10] = { 0 };
+	int fields[10];
+	char c;
+	int i = 0;
+	int f = 0;
+	while( (c = *cmd++) )
+	{
+		if( c == ':' ) break;
+		if( c == '\0' ) break;
+		if( i + 1 >= sizeof( command )) break;
+		command[i++] = c;
+		command[i] = 0;
+	}
+	i = 0;
+	f = 0;
+	while( 1 )
+	{
+		c = *cmd++;
+		if( c == ':' || c == '\0' )
+		{
+			fields[f++] = SimpleReadNumberInt( tbuf,  0 );
+			puts( tbuf );
+			if( f == 10 ) break; 
+			tbuf[0] = 0;
+			i = 0;
+			if( c == '\0' ) break;
+			continue;
+		}
+		if( i + 1 >= sizeof( tbuf )) break;
+		tbuf[i++] = c;
+		tbuf[i] = 0;
+	}
+	printf( "Got Vendor Command \"%s\"\n", command );
+	ESPFlushLLCommands( dev );
+	if( strcasecmp( command, "ECLK" ) == 0 )
+	{
+		printf( "Setting up external clock on pin.\n" );
+		if( f < 5 )
+		{
+			fprintf( stderr, "Error: Need fields :use_apll:sdm0:sdm1:sdm2:odiv try 1:0:0:8:3 for 24MHz\n" );
+			fprintf( stderr, "Definition:\n\
+	use_apll = Configures APLL = 480 / 4 = 120\n\
+	40 * (SDM2 + SDM1/(2^8) + SDM0/(2^16) + 4) / ( 2 * (ODIV+2) );\n\
+	Datasheet recommends that numerator is between 300 and 500MHz.\n ");
+			return -9;
+		}
+		Write2LE( dev, 0x0cfe );
+		Write1( dev, fields[0] ); 
+		Write1( dev, fields[1] ); 
+		Write1( dev, fields[2] ); 
+		Write1( dev, fields[3] ); 
+		Write1( dev, fields[4] ); 
+		Write1( dev, 0 ); 
+		Write1( dev, 0 ); 
+		Write1( dev, 0 ); 
+	ESPFlushLLCommands( dev );
+	}
+	else
+	{
+		fprintf( stderr, "Error: Unknown vendor command %s\n", command );
+	}
+	return 0;
+}
 
 void * TryInit_ESP32S2CHFUN()
 {
@@ -268,6 +342,7 @@ void * TryInit_ESP32S2CHFUN()
 	MCF.DelayUS = ESPDelayUS;
 	MCF.Control3v3 = ESPControl3v3;
 	MCF.Exit = ESPExit;
+	MCF.VoidHighLevelState = ESPVoidHighLevelState;
 
 	// These are optional. Disabling these is a good mechanismto make sure the core functions still work.
 	MCF.WriteWord = ESPWriteWord;
@@ -279,7 +354,7 @@ void * TryInit_ESP32S2CHFUN()
 	MCF.PerformSongAndDance = ESPPerformSongAndDance;
 
 	MCF.BlockWrite64 = ESPBlockWrite64;
-
+	MCF.VendorCommand = ESPVendorCommand;
 	// Reset internal programmer state.
 	Write2LE( eps, 0x0afe );