diff --git a/src/avrftdi.c b/src/avrftdi.c
index e41d775e..f0c07a6a 100644
--- a/src/avrftdi.c
+++ b/src/avrftdi.c
@@ -917,8 +917,19 @@ static int avrftdi_chip_erase(PROGRAMMER * pgm, AVRPART * p)
 static int
 avrftdi_lext(PROGRAMMER *pgm, AVRPART *p, AVRMEM *m, unsigned int address)
 {
+	/* nothing to do if load extended address command unavailable */
+	if(m->op[AVR_OP_LOAD_EXT_ADDR] == NULL)
+		return 0;
+
+	avrftdi_t *pdata = to_pdata(pgm);
 	unsigned char buf[] = { 0x00, 0x00, 0x00, 0x00 };
 
+	/* only send load extended address command if high byte changed */
+	if(pdata->lext_byte == (uint8_t) (address>>16))
+		return 0;
+
+	pdata->lext_byte = (uint8_t) (address>>16);
+
 	avr_set_bits(m->op[AVR_OP_LOAD_EXT_ADDR], buf);
 	avr_set_addr(m->op[AVR_OP_LOAD_EXT_ADDR], buf, address);
 
@@ -983,8 +994,6 @@ static int avrftdi_eeprom_read(PROGRAMMER *pgm, AVRPART *p, AVRMEM *m,
 static int avrftdi_flash_write(PROGRAMMER * pgm, AVRPART * p, AVRMEM * m,
 		unsigned int page_size, unsigned int addr, unsigned int len)
 {
-	int use_lext_address = m->op[AVR_OP_LOAD_EXT_ADDR] != NULL;
-	
 	unsigned int word;
 	unsigned int poll_index;
 
@@ -1013,22 +1022,12 @@ static int avrftdi_flash_write(PROGRAMMER * pgm, AVRPART * p, AVRMEM * m,
 
 	page_size = m->page_size;
 
-	/* if we do cross a 64k word boundary (or write the
-	 * first page), we need to issue a 'load extended
-	 * address byte' command, which is defined as 0x4d
-	 * 0x00 <address byte> 0x00.  As far as i know, this
-	 * is only available on 256k parts.  64k word is 128k
-	 * bytes.
-	 * write the command only once.
-	 */
-	if(use_lext_address && (((addr/2) & 0xffff0000))) {
-		if (0 > avrftdi_lext(pgm, p, m, addr/2))
-			return -1;
-	}
+	/* on large-flash devices > 128k issue extended address command when needed */
+	if(avrftdi_lext(pgm, p, m, addr/2) < 0)
+		return -1;
 	
 	/* prepare the command stream for the whole page */
-	/* addr is in bytes, but we program in words. addr/2 should be something
-	 * like addr >> WORD_SHIFT, though */
+	/* addr is in bytes, but we program in words. */
 	for(word = addr/2; word < (len + addr)/2; word++)
 	{
 		log_debug("-< bytes = %d of %d\n", word * 2, len + addr);
@@ -1107,8 +1106,6 @@ static int avrftdi_flash_read(PROGRAMMER * pgm, AVRPART * p, AVRMEM * m,
 {
 	OPCODE * readop;
 	int byte, word;
-	int use_lext_address = m->op[AVR_OP_LOAD_EXT_ADDR] != NULL;
-	unsigned int address = addr/2;
 
 	unsigned int buf_size = 4 * len + 4;
 	unsigned char* o_buf = alloca(buf_size);
@@ -1128,10 +1125,8 @@ static int avrftdi_flash_read(PROGRAMMER * pgm, AVRPART * p, AVRMEM * m,
 		return -1;
 	}
 	
-	if(use_lext_address && ((address & 0xffff0000))) {
-		if (0 > avrftdi_lext(pgm, p, m, address))
-			return -1;
-	}
+	if(avrftdi_lext(pgm, p, m, addr/2) < 0)
+		return -1;
 	
 	/* word addressing! */
 	for(word = addr/2, index = 0; word < (addr + len)/2; word++)
@@ -1210,7 +1205,11 @@ avrftdi_setup(PROGRAMMER * pgm)
 {
 	avrftdi_t* pdata;
 
-	pgm->cookie = malloc(sizeof(avrftdi_t));
+	
+	if(!(pgm->cookie = calloc(sizeof(avrftdi_t), 1))) {
+		log_err("Error allocating memory.\n");
+		exit(1);
+	}
 	pdata = to_pdata(pgm);
 
 	pdata->ftdic = ftdi_new();
@@ -1224,6 +1223,7 @@ avrftdi_setup(PROGRAMMER * pgm)
 	pdata->pin_value = 0;
 	pdata->pin_direction = 0;
 	pdata->led_mask = 0;
+	pdata->lext_byte = 0xff;
 }
 
 static void
diff --git a/src/avrftdi_private.h b/src/avrftdi_private.h
index 3c965ed8..15b9caec 100644
--- a/src/avrftdi_private.h
+++ b/src/avrftdi_private.h
@@ -81,6 +81,8 @@ typedef struct avrftdi_s {
 	int tx_buffer_size;
 	/* use bitbanging instead of mpsse spi */
 	bool use_bitbanging;
+	/* bits 16-23 of extended 24-bit word flash address for parts with flash > 128k */
+	uint8_t lext_byte;
 } avrftdi_t;
 
 void avrftdi_log(int level, const char * func, int line, const char * fmt, ...);