Weather Station Freezes after long period

Hi all, I have an arduino sketch to utilise with a davis weather station. It works perfectly until it doesn’t. For example, after approx one month, it simply stops sending uplinks. Using the RAK 4631 with RAK19007. I suspect it’s probably due to me having to wire.end followed by ads.begin to allow the temp/humidity to read correctly, but really not sure. Any suggestions as to how to sort this out, or if not what I could do to diagnose the issue or put in place something to restart if it freezes? Here is the code…

#include <Arduino.h>
#include <LoRaWan-RAK4630.h>
#include <SPI.h>
#include <Wire.h>
#include <Adafruit_ADS1X15.h>
#include <nrf_nvic.h>
#include <Adafruit_LPS2X.h>
#include <Adafruit_Sensor.h>
#include <sensirion.h>

Adafruit_ADS1015 ads;
Adafruit_LPS22 g_lps22hb;

// --- Configurable intervals ---
static uint32_t app_interval_ms = 30000; // start fast
const uint32_t tx_interval_slow = 300000;
const uint32_t change_tx_after = 10;
// const uint32_t tx_reset_count = 2500;

// --- Temp/Hum ---
#define dataPin 13
#define clockPin 14
sensirion sht(dataPin, clockPin);
uint16_t t_raw, h_raw;

// --- Rain ---
#define rainPin WB_IO2
volatile uint32_t rain = 0;

// --- Wind ---
#define WIND_DIR_SENSOR WB_A1
#define WIND_SPEED_SENSOR WB_IO1
#define WIND_GUST_PERIOD 5000UL
volatile uint32_t windSpeedCount = 0;
volatile uint32_t windGustCount = 0;
unsigned int maxWindGustCount = 0;
unsigned long lastWindDirTime = 0;
unsigned long lastWindGustTime = 0;
unsigned long lastReportTime = 0;
uint32_t windDirTot = 0;
uint16_t windDirCount = 0;
int windDirNow = 0;

// --- Battery ---
#define PIN_VBAT WB_A0
#define VBAT_MV_PER_LSB (0.73242188F)
#define VBAT_DIVIDER_COMP (1.73)
#define REAL_VBAT_MV_PER_LSB (VBAT_DIVIDER_COMP * VBAT_MV_PER_LSB)

float readVBAT(void) {
  return analogRead(PIN_VBAT) * REAL_VBAT_MV_PER_LSB;
}

uint8_t mvToPercent(uint16_t mv) {
  if (mv <= 3300) return 0;
  if (mv >= 4200) return 100;
  if (mv < 3600) return (mv - 3300) / 30;
  return 10 + (uint8_t)((mv - 3600) * 0.15f);
}

// --- LoRaWAN ---
bool doOTAA = true;
DeviceClass_t g_CurrentClass = CLASS_A;
LoRaMacRegion_t g_CurrentRegion = LORAMAC_REGION_AU915;
lmh_confirm g_CurrentConfirm = LMH_UNCONFIRMED_MSG;
uint8_t gAppPort = LORAWAN_APP_PORT;

static lmh_param_t g_lora_param_init = {
  LORAWAN_ADR_ON, DR_2, LORAWAN_PUBLIC_NETWORK, 3, TX_POWER_5, LORAWAN_DUTYCYCLE_OFF
};

static void lorawan_has_joined_handler(void);
static void lorawan_join_failed_handler(void);
static void lorawan_rx_handler(lmh_app_data_t *app_data);
static void lorawan_confirm_class_handler(DeviceClass_t Class);
static void send_lora_frame(void);


static lmh_callback_t g_lora_callbacks = {
  BoardGetBatteryLevel, BoardGetUniqueId, BoardGetRandomSeed,
  lorawan_rx_handler, lorawan_has_joined_handler, lorawan_confirm_class_handler, lorawan_join_failed_handler
};

// Replace with your actual keys before deployment
uint8_t nodeDeviceEUI[8] = {};
uint8_t nodeAppEUI[8] = {};
uint8_t nodeAppKey[16] = {};

#define LORAWAN_APP_DATA_BUFF_SIZE 64
static uint8_t m_lora_app_data_buffer[LORAWAN_APP_DATA_BUFF_SIZE];
static lmh_app_data_t m_lora_app_data = {m_lora_app_data_buffer, 0, 0, 0, 0};
static uint32_t join_retry_count = 0;
static uint32_t last_join_attempt = 0;

TimerEvent_t appTimer;
uint32_t count = 0, count_fail = 0;
bool send_triggered = false;

// --- Wind direction mapping table ---
struct DirMap { uint16_t threshold; uint16_t degrees; };
const DirMap dirMap[] = {
  {220, 22}, {450, 45}, {680, 67}, {895, 90}, {1110, 112}, {1320, 135}, {1550, 157},
  {1770, 180}, {2010, 202}, {2260, 225}, {2515, 247}, {2800, 270}, {3080, 292},
  {3400, 315}, {3755, 337}, {9999, 360}
};

void setup() {
  Serial.begin(115200);
  while (!Serial && millis() < 5000) delay(10);

  pinMode(rainPin, INPUT_PULLUP);
  attachInterrupt(rainPin, [](){
    static unsigned long last_time = 0;
    unsigned long now = millis();
    if (now - last_time > 200) rain++;
    last_time = now;
  }, FALLING);

  pinMode(WIND_SPEED_SENSOR, INPUT_PULLUP);
  attachInterrupt(WIND_SPEED_SENSOR, [](){
    static unsigned long last_time = 0;
    unsigned long now = millis();
    if (now - last_time > 20) {
      windSpeedCount++;
      windGustCount++;
    }
    last_time = now;
  }, FALLING);

  ads.begin();
  ads.setGain(GAIN_ONE);

  if (!g_lps22hb.begin_I2C(0x5c)) {
    Serial.println("No LPS22 sensor!");
    while (1) delay(10);
  }
  g_lps22hb.setDataRate(LPS22_RATE_10_HZ);

  analogReadResolution(12);
  readVBAT();

  lora_rak4630_init();
  if (doOTAA) {
    lmh_setDevEui(nodeDeviceEUI);
    lmh_setAppEui(nodeAppEUI);
    lmh_setAppKey(nodeAppKey);
  }
  if (lmh_init(&g_lora_callbacks, g_lora_param_init, doOTAA, g_CurrentClass, g_CurrentRegion) != 0) {
    Serial.println("lmh_init failed");
    return;
  }

  TimerInit(&appTimer, [](){
    TimerSetValue(&appTimer, app_interval_ms);
    TimerStart(&appTimer);
    send_triggered = true;
  });
  
  lmh_join();
}

void loop() {
  unsigned long now = millis();
  if (now - lastWindDirTime >= 1000) {
    lastWindDirTime = now;
    int16_t adc1 = ads.readADC_SingleEnded(1);

    float readingVolts = ads.computeVolts(adc1);
    // This formula was created by charting voltages in Excel and having it produce a polynomial trendline (order 3)
    windDirNow = (int)(-59.639 * pow(readingVolts, 3) + 74.669 * pow(readingVolts, 2) + 266.22 * readingVolts - 1.1853);
    if(windDirNow < 0) {
      windDirNow = 0;
    }

    windDirTot += windDirNow;
    windDirCount++;
  }

  if (now - lastWindGustTime >= WIND_GUST_PERIOD) {
    lastWindGustTime = now;
    if (windGustCount > maxWindGustCount) maxWindGustCount = windGustCount;
    windGustCount = 0;
  }
  if (send_triggered) {
    send_lora_frame();
    send_triggered = false;
  }
}

void lorawan_has_joined_handler(void) {
  Serial.println("Network Joined!");
  TimerSetValue(&appTimer, app_interval_ms);
  TimerStart(&appTimer);
}

void lorawan_join_failed_handler(void) {
  Serial.println("Join failed; check keys/gateway.");
  join_retry_count++;
  last_join_attempt = millis();
  
  // Exponential backoff to prevent battery drain
  uint32_t backoff_ms = min(300000UL, 30000UL * (1 << min(join_retry_count, 5)));
  Serial.printf("Will retry join in %lu seconds\n", backoff_ms / 1000);
  
  // Schedule retry with backoff delay
  TimerSetValue(&appTimer, backoff_ms);
  TimerStart(&appTimer);
}

void attemptJoinIfNeeded() {
  if (lmh_join_status_get() != LMH_SET) {
    // Only retry if we've had a previous failure and enough time passed
    if (join_retry_count > 0 && (millis() - last_join_attempt > 30000UL)) {
      Serial.println("Retrying join...");
      lmh_join();
    }
  }
}

void lorawan_rx_handler(lmh_app_data_t *app_data) {
  Serial.printf("RX port %d, size %d\n", app_data->port, app_data->buffsize);
}

void lorawan_confirm_class_handler(DeviceClass_t Class) {
  Serial.printf("Class switched to %c\n", "ABC"[Class]);
}

void send_lora_frame(void) {
  // First check if we need to retry joining
  attemptJoinIfNeeded();
  
  if (lmh_join_status_get() != LMH_SET) {
    Serial.println("Not joined, skipping send");
    return;
  }

  // Reset join retry count on successful join
  join_retry_count = 0;

  Wire.end(); //disable ADS1015 while taking temp reading
  t_raw = sht.readTemperatureC() + 20;
  h_raw = sht.readHumidity();
  ads.begin(); //reenable ADS1015

  uint16_t vbat_mv = (uint16_t)readVBAT();
  uint8_t vbat_per = mvToPercent(vbat_mv);

  uint16_t windDir = (windDirCount > 0) ? windDirTot / windDirCount : 0;
  uint8_t spd, gust;
  noInterrupts();
  uint32_t wsCount = windSpeedCount;
  uint32_t wgCount = maxWindGustCount;
  interrupts();
  unsigned long rp = millis() - lastReportTime;
  lastReportTime = millis();
  spd = (rp > 0) ? min<uint32_t>(wsCount * (2.25 / (rp / 1000.0)) * 1.609, 255) : 0;
  gust = min<uint32_t>(wgCount * (2.25 / (WIND_GUST_PERIOD / 1000.0)) * 1.609, 255);

  sensors_event_t p, t;
  g_lps22hb.getEvent(&p, &t);

  // --- pack payload ---
  uint8_t i = 0;
  auto put16 = [&](uint16_t v){ m_lora_app_data.buffer[i++] = highByte(v); m_lora_app_data.buffer[i++] = lowByte(v); };

  put16(t_raw); // temp x10
  m_lora_app_data.buffer[i++] = (uint8_t)h_raw;
  put16(rain);
  m_lora_app_data.buffer[i++] = spd;
  m_lora_app_data.buffer[i++] = gust;
  put16(windDir);
  put16(vbat_mv);
  put16((uint16_t)(p.pressure * 10)); // hPa x10

  m_lora_app_data.buffsize = i;
  m_lora_app_data.port = gAppPort;

  auto err = lmh_send(&m_lora_app_data, g_CurrentConfirm);
  if (err == LMH_SUCCESS) {
    count++;
    Serial.printf("Sent ok (%lu)\n", count);
    if (count > change_tx_after) app_interval_ms = tx_interval_slow;
    // if (count > tx_reset_count) sd_nvic_SystemReset();
  } else {
    count_fail++;
    Serial.printf("Send fail %d (total fails=%lu)\n", err, count_fail);
  }

  windDirTot = 0; windDirCount = 0; windSpeedCount = 0; maxWindGustCount = 0;
}

Hi @Andy ,

The only way to find out what causes the issue is to isolate it. I am not sure why you have to wire.end and why it will only happen only after a month. If there is a problem on I2C bus (conflict/contention/timing), it should manifest immediately. Unless there are other factors like temperature/heat, voltage levels, etc. While solving/isolating this issue, one lazy trick is to run a regular sw reset so that you can avoid getting stuck - like once a month, every 2-3 weeks? A more professional way is implementing a watchdog timer but I am not aware of any arduino compatible implementation of wdt on nRF52.