Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Due #82

Merged
merged 4 commits into from
Sep 12, 2021
Merged

Due #82

Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/FastAccelStepper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ FastAccelStepperEngine* fas_engine = NULL;
FastAccelStepper fas_stepper[MAX_STEPPER] = {FastAccelStepper(),
FastAccelStepper()};
#endif
#if defined(ARDUINO_ARCH_ESP32)
#if defined(ARDUINO_ARCH_ESP32) || defined (ARDUINO_ARCH_SAM)
FastAccelStepper fas_stepper[MAX_STEPPER] = {
FastAccelStepper(), FastAccelStepper(), FastAccelStepper(),
FastAccelStepper(), FastAccelStepper(), FastAccelStepper()};
#endif
#if defined (ARDUINO_ARCH_SAM)
//We also need access to the engine for the timer task, so we will mimic the AVR code here...
FastAccelStepperEngine* fas_engine=NULL;
#endif
#if defined(TEST)
FastAccelStepper fas_stepper[MAX_STEPPER] = {FastAccelStepper(),
FastAccelStepper()};
Expand All @@ -64,10 +68,10 @@ void StepperTask(void* parameter) {
#endif
//*************************************************************************************************
void FastAccelStepperEngine::init() {
#if (TICKS_PER_S != 16000000L)
#if (TICKS_PER_S != 16000000L) && (TICKS_PER_S != 21000000L)
upm_timer_freq = upm_from((uint32_t)TICKS_PER_S);
#endif
#if defined(ARDUINO_ARCH_AVR)
#if defined(ARDUINO_ARCH_AVR) || defined (ARDUINO_ARCH_SAM)
fas_engine = this;
#endif
#if defined(ARDUINO_ARCH_ESP32)
Expand Down Expand Up @@ -123,7 +127,7 @@ FastAccelStepper* FastAccelStepperEngine::stepperConnectToPin(
uint8_t stepper_num = _next_stepper_num;
_next_stepper_num++;

#if defined(ARDUINO_ARCH_AVR) || defined(ESP32) || defined(TEST)
#if defined(ARDUINO_ARCH_AVR) || defined(ESP32) || defined(TEST) || defined (ARDUINO_ARCH_SAM)
FastAccelStepper* s = &fas_stepper[fas_stepper_num];
_stepper[stepper_num] = s;
s->init(this, fas_stepper_num, step_pin);
Expand Down Expand Up @@ -528,6 +532,9 @@ void FastAccelStepper::setDelayToDisable(uint16_t delay_ms) {
#if defined(ARDUINO_ARCH_ESP32)
delay_count = delay_ms / TASK_DELAY_4MS;
#endif
#if defined (ARDUINO_ARCH_SAM)
delay_count = delay_ms / (1000 / TICKS_PER_S);
#endif
#if defined(ARDUINO_ARCH_AVR)
delay_count = delay_ms / (65536000 / TICKS_PER_S);
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/FastAccelStepper.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef FASTACCELSTEPPER_H
#define FASTACCELSTEPPER_H
#if defined(ARDUINO_ARCH_ESP32) || defined(ARDUINO_ARCH_AVR)
#if defined(ARDUINO_ARCH_ESP32) || defined(ARDUINO_ARCH_AVR) || \
defined(ARDUINO_ARCH_SAM)
#include <Arduino.h>
#else
#include <math.h>
Expand Down
36 changes: 36 additions & 0 deletions src/PoorManFloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,42 @@
// treated separately, that's why this table has only 255 entries The table is
// generated with this one liner
// [round(512.0/math.sqrt(i/256))-256 for i in range(257,512)
//
// I want to expound on the original comment, and include an example to make
// this less tedious for someone else. You probably need to go lookup/refresh
// your memory on floating point formats to make this make sense. In this
// format we will use an 8 bit exponent, and 8 bits of the data type as
// the mantissa, but as the diagram above shows, the first bit will always be 1
// So lets walk through an example. For the Due, I needed the PWM clock in
// this format, so 21,000,000.
// First off, remember that floating point wants the entire number in the range
// of the mantissa, so we right shift the value until it is within the range of
// 256-511. So for 21,000,000, thats 21000000 >> 16 = 0x140. Remember that the
// leading one is hidden. so the low byte is 0x40. You'd be wrong to assume
// the exponent is simply 16. again, in floating point, we want the mantissa
// to be treated as less than 1, so we shift past the bits. This means we want
// it to look like 0.0x140, or 0.101000000. Thats what mantissa means. So, to
// get the correct value for the exponent, we need the exponent that makes this
// (as close to) the original value as we can get, that is 20,971,520 or
// 0001 0100 0000 0000 0000 0000 0000. So again, we need to shift past our
// number, all the way to the above, so 0.101000000 << 25 is what we want. Or
// 0x19, (remember we had 16 shifts, but 9 bits of mantissa including the
// hidden bit 16+9=25). Now, we get a bit tricky. We know the hidden bit is
// always there, so we don't include it in our exponent! Thus, our exponent
// becomes 0x18 or 24. So the almost final value is 0x1840, but we forgot
// about the sign bit. We need to or 0x80 to the exponent to get the sign bit
// making for a final value of 0x9840 as seen in PoorManFloat.h. To reverse
// this, we take our value of the mantissa enter it in calculator. We take
// the exponent and subtract 8 from it. Enter the mantissa with the added 1
// into a "programmer calculator", 0x140 and use the LSH operator to do 0x140
// << 16, and you should get 20,971,520. There, now there's no need to be
// afraid of making new constants for the system to use different clock rates!
// I had to do this for the Due because the PWM clock could be divisions of 2
// off the main system clock of 84MHz. I'd need a 5.25 divider to get 16 MHz.
// Thats not an even number, so even with the arbitrary clock divider, the best
// I could have managed was 16.8 MHz. Easier to just use the modulo 4 divider
// and add the constants.

const PROGMEM uint8_t rsqrt_exp_even[255] = {
255, 254, 253, 252, 251, 250, 249, 248, 247, 247, 246, 245, 244, 243, 242,
241, 240, 239, 238, 237, 236, 236, 235, 234, 233, 232, 231, 230, 230, 229,
Expand Down
5 changes: 4 additions & 1 deletion src/PoorManFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ typedef uint16_t upm_float;
#define UPM_CONST_2000 ((upm_float)0x8af4)
#define UPM_CONST_32000 ((upm_float)0x8ef4)
#define UPM_CONST_16E6_DIV_SQRT_OF_2 ((upm_float)0x9759)

#define UPM_CONST_21E6 ((upm_float)0x9840)
#define UPM_CONST_42000 ((upm_float)0x8f48)
#define UPM_CONST_21E6_DIV_SQRT_OF_2 ((upm_float)0x97c5)
#define UPM_CONST_2205E11 ((upm_float)0xaf91)
upm_float upm_from(uint8_t x);
upm_float upm_from(uint16_t x);
upm_float upm_from(uint32_t x);
Expand Down
2 changes: 1 addition & 1 deletion src/RampGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void RampGenerator::init() {
_rw.accel_change_cnt = 0xff;
_rw.ramp_state = RAMP_STATE_IDLE;
_rw.curr_ticks = TICKS_FOR_STOPPED_MOTOR;
#if (TICKS_PER_S != 16000000L)
#if (TICKS_PER_S != 16000000L) && (TICKS_PER_S != 21000000L)
upm_timer_freq = upm_from((uint32_t)TICKS_PER_S);
upm_timer_freq_div_500 = upm_divide(upm_timer_freq, UPM_CONST_500);
#endif
Expand Down
11 changes: 9 additions & 2 deletions src/RampGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
#elif defined(ARDUINO_ARCH_ESP32)
#define MAX_STEPPER 6
#define TICKS_PER_S 16000000L
#else
#elif defined (ARDUINO_ARCH_SAM)
#define MAX_STEPPER 6
#define TICKS_PER_S 16000000L
#define TICKS_PER_S 21000000L
#endif

#include "common.h"
Expand All @@ -30,6 +30,13 @@ class FastAccelStepper;
#define UPM_ACCEL_FACTOR UPM_CONST_128E12
#define US_TO_TICKS(u32) (u32 * 16)
#define TICKS_TO_US(u32) (u32 / 16)
#elif (TICKS_PER_S == 21000000L)
#define UPM_TICKS_PER_S UPM_CONST_21E6
#define UPM_TICKS_PER_S_DIV_500 UPM_CONST_42000
#define UPM_TICKS_PER_S_DIV_SQRT_OF_2 UPM_CONST_21E6_DIV_SQRT_OF_2
#define UPM_ACCEL_FACTOR UPM_CONST_2205E11
#define US_TO_TICKS(u32) (u32 * 21)
#define TICKS_TO_US(u32) (u32 / 21)
#else
#define UPM_TICKS_PER_S upm_timer_freq
#define UPM_TICKS_PER_S_DIV_500 upm_timer_freq_div_500
Expand Down
68 changes: 63 additions & 5 deletions src/StepperISR.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#if defined(ARDUINO_ARCH_ESP32) || defined(ARDUINO_ARCH_AVR)
#if defined(ARDUINO_ARCH_ESP32) || defined(ARDUINO_ARCH_AVR) || \
defined(ARDUINO_ARCH_SAM)
#include <Arduino.h>
#else
#include <assert.h>
Expand Down Expand Up @@ -35,6 +36,9 @@ enum channels { channelA, channelB, channelC };
#elif defined(ARDUINO_ARCH_ESP32)
#define NUM_QUEUES 6
#define QUEUE_LEN 32
#elif defined(ARDUINO_ARCH_SAM)
#define NUM_QUEUES 6
#define QUEUE_LEN 32
#else
#define NUM_QUEUES 6
#define QUEUE_LEN 32
Expand Down Expand Up @@ -73,7 +77,14 @@ bool _esp32_attachToPulseCounter(uint8_t pcnt_unit, FastAccelStepper* stepper,
void _esp32_clearPulseCounter(uint8_t pcnt_unit);
int16_t _esp32_readPulseCounter(uint8_t pcnt_unit);
#endif

#if defined(ARDUINO_ARCH_SAM)
typedef struct _PWMCHANNELMAPPING {
uint8_t pin;
uint32_t channel;
Pio* port;
uint32_t channelMask;
} PWMCHANNELMAPPING;
#endif
struct queue_entry {
uint8_t steps; // if 0, then the command only adds a delay
uint8_t toggle_dir : 1;
Expand All @@ -90,8 +101,22 @@ struct queue_entry {
class StepperQueue {
public:
struct queue_entry entry[QUEUE_LEN];
#ifndef ARDUINO_ARCH_SAM
//gin66 thinks this is unnecessary, and I think I see the point and how it
//has been constrained to make it unnecessary. Instinct honed on tons of
//HPC clusters says do not trust constraints to make it unnecessary....
//
//I still believe that if unnecessary, all this should do is force the single
//read from the variable at the top of the ISR to be from RAM, not from any
//caching. It should not affect optimization in any way. I'll test sans
//volatile later, but I feel like that will require a lot of longevity
//testing! Its one of those sneaky errors that can pop up!
volatile uint8_t read_idx; // ISR stops if readptr == next_writeptr
volatile uint8_t next_write_idx;
Copy link
Owner

@gin66 gin66 Sep 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This volatile is problematic, because it impacts the other architectures. If this is necessary, it should be conditionally compiled for sam only. But I tend to assume, this is not needed.

Why is it important to avoid ?
In the ISR, those two variables are accessed more than once. The volatile prevents the compiler to apply any register optimization, and for avr the ISR service time being as short as possible is crucial. So this compiler optimization is needed.

Why is volatile not needed ?
For the ISR those two variables are not volatile at all. During ISR execution no one will change it. In the cyclic task/interrupt, the volatile nature is mitigated by 1.disabling the interrupts 2. not reading more than once.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm....My understanding of volatile is quite different, and perhaps its handled differently in the embedded world. volatile is a hint to the compiler to warn it that it may have been written to by another variable, and to not cache the value. I didn't test it without, but I did wonder how this ever worked anywhere. It may even be the gcc for AVR has no concept of variable caching. All it should ever do though is hint the compiler to always read the value from the source. Unfortunately in bigger systems, it is just a hint and the compiler does ignore it sometimes. It should not disable optimizations. Again, I've spent most of my time in much much much higher powered multiprocessor, kernel threaded systems. I'd be pretty annoyed though if something so basic as volatile was interpreted differently on smaller architectures.

Copy link
Owner

@gin66 gin66 Sep 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

volatile is IMHO not really a hint. It should be a must for the compiler to not cache accordingly declared variables. If a compiler is just ignoring it, then I would be surprised. How can someone then write a reliable device driver ? That’s why most peripheral registers should be defined as volatile. For example a port input register may yield on every read a different value due to changed external signals. Imagine you need to busy wait for a high on an input signal to trigger a time critical action. With a wrongly optimizing compiler this turns into an endless loop and it is impossible to solve this without resorting to assembly code.

In any “parallel” execution environment (just using interrupts), shared variables are critical and just mark those as volatile is just an easy and working way out for the developer, but sacrifices performance a bit. But avr is with one interrupt per step already close to the limits. That’s why I have explained, why I do not see the need for volatile in this case and the volatile needs to be removed for at least avr (esp perhaps not) and for sam it’s up to you/your code.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ifndef should be an ifdef, right?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started writing it one way, then implemented it the other....I was doing ifndef and moving the volatile, but reversed it...give me a second...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and it turns out, I'm running with that mistake, so I do not have volatile....I had intended to run an all day torture test. Its up to 12 million pulses without error, and I don't really want to stop it now, so I'll leave my extreme long term test running....guess I'll prove to myself whether it is necessary...

#else
uint8_t read_idx; // ISR stops if readptr == next_writeptr
uint8_t next_write_idx;
#endif
bool dirHighCountsUp;
uint8_t dirPin;
#if defined(ARDUINO_ARCH_ESP32)
Expand All @@ -108,6 +133,19 @@ class StepperQueue {
volatile bool _isRunning;
bool isRunning() { return _isRunning; }
enum channels channel;
#elif defined(ARDUINO_ARCH_SAM)
volatile uint32_t* _dirPinPort;
uint32_t _dirPinMask;
volatile bool _hasISRactive;
bool isRunning();
const PWMCHANNELMAPPING* mapping;
bool _connected;
volatile bool _skipNextPWMInterrupt;
volatile bool _skipNextPIOInterrupt;
volatile bool _pauseCommanded;
volatile bool _runOnce;
volatile uint32_t timePWMInterruptEnabled;

#else
volatile bool _isRunning;
bool isRunning() { return _isRunning; }
Expand Down Expand Up @@ -166,6 +204,10 @@ class StepperQueue {
if (isQueueEmpty()) {
// set the dirPin here. Necessary with shared direction pins
digitalWrite(dirPin, dir);
#ifdef ARDUINO_ARCH_SAM
delayMicroseconds(30); // Make sure the driver has enough time to see
// the dir pin change
#endif
queue_end.dir = dir;
} else {
toggle_dir = (dir != queue_end.dir);
Expand Down Expand Up @@ -269,13 +311,13 @@ class StepperQueue {

if (steps != 0) {
if (e->countUp) {
#if defined(ARDUINO_ARCH_AVR)
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_SAM)
adjust = -steps;
#elif defined(ARDUINO_ARCH_ESP32)
adjust = done_p;
#endif
} else {
#if defined(ARDUINO_ARCH_AVR)
#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_SAM)
adjust = steps;
#elif defined(ARDUINO_ARCH_ESP32)
adjust = -done_p;
Expand Down Expand Up @@ -373,6 +415,17 @@ class StepperQueue {
#elif defined(ARDUINO_ARCH_ESP32)
_hasISRactive = false;
_nextCommandIsPrepared = false;
#elif defined(ARDUINO_ARCH_SAM)
_hasISRactive = false;
// we cannot clear the PWM interrupt when switching to a pause, but we'll
// get a double interrupt if we do nothing. So this tells us that on a
// transition from a pulse to a pause to skip the next interrupt.
_skipNextPWMInterrupt = false;
_skipNextPIOInterrupt = false;
_pauseCommanded = false;
_runOnce = false;
timePWMInterruptEnabled = 0;

#else
_isRunning = false;
#endif
Expand All @@ -383,13 +436,18 @@ class StepperQueue {
#if defined(ARDUINO_ARCH_ESP32)
uint8_t _step_pin;
uint16_t _getPerformedPulses();
#endif
#if defined(ARDUINO_ARCH_SAM)
uint8_t _step_pin;
uint8_t _queue_num;
#endif
void connect();
void disconnect();
void setDirPin(uint8_t dir_pin, bool _dirHighCountsUp) {
dirPin = dir_pin;
dirHighCountsUp = _dirHighCountsUp;
#if defined(ARDUINO_ARCH_ESP32) || defined(ARDUINO_ARCH_AVR)
#if defined(ARDUINO_ARCH_ESP32) || defined(ARDUINO_ARCH_AVR) || \
defined(ARDUINO_ARCH_SAM)
if (dir_pin != PIN_UNDEFINED) {
_dirPinPort = portOutputRegister(digitalPinToPort(dir_pin));
_dirPinMask = digitalPinToBitMask(dir_pin);
Expand Down
Loading