This is a backport of 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d
This should have been backported when it was commited, but I
mistook the problem as requiring the ntp_lock changes
that landed in 3.4 in order for it to occur.
Unfortunately the same issue can happen (with only one cpu)
as follows:
do_adjtimex()
write_seqlock_irq(&xtime_lock);
process_adjtimex_modes()
process_adj_status()
ntp_start_leap_timer()
hrtimer_start()
hrtimer_reprogram()
tick_program_event()
clockevents_program_event()
ktime_get()
seq = req_seqbegin(xtime_lock); [DEADLOCK]
This deadlock will no always occur, as it requires the
leap_timer to force a hrtimer_reprogram which only happens
if its set and there's no sooner timer to expire.
NOTE: This patch, being faithful to the original commit,
introduces a bug (we don't update wall_to_monotonic),
which will be resovled by backporting a following fix.
Original commit message below:
Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.
Thomas diagnosed this as the following pattern:
CPU 0 CPU 1
do_adjtimex()
spin_lock_irq(&ntp_lock);
process_adjtimex_modes(); timer_interrupt()
process_adj_status(); do_timer()
ntp_start_leap_timer(); write_lock(&xtime_lock);
hrtimer_start(); update_wall_time();
hrtimer_reprogram(); ntp_tick_length()
tick_program_event() spin_lock(&ntp_lock);
clockevents_program_event()
ktime_get()
seq = req_seqbegin(xtime_lock);
This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.
The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ) after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).
This patch applies on top of tip/timers/core.
CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
---
include/linux/timex.h | 2 +-
kernel/time/ntp.c | 122 +++++++++++++++------------------------------
kernel/time/timekeeping.c | 12 ++---
3 files changed, 44 insertions(+), 92 deletions(-)
diff --git a/include/linux/timex.h b/include/linux/timex.h
index e6967d1..3b587b4 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -271,7 +271,7 @@ static inline int ntp_synced(void)
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 tick_length;
-extern void second_overflow(void);
+extern int second_overflow(unsigned long secs);
extern void update_ntp_one_tick(void);
extern int do_adjtimex(struct timex *);
/*
- * Leap second processing. If in leap-insert state at the end of the
- * day, the system clock is set back one second; if in leap-delete
- * state, the system clock is set ahead one second.
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ * Also handles leap second processing, and returns leap offset
*/
-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
+int second_overflow(unsigned long secs)
{
- enum hrtimer_restart res = HRTIMER_NORESTART;
-
- write_seqlock(&xtime_lock);
+ int leap = 0;
+ s64 delta;
+ /*
+ * Leap second processing. If in leap-insert state at the end of the
+ * day, the system clock is set back one second; if in leap-delete
+ * state, the system clock is set ahead one second.
+ */
switch (time_state) {
case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
break;
case TIME_INS:
- timekeeping_leap_insert(-1);
- time_state = TIME_OOP;
- printk(KERN_NOTICE
- "Clock: inserting leap second 23:59:60 UTC
");
- hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
- res = HRTIMER_RESTART;
+ if (secs % 86400 == 0) {
+ leap = -1;
+ time_state = TIME_OOP;
+ printk(KERN_NOTICE
+ "Clock: inserting leap second 23:59:60 UTC
");
+ }
break;
case TIME_DEL:
- timekeeping_leap_insert(1);
- time_tai--;
- time_state = TIME_WAIT;
- printk(KERN_NOTICE
- "Clock: deleting leap second 23:59:59 UTC
");
+ if ((secs + 1) % 86400 == 0) {
+ leap = 1;
+ time_tai--;
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE
+ "Clock: deleting leap second 23:59:59 UTC
");
+ }
break;
case TIME_OOP:
time_tai++;
time_state = TIME_WAIT;
- /* fall through */
+ break;
+
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
- write_sequnlock(&xtime_lock);
-
- return res;
-}
-
-/*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- */
-void second_overflow(void)
-{
- s64 delta;
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -253,23 +251,25 @@ void second_overflow(void)
tick_length += delta;