\documentclass[12pt,reqno]{article}
\usepackage[usenames]{color}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{amscd}
\usepackage[colorlinks=true,
linkcolor=webgreen,
filecolor=webbrown,
citecolor=webgreen]{hyperref}
\definecolor{webgreen}{rgb}{0,.5,0}
\definecolor{webbrown}{rgb}{.6,0,0}
\usepackage{color}
\usepackage{fullpage}
\usepackage{float}
\usepackage{psfig}
\usepackage{graphics,amsmath,amssymb}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{latexsym}
\usepackage{epsf}
\setlength{\textwidth}{6.5in}
\setlength{\oddsidemargin}{.1in}
\setlength{\evensidemargin}{.1in}
\setlength{\topmargin}{-.5in}
\setlength{\textheight}{8.9in}
\newcommand{\twolinesum}[2]{\sum_{\substack{{\scriptstyle #1}\\
{\scriptstyle #2}}}}
\def\cp {\mathcal{P}}
\def\cm {{\cal M}}
\def\ci {{\cal I}}
\newcommand{\gmod}[1]{\, ({\rm{mod}} \, #1)}
\newcommand{\seqnum}[1]{\href{http://oeis.org/#1}{\underline{#1}}}
\begin{document}
\begin{center}
\epsfxsize=4in
\leavevmode\epsffile{logo129.eps}
\end{center}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{conjecture}[theorem]{Conjecture}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\begin{center}
\vskip 1cm{\LARGE\bf Counting Primes whose Sum of \\
\vskip .1in
Digits is Prime}
\vskip 1cm
\large
Glyn Harman\\
Department of Mathematics\\
Royal Holloway, University of London\\
Egham\\
Surrey TW20 0EX\\
United Kingdom\\
\href{mailto:g.harman@rhul.ac.uk}{\tt g.harman@rhul.ac.uk} \\
\end{center}
\vskip .2 in
\begin{abstract}
Motivated by recent work of Drmota, Mauduit and Rivat, we discuss the
possibility of counting the number of primes up to $x$ whose sum of
digits is also prime. We show that, although this is not possible
unless we assume a hypothesis on the distribution of primes stronger
than what is implied by the Riemann hypothesis, we can establish a
Mertens-type result. That is, we obtain a formula for the number of
such primes $p$ up to $x$ weighted with a factor $1/p$. Indeed, we can
iterate the method and count primes with the sum of digits a prime
whose sum of digits is a prime, and so on.
\end{abstract}
\section{Introduction}
\label{intro}
Thanks to the revolutionary work of Mauduit and Rivat \cite{Mau}, in particular as developed in \cite{Drm}, our knowledge of the distribution of
primes with constraints on their sum of digits has undergone a substantial expansion in recent years. The question of counting the number of primes
up to $x$, whose sum of digits in base $b$ is also prime, would have been considered far too difficult ten years ago
in view of the difficulty in restricting the sum of digits function when its variable is prime. In this paper we discuss this question, showing
that it is still impossibly difficult at present, but this is now a result of our lack of knowledge
concerning the distribution of primes in short intervals,
and no longer caused by the problems of restricting the sum of digits function. However, we are able to prove a Mertens-type
result on counting primes whose sum of digits is prime.
Throughout this paper the letters $p$ and $q$ are reserved for prime variables. In 1874 Mertens proved that
\[
\sum_{p 0$ is arbitrary but fixed.
\end{theorem}
Since $\sigma_b(p) \equiv p \gmod{b-1}$, for all but the finitely many primes $p$ satisfying $(p,b-1)>1$ we have $(\sigma_b(p),b-1)=1$,
and this leads to the
$(b-1)/\phi(b-1)$ factor appearing frequently. From the above result we can deduce some very interesting corollaries.
Clearly, \eqref{DMJ} shows that every sufficiently large integer is the sum of digits of a prime number. So, in particular, there are infinitely many
primes whose sum of digits is also prime. Also, every sufficiently large prime is the sum of digits of another prime which in turn is
the sum of digits of another prime, and so on. We use Theorem~\ref{DMR} to obtain a Mertens-type result as follows.
\begin{theorem}\label{T2}
Let $b \ge 2$ be an integer and $X \ge 2$. Then we have
\begin{equation}
\twolinesum{p < X}{p \in \cp_b} \frac{1}{p} = \frac{b-1}{\phi(b-1)} L_3(X) + O(1).
\end{equation}
\end{theorem}
In fact we shall use Theorem~\ref{T2} as the first step in an inductive argument to
establish the following more general result for the sets $\cp_b^{(j)}$.
\begin{theorem}\label{T3}
Let $b,j \ge 2$ be integers and $X \ge 2$. Then we have
\begin{equation}
\twolinesum{p < X}{p \in \cp_b^{(j)}} \frac{1}{p} = \left(\frac{b-1}{\phi(b-1)}\right)^{j} L_{j+2}(X) + O(1).
\end{equation}
\end{theorem}
As a final application of Theorem~\ref{DMR} we leave the following for the reader to prove.
\begin{theorem}\label{T4}
Write $\cp_b^*$ for the set of primes in $\cp_b$ which have a prime number of digits. Then
\[
\twolinesum{p < X}{p \in \cp_b^*} \frac{L_2(p)}{p} = \frac{b-1}{\phi(b-1)} L_3(X) + O(1).
\]
\end{theorem}
\section{The problem of the unweighted sum}
In this section we briefly consider the sum
\[
\pi_b^*(X) = \twolinesum{p < X}{p \in \cp_b} 1 \, .
\]
First we note the following result of K\'atai \cite{Kat}.
\begin{lemma}\label{Katlem}
For every positive integer $k$ we have
\[
\sum_{p < x} \left|\sigma_b(p) - \mu_b \log_b x\right|^k \ll_k x(\log x)^{k/2 - 1}.
\]
\end{lemma}
Now, even assuming the Riemann hypothesis and the pair correlation conjecture \cite{GHB}
we cannot prove that every interval $\ci_y = [y- y^{\frac12} (\log y)^{\frac13}, y+ y^{\frac12} (\log y)^{\frac13}]$ contains a
prime number. Suppose that $\ci_y$ contains no primes, and choose $X$ so that $y = \mu_b \log_b X$.
Then Theorem~\ref{DMR} cannot supply an asymptotic formula for $\pi_b^*(X)$, and by Lemma~\ref{Katlem} with $k=3A$
\[
\pi_b^*(X) \ll_A \frac{X}{X L_1(X) (L_2(X))^A}
\]
for every positive integer $A$. This shows that no asymptotic formula is possible unless a stronger result than can be
obtained on the Riemann hypothesis is assumed, for clearly $\pi_b^*(X)$ should be of size $X(L_1(X) L_2(X))^{-1}$. We
state here a hypothetical result to illustrate what we mean.
\begin{theorem}\label{hyp}
Suppose that there exists a positive function $f(x) \rightarrow 0$ such that for all large $x$ every interval of the form
$[x, x+x^{\frac12} f(x)]$ contains $f(x) x^{\frac12}(1+o(1))(\log x)^{-1}$ primes. Then
\begin{equation}\label{expect}
\pi_b^*(X) \sim \frac{b-1}{\phi(b-1)} \frac{X}{L_1(X) L_2(X)} .
\end{equation}
\end{theorem}
The proof of this result is straightforward. By Lemma~\ref{Katlem} we need only count primes whose sum of digits is $q$ with
\begin{equation}\label{con}
|q-\mu_b \log_b x| < (\log_b x)^{\frac23}.
\end{equation}
Applying Theorem~\ref{DMR} we need to estimate
\[
\sum_{\eqref{con}} \exp\left(- \frac{(q-\mu_b \log_b x)^2}{2 s_b \log_b x} \right).
\]
Breaking the summation range into shorter ranges of the form $[y,y+y^{\frac12}f(y)]$ and then comparing a sum with an integral
we obtain
\[
\frac{1+o(1)}{\log(\mu_b \log_b x)} \int_{-\infty}^{\infty} \exp\left(- \frac{t^2}{2 s_b \log_b x} \right)\, dt
=
\frac{1+o(1)}{\log(\mu_b \log_b x)}\sqrt{2s_b \pi \log_b x},
\]
which quickly establishes the result.
We remark that the hypothesis of Theorem~\ref{hyp}
is ``almost always'' true unconditionally in the following sense.
The method of Heath-Brown \cite{HB} shows that, given $\epsilon > 0$, there exists
$\eta>0$ such that for
all integers $y \in[Y,2Y)$, with at most $\ll Y^{\frac34 + \epsilon}$ exceptions, every subinterval
$[z,z+ z^{\frac12-\eta}]$ of $[y-Y^{\frac12 + \epsilon}, y+ Y^{\frac12+\epsilon}]$
contains $z^{\frac12-\eta}(1+O((\log z)^{-1}))(\log z)^{-1}$ primes. For these non-exceptional $y$ and with $X$
chosen to satisfy $y = \mu \log_b X$ we then have \eqref{expect}. Indeed, we can obtain the expected asymptotic formula
for $X$ on quite long ranges.
We note that we are unable to use the stronger results obtainable using a sieve method \cite{Kai}
since we require an asymptotic formula for the number of primes in an interval rather than upper or lower bounds.
\section{Proof of Theorem~\ref{T2}}\label{prooft2}
Lemma~\ref{Katlem}, together with \eqref{DMJ}, shows that $\sigma_b(p)$ is concentrated near its mean value $\mu_b \log_b p$,
and so enables us to restrict our attention to certain $p \in \cp_b$ with negligible error.
We write
\begin{eqnarray*}
&M(m) = \exp\left((\log b) m^{\frac78}\right), \qquad &\mu(m) = \mu_b m^{\frac78},\\
&\epsilon(m) = \displaystyle{\frac{M(m+1) - M(m)}{M(m)},} \qquad &\delta(m) = (m+1)^{\frac78} - m^{\frac78},
\end{eqnarray*}
and put $\cm(m) = [M(m),M(m+1))$.
We note for future reference that
\begin{equation}\label{eps}
\epsilon(m) =O\left(m^{-\frac18}\right) \quad \text{and} \ \ \epsilon(m) = \delta(m) \left(\log b + O\left(m^{-\frac18}\right)\right).
\end{equation}
Let $V$ be the largest integer with $M(V)