EMIS/ELibM Electronic Journals

Outdated Archival Version

These pages are not updated anymore. They reflect the state of 20 August 2005. For the current production of this journal, please refer to http://www.math.psu.edu/era/.


%_ **************************************************************************
%_ * The TeX source for AMS journal articles is the publishers TeX code     *
%_ * which may contain special commands defined for the AMS production      *
%_ * environment.  Therefore, it may not be possible to process these files *
%_ * through TeX without errors.  To display a typeset version of a journal *
%_ * article easily, we suggest that you retrieve the article in DVI,       *
%_ * PostScript, or PDF format.                                             *
%_ **************************************************************************
% Author Package file for use with AMS-LaTeX 1.2
\controldates{17-DEC-2003,17-DEC-2003,17-DEC-2003,17-DEC-2003}
 
\RequirePackage[warning,log]{snapshot}
\documentclass{era-l}
\issueinfo{9}{19}{}{2003}
\dateposted{December 18, 2003}
\pagespan{152}{161}
\PII{S 1079-6762(03)00123-9}

\copyrightinfo{2003}
  {American Mathematical Society}

\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}

\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{xca}[theorem]{Exercise}

\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

\numberwithin{equation}{section}

\newcommand{\DD}{\mathcal{D}}
\newcommand{\GG}{\mathcal{G}}
\newcommand{\sfrac}[2]{{\textstyle\frac{#1}{#2}}}
\newcommand{\Nin}{N_{\mbox{\tiny{in}}}}
\newcommand{\Tarr}{T_{\mbox{\tiny{arr}}}}
\newcommand{\Din}{D_{\mbox{{\small in}}}}
\newcommand{\Dout}{D_{\mbox{{\small out}}}}
\newcommand{\ccluster}{\kappa_{\mathrm{cluster}}}
\newcommand{\Exp}{\mbox{Exp}}
\begin{document}

\title{A stochastic complex network model}

\author{David J. Aldous}
\address{Department of Statistics, 
367 Evans Hall, 
U.C. Berkeley, CA 94720} 
\email{aldous@stat.berkeley.edu}
\thanks{The author was supported in part by NSF Grant DMS-0203062.}


\subjclass[2000]{Primary 60K35; Secondary 05C80, 90B15, 94C15}

\date{July 22, 2003}

\commby{Ronald L. Graham}



\keywords{Complex network,
Poisson process, PWIT,
random graph,
scale-free,
small worlds,
Yule process}

\begin{abstract}
We introduce a stochastic model for complex networks possessing three
qualitative features: power-law degree distributions, local clustering, and
slowly growing diameter.
The model is mathematically natural, permits a wide variety
of explicit calculations, has the desired three qualitative features,
and fits the complete range of degree scaling exponents 
and clustering parameters.
\end{abstract}

\maketitle

\section{Introduction}
Three popular science books
\cite{barabasi02},
\cite{buchanan02},
\cite{watts03},
a dozen articles in {\em Science} and {\em Nature},
and 154 preprints at 
{\tt xxx.arXiv.org/cond-mat}
deal with {\em complex networks},
which in this context means 
the empirical and theoretical study of large graphs,
focusing in particular on those possessing the following
three qualitative properties,
asserted to hold in many interesting real-world examples:
\begin{itemize}
\item the degree distribution has power-law tail;
\item local clustering of edges: graph is not locally tree-like;
\item small diameter --- $O(\log \mbox{(number of vertices)}). $
\end{itemize}
The nature of that subject---typically not 
presented as rigorous mathematics---is most easily seen
from the long survey papers \cite{AB02},
\cite{DM02},
\cite{newman-survey} 
and associated monograph \cite{MD03},
and we will not attempt an overview here.
A shorter survey \cite{boll-scale} emphasizes
rigorous mathematical results,
more of which are appearing in a new journal {\em Internet Mathematics}.
The probability models which have been studied are mostly
variations of two basic ideas.
In {\em proportional attachment} models, vertices arrive and
form edges to existing vertices with relative probabilities
increasing linearly with degree of the existing vertex.
In {\em lattice small worlds} models, vertices are points
of $\mathbb{Z}^d$ and in addition to lattice edges there are long edges
between random vertex-pairs chosen with probabilities decreasing
with distance.
But it has often been noted that neither class of models
gives all three desired properties without adding extra ingredients
to force a property to hold by fiat,
as illustrated by the following quotes.
\begin{quote}
Most models proposed to describe the topology of complex networks
have difficulty capturing simultaneously these two features
[in our list of three].
\cite{RBar03}
\end{quote}
\begin{quote}
There is a great need for models of real-life networks
that incorporate many of the important features of these systems,
but still can be analyzed rigorously.
\cite{boll-scale}
\end{quote}
Our purpose is to introduce a model, based on somewhat
more sophisticated mathematics, designed to possess all three
qualitative properties in a mathematically natural way,
and to permit a wide variety of explicit calculations
in the $n$ (= number of vertices) $\to \infty$ limit.
We do not claim the model is canonical---we could imagine
quite different models satisfying the same desiderata---but such
models have not appeared in the literature.

A longer account of this model, aimed at
non-mathematicians and emphasizing the conceptual aspects 
of the model and the explicit calculations, 
will appear elsewhere \cite{me104}.
In this account, aimed at mathematicians, 
we emphasize how 
some existing mathematical structures and methodology can be used to implement
the kind of graph-modeling ideas studied more naively
in the complex networks literature.
The bald construction (Section \ref{sec-finiten}) will at first sight seem
neither natural nor tractable.
Section \ref{sec-compare} compares it with standard models.
Section \ref{sec-method} reviews 
some existing mathematical structures and methodology 
(the Yule process;
steady-state Poisson universe;
local weak convergence and the PWIT).
Section \ref{sec-put} puts it all together
to reveal why one can do explicit calculations in the
$n \to \infty$ limit, and then Section \ref{sec-calc} 
outlines illustrative calculations.

Our model has of course some disadvantages, detailed in \cite{me104}.
In particular, the graph $\GG_n$ is typically not connected.
By analogy with classical 
Erd{\"o}s-R{\'e}nyi random graph theory, we expect that 
(on one side of a critical line in parameter space) 
there is a unique ``giant component" of $\GG_n$, whose diameter is 
$O(\log n)$.
One could make a connected graph by modifying the definition 
so that an arriving vertex always links to its closest neighbor,
though this would make explicit formulas more complicated. 

\section{The finite $n$ model}
\label{sec-finiten}
We define the model in steps (a)--(e).

\begin{itemize}
\item[(a)] 
Vertices $n = 1,2,3,\ldots$ arrive successively,
vertex $n$ arriving at time $t_n = \log n$.
The model can be indexed by number of vertices
($n = 1,2,3,\ldots$)
or equivalently by time
($0 \leq t < \infty$).

\item[(b)]
When vertex $n$ arrives at time $t_n$, there are created
links from $n$ to each vertex $1 \leq j < n$.
Each link-length $\bar{d}(n,j,t_n)$ is random with
exponential (mean $n$) law, independent of other link-lengths.

\item[(c)] The link-lengths increase with time at deterministic 
rate $1$; at time $t>t_n$ the link $(n,j)$ has length
$\bar{d}(n,j,t) = \bar{d}(n,j,t_n) e^{t-t_n}$.

\item[(d)] At time $t$ the 
{\em distance}
$d(i,j,t)$ between two vertices $i,j$ 
(where $i,j \leq e^t =$ number of vertices present)
is defined as the minimum, over all paths from $i$ to $j$, of
the length of the path (i.e. sum of link-lengths along the path).
\end{itemize}

What we have described so far is a ``geometry"
(we write $\DD_n$ or $\DD_t$)
of an evolving random discrete metric space.
The object of this paper is a directed graph process
(we write $\GG_n$ or $\GG_t$)
built on top of this geometry.  
The model has two parameters
$\alpha, \lambda > 0$,
which enter only via the function
\begin{equation}
 p(x) = \min(1,\alpha \lambda e^{-\lambda x}), \quad 0 \leq x < \infty . \label{pdef}
\end{equation}
We require
\begin{equation}
\int_0^\infty \ p(x) \ dx < 1,
\label{pxreq}
\end{equation}
which forces the parameter range to be
\[ \alpha < 1 \mbox{ and } \alpha \lambda \leq 1;
\mbox{ or } \alpha \lambda \geq 1
\mbox{ and } \lambda^{-1} + \log(\alpha \lambda) < 1 . \]
In fact the model makes sense for any function $p(x) \leq 1$
satisfying (\ref{pxreq}), but the slightly arbitrary parametrization 
(\ref{pdef})
is convenient for obtaining explicit formulas.

\begin{itemize}
\item[(e)] When vertex $n$ arrives at time $t_n$,
for each $1 \leq i < n$:
\begin{itemize}
\item[$\bullet$] a directed edge $n \to i$ is created with probability
$p(\bar{d}(n,i,t_n))$;
\item[$\bullet$] for each $j < n$ such that $i \to j$ is an existing
edge in $\GG_n$, 
a directed edge $n \to j$ is created with probability
$p(\bar{d}(n,i,t_n))$.
\end{itemize}
\end{itemize}

This provides an inductive description of $\GG_n$.
Of course as well as its graph structure,
$\GG_n$ inherits ``metric" structure such as real-valued
edge lengths (growing exponentially with time).

Let us make two remarks on definition (e).
Note that the probabilities depend on link-lengths
$\bar{d}(\cdot)$ instead of metric distance $d(\cdot)$.
And note that the probability that an existing edge
$i \to j$ is ``copied" to a new edge $n \to j$
depends on the link-length from $n$ to the ``head" $i$,
not to the ``tail" $j$.


\subsection{Comparisons with other models}
\label{sec-compare}
In the {\em small worlds} models,
vertices are points in $d$-dimensional space, which automatically 
provides a metric distance between vertices, and the model
uses some rule to
create a random graph with short-range and long-range edges.
In purely graph-theoretical models, such as the 
basic {\em proportional attachment} model,
the vertices have no ``intrinsic structure" other than that provided
by the graph; we visualize this as saying that each pair of vertices
is metric distance $1$ apart.
In related ``copying" models a new vertex picks existing vertices $i$
and, in some random way, creates edges to those vertices $i$ and copies 
edges to some $j$ for which $i \to j$ is an existing edge, 
in the spirit of (e) above.
One can regard (e) as defining a new class of
{\em metric copying} models, which could be combined with any model
of metric distance $d(\cdot)$.
In a metric copying model we visualize vertices
as points in some abstract metric space, representing
(in the case of web pages, say) the difference between the
content of the pages, or 
(for people) some notion of ``social distance" based on location, education, 
profession, interests, etc., of the individuals.
In detail our particular geometry model is chosen for mathematical tractability rather
than any claimed realism.
But it does have the property
(see Section \ref{sec-PWIT}) that the number of vertices within metric distance $r$ of a typical
vertex grows exponentially with $r$.
This property is intermediate between,
and surely in many contexts more plausible than,
the alternatives implicit in the two standard categories of models above.

Of course ours is not the first model seeking to combine
the ``metric" and ``copying" ingredients, but other models
\cite{JJ02},
\cite{menczer02},
\cite{fabr02}
do not permit the same range of explicit calculations.


\section{Three mathematical structures}
\label{sec-method}
Here we recall three structures which will be used in the construction
and analysis of our model.  A note at the end of each subsection says
where they are used.

\subsection{The Yule process}
Fix $0< \theta < \infty$ 
and set $N(0) = 1$.
The {\em Yule process} of rate $\theta$,
written
$(N(t), t \geq 0)$,
represents population size in  
a population of immortal individuals, each of whom
reproduces at stochastic rate $\theta$.
In other words, it is the continuous-time Markov chain 
which changes only by $+1$ steps and for which
\[ P(N(t+dt) = m+1|N(t) = m) = \theta m \ dt .\]
This is a well-understood elementary example of a stochastic
process, for which we quote two explicit results
(both going back to the original 1924 paper by Yule \cite{yule24}).
First
\begin{equation}
 N(t) \mbox{ has geometric($e^{-\theta t}$) law,} \label{geom}
\end{equation}
meaning
$P(N(t) = m) = (1-p)^{m-1} p, \ m \geq 1$
for $p = e^{-\theta t}$.
Second, if $T$ is an independent exponential (mean $1$) r.v., then
\begin{equation}
P(N(T) - 1 \geq d)
= \frac{
\Gamma(d+1)\Gamma(1/\theta)}{\theta \Gamma(d+1 + \sfrac{1}{\theta})},
\quad d \geq 0  
. \label{eqY}
\end{equation}
{\em Where used?}
The Yule process is used in analysing in-degree distribution (Section
\ref{411}) and density of induced subgraphs (Section \ref{413}).


\subsection{The steady-state Poisson universe}
\label{sec-poisson}
In the 1950s the astronomer Fred Hoyle noted that the fact
that more distant galaxies recede more rapidly 
was not logically inconsistent with the supposition that
the universe has existed, and looked statistically similar, for ever;
because it might be that matter is continually created from empty space.
This {\em steady state} cosmological theory was never widely accepted, but 
does correspond to a well-defined mathematical model
for random points.
The simplest ``static" model for random points in 
$\mathbb{R}^d$ is the 
{\em Poisson process} of spatial rate $1$, 
for which the mean number of points in a region equals
the $d$-dimensional volume of the region.
One now makes a ``dynamic" or
{\em space-time} model over time $-\infty < t < \infty$
by specifying that
\begin{itemize}
\item[(i)] points move away from the origin as deterministic
motion with exponential rate $1/d$; a point at position $x$ at time $t$
will be at position $xe^{(t^\prime -t)/d}$
at times $t^\prime > t$.
\item[(ii)]
New points appear as a rate-$1$ space-time Poisson process; 
that is, the chance of a point arriving in a cube of volume
$dx$ during a time interval $dt$ equals $dx \, dt$.
\end{itemize}

It is straightforward that such a process exists which is
time-stationary: at any time the spatial distribution of points is the
Poisson process of spatial rate $1$.
One could also consider this space-time process with the direction of time reversed.
Then in (i) the exponential motion is towards the origin; in (ii), 
a point existing at time $t_0$ has a ``disappearance"
time $T< t_0$ such that $t_0 - T$ has exponential (mean $1$) law.

{\em Where used?}
Our model is designed so that its $n \to \infty$ limit is an 
infinite-dimensional analog of the space-time
process above---see paragraph (i) of Section~\ref{sec-put}.

\subsection{Local weak convergence and the PWIT}
\label{sec-PWIT}
The ideas here are treated in detail in the survey paper \cite{me101}.
Consider the space of graphs whose edges have real-valued lengths, with
one vertex distinguished as the root, and which are 
{\em locally finite} in that there are only finitely many vertices within
any fixed distance from the root.
There is a natural notion of {\em local convergence} of such graphs.
This induces a notion of
{\em local weak convergence}
of finite random graphs to a limit infinite (locally finite) rooted graph,
based on choosing a uniform random vertex of the finite graph to be a root.
In our ``geometry" $\DD_n$, at time $t_n$ all the 
$\binom{n}{2}$
link-lengths between the $n$ vertices
are independent with exponential (mean $n$) distribution.
It is known (and easy) that $\DD_n$ has a $n \to \infty$ limit in
the sense of local weak convergence, called the {\em PWIT}.

The PWIT is defined by a construction, illustrated in
Figure \ref{firstfig}.
Start with a single root vertex $\emptyset$.  This root 
vertex is then given an \emph{infinite} number of links to its {\em children}
vertices,
these link-lengths having the law of a Poisson process
$(\xi_i^{\emptyset}:  1 \leq i < \infty)$ of rate $1$ on $(0,\infty)$.
Now, recursively,
each vertex $v$ 
arising as a child of a previous vertex is given an infinite number
of links to its children, and these link-lengths 
$(\xi^v_i:  1 \leq i < \infty)$ 
again have the law of a Poisson process
of rate $1$,
independent of previous lengths.
This procedure is then continued \emph{ad infinitum}.
The resulting rooted infinite tree is a well-defined random object,
called the
\emph{Poisson weighted
infinite tree} (PWIT).  
An obvious, but very useful for calculations, property of
the PWIT is its
{\em recursive self-similarity}: the subtrees at each child of the
root are independent copies of the PWIT itself.


\begin{figure}[h]  
\setlength{\unitlength}{0.4in}
\begin{picture}(12,12)
\thinlines
\put(0.5,0.5){\line(1,0){11}}
\put(0.5,0.5){\line(0,1){11}}
\put(11.5,11.5){\line(0,-1){11}}
\put(11.5,11.5){\line(-1,0){11}}
\put(6,6){\circle*{0.2}}
\put(6,5){\circle*{0.2}}
\put(6,5.8){\line(0,-1){0.6}}
\put(6.1,5.3){$\xi^{\emptyset}_1$}
\put(4,8){\circle*{0.2}}
\put(5.8,6.2){\line(-1,1){1.6}}
\put(4.7,6.6){$\xi^{\emptyset}_3$}
\put(8,7){\circle*{0.2}}
\put(6.2,6.1){\line(2,1){1.6}}
\put(7.0,6.3){$\xi^{\emptyset}_2$}
\put(11,5){\circle*{0.2}}
\put(6.2,5.96){\line(5,-1){4.6}}
\put(8.5,5.1){$\xi^{\emptyset}_4$}
\put(5,4){\circle*{0.2}}
\put(5.8,4.8){\line(-1,-1){0.6}}
\put(7,2){\circle*{0.2}}
\put(6.07,4.8){\line(1,-3){0.86}}
\put(10,1){\circle*{0.2}}
\put(6.2,4.8){\line(1,-1){3.6}}
\put(4,2){\circle*{0.2}}
\put(4.9,3.8){\line(-1,-2){0.8}}
\put(5,1){\circle*{0.2}}
\put(5,3.8){\line(0,-1){2.6}}
\put(3,8){\circle*{0.2}}
\put(3.8,8){\line(-1,0){0.6}}
\put(1,10){\circle*{0.2}}
\put(2.8,8.2){\line(-1,1){1.6}}
\put(4,10){\circle*{0.2}}
\put(4,8.2){\line(0,1){1.6}}
\put(7,11){\circle*{0.2}}
\put(4.2,8.2){\line(1,1){2.6}}
\put(2,4){\circle*{0.2}}
\put(3.9,7.8){\line(-1,-2){1.8}}
\put(2,2){\circle*{0.2}}
\put(2,3.8){\line(0,-1){1.6}}
\put(9,10){\circle*{0.2}}
\put(8.07,7.2){\line(1,3){0.86}}
\put(10,8){\circle*{0.2}}
\put(8.2,7.1){\line(2,1){1.6}}
\put(10,9){\circle*{0.2}}
\put(10,8.2){\line(0,1){0.6}}
\put(11,11){\circle*{0.2}}
\put(10.07,8.2){\line(1,3){0.86}}
\put(4.17,7.94){$3$}
\put(5.66,4.93){$1$}
\put(8.07,6.69){$2$}
\put(11,5.2){$4$}
\put(5.6,5.9){$\emptyset$}
\end{picture}
\caption{{\bf The PWIT.}
Illustration of the vertices of the PWIT within a window
of radius $3$ centered on the root $\emptyset$.
Lines indicate the link relationship, but are drawn only
when both end-vertices are within the window.
Thus the four links of $\emptyset$ shown have lengths
$0< \xi^{\emptyset}_1 < \xi^{\emptyset}_2 < \xi^{\emptyset}_3 <
\xi^{\emptyset}_4 < 3$, 
while there are infinitely many 
links at $\emptyset$ with lengths greater than $3$.
Orientation of lines in the figure is arbitrary.
}\label{firstfig}
\end{figure}

The {\em distance} $d(v,w)$ between two vertices of the PWIT
is just the sum of link-lengths along the path from $v$ to $w$.
Though we have drawn a tree in Figure \ref{firstfig},  
the lines merely indicate the link relationships;
it is better to think of the edges as absent
while retaining the distances $d(v,w)$.
In this way we may regard the vertices of the PWIT as an infinite-dimensional analog of
the $d$-dimensional Poisson point process in Section \ref{sec-poisson}.

The Yule process is encoded within the PWIT as the process, indexed 
by $r \geq 0$, which counts the number of vertices within distance $r$ of the
root.  This is not our main use of the Yule process, but does imply
(using (\ref{geom}))
that the mean number of such vertices equals $e^r$, thus providing 
one justification for regarding the PWIT as 
``infinite-dimensional".

{\em Where used?}
Our model adds time-dependence and graph structure to the geometry $\DD_t$;
we will study its limit behavior in the next section by adding corresponding
structure to the PWIT.


In contrast to its use in this paper, 
the primary use of the PWIT is as a platform to study properties
of solutions of combinatorial optimization problems such
as the traveling salesman problem on random instances;
such study is often intractable in the Poisson model on 
$\mathbb{R}^d$.
See \cite{me103} for recent work.


\section{The
$ n = \infty$ limit graph process}
\label{sec-put}
We can now explain, in stages, the motivation for our complex network model;
it is designed to have tractable structure in the $n \to \infty$ limit.

(i)  Just as the $t_0 \to \infty$ limit of the
``static" geometry $\DD_{t_0}$ is the PWIT, 
say $\DD^*_0$,
we can consider the $t_0 \to \infty$ limit of the
``dynamic" space-time geometry
$(\DD_{t_0 +t}, \ - t_0 \leq t < \infty)$,
and this limit is a space-time version
$(\DD^*_t, \ - \infty < t < \infty)$
of the PWIT.  Loosely, we regard
this as an infinite-dimensional analog of the steady-state Poisson universe
in Section \ref{sec-poisson}.
At $t$ increases, inter-point distances increase exponentially, 
but the arrival of new points maintains a constant density
and gives the time-stationary property.


(ii) Our finite random graph process $\GG_n$ was built
on top of $\DD_n$ by rule (e) of Section \ref{sec-finiten}.
We now use exactly the same rule to define a random graph
process
$(\GG^*_t, \ - \infty < t < \infty)$
built over the space-time PWIT
$(\DD^*_t, \ - \infty < t < \infty)$.
The time-stationary property of the space-time PWIT
extends to $(\GG^*_t)$.

(iii) By definition of local weak convergence,
the structure of $\GG^*_0$ around the root
is the $n \to \infty$ limit
of
the structure of $\GG_n$ around 
a uniform random vertex.
So for many quantitative questions about $\GG_n$,
the $n \to \infty$ limit is given exactly as the answer to
a corresponding question about $\GG^*_0$.
As illustrated later, 
many such questions can be answered explicitly by doing 
``calculations around the root" exploiting the structural properties
listed below.


While there are many details involved in making the
above assertions rigorous, there is nothing mathematically
hard. 
Showing that there exists a {\em time-stationary} version of $(\GG^*_t)$
ultimately comes down to the fact (\ref{ED}) that the mean vertex 
degree is finite.  The argument for that fact can be rephrased to show
that, starting from no edges and running the dynamics (e), the mean vertex
degree remains bounded, so that by taking subsequences of time-averages
and passing to a limit one gets a stationary distribution.


Of course, the object $\GG^*_0$ that we need to study is not defined
explicitly but rather as the stationary distribution of the process
$(\GG^*_t)$ built over the geometry $(\DD^*_t)$.
What makes calculations feasible is the interplay of three nice structural
properties.  We have already mentioned
\begin{itemize}
\item recursive self-similarity of the PWIT;
\item time-stationarity of the joint process $(\DD^*_t,\GG^*_t)$;
\end{itemize}
so let us now develop the third property
\begin{itemize}
\item embedded Yule processes.
\end{itemize}
The basic instance concerns vertex in-degrees.
In particular, consider the process
\[ \Nin (t) = 1 + \mbox{ in-degree of root at time } \Tarr + t, \]
where $\Tarr$ is the arrival time of the root.
This process is the Yule process of rate
\begin{equation}
\beta = \int_0^\infty p(r) \ dr < 1. \label{def-beta}
\end{equation}
To see why, note that
the dynamics of the geometry $(\DD^*_t)$ can be expressed intuitively as:
for a vertex $v$ present at time $t$,
\begin{eqnarray}
\mbox{there is chance $1 \cdot dr \ dt$ that during
$[t,t+dt]$ a new vertex $v^\prime$ }&&\nonumber\\ \mbox{will appear  
with a link to $v$ of length 
$\in [r,r+dr]$ 
} . \label{newvertex}
\end{eqnarray}
Apply this at time $t$ to the $N(t) = n$ vertices $v$
which are either the root or have an edge $v \to $ root;
an arriving $v^\prime$ has chance $p(r)$ to create a new edge
into the root, so the overall rate of such new edges equals
$n  \int_0^\infty \ p(r) dr$.





\subsection{Illustrative calculations}
\label{sec-calc}
Here we outline four calculations from \cite{me104},
emphasising how the structure is used and omitting calculus details.


\subsubsection{Distribution of in-degree}
\label{411}
The distribution of in-degrees in $\GG_n$ has a $n \to \infty$ limit, say
$\Din$, which
is the in-degree of the root of $\GG^*_0$.
The age $- \Tarr$ of the root of $\GG^*_0$ has exponential 
(mean $1$) law, as for the steady-state Poisson process.
So $\Din$ has the law of 
$\Nin(- \Tarr) - 1$, where $\Nin(\cdot)$ is the Yule process of rate $\beta$
at (\ref{def-beta}).
So from (\ref{eqY}) we can read off the formula
\[ P(\Din \geq d) = 
 \frac{
\Gamma(d+1)\Gamma(1/\beta)}{\beta \Gamma(d+1 + \sfrac{1}{\beta})},
\quad d \geq 0  
 . \]
This has mean
\begin{equation}
E \Din = \sfrac{\beta}{1 - \beta}
\label{ED}
\end{equation}
and power-law tail
\[
P(\Din = d) \sim \beta^{-2} \Gamma(1/\beta) \ d^{-1-\sfrac{1}{\beta}} .
  \]

\subsubsection{Distribution of out-degree}
The distribution of out-degrees in $\GG_n$ has a $n \to \infty$ limit, say
$\Dout$, which
is the out-degree of the root of $\GG^*_0$.
Since the out-edges are formed on arrival, we may suppose
the root of the PWIT has just arrived.
Consider a link (root, $v^\prime$) of length $r$.
For each out-edge of $v^\prime$,
and for $v^\prime$ itself, there is chance $p(r)$ that
a corresponding out-edge is created at the root,
so the number of such edges has
$\mbox{Binomial}(1+D^{(i)},p(r))$ law, 
where $D^{(i)}$ is the out-degree of $v^\prime$.
Summing over all links whose lengths
$(\xi_i, i \geq 1)$ 
form a Poisson process of rate $1$,
\begin{equation}
 \Dout = 
\sum_{i=1}^\infty \mbox{Binomial}(1+D^{(i)},p(\xi_i))  . \label{Douteq3}
\end{equation}
By recursive self-similarity, the $(D^{(i)})$ are independent
with law $\Dout$, so (\ref{Douteq3}) provides an equation
for the law of $\Dout$.
Of course $E\Dout = E\Din$ but the distributions are different.
While we do not have a useful explicit solution in general,
one can calculate moments and can identify the law in several
parameter-limit cases~\cite{me104}.


\subsubsection{Density of induced complete subgraphs}
\label{413}
Let $G$ be a finite directed acyclic graph. 
Define a random variable $\mathbf{X}_n(G)$ to be the
number of vertex-subsets $V$ of $\GG_n$ such that
$\GG_n$ restricted to $V$ is isomorphic to $G$.
Our methodology enables us to study asymptotics
in terms of $\GG^*_0$.
Essentially 
(omitting fussy conventions about rooting and automorphisms of $G$)
\[ \lim_n \sfrac{1}{n} E \mathbf{X}_n(G) = 
\chi(G) \]
where
\begin{quote}
$\chi(G)$ is the expected number of vertex-subsets
$V$ of $\GG_\infty^*$ including the root such that
$\GG_\infty^*$ restricted to
$V$ is isomorphic to $G$. 
\end{quote}
One can obtain formulas for $\chi(G)$ in many cases, in
particular for the complete directed acyclic graph $K_r$ on $ r \geq 2$ vertices.
The idea in this case is that a $K_{r+1}$ is formed by a 
newly arriving vertex creating edges to all vertices
of an existing $K_r$.
For a given $K_r$, the process which counts the number of 
such $K_{r+1}$'s, plus the $K_r$ itself, turns out to be
a Yule process of rate
$\beta_r = \int_0^\infty p^r(u) \ du$,
and we eventually obtain the formula
\[ \chi(K_r) = 
\prod_{u=1}^{r-1} \frac{\beta_u}{1 - \beta_u} 
 . \]
The case $r = 3$ is useful for the following reason.
One of the basic summary statistics in empirical study of
complex networks is the 
{\em cluster coefficient},
which measures relative density of triangles.
For our purposes we use
\begin{eqnarray*}
\ccluster &=& \lim_n
\frac{E \mbox{(number of triangles in $\GG_n$)}}
{E \mbox{(number of directed $2$-paths in $\GG_n$)}}
\\ &=&
\frac{\chi(K_3)}{(E\Din)(E\Dout)} .
\end{eqnarray*}
So we have formulas for two basic parameters
(mean degree and $\ccluster$)
in terms of $p(\cdot)$.
As shown in \cite{me104}, an advantage of our particular 
specification of $p(\cdot)$ in terms of two parameters $\alpha, \lambda$
is that the complete range 
$0< E\Din < \infty, \ 0 < \ccluster < 1$ is
attained, for unique parameter values.


\subsubsection{Distribution of edge-lengths}
In our graph model, edges have real-valued lengths, and so 
there is a law $L$ for the length of a ``typical" edge in the
limit $\GG^*_0$, representing the $n \to \infty$ limit law
of a uniform random edge of $\GG_n$.
We can calculate this law using the time-dynamics of the
limit graph process $(\GG^*_t)$.
We treat the simpler case $\alpha \lambda < 1$.
Consider the lengths of the in-edges at a particular 
vertex $v_0$.
Following a tradition in mathematical probability,
we visualize an in-edge of length $\ell$ as a ``particle"
at position $\ell$ on a line; we also put a particle at position
$0$ to represent the vertex $v_0$ itself.
If we start time $\tau$ with $\tau = 0$ at the arrival time
of $v_0$, 
then the evolution of the ``particle process" can be specified
as follows.

\begin{itemize}
\item[(i)] There is a particle at position $0$ at all times 
$\tau \geq 0$.

\item[(ii)] For each particle (at position $x$ at time $\tau$, say),
at stochastic rate $\alpha$ per unit time a new particle
appears at position $x + \Exp(\lambda)$,
where $\Exp(\lambda)$ denotes a r.v. with exponential 
(rate $\lambda$) law.

\item[(iii)] Particle positions increase deterministically at
exponential(1) rate: a particle at $x$ at time $\tau$
will be at $x e^{\tau_0 - \tau}$ at time $\tau_0 > \tau$.
\end{itemize}
Rule (ii) derives from (\ref{newvertex}):
for an existing edge $(v^\prime,v_0)$,
a new vertex arriving at link-distance $r$ from $v^\prime$
creates an edge to $v_0$ with probability $p(r)$,
so the rate at which each existing edge is copied equals
$\int_0^\infty p(x) \ dx = \alpha$;
moreover conditional on copying, the distance $r$ has
$\Exp(\lambda)$ distribution, and so the length of the new edge
equals the length of the old edge $+ \Exp(\lambda)$.

Such a particle process can be analyzed by classical methods---set
up and solve a differential equation for 
\[ g(\tau,x) = 
\mbox{ mean density (in $x$) of particles at time $\tau$} . \]
Ultimately one obtains a hypergeometric-type series expression for
the probability density function of $L$:
\begin{equation}
\frac{1-\alpha}{\alpha}   \sum_{i=0}^\infty \frac{(i+1)\Gamma(\alpha + 3) 
\ (-\lambda x)^i}{\Gamma(i + \alpha + 3)}, \ 
0 < x < \infty 
 .
\label{len-dens}
\end{equation}
\begin{thebibliography}{10}

\bibitem{AB02}
R.~Albert and A.-L. Barab{\'a}si, \emph{Statistical mechanics of complex
  networks}, Rev. Mod. Phys. \textbf{74} (2002), 47--97.
\MR{2003d:82055}

\bibitem{me103}
D.J. Aldous and A.G. Percus, \emph{Scaling and universality in continuous
  length combinatorial optimization}, 
Proc. Natl. Acad. Sci. USA \textbf{100} (2003), 11211--11215.

\bibitem{me104}
D.J. Aldous, \emph{A tractable complex network model based on the stochastic
  mean-field model of distance}, arXiv:cond-mat/0304701, 2003.

\bibitem{me101}
D.J. Aldous and J.M. Steele, \emph{The objective method: Probabilistic
  combinatorial optimization and local weak convergence}, 
in Probability on Discrete Structures, H. Kesten (ed.), 1--72, 
Springer, 2003.

\bibitem{barabasi02}
A.L. Barab{\'a}si, \emph{Linked: the new science of networks}, Perseus Press,
  Cambridge, MA, 2002.

\bibitem{boll-scale}
B.~Bollob{\'a}s and O.~Riordan, \emph{Mathematical results on scale-free random
  graphs}, Handbook of Graphs and Networks (S.~Bornholdt and H.G. Schuster,
  eds.), Wiley, 2002.

\bibitem{buchanan02}
M.~Buchanan, \emph{Nexus: Small worlds and the groundbreaking science of
  networks}, W.W. Norton, 2002.

\bibitem{DM02}
S.N. Dorogovtsev and J.F.F. Mendes, \emph{Evolution of networks}, Adv. Phys.
  \textbf{51} (2002), 1079--1187.

\bibitem{fabr02}
A.~Fabrikant, E.~Koutsoupias, and C.H. Papadimitriou, \emph{Heuristically
  optimized trade-offs: a new paradigm for power laws in the internet},
  International Colloq. Automata, Languages and Programming, 2002.

\bibitem{JJ02}
J.~Jost and M.P. Joy, \emph{Evolving networks with distance preferences},
  Physical Review {E} \textbf{66} (2002), 036126.
\MR{2002i:37133}

\bibitem{menczer02}
F.~Menczer, \emph{Growing and navigating the small world web by local content},
  Proc. Natl. Acad. Sci. USA \textbf{99} (2002), 14014--14019.

\bibitem{MD03}
J.F.F. Mendes and S.N. Dorogovtsev, \emph{Evolution of networks: From
  biological nets to the internet and {WWW}}, Oxford Univ. Press, 2003.

\bibitem{newman-survey}
M.E.J. Newman, \emph{The structure and function of complex networks}, {SIAM
  Review} \textbf{45} (2003), 167--256.

\bibitem{RBar03}
E.~Ravasz and A.L. Barab{\'a}si, \emph{Hierarchical organization in complex
  networks}, 
Physical Review E \textbf{67} (2003), 026112.

\bibitem{watts03}
D.J. Watts, \emph{Six degrees: the science of a connected age}, W.W. Norton,
  2003.

\bibitem{yule24}
G.U. Yule, \emph{A mathematical theory of evolution, based on the conclusions
  of {D}r. {J}. {C}. {W}illis}, Philos. Trans. Roy. Soc. London Ser. B
  \textbf{213} (1924), 21--87.

\end{thebibliography}
\end{document}