Count all Confluence pages in python using XML-RPC API

I was recently asked how to count the total pages on a 4.2 Confluence server so I provided this python script that shows how to use the XML-RPC API to do it. The technique can be used for more than counting pages. The API provides all sorts of useful operations, like adding users. For more information on the available methods see this page: https://developer.atlassian.com/display/CONFDEV/Remote+Confluence+Methods.

#!/usr/bin/env python
r'''
This script shows how to use the XML-RPC API to access
information from a Confluence server.

Here is how you might use it:

$ confluence_pages.py
URL: http://confluence
Username: myself
Password: <secret>

Server Info
  baseUrl          : http://confluence:8090
  buildId          : 3284
  developmentBuild : false
  majorVersion     : 4
  minorVersion     : 2
  patchLevel       : 5

Misc Info
  Active Users :   580
  Spaces       :   120
  Pages        :  7260

'''
import datetime
import getpass
import sys
import xmlrpclib


def get_credentials():
    '''
    Get the access credentials.

    They are accessed positionally from the command line
    or from a prompt.

    @returns a tuple of url, username and password
    '''
    url = None
    username = None
    password = None

    if len(sys.argv) > 1:
        url = sys.argv[1]
        if len(sys.argv) > 2:
            username = sys.argv[2]
            if len(sys.argv) > 3:
                password = sys.argv[3]

    if url is None:
        url = raw_input('URL: ')  # ex. https://docs.tabula.com
    if username is None:
        username = raw_input('Username: ')  # ex. jlinoff
    if password is None:
        password = getpass.getpass('Password: ')
    return url, username, password


def access_confluence(url, username, password):
    '''
    Access confluence and report some information.
    @param url      The URL of the server.
    @param username Login username.
    @param password Login password.
    '''
    server = xmlrpclib.ServerProxy(url + '/rpc/xmlrpc')
    token = server.confluence2.login(username, password)

    # Server
    info = server.confluence2.getServerInfo(token)
    now = datetime.datetime.now()
    print
    print 'Confluence Pages Report '+now.strftime('%Y-%m-%d %H:%M:%S')
    print
    print '  Server Info'
    maxlen = 0
    for item in sorted(info):
        maxlen = max(len(item), maxlen)
    for item in sorted(info):
        val = info[item]
        print '    %-*s : %s' % (maxlen, item, val)

    # Misc
    spaces = server.confluence2.getSpaces(token)
    users = server.confluence2.getActiveUsers(token, True)
    print
    print '  Misc Info'
    print '    %-12s : %5d' % ('Active Users', len(users))
    print '    %-12s : %5d' % ('Spaces', len(spaces))

    num_pages = 0
    for space in spaces:
        space_key = space['key']
        num_pages += len(server.confluence2.getPages(token, space_key))
    print '    %-12s : %5d' % ('Pages', num_pages)
    server.confluence2.logout(token)
    print


def main():
    '''
    Run program.
    '''
    url, username, password = get_credentials()
    access_confluence(url, username, password)

if __name__ == '__main__':
    main()

100

101

102

103

104

105

106

107

108

109

110

111

#!/usr/bin/env python

r'''

This script shows how to use the XML-RPC API to access

information from a Confluence server.

Here is how you might use it:

$ confluence_pages.py

URL: http://confluence

Username: myself

Password: <secret>

Server Info

baseUrl : http://confluence:8090

buildId : 3284

developmentBuild : false

majorVersion : 4

minorVersion : 2

patchLevel : 5

Misc Info

Active Users : 580

Spaces : 120

Pages : 7260

'''

import datetime

import getpass

import sys

import xmlrpclib

def get_credentials():

'''

Get the access credentials.

They are accessed positionally from the command line

or from a prompt.

@returns a tuple of url, username and password

'''

url = None

username = None

password = None

if len(sys.argv) > 1:

url = sys.argv[1]

if len(sys.argv) > 2:

username = sys.argv[2]

if len(sys.argv) > 3:

password = sys.argv[3]

if url is None:

url = raw_input('URL: ') # ex. https://docs.tabula.com

if username is None:

username = raw_input('Username: ') # ex. jlinoff

if password is None:

password = getpass.getpass('Password: ')

return url, username, password

def access_confluence(url, username, password):

'''

Access confluence and report some information.

@param url The URL of the server.

@param username Login username.

@param password Login password.

'''

server = xmlrpclib.ServerProxy(url + '/rpc/xmlrpc')

token = server.confluence2.login(username, password)

# Server

info = server.confluence2.getServerInfo(token)

now = datetime.datetime.now()

print 'Confluence Pages Report '+now.strftime('%Y-%m-%d %H:%M:%S')

print ' Server Info'

maxlen = 0

for item in sorted(info):

maxlen = max(len(item), maxlen)

for item in sorted(info):

val = info[item]

print ' %-*s : %s' % (maxlen, item, val)

# Misc

spaces = server.confluence2.getSpaces(token)

users = server.confluence2.getActiveUsers(token, True)

print ' Misc Info'

print ' %-12s : %5d' % ('Active Users', len(users))

print ' %-12s : %5d' % ('Spaces', len(spaces))

num_pages = 0

for space in spaces:

space_key = space['key']

num_pages += len(server.confluence2.getPages(token, space_key))

print ' %-12s : %5d' % ('Pages', num_pages)

server.confluence2.logout(token)

def main():

'''

Run program.

'''

url, username, password = get_credentials()

access_confluence(url, username, password)

if __name__ == '__main__':

main()

Note that this script has very weak option handling and, as such, is not suitable for production.

Leave a Reply Cancel reply