import requests
from lxml import html
from urllib.request import urlopen
from bs4 import BeautifulSoup as soup
import re
client = requests.Session()
HOMEPAGE_URL = 'https://bitinfocharts.com/top-100-richest-bitcoin-addresses.html'
html = client.get(HOMEPAGE_URL).content
body = BeautifulSoup(html,'html.parser')
addresses = []
temp_addresses=[]
for m in re.finditer('</td><td><a href="https://bitinfocharts.com/bitcoin/address/', str(body)):
#print(m.end())
a = str(body)[m.end():m.end()+80]
a = a.split(">")
a= a[0] [:-1]
addresses.append(a)
#for m in re.finditer('">wallet:', str(body)):
#len(addresses)
#print(addresses)
# Fill in with url of page which is to be scraped
#body = body.prettify()
#text = body.get_text
#print(body.prettify)
import blockchain
import requests
client = requests.Session()
#printing our URLS to the top 100 addresses
address_data = []
homepage_url=[]
balance =[]
n_tx =[]
total_recieved=[]
total_sent=[]
##
#Address can be base58 or hash160
#Optional limit parameter to show n transactions e.g. &limit=50 (Default: 50, Max: 50)
#Optional offset parameter to skip the first n transactions e.g. &offset=100 (Page 2 for limit 50)
##
for i in range(0,len(addresses)):
#HOMEPAGE_URL = 'https://bitinfocharts.com/bitcoin/address/'+addresses[i]
HOMEPAGE_URL = 'https://blockchain.info/rawaddr/'+addresses[i]
homepage_url.append(HOMEPAGE_URL)
html = client.get(HOMEPAGE_URL).content
body_address = BeautifulSoup(html,'html.parser')
#body_address = str(body_address)
print(HOMEPAGE_URL)
#print(body_address.prettify)
address_data.append(body_address.prettify)
##########################################
number_transcations_performed= str(address_data[i]).split("n")[3][11:-1]
n_tx.append(number_transcations_performed)
print(number_transcations_performed)
####################|######################
recieved = str(address_data[i]).split("n")[4][21:-1-8]
sent = str(address_data[i]).split("n")[5][17:-1-8]
total_recieved.append(recieved)
total_sent.append(sent)
print(recieved)
print(sent)
##########################################
Balance_URL = 'https://blockchain.info/balance?active='+addresses[i]
html = client.get(Balance_URL).content
body_balance = BeautifulSoup(html,'html.parser')
print(Balance_URL)
#print(body_balance.prettify)
a = str(body_balance)
a = a.split("n")
a = a[1][21:len(a[1])-1-8]
balance.append(a)
print(a)
#print(len(address_data))
#print(len(homepage_url))
#print(len(balance))
#Balance information extraction
#https://blockchain.info/balance?active=$address
import pandas as pd
from pandas import ExcelWriter
# Creates a datframe with columns: |Name|Twitter Handle|Path To Tweets|
#balance = [int(i) for i in balance]
df = pd.DataFrame({'Addresses': addresses,
'Balance in BTC': balance,
'URL Profile': homepage_url,
'No of Transactions performed':n_tx,
'Total Recieved': total_recieved,
'Total Sent': total_sent})
## # 'Activity Details': address_data,
# # 'Balance in BTC': str(balance)})
writer = ExcelWriter('Defiance.xlsx')
df.to_excel(writer,'Sheet1',index=False)
writer.save()
See below for sample output
Interact with information on largest 100 btc accounts ( python skeleton code)
Interact with information on largest 100 btc accounts ( python skeleton code)

Interact with information on largest 100 btc accounts ( python skeleton code)

  • 1.
    import requests from lxmlimport html from urllib.request import urlopen from bs4 import BeautifulSoup as soup import re client = requests.Session() HOMEPAGE_URL = 'https://bitinfocharts.com/top-100-richest-bitcoin-addresses.html' html = client.get(HOMEPAGE_URL).content body = BeautifulSoup(html,'html.parser') addresses = [] temp_addresses=[] for m in re.finditer('</td><td><a href="https://bitinfocharts.com/bitcoin/address/', str(body)): #print(m.end()) a = str(body)[m.end():m.end()+80] a = a.split(">") a= a[0] [:-1] addresses.append(a) #for m in re.finditer('">wallet:', str(body)): #len(addresses) #print(addresses) # Fill in with url of page which is to be scraped #body = body.prettify() #text = body.get_text #print(body.prettify) import blockchain import requests client = requests.Session() #printing our URLS to the top 100 addresses address_data = []
  • 2.
    homepage_url=[] balance =[] n_tx =[] total_recieved=[] total_sent=[] ## #Addresscan be base58 or hash160 #Optional limit parameter to show n transactions e.g. &limit=50 (Default: 50, Max: 50) #Optional offset parameter to skip the first n transactions e.g. &offset=100 (Page 2 for limit 50) ## for i in range(0,len(addresses)): #HOMEPAGE_URL = 'https://bitinfocharts.com/bitcoin/address/'+addresses[i] HOMEPAGE_URL = 'https://blockchain.info/rawaddr/'+addresses[i] homepage_url.append(HOMEPAGE_URL) html = client.get(HOMEPAGE_URL).content body_address = BeautifulSoup(html,'html.parser') #body_address = str(body_address) print(HOMEPAGE_URL) #print(body_address.prettify) address_data.append(body_address.prettify) ########################################## number_transcations_performed= str(address_data[i]).split("n")[3][11:-1] n_tx.append(number_transcations_performed) print(number_transcations_performed) ####################|###################### recieved = str(address_data[i]).split("n")[4][21:-1-8] sent = str(address_data[i]).split("n")[5][17:-1-8] total_recieved.append(recieved) total_sent.append(sent) print(recieved) print(sent)
  • 3.
    ########################################## Balance_URL = 'https://blockchain.info/balance?active='+addresses[i] html= client.get(Balance_URL).content body_balance = BeautifulSoup(html,'html.parser') print(Balance_URL) #print(body_balance.prettify) a = str(body_balance) a = a.split("n") a = a[1][21:len(a[1])-1-8] balance.append(a) print(a) #print(len(address_data)) #print(len(homepage_url)) #print(len(balance)) #Balance information extraction #https://blockchain.info/balance?active=$address import pandas as pd from pandas import ExcelWriter # Creates a datframe with columns: |Name|Twitter Handle|Path To Tweets| #balance = [int(i) for i in balance] df = pd.DataFrame({'Addresses': addresses, 'Balance in BTC': balance, 'URL Profile': homepage_url, 'No of Transactions performed':n_tx, 'Total Recieved': total_recieved, 'Total Sent': total_sent}) ## # 'Activity Details': address_data, # # 'Balance in BTC': str(balance)})
  • 4.