Fight against new coronavirus - get and save online data

# default_exp getdata
# The above line is used to declare the name of this module in nbdev. Must be the first row of the first Cell of the notebook.

getdata

#hide
from nbdev.showdoc import *
#export 
from bs4 import BeautifulSoup
from parser import * #regex_parser
import re
import json
import time
import logging
import datetime
import requests
import pprint 

Preset network address and create request header.

# export 
# Use data from DXY.
url = "https://ncov.dxy.cn/ncovh5/view/pneumonia?from=singlemessage&isappinstalled=0"
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
}

#The name of the saved file.
dateof = '20200207'

Get the Web page from the Web service and parse it into JSON format.

#export 

def getweb():
    session = requests.session()
    session.headers.update(headers)
    r = session.get(url)
    soup = BeautifulSoup(r.content, 'lxml')

    #Get provincial and municipal data.
    area_information = re.search(r'\[(.*)\]', str(soup.find('script', attrs={'id': 'getAreaStat'})))
    area = json.loads(area_information.group(0))
    return area

Save provincial data to data/prov? Date. csv file.

# export 
# Write file and save data.
def saveprovice(area):
    fprovince = "data/" + "prov_" + dateof + ".csv"
    fp = open(fprovince, "w")
    fp.write("Province,Diagnosis,Suspected,Cure,death\r")
    for a in area:
        fp.write(a['provinceName']+','+ \
                 str(a['confirmedCount'])+','+ \
                 str(a['suspectedCount'])+','+ \
                 str(a['curedCount'])+','+ \
                 str(a['deadCount'])+ '\r')
    fp.close()
    print("writed to "+ fprovince + "\r\n")

Save the city data to the file data/city? Date.csv.

# export
# Write file, city data.
def savecity(area):
    fcity = "data/" + "city_" + dateof + ".csv"
    fc = open(fcity, "w")
    fc.write("Province,City,Diagnosis,Suspected,Cure,death\r")
    for p in area:
        cities = p['cities']
        for c in cities:
            fc.write(p['provinceName']+','+ \
                     c['cityName']+','+ \
                     str(c['confirmedCount'])+','+ \
                     str(c['suspectedCount'])+','+ \
                     str(c['curedCount'])+','+ \
                     str(c['deadCount'])+'\r')
    fc.close()
    print("writed to "+ fcity + "\r\n")

View the saved file.

The magic operator of Notebook is used here. Refer to:

!ls -l data
Total dosage 176
-Rw-r -- R -- 1 SuperMap SuperMap 1445 February 9 22:49 china.csv
-Rw-r -- R -- 1 SuperMap
-Rw-r -- R -- 1 SuperMap
-Rw-r -- R -- 1 SuperMap
-Rw-r -- R -- 1 SuperMap 126285 February 9 15:09 img_.jpg
-Rw-r -- R -- 1 SuperMap
-Rw-r -- R -- 1 SuperMap
-Rw-r -- R -- 1 SuperMap

nbdev utility

# Convert the notebook to *. py code of python and save it in the subdirectory of the project name.

from nbdev.export import *
notebook2script()
Converted 00_digdata.ipynb.
Converted 01_getdata.ipynb.
Converted 10_charts.ipynb.
Converted 10_china.ipynb.
Converted index.ipynb.

Tags: Big Data JSON Python Session jupyter

Posted on Mon, 10 Feb 2020 04:14:33 -0800 by paulb