The 12306 website of python crawler -- train ticket information query

The 12306 website of python crawler -- train ticket information query

Train of thought:

1. The train ticket information query is based on the station information query. First, complete the station information query, and then query all the train ticket information of the current known departure station and destination station according to the url address generated by the station information query

2. The JSON file stores the details of all current trains from the departure station to the destination station

3. Analyze the json file

4. Classified inquiry of tickets (high-speed rail, train)

1.json file: select "ticket" and "one way" on the 12306 page, open "developer tools", and enter the departure and destination

The json file is analyzed and found to be a nested dictionary. All the details of the train number are stored in "result"

Select a piece of data and analyze it to find the data you want (such as the location of the train number, departure station, arrival station, seat type, etc.)

notepad + + software is used here, replacing "|" with "\ r", so that you can easily find the corresponding location of the desired data

The code is as follows:

#python Inquiry of train ticket information
import requests
url1="https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9090"
txt=requests.get(url1).text
inf=txt[:-2].split("@")[1:]
#print(inf)
stations={}
for record in inf:
    rlist=record.split("|")
    stations[rlist[2]]={"cn":rlist[1],"qp":rlist[3],"jp":rlist[4]}  #Treat station code as key
#print(stations)
def getcode(t):
    while True:
        s1=input("%s station:"%t)
        r1=[]
        for id,station in stations.items():
            if s1 in station.values():
                r1.append((id,station))
        if r1:
            break
        print("There is no such station.")
        print("Please re-enter.")
    if len(r1)==1:
        sid=r1[0][0]
    else:
        print("You need to choose from the following stations:")
        for i in range(len(r1)):
            print(i+1,r1[i][1]["cn"])
        sel=int(input("Your choice is:"))-1
        sid=r1[sel][0]
    return sid
fromstation=getcode("Set out")
tostation=getcode("Arrive")
chufatime=input("Departure date(Format 2019-01-01):").strip()
qurl="https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT".format(chufatime,fromstation,tostation)
print(qurl)
print("The query criteria you entered is:Departure station=%s,Destination=%s"%(stations[fromstation]["cn"],stations[tostation]["cn"]))
ainf=requests.get(qurl).json()["data"]["result"]  #json The file stores details of all current train numbers from the departure station to the destination station
#print(ainf,type(ainf))
result=[]
for i in ainf:
    list=i.split("|")
    checi=list[3]
    chufa=stations[list[6]]["cn"]
    mudi=stations[list[7]]["cn"]
    ftime=list[8]
    dtime=list[9]
    sw=list[32]
    yd=list[31]
    rw=list[23]
    yw=list[26]
    wuzuo=list[28]
    ed=list[30]
    yz=list[29]
    result.append((checi,chufa,mudi,ftime,dtime,sw,yd,ed,yz,yw,rw,wuzuo))
#print(result)
print("Train number\t Departure station\t Arrival station departure time arrival time business block first class second class Hard Seat Hard Sleeper Soft Sleeper no seat ")
for i in result:
    for n in range(len(i)):
        print(i[n],end="\t")
    print()

 

The operation effect is as follows:

The above display is too unfriendly. Use the prettytable library here (installation required)

from  prettytable import PrettyTable
............
............
table=PrettyTable(["Train number","Departure station","Destination station","Departure time","Arrival time","Business block","First class seat","Second-class seat","Hard seat","Hard sleeper","Soft sleeper","No seat"])
for i in  result:
    table.add_row([i[0],i[1],i[2],i[3],i[4],i[5],i[6],i[7],i[8],i[9],i[10],i[11]])
print(table)

The operation effect is as follows:

4. Query tickets by category

import requests
from  prettytable import PrettyTable
url1="https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9090"
txt=requests.get(url1).text
inf=txt[:-2].split("@")[1:]
#print(inf)
stations={}
for record in inf:
    rlist=record.split("|")
    stations[rlist[2]]={"cn":rlist[1],"qp":rlist[3],"jp":rlist[4]}  #Treat station code as key
#print(stations)
def getcode(t):
    while True:
        s1=input("%s station:"%t)
        r1=[]
        for id,station in stations.items():
            if s1 in station.values():
                r1.append((id,station))
        if r1:
            break
        print("There is no such station.")
        print("Please re-enter.")
    if len(r1)==1:
        sid=r1[0][0]
    else:
        print("You need to choose from the following stations:")
        for i in range(len(r1)):
            print(i+1,r1[i][1]["cn"])
        sel=int(input("Your choice is:"))-1
        sid=r1[sel][0]
    return sid
fromstation=getcode("Set out")
tostation=getcode("Arrive")
chufatime=input("Departure date(Format 2019-01-01):").strip()
qurl="https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT".format(chufatime,fromstation,tostation)
print(qurl)
print("The query criteria you entered is:Departure station=%s,Destination=%s"%(stations[fromstation]["cn"],stations[tostation]["cn"]))
ainf=requests.get(qurl).json()["data"]["result"]  #json The file stores details of all current train numbers from the departure station to the destination station
#print(ainf,type(ainf))
result=[]
gaotie=[]
huoche=[]
for i in ainf:
    list=i.split("|")
    checi=list[3]
    chufa=stations[list[6]]["cn"]
    mudi=stations[list[7]]["cn"]
    ftime=list[8]
    dtime=list[9]
    sw=list[32]
    yd=list[31]
    rw=list[23]
    yw=list[26]
    wuzuo=list[28]
    ed=list[30]
    yz=list[29]
    result.append((checi,chufa,mudi,ftime,dtime,sw,yd,ed,yz,yw,rw,wuzuo))
    if checi[0] in ["G","D"]:
        gaotie.append([checi,chufa,mudi,ftime,dtime,sw,yd,ed])
    else:
         huoche.append([checi,chufa,mudi,ftime,dtime,yz,yw,rw,wuzuo])
#print(result)
while True:
    print("Please enter view information:1,whole    2. High speed railway and motor car     3. Train 4. Exit")
    show=int(input("Please choose:"))
    if show==1:
        table=PrettyTable(["Train number","Departure station","Destination station","Departure time","Arrival time","Business block","First class seat","Second-class seat","Hard seat","Hard sleeper","Soft sleeper","No seat"])
        for i in result:
            table.add_row([i[0],i[1],i[2],i[3],i[4],i[5],i[6],i[7],i[8],i[9],i[10],i[11]])
        print(table)
    elif show==2:
        table=PrettyTable(["Train number","Departure station","Destination station","Departure time","Arrival time","Business block","First class seat","Second-class seat"])
        for i in gaotie:
            table.add_row([i[0],i[1],i[2],i[3],i[4],i[5],i[6],i[7]])
        print(table)
    elif show==3:
        table=PrettyTable(["Train number","Departure station","Destination station","Departure time","Arrival time","Hard seat","Hard sleeper","Soft sleeper","No seat"])
        for i in huoche:
            table.add_row([i[0],i[1],i[2],i[3],i[4],i[5],i[6],i[7],i[8]])
        print(table)
    elif show==4:
        print("End of query!")
        break
    else:
        print("Input error, please input again!")

The operation effect is as follows:

Tags: Python JSON

Posted on Sat, 30 Nov 2019 11:19:26 -0800 by keithschm