解析 HTML 未输出所需数据(FedEx 的跟踪信息)

问题描述 投票:0回答:2

我正在尝试制作一个脚本来从联邦快递网站获取跟踪信息。

我想,如果我只是访问网址“https://www.fedex.com/fedextrack/?tracknumbers=”并将跟踪号码粘贴在其末尾,它会将我带到包含信息的跟踪页面我需要。

我尝试向 URL 提供跟踪号码并从响应中解析 html。

这是我尝试过的。

import urllib

url_prefix= 'https://www.fedex.com/fedextrack/?tracknumbers='
tracking_number = '570573906561'
url = url_prefix + tracking_number
sock = urllib.urlopen(url) htmlSource = sock.read()
sock.close()
print htmlSource

此代码输出: http://freetexthost.com/iy1ma2q1fm

我以为我只能从输出中搜索文本并找到交付状态/日期,但它不在这个输出中。

如果我进入 Chrome 中的跟踪页面并检查元素,我会看到交货日期信息的 ID 为 destinDateTime, 所以如果我在 Chrome 控制台中运行这个:

var document.getElementbyID('destinationDateTime')

它返回我想要的输出(交货日期)

为什么我的 python 脚本不打印实际的跟踪数据信息或 html 输出中的该类?

我尝试搜索这个问题并尝试解析几种不同的方式(Mechanize、Beautiful Soup、html2text),但所有这些都给了我相同的输出,但不包含任何有关发货的实际数据。

python html-parsing urllib fedex
2个回答
7
投票

与许多其他网站一样,如果没有 JavaScript,该网站将无法运行。它将 HTTP POST 请求发送到某个 URL,然后该 URL 将跟踪数据作为 JSON 编码的对象返回。

您需要使用 Python 来模拟:

import requests
import json

tracking_number = '570573906561'

data = requests.post('https://www.fedex.com/trackingCal/track', data={
    'data': json.dumps({
        'TrackPackagesRequest': {
            'appType': 'wtrk',
            'uniqueKey': '',
            'processingParameters': {
                'anonymousTransaction': True,
                'clientId': 'WTRK',
                'returnDetailedErrors': True,
                'returnLocalizedDateTime': False
            },
            'trackingInfoList': [{
                'trackNumberInfo': {
                    'trackingNumber': tracking_number,
                    'trackingQualifier': '',
                    'trackingCarrier': ''
                }
            }]
        }
    }),
    'action': 'trackpackages',
    'locale': 'en_US',
    'format': 'json',
    'version': 99
}).json()

然后处理生成的对象:

{
    "TrackPackagesResponse": {
        "successful": true,
        "passedLoggedInCheck": false,
        "errorList": [{
            "code": "0",
            "message": "Request was successfully processed.",
            "source": null
        }],
        "packageList": [{
            "trackingNbr": "570573906561",
            "trackingQualifier": "2456536000\u007e570573906561\u007eFX",
            "trackingCarrierCd": "FDXE",
            "trackingCarrierDesc": "FedEx Express",
            "displayTrackingNbr": "570573906561",
            "shipperCmpnyName": "",
            "shipperName": "",
            "shipperAddr1": "",
            "shipperAddr2": "",
            "shipperCity": "SEOUL",
            "shipperStateCD": "",
            "shipperZip": "",
            "shipperCntryCD": "KR",
            "shipperPhoneNbr": "",
            "shippedBy": "",
            "recipientCmpnyName": "",
            "recipientName": "",
            "recipientAddr1": "",
            "recipientAddr2": "",
            "recipientCity": "CHEK LAP KOK",
            "recipientStateCD": "",
            "recipientZip": "",
            "recipientCntryCD": "HK",
            "recipientPhoneNbr": "",
            "shippedTo": "",
            "keyStatus": "Delivered",
            "keyStatusCD": "DL",
            "lastScanStatus": "",
            "lastScanDateTime": "",
            "receivedByNm": ".CHOP",
            "subStatus": "Signed for by\u003a .CHOP",
            "mainStatus": "",
            "statusBarCD": "DL",
            "shortStatus": "",
            "shortStatusCD": "",
            "statusLocationAddr1": "",
            "statusLocationAddr2": "",
            "statusLocationCity": "CHEK LAP KOK",
            "statusLocationStateCD": "",
            "statusLocationZip": "",
            "statusLocationCntryCD": "HK",
            "statusWithDetails": "Delivered\u003a 9\u002f02\u002f2013 11\u003a58 am Signed for by\u003a.CHOP\u003b CHEK LAP KOK, HK",
            "shipDt": "2013\u002d08\u002d31T15\u003a00\u003a00\u002b09\u003a00",
            "displayShipDt": "8\u002f31\u002f2013",
            "displayShipTm": "3\u003a00 pm",
            "displayShipDateTime": "8\u002f31\u002f2013 3\u003a00 pm",
            "pickupDt": "2013\u002d08\u002d31T15\u003a00\u003a00\u002b09\u003a00",
            "displayPickupDt": "8\u002f31\u002f2013",
            "displayPickupTm": "3\u003a00 pm",
            "displayPickupDateTime": "8\u002f31\u002f2013 3\u003a00 pm",
            "estDeliveryDt": "",
            "estDeliveryTm": "",
            "displayEstDeliveryDt": "",
            "displayEstDeliveryTm": "",
            "displayEstDeliveryDateTime": "",
            "actDeliveryDt": "2013\u002d09\u002d02T11\u003a58\u003a00\u002b08\u003a00",
            "displayActDeliveryDt": "9\u002f02\u002f2013",
            "displayActDeliveryTm": "11\u003a58 am",
            "displayActDeliveryDateTime": "9\u002f02\u002f2013 11\u003a58 am",
            "nickName": "",
            "note": "",
            "matchedAccountList": [""],
            "fxfAdvanceETA": "",
            "fxfAdvanceReason": "",
            "fxfAdvanceStatusCode": "",
            "fxfAdvanceStatusDesc": "",
            "destLink": "",
            "originLink": "",
            "hasBillOfLadingImage": false,
            "hasBillPresentment": false,
            "signatureRequired": 0,
            "totalKgsWgt": "3.5",
            "displayTotalKgsWgt": "3.5 kgs",
            "totalLbsWgt": "7.8",
            "displayTotalLbsWgt": "7.8 lbs",
            "displayTotalWgt": "7.8 lbs \u002f 3.5 kgs",
            "pkgKgsWgt": "3.5",
            "displayPkgKgsWgt": "3.5 kgs",
            "pkgLbsWgt": "7.8",
            "displayPkgLbsWgt": "7.8 lbs",
            "displayPkgWgt": "7.8 lbs \u002f 3.5 kgs",
            "dimensions": "20x14x14 in.",
            "masterTrackingNbr": "",
            "masterQualifier": "",
            "masterCarrierCD": "",
            "originalOutboundTrackingNbr": null,
            "originalOutboundQualifier": "",
            "originalOutboundCarrierCD": "",
            "invoiceNbrList": [""],
            "referenceList": [""],
            "doorTagNbrList": [""],
            "referenceDescList": [""],
            "purchaseOrderNbrList": [""],
            "billofLadingNbrList": [""],
            "shipperRefList": ["PO\u00232612  Proton housing\u005fPlastics"],
            "rmaList": [""],
            "deptNbrList": [""],
            "shipmentIdList": [""],
            "tcnList": [""],
            "partnerCarrierNbrList": [""],
            "hasAssociatedShipments": false,
            "hasAssociatedReturnShipments": false,
            "assocShpGrp": 0,
            "drTgGrp": ["0"],
            "associationInfoList": [{
                "trackingNumberInfo": {
                    "trackingNumber": "",
                    "trackingQualifier": "",
                    "trackingCarrier": "",
                    "processingParameters": null
                },
                "associatedType": ""
            }],
            "returnReason": "",
            "returnRelationship": null,
            "skuItemUpcCdList": [""],
            "receiveQtyList": [""],
            "itemDescList": [""],
            "partNbrList": [""],
            "serviceCD": "INTERNATIONAL\u005fPRIORITY",
            "serviceDesc": "FedEx International Priority",
            "serviceShortDesc": "IP",
            "packageType": "YOUR\u005fPACKAGING",
            "packaging": "Your Packaging",
            "clearanceDetailLink": "",
            "showClearanceDetailLink": false,
            "manufactureCountryCDList": [""],
            "commodityCDList": [""],
            "commodityDescList": [""],
            "cerNbrList": [""],
            "cerComplaintCDList": [""],
            "cerComplaintDescList": [""],
            "cerEventDateList": [""],
            "displayCerEventDateList": [""],
            "totalPieces": "1",
            "specialHandlingServicesList": ["Deliver Weekday", "Weekend Pick\u002dUp"],
            "shipmentType": "",
            "pkgContentDesc1": "",
            "pkgContentDesc2": "",
            "docAWBNbr": "",
            "originalCharges": "",
            "transportationCD": "",
            "transportationDesc": "",
            "dutiesAndTaxesCD": "",
            "dutiesAndTaxesDesc": "",
            "origPieceCount": "",
            "destPieceCount": "",
            "goodsClassificationCD": "",
            "receipientAddrQty": "0",
            "deliveryAttempt": "0",
            "codReturnTrackNbr": "",
            "scanEventList": [{
                "date": "2013\u002d09\u002d02",
                "time": "11\u003a58\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "Delivered",
                "statusCD": "DL",
                "scanLocation": "CHEK LAP KOK HK",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": true
            }, {
                "date": "2013\u002d09\u002d02",
                "time": "09\u003a36\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "On FedEx vehicle for delivery",
                "statusCD": "OD",
                "scanLocation": "LANTAU ISLAND HK",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d02",
                "time": "08\u003a55\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "At local FedEx facility",
                "statusCD": "AR",
                "scanLocation": "LANTAU ISLAND HK",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d02",
                "time": "07\u003a12\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "International shipment release \u002d Import",
                "statusCD": "CC",
                "scanLocation": "LANTAU ISLAND HK",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d02",
                "time": "04\u003a40\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "Shipment exception",
                "statusCD": "SE",
                "scanLocation": "GUANGZHOU CN",
                "scanDetails": "Delay beyond our control",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d02",
                "time": "03\u003a45\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "Departed FedEx location",
                "statusCD": "DP",
                "scanLocation": "GUANGZHOU CN",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d02",
                "time": "01\u003a17\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "Arrived at FedEx location",
                "statusCD": "AR",
                "scanLocation": "GUANGZHOU CN",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d01",
                "time": "23\u003a10\u003a00",
                "gmtOffset": "\u002b08\u003a00",
                "status": "In transit",
                "statusCD": "IT",
                "scanLocation": "SHANGHAI CN",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d09\u002d01",
                "time": "17\u003a13\u003a00",
                "gmtOffset": "\u002b09\u003a00",
                "status": "In transit",
                "statusCD": "IT",
                "scanLocation": "INCHEON KR",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d08\u002d31",
                "time": "19\u003a44\u003a00",
                "gmtOffset": "\u002b09\u003a00",
                "status": "In transit",
                "statusCD": "IT",
                "scanLocation": "INCHEON KR",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d08\u002d31",
                "time": "16\u003a27\u003a00",
                "gmtOffset": "\u002b09\u003a00",
                "status": "Left FedEx origin facility",
                "statusCD": "DP",
                "scanLocation": "SEOUL KR",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d08\u002d31",
                "time": "15\u003a00\u003a00",
                "gmtOffset": "\u002b09\u003a00",
                "status": "Picked up",
                "statusCD": "PU",
                "scanLocation": "SEOUL KR",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }, {
                "date": "2013\u002d08\u002d30",
                "time": "23\u003a58\u003a11",
                "gmtOffset": "\u002d05\u003a00",
                "status": "Shipment information sent to FedEx",
                "statusCD": "OC",
                "scanLocation": "",
                "scanDetails": "",
                "scanDetailsHtml": "",
                "rtrnShprTrkNbr": "",
                "isDelException": false,
                "isClearanceDelay": false,
                "isException": false,
                "isDelivered": false
            }],
            "originAddr1": "",
            "originAddr2": "",
            "originCity": "SEOUL",
            "originStateCD": "",
            "originZip": "",
            "originCntryCD": "KR",
            "originLocationID": "",
            "originTermCity": "SEOUL",
            "originTermStateCD": "",
            "destLocationAddr1": "",
            "destLocationAddr2": "",
            "destLocationCity": "LANTAU ISLAND",
            "destLocationStateCD": "",
            "destLocationZip": "",
            "destLocationCntryCD": "HK",
            "destLocationID": "",
            "destLocationTermCity": "LANTAU ISLAND",
            "destLocationTermStateCD": "",
            "destAddr1": "",
            "destAddr2": "",
            "destCity": "CHEK LAP KOK",
            "destStateCD": "",
            "destZip": "",
            "destCntryCD": "HK",
            "halAddr1": "",
            "halAddr2": "",
            "halCity": "",
            "halStateCD": "",
            "halZipCD": "",
            "halCntryCD": "",
            "actualDelAddrCity": "CHEK LAP KOK",
            "actualDelAddrStateCD": "",
            "actualDelAddrZipCD": "",
            "actualDelAddrCntryCD": "HK",
            "totalTransitMiles": "",
            "excepReasonList": [""],
            "excepActionList": [""],
            "exceptionReason": "",
            "exceptionAction": "",
            "statusDetailsList": [""],
            "trackErrCD": "",
            "destTZ": "\u002b08\u003a00",
            "originTZ": "\u002b09\u003a00",
            "isMultiStat": "0",
            "multiStatList": [{
                "multiPiec": "",
                "multiTm": "",
                "multiDispTm": "",
                "multiSta": ""
            }],
            "maskMessage": "",
            "deliveryService": "",
            "milestoDestination": "",
            "terms": "",
            "originUbanizationCode": "",
            "originCountryName": "",
            "isOriginResidential": false,
            "halUrbanizationCD": "",
            "halCountryName": "",
            "actualDelAddrUrbanizationCD": "",
            "actualDelAddrCountryName": "",
            "destUrbanizationCD": "",
            "destCountryName": "",
            "delToDesc": "Shipping\u002fReceiving",
            "recpShareID": "",
            "shprShareID": "9mbo6hrq0tqxo1i4pr7kp2yp",
            "defaultCDOType": "CDO",
            "mpstype": "",
            "fxfAdvanceNotice": true,
            "rthavailableCD": "",
            "excepReasonListNoInit": [""],
            "excepActionListNoInit": [""],
            "statusDetailsListNoInit": [""],
            "matched": false,
            "isSuccessful": true,
            "errorList": [{
                "code": "",
                "message": "",
                "source": null
            }],
            "isCanceled": false,
            "isPrePickup": false,
            "isPickup": false,
            "isInTransit": false,
            "isInProgress": true,
            "isDelException": false,
            "isClearanceDelay": false,
            "isException": false,
            "isDelivered": true,
            "isHAL": false,
            "isOnSchedule": false,
            "isDeliveryToday": false,
            "isSave": false,
            "isWatch": false,
            "isHistorical": false,
            "isTenderedNotification": false,
            "isDeliveredNotification": true,
            "isExceptionNotification": false,
            "isCurrentStatusNotification": false,
            "isAnticipatedShipDtLabel": false,
            "isShipPickupDtLabel": true,
            "isActualPickupLabel": false,
            "isOrderReceivedLabel": false,
            "isEstimatedDeliveryDtLabel": true,
            "isDeliveryDtLabel": false,
            "isActualDeliveryDtLabel": true,
            "isOrderCompleteLabel": false,
            "isOutboundDirection": false,
            "isInboundDirection": false,
            "isThirdpartyDirection": false,
            "isUnknownDirection": false,
            "isFSM": false,
            "isReturn": false,
            "isOriginalOutBound": false,
            "isChildPackage": false,
            "isParentPackage": false,
            "isReclassifiedAsSingleShipment": false,
            "isDuplicate": false,
            "isMaskShipper": false,
            "isHalEligible": false,
            "isFedexOfficeOnlineOrders": false,
            "isFedexOfficeInStoreOrders": false,
            "isMultipleStop": false,
            "isCustomCritical": false,
            "isInvalid": false,
            "isNotFound": false,
            "isFreight": false,
            "isSpod": true,
            "isSignatureAvailable": false,
            "isMPS": false,
            "isGMPS": false,
            "isResidential": false,
            "isDestResidential": true,
            "isHALResidential": false,
            "isActualDelAddrResidential": false,
            "isReqEstDelDt": false,
            "isCDOEligible": false,
            "CDOInfoList": [{
                "spclInstructDesc": "",
                "delivOptn": "",
                "delivOptnStatus": "",
                "reqApptWdw": "",
                "reqApptDesc": "",
                "rerouteTRKNbr": "",
                "beginTm": "",
                "endTm": ""
            }],
            "CDOExists": false,
            "isMtchdByRecShrID": false,
            "isMtchdByShiprShrID": false
        }]
    }
}

0
投票

这就是我最终得到的结果,感谢@Blender

import requests
import json

daysdict = {1:31,2:28,3:31,4:31,5:31,6:30,7:31,8:31,9:30,10:31,11:30,12:31}
def days_in_month(month):
    for key, value in daysdict.iteritems():
        if key == month:
            number_of_days = value
    return number_of_days




def build_output(tracking_number):

    data = requests.post('https://www.fedex.com/trackingCal/track', data={
        'data': json.dumps({
            'TrackPackagesRequest': {
                'appType': 'wtrk',
                'uniqueKey': '',
                'processingParameters': {
                    'anonymousTransaction': True,
                    'clientId': 'WTRK',
                    'returnDetailedErrors': True,
                    'returnLocalizedDateTime': False
                },
                'trackingInfoList': [{
                    'trackNumberInfo': {
                        'trackingNumber': tracking_number,
                        'trackingQualifier': '',
                        'trackingCarrier': ''
                    }
                }]
            }
        }),
        'action': 'trackpackages',
        'locale': 'en_US',
        'format': 'json',
        'version': 99
    }).json()

    return data

# finds delivery date info

ship_arrival_key = 'displayActDeliveryDateTime'
ship_time_key = 'displayShipDt'



def track(tracking_number):

    data = build_output(tracking_number)
     #narrowing down dictionary and lists to objects needed (ship day,arrival)
    for key, value in data.iteritems():
        narrow = value 
    #narrow more into packageList list
    for key, value in narrow.iteritems():
        if key == 'packageList':
            narrow = value
    # narrow to ship start value
    for x, y in narrow[0].iteritems():
        if x == ship_arrival_key:
            ship_arival_value = y
            exists = True

    # also find ship arrival
        elif x == ship_time_key:
            ship_time_value = y
            exists = True
    # list with two items shiptime and shiparrival

    return  ship_time_value, ship_arival_value, exists


def print_results(tracking_number):
    to_fro = track(tracking_number)
    if to_fro[2] == True:
        try:
            daysinmonth = days_in_month(int(to_fro[0][0]))
            try:
                if to_fro[0][0] != to_fro[1][0]:

                    ship_days = str(    (int(daysinmonth) - int(str((to_fro[0][2]))+str((to_fro[0][3])))  + int(to_fro[1][3])) )

                    print '_____________________'
                    print 'Shipped: ' + to_fro[0]
                    print 'Arrived: ' + to_fro[1]
                    print '_____________________'
                    print '\nShipping took:' +"     "  +ship_days  
                else:
                    ship_days = int(to_fro[1][2] + to_fro[1][3]) - int(to_fro[0][2] + to_fro[0][3])
                    print '_____________________'
                    print 'Shipped: ' + to_fro[0]
                    print 'Arrived: ' + to_fro[1]
                    print '_____________________'
                    print  '\nShipping took:' +"    " +  str(ship_days)  
            except IndexError:
                print 'Invalid Tracking Number'
                pass
        except IndexError:
            pass
    else:
        pass

def raw_results(tracking_number):
    to_fro = track(tracking_number)
    if to_fro[2] == True:
        daysinmonth = days_in_month(int(track(tracking_number)[0][0]))
        try:
            if to_fro[0][0] != to_fro[1][0]:

                ship_days = str(    (int(daysinmonth) - int(str((to_fro[0][2]))+str((to_fro[0][3])))  + int(to_fro[1][3])) )
            else:
                ship_days = int(to_fro[1][2] + to_fro[1][3]) - int(to_fro[0][2] + to_fro[0][3])
        except IndexError:
            print 'Invalid Tracking Number'
            pass
    else:
        pass

    return ship_days



#print_results(499552080632881)
© www.soinside.com 2019 - 2024. All rights reserved.