由python类型错误困惑

问题描述 投票:-2回答:2

我已经使用python一段时间了,并做了一些改进,但这对我来说是一个新的错误。我正在尝试为我的职业生涯学习社交媒体分析,这就是为什么我要尝试这组代码here

我已经发现了一个错误,但是这个错误出现在第81行,让我感到难过,因为我无法理解为什么函数“def get_user_objects(follower_ids):”返回none而我需要更改它按照此前对其他问题的建议。

为简单起见,这是脚本。所有帮助赞赏。

要重复的错误是TypeError:“NoneType”类型的对象没有len()

from tweepy import OAuthHandler
from tweepy import API
from collections import Counter
from datetime import datetime, date, time, timedelta
import sys
import json
import os
import io
import re
import time

# Helper functions to load and save intermediate steps
def save_json(variable, filename):
    with io.open(filename, "w", encoding="utf-8") as f:
        f.write(str(json.dumps(variable, indent=4, ensure_ascii=False)))

def load_json(filename):
    ret = None
    if os.path.exists(filename):
        try:
            with io.open(filename, "r", encoding="utf-8") as f:
                ret = json.load(f)
        except:
            pass
    return ret

def try_load_or_process(filename, processor_fn, function_arg):
    load_fn = None
    save_fn = None
    if filename.endswith("json"):
        load_fn = load_json
        save_fn = save_json
    else:
        load_fn = load_bin
        save_fn = save_bin
    if os.path.exists(filename):
        print("Loading " + filename)
        return load_fn(filename)
    else:
        ret = processor_fn(function_arg)
        print("Saving " + filename)
        save_fn(ret, filename)
        return ret

# Some helper functions to convert between different time formats and 
perform date calculations
def twitter_time_to_object(time_string):
    twitter_format = "%a %b %d %H:%M:%S %Y"
    match_expression = "^(.+)\s(\+[0-9][0-9][0-9][0-9])\s([0-9][0-9][0-9] 
[09])$"
    match = re.search(match_expression, time_string)
    if match is not None:
        first_bit = match.group(1)
        second_bit = match.group(2)
        last_bit = match.group(3)
        new_string = first_bit + " " + last_bit
        date_object = datetime.strptime(new_string, twitter_format)
        return date_object

def time_object_to_unix(time_object):
    return int(time_object.strftime("%s"))

def twitter_time_to_unix(time_string):
    return time_object_to_unix(twitter_time_to_object(time_string))

def seconds_since_twitter_time(time_string):
    input_time_unix = int(twitter_time_to_unix(time_string))
    current_time_unix = int(get_utc_unix_time())
    return current_time_unix - input_time_unix

def get_utc_unix_time():
    dts = datetime.utcnow()
    return time.mktime(dts.timetuple())

# Get a list of follower ids for the target account
def get_follower_ids(target):
    return auth_api.followers_ids(target)

# Twitter API allows us to batch query 100 accounts at a time
# So we'll create batches of 100 follower ids and gather Twitter User 
objects for each batch
def get_user_objects(follower_ids):
    batch_len = 100
    num_batches = len(follower_ids)/100
    batches = (follower_ids[i:i+batch_len] for i in range(0, 
len(follower_ids), batch_len))
    all_data = []
    for batch_count, batch in enumerate(batches):
        sys.stdout.write("\r")
        sys.stdout.flush()
        sys.stdout.write("Fetching batch: " + str(batch_count) + "/" + 
str(num_batches))
        sys.stdout.flush()
        users_list = auth_api.lookup_users(user_ids=batch)
        users_json = (map(lambda t: t._json, users_list))
        all_data += users_json
    return all_data
# Creates one week length ranges and finds items that fit into those range 
boundaries
def make_ranges(user_data, num_ranges=20):
range_max = 604800 * num_ranges
range_step = range_max/num_ranges

# We create ranges and labels first and then iterate these when going 
through the whole list
# of user data, to speed things up
ranges = {}
labels = {}
for x in range(num_ranges):
    start_range = x * range_step
    end_range = x * range_step + range_step
    label = "%02d" % x + " - " + "%02d" % (x+1) + " weeks"
    labels[label] = []
    ranges[label] = {}
    ranges[label]["start"] = start_range
    ranges[label]["end"] = end_range
for user in user_data:
    if "created_at" in user:
        account_age = seconds_since_twitter_time(user["created_at"])
        for label, timestamps in ranges.iteritems():
            if account_age > timestamps["start"] and account_age < 
timestamps["end"]:
                entry = {} 
                id_str = user["id_str"] 
                entry[id_str] = {} 
                fields = ["screen_name", "name", "created_at", 
"friends_count", "followers_count", "favourites_count", "statuses_count"] 
                for f in fields: 
                    if f in user: 
                        entry[id_str][f] = user[f] 
                labels[label].append(entry) 
return labels


if __name__ == "__main__": 
    account_list = [] 
    if (len(sys.argv) > 1):
        account_list = sys.argv[1:]

    if len(account_list) < 1:
        print("No parameters supplied. Exiting.")
        sys.exit(0)

    consumer_key="XXXXXXX"
    consumer_secret="XXXXXX"
    access_token="XXXXXXX"
    access_token_secret="XXXXXXXX"

    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    auth_api = API(auth)

    for target in account_list:
        print("Processing target: " + target)

# Get a list of Twitter ids for followers of target account and save it
        filename = target + "_follower_ids.json"
        follower_ids = try_load_or_process(filename, get_follower_ids, 
target)

# Fetch Twitter User objects from each Twitter id found and save the data
        filename = target + "_followers.json"
        user_objects = try_load_or_process(filename, get_user_objects, 
follower_ids)
        total_objects = len(user_objects)

# Record a few details about each account that falls between specified age 
ranges
        ranges = make_ranges(user_objects)
        filename = target + "_ranges.json"
        save_json(ranges, filename)

# Print a few summaries
        print
        print("\t\tFollower age ranges")
        print("\t\t===================")
        total = 0
        following_counter = Counter()
        for label, entries in sorted(ranges.iteritems()):
            print("\t\t" + str(len(entries)) + " accounts were created 
within " + label)
            total += len(entries)
            for entry in entries:
                for id_str, values in entry.iteritems():
                    if "friends_count" in values:
                        following_counter[values["friends_count"]] += 1
        print("\t\tTotal: " + str(total) + "/" + str(total_objects))
        print
        print("\t\tMost common friends counts")
        print("\t\t==========================")
        total = 0
        for num, count in following_counter.most_common(20):
            total += count
            print("\t\t" + str(count) + " accounts are following " + 
str(num) + " accounts")
        print("\t\tTotal: " + str(total) + "/" + str(total_objects))
        print
        print
python python-3.x tweepy
2个回答
1
投票

当前问题出在load_json:你假设它的返回值是一个列表或字典,或者可以传递给len的东西。但是,它可以在许多情况下返回None

  1. 找不到要读取的文件
  2. 从文件中读取有一些错误
  3. 解码文件内容时出现问题
  4. 该文件只包含JSON值null

在你打电话给load_json后,你没有检查它的返回值。

更糟糕的是,你捕获并忽略了load_json中可能发生的任何异常,导致它静默地返回None而没有任何迹象表明出现了问题。

函数写得更好

def load_json(filename):
    with io.open(filename, "r", encoding="utf-8") as f:
        return json.load(f)

至少现在,任何错误都会引发一个未被捕获的异常,使得更明显的是存在问题并提供问题所在的线索。异常处理的黄金法则是仅捕获您可以执行某些操作的异常,如果您无法对捕获的异常执行任何操作,请重新提升它。


0
投票

您可以检查结果值并遵循相应的:

# Fetch Twitter User objects from each Twitter id found and save the data
        filename = target + "_followers.json"
        res_get_user_objects = get_user_objects()
        if res_get_user_objects is not None:
            user_objects = try_load_or_process(filename, get_user_objects,
    follower_ids)
            total_objects = len(user_objects)
        else:
            # handle it otherwise
最新问题
© www.soinside.com 2019 - 2024. All rights reserved.