如何计算列表元组中的相同项目

问题描述 投票:-3回答:1

我有一个这样的列表元组:

tup_list = [('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 21.325), ('UL00628', 6.675), ('UL00628', 22.5), ('UL00628', 5.5), ('UL00628', 15.525), ('UL00628', 12.475), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428-OGA', 28.0)]

我想计算每个元组中的相同项目,如UL00628,UL00428?我可以使用什么迭代?

顺便说一下,tup_list来自excel文件。代码如下:

load_all = dict()
file_name ='***.xls'
wb = xlrd.open_workbook('d:\**%s'%file_name)
table = wb.sheet_by_name('***')
date_start_month= int(input('Pls enter the date of start month:'))
date_start_day= int(input('Pls enter the date of start day:'))
date_end_month= int(input('Pls enter the date of end month:'))
date_end_day = int(input('Pls enter the date of end day:'))
count = 0
tup_list = list()
tup = tuple()
nrows = table.nrows
if table.cell(1, 11).value == '****':
    for num in range(2,nrows):
        date_of_load = table.cell(num,11).value #this is a date value
        #print(date_of_load)
        year,month,day,hous,minute,second = xlrd.xldate.xldate_as_tuple(date_of_load,0) # date is a tuple, (y,m,d,h,min,second) 
        if month in range(date_start_month,date_end_month+1) :# the month is OK?
            if day in range(date_start_day, date_end_day+1): # find the day
                grade_name = table.cell(num,3).value #grade
                grade_num = table.cell(num,5).value #quanlity
                tup = (grade_name, grade_num)
                tup_list.append(tup)
                count +=1

^^^^^^^^^这是原始数据

NO  Grade   quantity    Loadday
9   UL00628 28.0000     2018/2/7
10  UL00628 28.0000     2018/2/7
11  UL00628 28.0000     2018/2/7
12  EVA-OGC 28.0000     2018/2/7
13  EVA-OGC 28.0000     2018/2/7
14  UL00628 28.0000     2018/2/8
15  UL00628 28.0000     2018/2/8
16  UL00628 28.0000     2018/2/19
17  UL00628 28.0000     2018/2/19
18  UL00628 28.0000     2018/2/19
19  UL00628 28.0000     2018/2/19
20  UL00628 28.0000     2018/2/19
21  UL00628 28.0000     2018/2/19
22  UL00628 28.0000     2018/2/19
23  UL00628 28.0000     2018/2/19
24  UL00628 28.0000     2018/2/20
25  UL00628 28.0000     2018/2/20
26  UL00628 28.0000     2018/2/20
27  UL00628 28.0000     2018/2/20
28  UL00628 28.0000     2018/2/20

我需要找出正确的装载日,然后得到等级和数量并计算每个等级。

python-3.x list tuples calculated-columns
1个回答
0
投票

对于数据修改,特别是涉及Excel或CSV文件,我会使用pandas而不是直接openpyxl

除此之外,一旦你有元组列表,你可以使用defaultdict

from collections import defaultdict
results = defaultdict(list)
for grade, quantity in tup_list:
    results[grade].append(quantity)

Other problems

我会做的其他改变是

  • 将程序的不同部分放在不同的功能中
  • 确保输入聚集可以在传入非int值时处理,
  • 将输入放在dict中,所以如果有一天你从另一个脚本或程序的一部分获得所需数据的输入,你可以轻松地重用这个
  • 尽可能使用生成器而不是返回列表的函数
  • 使用with语句打开资源
  • 使用pathlib.Path处理文件和文件名
  • 使用if __name__ == "__main__"

测试

from collections import defaultdict
from pathlib import Path 
import xlrd

def get_int_inputs(questions):
    for key, msg in questions.items():
        answer = None
        while not answer:
            try:
                answer = int(input(msg))
                yield key, answer
            except ValueError:
                pass


def parse_file(filename, inputs):
    with xlrd.open_workbook(filename) as wb:
        table = wb.sheet_by_name('***')
        if table.cell(1, 11).value == '****':
            for num in range(2,nrows):
                year, month, day, *_ = xlrd.xldate.xldate_as_tuple(date_of_load, 0)
                if not inputs['date_start_month'] < month <= inputs['date_end_month']:
                    continue
                if not inputs['date_start_day'] < day <= inputs['date_end_day']:
                    continue
                grade_name = table.cell(num,3).value
                grade_num = table.cell(num,5).value
                yield grade_name, grade_num


def aggregate(quantities):
    results = defaultdict(list)
    for grade_name, grade_num in quantities:
        results[grade_name].append(grade_num)
    return {grade_name: sum(val) for grade_name, val in results.items()}


if __name__ == '__main__':

    wanted_input = {
        'date_start_month': 'Pls enter the date of start month:',
        'date_start_day': 'Pls enter the date of start day:',
        'date_end_month': 'Pls enter the date of end month:',
        'date_end_day': 'Pls enter the date of end day:',
    }
    inputs = dict(get_int_inputs(wanted_input))

    filename = Path('D:/' , '***.xls')
    quantities = parse_file(filename, inputs)
    result = aggregate(quantities)

没有样本数据,我无法测试代码,因此可能充满了错误

Pandas

另一种方法是使用pandas进行数据处理

然后你会得到类似的东西

from pathlib import Path 
import pandas as pd

def parse_data(df, inputs):
    if df.columns[11] != '****':  # index might be different, depending on whether there is an index-col and 0- or 1-based indexing
        return None

    dates = df[<date_column_label>]   
    # or if it needs conversion to datetime
    # dates = pd.to_datetime(df[<date_column_label>])
    date_correct = dates.dt.month.between(
            inputs['date_start_month'], 
            inputs['date_end_month'] + 1, 
            inclusive = False,
            ) & dates.dt.day.between(
            inputs['date_start_day'], 
            inputs['date_end_day'] + 1, 
            inclusive = False,
            )
    return df[date_correct].groupby(<grade_name_label>)[<quantity_label>].sum()

if __name__ == '__main__':
    wanted_input = {
        'date_start_month': 'Pls enter the date of start month:',
        'date_start_day': 'Pls enter the date of start day:',
        'date_end_month': 'Pls enter the date of end month:',
        'date_end_day': 'Pls enter the date of end day:',
    }
    inputs = dict(get_int_inputs(wanted_input))

    filename = Path('D:/' , '***.xls')
    df = pd.read_excel(filename, sheet_name='', header=0)
    result = parse_data(df)     
© www.soinside.com 2019 - 2024. All rights reserved.