如何优化 MongoDB 中的聚合管道

问题描述 投票:0回答:1

我有一个复杂的聚合管道,但是当收集的数据较少(例如 300,400)时它工作得很好,但如果数据量很大(例如 20000,30000),则需要更长的时间来获取和生成 pdf,该 pdf 包含所有 30000 数据,那么如何快速这个过程。只能通过索引还是有其他方法可以实现这一点?请帮忙......

下面我提供了service.ts文件,

    async getQCResultSLcmExcel(
    filters: GetAllQcInspectionWithFilterSlcmDto,
    businessUnitId: string,
  ) {
    let startDateQuery = {};
    let endDateQuery = {};
    let commoditySearchQuery = {};
    let variantSearchQuery = {};
    const statusQuery = {};
    let cidNumberSearchQuery = {};
    const businessUnitFilterQuery = {};
    let generalSearchQuery = {};

    if (filters.startDate) {
      startDateQuery = {
        $expr: {
          $gte: [
            '$createdAt',
            {
              $dateFromString: {
                dateString: filters.startDate,
                timezone: '+05:30',
                format: '%m-%d-%Y',
              },
            },
          ],
        },
      };
    }

    if (filters.endDate) {
      endDateQuery = {
        $expr: {
          $lt: [
            '$createdAt',
            {
              $dateAdd: {
                startDate: {
                  $dateFromString: {
                    dateString: filters.endDate,
                    timezone: '+05:30',
                    format: '%m-%d-%Y',
                  },
                },
                unit: 'day',
                amount: 1,
              },
            },
          ],
        },
      };
    }

    if (filters.searchByCommodity) {
      commoditySearchQuery = {
        'commodityData.name': {
          $regex: `${filters.searchByCommodity}`,
          $options: 'i',
        },
      };
    }
    if (filters.searchByVariant) {
      variantSearchQuery = {
        'commodityVariantData.name': {
          $regex: `${filters.searchByVariant}`,
          $options: 'i',
        },
      };
    }

    if (filters.searchByStatus) {
      statusQuery['status'] = filters.searchByStatus;
    }

    if (filters.searchByCIDNumber) {
      cidNumberSearchQuery = {
        $or: [
          {
            'commodityDetail.CIDNumber': {
              $regex: `${filters.searchByCIDNumber}`,
              $options: 'i',
            },
          },
          {
            'businessUnitData.name': {
              $regex: `${filters.searchByCIDNumber}`,
              $options: 'i',
            },
          },
          {
            'commodityDetail.LOTNumber': {
              $regex: `${filters.searchByCIDNumber}`,
              $options: 'i',
            },
          },
        ],
      };
    }
    if (filters.searchByGeneralSearch) {
      generalSearchQuery = {
        $or: [
          {
            qcId: {
              $regex: `${filters.searchByGeneralSearch}`,
              $options: 'i',
            },
          },
          {
            'businessUnitData.name': {
              $regex: `${filters.searchByGeneralSearch}`,
              $options: 'i',
            },
          },
          {
            'userData.name': {
              $regex: `${filters.searchByGeneralSearch}`,
              $options: 'i',
            },
          },
        ],
      };
    }

    if (businessUnitId) {
      businessUnitFilterQuery['businessUnitId'] = new mongoose.Types.ObjectId(
        businessUnitId,
      );
    }

    const result = await this.qcInspectionModel.aggregate([
      {
        $match: {
          $and: [
            startDateQuery,
            endDateQuery,
            statusQuery,
            businessUnitFilterQuery,
            { isDeleted: false },
            { isSLCMQcInspection: true },
          ],
        },
      },
      {
        $lookup: {
          from: 'mastercommodities',
          localField: 'commodityId',
          pipeline: [
            {
              $project: {
                name: 1,
              },
            },
          ],
          foreignField: '_id',
          as: 'commodityData',
        },
      },
      {
        $unwind: '$commodityData',
      },
      {
        $lookup: {
          from: 'commodityvariants',
          localField: 'commodityVariantId',
          pipeline: [
            {
              $project: {
                name: 1,
              },
            },
          ],
          foreignField: '_id',
          as: 'commodityVariantData',
        },
      },
      {
        $unwind: '$commodityVariantData',
      },
      {
        $lookup: {
          from: 'businessunits',
          localField: 'businessUnitId',
          foreignField: '_id',
          as: 'businessUnitData',
          pipeline: [
            {
              $project: {
                name: 1,
                businessUnitCode: 1,
                _id: 0,
              },
            },
          ],
        },
      },
      {
        $unwind: {
          path: '$businessUnitData',
          preserveNullAndEmptyArrays: true,
        },
      },
      {
        $lookup: {
          from: 'users',
          localField: 'createdBy',
          foreignField: '_id',
          as: 'userData',
          pipeline: [
            {
              $project: {
                firstName: 1,
                lastName: 1,
                _id: 0,
                name: { $concat: ['$firstName', ' ', '$lastName'] },
              },
            },
          ],
        },
      },
      {
        $unwind: {
          path: '$userData',
          preserveNullAndEmptyArrays: true,
        },
      },
      {
        $match: {
          $and: [
            commoditySearchQuery,
            variantSearchQuery,
            generalSearchQuery,
            cidNumberSearchQuery,
          ],
        },
      },
      {
        $sort: {
          createdAt:
            filters.sortOrder && filters.sortOrder != SortOrder.Ascending
              ? SortOrder.Descending
              : SortOrder.Ascending,
        },
      },
      {
        $project: {
          _id: 0,
          'QC ID': {
            $cond: {
              if: '$qcId',
              then: '$qcId',
              else: '',
            },
          },
          'CID Number': {
            $cond: {
              if: '$commodityDetail',
              then: '$commodityDetail.CIDNumber',
              else: '',
            },
          },
          Commodity: {
            $cond: {
              if: '$commodityData',
              then: '$commodityData.name',
              else: '',
            },
          },
          Date: {
            $cond: {
              if: '$createdAt',
              then: {
                $dateToString: { format: '%d-%m-%Y', date: '$createdAt' },
              },
              else: '',
            },
          },
          'Analyst Name': {
            $cond: {
              if: '$userData',
              then: '$userData.name',
              else: '',
            },
          },
          'Business Unit': {
            $cond: {
              if: '$businessUnitData',
              then: '$businessUnitData.name',
              else: '',
            },
          },
          'LOT No': {
            $cond: {
              if: '$commodityDetail',
              then: '$commodityDetail.LOTNumber',
              else: '',
            },
          },
          Location: {
            $cond: {
              if: '$location',
              then: '$location.name',
              else: '',
            },
          },
          'Middleware Status': {
            $cond: {
              if: '$middlewareStatus',
              then: '$middlewareStatus',
              else: '',
            },
          },
          Status: {
            $cond: {
              if: '$status',
              then: '$status',
              else: '',
            },
          },
        },
      },
    ]);

    if (result && result.length > 0) {
      const payload = await generateExcel(result, 'slcm-qc-reports');
      const excelData = await this.uploadService.fileUpload(payload);
      return excelData.Location || '';
    } else {
      return ERROR_MESSAGE.NO_DATA_EXPORT;
    }
  }

提前致谢....

mongodb mongoose mongodb-query aggregation-framework query-optimization
1个回答
0
投票

正则表达式总是很慢,你真的需要它们吗?

您可以通过使用第三方日期库来提高日期过滤器的效率,例如Luxonmoment.jsDay.js

我会这样做:

const { DateTime } = require("luxon"); 

if (filters.startDate) {
   startDateQuery = { createdAt: { $gte: DateTime.fromFormat(filters.startDate, "MM-dd-yyyy").setZone("+05:30").toJSDate() } }
}    
if (filters.endDate) {
   endDateQuery = { createdAt: { $lt: DateTime.fromFormat(filters.startDate, "MM-dd-yyyy").plus({ days: 1 }).setZone("+05:30").toJSDate() } }
}

当您的输入数据根本没有时间信息时,使用时区看起来有点奇怪。请注意,像 Luxon 这样的库提供了类似

startOf('day')
的方法,它们也知道时区。

我假设大多数

$lookup
仅返回一份文档。在这种情况下,您不需要
$unwind
。投影像

{ $set: { commodityVariantData: { $first: "$commodityVariantData" } } }

可能会更快。

除此之外,四个

$lookup
已经很多了,你可以检查一下你的数据模型。 MongoDB 等 NoSQL 数据库并未针对连接进行优化。考虑嵌入文档。

© www.soinside.com 2019 - 2024. All rights reserved.