在hadoop中运行此程序时出错

问题描述 投票:-2回答:1

我正在尝试在Hadoop中编写这个map reduce程序,它计算任何特定用户从twitter转储中发布的字数。这是我的代码:我相信当我解析csv文件时我的mapper中有一个错误,但是我不确定如何解决它。谁能提供一些见解?

import java.io.IOException;
import java.util.StringTokenizer;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;    
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.commons.lang.StringEscapeUtils;
import java.util.HashMap;




public class Problem1{



public static class SOWordCountMapper extends
        Mapper<Object, Text, Text, IntWritable> {

private static final IntWritable ONE = new IntWritable(1);
private Text txt = new Text();
private Text user = new Text();

    public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {

Configuration conf = context.getConfiguration();
String tweetHandle = conf.get("tweetHandle");


String line = value.toString();
String parsed[] = line.split("\" , ");

String skip[] = new String[1];
skip[0] = "Handle,Tweet,Favs,RTs,Latitude,Longitude";
if(parsed[0].equals(skip[0])){

    return;
}

String handle = parsed[0].replace("\"","").toLowerCase();
String text = parsed[1].replace("\"", "");

        if(user == null || txt == null || !user.equals(tweetHandle)){
            return;
        }
        }

}


    public static class IntSumReducer extends
        Reducer<Text, IntWritable, Text, IntWritable> {
    private IntWritable result = new IntWritable();

    public void reduce(Text key, Iterable<IntWritable> values,
            Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable val : values) {
            sum += val.get();
        }

        result.set(sum);
        context.write(key, result);

    }
}
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: Problem1 <in> <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "Problem1");

    job.setJarByClass(Problem1.class);
    job.setMapperClass(SOWordCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);


}
}

我尝试运行程序时遇到这些错误:

java.lang.Exception: java.lang.ArrayIndexOutOfBoundsException: 1
        at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
        at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
        at Problem1$SOWordCountMapper.map(Problem1.java:49)
        at Problem1$SOWordCountMapper.map(Problem1.java:24)
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146)
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
java hadoop mapreduce
1个回答
0
投票

该计划在String text = parsed[1].replace("\"", "");破产;因为parsed[1]不存在。

问题在于代码String parsed[] = line.split("\" , ");我认为split()的语法不正确(括号中有3个双引号)。如果你想拆分","

String parsed[] = line.split(Pattern.quote(","), -1);

如果你想拆分"\"

String parsed[] = line.split(Pattern.quote("\"), -1);

对于Pattern类,请使用import java.util.regex.Pattern

© www.soinside.com 2019 - 2024. All rights reserved.