More Related Content
Similar to 20121215 DevLOVE2012 Mahout on AWS
Similar to 20121215 DevLOVE2012 Mahout on AWS (20)
More from 都元ダイスケ Miyamoto
More from 都元ダイスケ Miyamoto (20)
20121215 DevLOVE2012 Mahout on AWS
- 2. 自己紹介
• 都元ダイスケ (@daisuke_m)
• Java屋です
• java-jaから来ま(ry
Java
オブジェクト指向
Eclipse
恭ライセンス
薬
Mahout
Spring
XML Jiemamy
DDD
HadoopOSGi
Haskell
Scala
Maven
Wicket
AWS
酒
- 7. アプリと機械学習
• CRUD (create, read, update, delete)
• FILTER (where)
• AGGREGATE (count, sum, ave, max, min...)
• SORT (order by)
• INTELLIGENCE (machine learning)
- 9. レコメンド
1,101,5.0
1,102,3.0
1,103,2.5
2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0
...
1128 [
1179:5.0,
3160:4.6582785, ...,
797:4.0637455
]
1136[
33493:4.8670673,
6934:4.86497, ...,
230:4.335819
]
...
recommendation
【input】 【output】
- 11. 入力データ (intro.csv)
1,101,5.0
1,102,3.0
1,103,2.5
2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0
3,101,2.5
3,104,4.0
3,105,4.5
3,107,5.0
4,101,5.0
4,103,3.0
4,104,4.5
4,106,4.0
5,101,4.0
5,102,3.0
5,103,2.0
5,104,4.0
5,105,3.5
5,106,4.0
- 12. 簡単なレコメンド
import java.io.File;
import java.util.List;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
DataModel model = new FileDataModel(new File("intro.csv"));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
Recommender recommender =
new GenericUserBasedRecommender(model, neighborhood, similarity);
List<RecommendedItem> recommendations = recommender.recommend(1, 2);
for (RecommendedItem recommendation : recommendations) {
System.out.println(recommendation);
}
- 17. • 1 vs 5 = 0.94
• 1 vs 4 = 0.99
• 1 vs 2 = -0.76
• 1 vs 3 = NaN
• 1 vs 1 = 1.0
- 18. 相関係数
• 1 vs 1 = 1.0
• 1 vs 2 = -0.7642652566278799
• 1 vs 3 = NaN
• 1 vs 4 = 0.9999999999999998
• 1 vs 5 = 0.944911182523068
それぞれの人が1さんの予想評点に与える影響度
- 20. 加重平均
0.94 ×0.99 ×
0.94 ×
0.94 ×0.99 ×
)/ 1.93
)/ 0.94
)/ 1.93
4.25 =(
3.50 =(
4.00 =(
この情報は
相関係数が低い
またはNaNなので
もうアテにしない
- 23. 分散レコメンド
1,101,5.0
1,102,3.0
1,103,2.5
2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0
...
1128 [
1179:5.0,
3160:4.6582785, ...,
797:4.0637455
]
1136[
33493:4.8670673,
6934:4.86497, ...,
230:4.335819
]
...
recommendation
【input】 【output】
S3 S3EMR
- 28. upload by code
import java.io.File;
import com.amazonaws.auth.*;
import com.amazonaws.services.s3.*;
import com.amazonaws.services.s3.model.Region;
AWSCredentials cred = new BasicAWSCredentials(
"AccessKeyID",
"SecretAccessKey");
AmazonS3 s3 = new AmazonS3Client(cred);
s3.createBucket("mahoutinaction-jp", Region.AP_Tokyo);
s3.putObject(
"mahoutinaction-jp",
"mahout/mahout-core-0.7-job.jar",
new File("mahout-core-0.7-job.jar"));
s3.putObject(
"mahoutinaction-jp",
"input10m/mahout-10m-ratings.dat",
new File("mahout-10m-ratings.dat"));
- 29. EMRの起動
• JAR Location
mahoutinaction-jp/mahout/
mahout-core-0.7-job.jar
• JAR Arguments
org.apache.mahout.cf.taste.hadoop.item.RecommenderJob
-Dmapred.map.tasks=40
-Dmapred.reduce.tasks=19
-Dmapred.input.dir=s3n://mahoutinaction-jp/input10m
-Dmapred.output.dir=s3n://mahoutinaction-jp/output10m
--numRecommendations 100
--similarityClassname SIMILARITY_PEARSON_CORRELATION
- 30. compute by code
import com.amazonaws.auth.*;
import com.amazonaws.services.elasticmapreduce.*;
import com.amazonaws.services.elasticmapreduce.model.*;
import com.amazonaws.services.elasticmapreduce.util.*;
AWSCredentials cred = new BasicAWSCredentials(
"AccessKeyID", "SecretAccessKey");
AmazonElasticMapReduce emr = new AmazonElasticMapReduceClient(cred);
emr.setEndpoint("elasticmapreduce.ap-northeast-1.amazonaws.com");
RunJobFlowRequest runRequest = new RunJobFlowRequest()
.withName("mahout-10m")
.withSteps( ... ) // detailed on next page
.withInstances( ... ) // detailed on next page
.withAmiVersion("2.1.4")
.withLogUri("s3n://mahoutinaction-jp/log");
RunJobFlowResult runResult = emr.runJobFlow(runRequest);
- 31. RunJobFlowRequest runRequest = new RunJobFlowRequest()
.withName("mahout-10m")
.withSteps(
new StepConfig()
.withName("Setup Hadoop Debugging")
.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
.withHadoopJarStep(
new StepFactory("ap-northeast-1.elasticmapreduce")
.newEnableDebuggingStep()),
new StepConfig()
.withName("Custom Jar")
.withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
.withHadoopJarStep(new HadoopJarStepConfig()
.withJar("s3n://mahoutinaction-jp/mahout/mahout-core-0.7-job.jar")
.withMainClass("org.apache.mahout.cf.taste.hadoop.item.RecommenderJob")
.withArgs(Arrays.asList(
"-Dmapred.map.tasks=40",
"-Dmapred.reduce.tasks=19",
"-Dmapred.input.dir=s3n://mahoutinaction-jp/input10m",
"-Dmapred.output.dir=s3n://mahoutinaction-jp/output10m",
"--numRecommendations", "100",
"--similarityClassname", "SIMILARITY_PEARSON_CORRELATION"))))
.withInstances(new JobFlowInstancesConfig()
.withPlacement(new PlacementType("ap-northeast-1a"))
.withInstanceCount(20)
.withMasterInstanceType("m1.small")
.withSlaveInstanceType("m1.small")
.withKeepJobFlowAliveWhenNoSteps(false)
.withHadoopVersion("0.20.205"))
.withAmiVersion("2.1.4")
.withLogUri("s3n://mahoutinaction-jp/logs");
後でごゆっくりどうぞ
- 32. watch by code
AmazonElasticMapReduce emr = ...;
RunJobFlowResult runResult = ...;
String jobFlowId = runResult.getJobFlowId();
DescribeJobFlowsRequest describeRequest =
new DescribeJobFlowsRequest().withJobFlowIds(jobFlowId);
DescribeJobFlowsResult describeResult =
emr.describeJobFlows(describeRequest);
JobFlowDetail detail = describeResult.getJobFlows().get(0);
JobFlowExecutionStatusDetail statusDetail =
detail.getExecutionStatusDetail();
JobFlowExecutionState state =
JobFlowExecutionState.fromValue(statusDetail.getState());
// COMPLETED, FAILED, TERMINATED, RUNNING, SHUTTING_DOWN,
// STARTING, WAITING, BOOTSTRAPPING
- 35. download by code
import java.io.InputStream;
import java.util.List;
import com.amazonaws.auth.*;
import com.amazonaws.services.s3.*;
import com.amazonaws.services.s3.model.*;
AWSCredentials cred = new BasicAWSCredentials(
"AccessKeyID",
"SecretAccessKey");
AmazonS3 s3 = new AmazonS3Client(cred);
ObjectListing listing = s3.listObjects(
"mahoutinaction-jp", "output10m");
List<S3ObjectSummary> summaries = listing.getObjectSummaries();
for (S3ObjectSummary summary : summaries) {
System.out.println(summary.getKey());
if (summary.getKey().endsWith("/_SUCCESS")) {
continue;
}
S3Object obj = s3.getObject("mahoutinaction-jp", summary.getKey());
InputStream in = obj.getObjectContent();
// ...
}