SlideShare a Scribd company logo
TEACHING YOUR
MACHINE
TO FIND
FRAUDSTERS

Ian Barber
ianb@php.net
phpir.com
twitter.com/ianbarber
http://joind.in/3429




https://github.com/ianbarber/FindingFraudsters-Talk
5%
           3%
SOME      .1%
SMALL
NUMBERS    8%
99%
ACCURACY
REALLY     REALLY
             LEGITIMATE   FRAUD


EVALUATED
                989         0
LEGITIMATE


EVALUATED
                 10         1
  FRAUD
REALLY     REALLY
             LEGITIMATE   FRAUD



      90%
EVALUATED
LEGITIMATE
          WR ONG989         0



EVALUATED
                 10         1
  FRAUD
ANOMALY DETECTION
30




         22.5
Clicks




          15




          7.5




           0
                Date
SOFTWARE
ARCHITECTURE
                           Alarm

               Detector

                          No Alarm
                Buffer


User Clicks    Landing
    Ad          Page
DETECTOR
              statistics

 Expected
  Clicks
              Threshold    Data Buffer
Sensitivity



               Alarm
average.php
function detect($sen) {
  $window = array(); $i = 0;
  $alarmCount = 0; $dtd = 0;
  $avg = $stddev = 0;
  $fraud = fopen("fraudclicks.csv", 'r');
  while($d = fgetcsv($fraud)) {
    $i++;
    if(count($window) > 7) {
      array_shift($window);
      $avg = array_sum($window) / 7;
      foreach($window as $val) {
        $stddev += pow($val - $average, 2);
      }
      $stddev = sqrt($stddev/7);
0.2




0.15




 0.1




0.05




  0
       1   2   3   4   5   6   7   8   9   10 11 12 13 14 15 16 17 18 19 20
if($d[1] > ($avg + ($sen * $stddev))){
          $alarmCount++;
          if($i > 201) {
            break;
          }
        } else {
          if($i > 201) {
            $dtd++;
          }
        }
      }
      array_push($window, $d[1]);
    }
    return array($alarmCount-1, $dtd);
}
1.6 SENSITIVITY
          30
                18 False Alarms          1 Day To Detect

         22.5
Clicks




          15




          7.5




           0
                                  Date
2.7 SENSITIVITY
          30
                1 False Alarm      18 Days To Detect

         22.5
Clicks




          15




          7.5




           0
                                Date
SICKNESS
AVAILABILITY
function detect($sens) {          sickavail.php
  $i = 0; $alarms = 0; $dtd = 0;
  $window = array(); $avail = array();
  $fraud = fopen("fraudclicks.csv", 'r');
  while($dat = fgetcsv($fraud)) {
    $dow = date("w", strtotime($dat[0]));
    if( count($window) >= 7
        && isset($avail[$dow]) ) {

      $sick = 0;
      foreach($window as $day => $value) {
        $dowavg = array_sum($avail[$day]) /
                  count($avail[$day]);
        $sick += $value / $dowavg;
      }
      $sick /= count($window);
$avlblty = array_sum($avail[$dow]) /
           count($avail[$dow]);
  $est = $sick * $avlblty;

  $fac = fac($dat[1]);
  $p = exp(-$est) * pow($est,$dat[1])
       / $fac; // poisson calc

  if($p < $sens && $dat[1] > $est) {
    $alarms++;
    if($i > 201) { break; }
  } else {
    if($i > 201) { $dtd++; }
  }

} // end if
0.2




0.15




 0.1




0.05




  0
       1   2   3   4   5   6   7   8   9   10
0.011 SENSITIVITY
          30
                1 False Alarm          1 Day To Detect

         22.5
Clicks




          15




          7.5




           0
                                Date
SUPERVISED CLASSIFIERS
classification model
SOFTWARE
ARCHITECTURE
                               Fraud

            Classifier

                             Not Fraud
  User     Transaction
Purchase    Processor


           Transaction
                              Learner
            Database
EVALUATING THE CLASSIFIER

Training Data   Learner      Model




 Test Data
                            Prediction
                Classifier   Accuracy
   Model
20




15




10




5




0
     0   5   10   15   20
20




15




10




5
             ?
0
     0   5       10   15   20
20




15




10




5
             ?
0
     0   5       10   15   20
$docs = array(
 array('fraud' => false, 'price' => 1699,
       'desc'=>'toy ninja', 'ship' => 'US'),
 array('fraud' => false, 'price' => 20000,
       'desc' => 'TV','ship' => 'US'),
 array('fraud' => false, 'price' => 2500,
       'desc' => 'cds', 'ship' => 'US'),
 array('fraud' => true, 'price' => 20000,
       'desc' => 'console', 'ship' => 'CN'),
 array('fraud' => true, 'price' => 5000,
       'desc' => 'books', 'ship' => 'US'),
 array('fraud' => true, 'price' => 15000,
       'desc' => 'ipod', 'ship' => 'CN'),
);
$db   = new XapianWritableDatabase("index",
                Xapian::DB_CREATE_OR_OPEN);
$idx = new XapianTermGenerator();
$stem = new XapianStem("english");
$idx->set_stemmer($stem);

foreach($docs as $key => $doc) {
    $xdoc = new XapianDocument();
    $xdoc->set_data($doc['fraud'] ?
                    "fraud" : "clean");
    $idx->set_document($xdoc);
    $idx->index_text($doc['price'] . ' ' .
         $doc['desc'] . ' ' . $doc['ship']);
    $db->add_document($xdoc, $key);
}
$db = null;
                               frau dknn.php
$test = array(                     testknn.ph
                                              p
   'price' => 10000, 'desc' => 'TV',
   'ship' => 'CN'
);

$db   = new XapianWritableDatabase("index",
         Xapian::DB_CREATE_OR_OPEN);
$idx = new XapianTermGenerator();
$stem = new XapianStem("english");
$idx->set_stemmer($stem);

$xdoc = new XapianDocument();
$idx->set_document($xdoc);
$idx->index_text($test['price'] . ' ' .
      $test['desc'] . ' ' . $test['ship']);
$id = $db->add_document($xdoc);
$enq = new XapianEnquire($db);
$rset = new XapianRSet();
$rset->add_document($id);
$eset = $enq->get_eset(10, $rset);
$terms = array();
$i = $eset->begin();
while ( !$i->equals($eset->end()) ) {
  $terms[] = $i->get_term(); $i->next();
}

$q = new XapianQuery(
         XapianQuery::OP_OR, $terms);
$enq->set_query($q);
$matches = $enq->get_mset(0, 4, $rset);
$i = $matches->begin();
while (!$i->equals($matches->end())) {
  if($i->get_document()->get_docid() != $id)
  {
    $class = $i->get_document()->get_data();
    var_dump($class);
  }
  $i->next();
}
$db->delete_document($id);


$ php testknn.php
string(5) "clean"
string(5) "fraud"
string(5) "fraud"
TRANSACTION
PARAMETERS
function compareEmailToName($name, $email) {
  $name = strtolower($name);
  $email = strtolower($email);
  $parts = explode(" ", $name);
  $pcnt = 0;

  list($user, $dom) = explode("@", $email);
  $user = str_replace(
              array(".", "+"), " ", $user);
  $dom = preg_replace("/..*/", "", $dom);

  similar_text($name, $user, $pcnt);
  if($pcnt > 80) { return 1.0; }
  similar_text($name, $dom, $pcnt);
  if($pcnt > 80) { return 0.8; }
                                 email.php
if(count($parts)) {
       $highest = 0;
       foreach($parts as $part) {
         similar_text($user, $part, $pcnt);
         if($pcnt > 50 && $pcnt > $highest) {
           $highest = $percent;
         }
         similar_text($dom, $part, $pcnt);
         if($pcnt > 50 && $pcnt > $highest) {
            $highest = $percent;
         }
       }
       return (1.7 * ($highest/100)) - 1;
     }

     return -1;
}
$data = array(
  'purchase_value' => 20993,
  'geo_country' => 'DE',
  'previous_orders' => 1,
  'time' => 6,
  'timegap' => 146632,
  'product_category' => 'small_item',
  'delivery_matches_card' => 0,
  'geo_ip_matches_card' => 1,
  'difference_from_last_trans' => 8755,
  'free_shipping' => 0,
  'email_like_name' => 0,
  'free_email_provider' => 0,
  'disposable_email_provider' => 0,
  'quantity' => 2,
  'fraud' => 0);
SUPPORT
VECTOR MACHINES
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
20




15




10




5




0
     0   5   10   15   20
$ apt-get install libsvm-dev
$ apt-get install libsvm-tools

$ yum install libsvm-devel

$ pecl install svm-beta
$ echo extension=svm.so > /etc/php.d/svm.ini
$ php -r '$s = new svm(); $m = $s->train
(array(array(-1, -1), array(1, 1))); echo
$m->predict(array(0, -1));'
-1
$fh = fopen('paydata.csv', 'r');
$output = array();

while($data = fgetcsv($fh)) {
  $output[] = array(
     $data[14] == 1 ? -1 : 1,
     1 => ($data[0]/20000.00) - 1.0, // price
     2 => $data[1] == 'CN' ? 1.0:-1.0,
     3 => $data[1] == 'US' ? 1.0:-1.0,
     4 => $data[5] == 'digital' ? 1.0:-1.0,
     5 => $data[7] == 1 ? 1.0:-1.0, //geo
     6 => $data[6] == 1 ? 1.0:-1.0, // deliv
     12 => $data[9] == 1 ? 1.0:-1.0, // ship
     13 => ($data[13] / 1.5) - 1.0, // qty
  );
}                                learn.php
$svm = new svm();
$model = $svm->train($output,
               array(-1 => 0.65, 1 => 0.5));
$model->save('learn.model');

$fp = $tp = $fn = $tn = 0;
foreach($output as $test) {
  $res = $model->predict($test);
  if($test[0] > 0) {
    if($res > 0) { $tp++; }
    else { $fn++; }
  } else {
    if($res > 0) { $fp++; }
    else { $tn++; }
  }
}
// ...snip.. loading test data from
// paytest.csv

$model = new SVMModel('learn.model');

$fp = $tp = $fn = $tn = 0;
foreach($output as $test) {
  $res = $model->predict($test);
  if($test[0] > 0) {
    if($res > 0) { $tp++; }
    else { $fn++; }
  } else {
    if($res > 0) { $fp++; }
    else { $tn++; }
  }
}
                                   test.php
var_dump("True Positive " . $tp);
var_dump("True Negative " . $tn);
var_dump("False Positive " . $fp);
var_dump("False Negative " . $fn);
var_dump("Accuracy " .
        (($tp+$tn)/($tp+$tn+$fp+$fn)));
$ php learn.php
string(18) "True Positive 8316"
string(18) "True Negative 1682"
string(16) "False Positive 2"
string(16) "False Negative 0"
string(15) "Accuracy 0.9998"

$ php test.php
string(17) "True Positive 844"
string(17) "True Negative 155"
string(16) "False Positive 0"
string(16) "False Negative 1"
string(14) "Accuracy 0.999"
training data


  Test         Verify       Update



Automated     Manual        Manual
Time Series           Class Based



   Sensitivity             Model



 False    Days To    False        False
Alarms    Detect    Positives   Negatives
(shogun)
TEACHING YOUR
MACHINE
TO FIND
FRAUDSTERS

http://joind.in/3429

Ian Barber
ianb@php.net
Title Slide - CSI
http://www.flickr.com/photos/39matt/5241862082
Sickness Availability - Chicago Fire Department
http://www.flickr.com/photos/mike_miley/3929146730/
Model Buildings - Ah Ain’t Long For This Whorl
http://www.flickr.com/photos/chadmiller/98014022/
Repeat Customer - McDonald’s Loyalty Card
http://www.flickr.com/photos/fsse-info/3658873057/
Shipping - FedEx Truck
http://www.flickr.com/photos/moto_club4ag/4852235145/
Velocity - Chevrolet Chevelle Dragster
http://www.flickr.com/photos/jns001/2958999006/
GeoIP - Earth Asia Terminator View
http://www.flickr.com/photos/flyingsinger/86898564/
Multiple Items - Boxes
http://www.flickr.com/photos/skrewtape/851672959/

More Related Content

What's hot

Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Mail.ru Group
 
Introdução ao Perl 6
Introdução ao Perl 6Introdução ao Perl 6
Introdução ao Perl 6
garux
 
News of the Symfony2 World
News of the Symfony2 WorldNews of the Symfony2 World
News of the Symfony2 WorldFabien Potencier
 
Advanced modulinos
Advanced modulinosAdvanced modulinos
Advanced modulinosbrian d foy
 
Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8
XSolve
 
The Magic Of Tie
The Magic Of TieThe Magic Of Tie
The Magic Of Tie
brian d foy
 
Créer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heureCréer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heure
Amaury Bouchard
 
Advanced modulinos trial
Advanced modulinos trialAdvanced modulinos trial
Advanced modulinos trial
brian d foy
 
20 modules i haven't yet talked about
20 modules i haven't yet talked about20 modules i haven't yet talked about
20 modules i haven't yet talked aboutTatsuhiko Miyagawa
 
Melhorando sua API com DSLs
Melhorando sua API com DSLsMelhorando sua API com DSLs
Melhorando sua API com DSLs
Augusto Pascutti
 
Perl 6 by example
Perl 6 by examplePerl 6 by example
Perl 6 by example
Andrew Shitov
 
Introduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 TokyoIntroduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 TokyoMasahiro Nagano
 
PHP Language Trivia
PHP Language TriviaPHP Language Trivia
PHP Language Trivia
Nikita Popov
 

What's hot (18)

zinno
zinnozinno
zinno
 
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
Security Meetup 22 октября. «Реверс-инжиниринг в Enterprise». Алексей Секрето...
 
Introdução ao Perl 6
Introdução ao Perl 6Introdução ao Perl 6
Introdução ao Perl 6
 
News of the Symfony2 World
News of the Symfony2 WorldNews of the Symfony2 World
News of the Symfony2 World
 
C99
C99C99
C99
 
Php 101: PDO
Php 101: PDOPhp 101: PDO
Php 101: PDO
 
Advanced modulinos
Advanced modulinosAdvanced modulinos
Advanced modulinos
 
Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8Xlab #1: Advantages of functional programming in Java 8
Xlab #1: Advantages of functional programming in Java 8
 
The Magic Of Tie
The Magic Of TieThe Magic Of Tie
The Magic Of Tie
 
C99[2]
C99[2]C99[2]
C99[2]
 
Créer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heureCréer une base NoSQL en 1 heure
Créer une base NoSQL en 1 heure
 
Advanced modulinos trial
Advanced modulinos trialAdvanced modulinos trial
Advanced modulinos trial
 
Cod
CodCod
Cod
 
20 modules i haven't yet talked about
20 modules i haven't yet talked about20 modules i haven't yet talked about
20 modules i haven't yet talked about
 
Melhorando sua API com DSLs
Melhorando sua API com DSLsMelhorando sua API com DSLs
Melhorando sua API com DSLs
 
Perl 6 by example
Perl 6 by examplePerl 6 by example
Perl 6 by example
 
Introduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 TokyoIntroduction to CloudForecast / YAPC::Asia 2010 Tokyo
Introduction to CloudForecast / YAPC::Asia 2010 Tokyo
 
PHP Language Trivia
PHP Language TriviaPHP Language Trivia
PHP Language Trivia
 

Viewers also liked

Deloittes 2009 Technology Fast 500™ Ranking
Deloittes 2009 Technology Fast 500™  RankingDeloittes 2009 Technology Fast 500™  Ranking
Deloittes 2009 Technology Fast 500™ Ranking
lisaswiftney
 
Canada Deber 2pdf
Canada Deber 2pdfCanada Deber 2pdf
Canada Deber 2pdf
Monica Guerra
 
Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500Seth Greenberg
 
dollar general annual reports 2002
dollar general annual reports 2002dollar general annual reports 2002
dollar general annual reports 2002finance41
 
Deployment Tactics
Deployment TacticsDeployment Tactics
Deployment Tactics
Ian Barber
 
Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009
mattdriscoll
 
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
power to the pixel
 
Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009mattdriscoll
 
Document Classification In PHP - Slight Return
Document Classification In PHP - Slight ReturnDocument Classification In PHP - Slight Return
Document Classification In PHP - Slight Return
Ian Barber
 
ZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 VersionZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 Version
Ian Barber
 
Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?
Linkfluence
 
4 de febrero de 1992 pdf
4 de febrero de 1992 pdf4 de febrero de 1992 pdf
4 de febrero de 1992 pdf
UPEL-IMPM; MPPE (UE. BRICEÑO MÉNDEZ)
 
Israel pide un rey
Israel pide un reyIsrael pide un rey
Israel pide un reyCoke Neto
 
Technology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDFTechnology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDFJustin Campbell
 

Viewers also liked (16)

Deloittes 2009 Technology Fast 500™ Ranking
Deloittes 2009 Technology Fast 500™  RankingDeloittes 2009 Technology Fast 500™  Ranking
Deloittes 2009 Technology Fast 500™ Ranking
 
Canada Deber 2pdf
Canada Deber 2pdfCanada Deber 2pdf
Canada Deber 2pdf
 
Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500Deloitte-2014-Technology-Fast500
Deloitte-2014-Technology-Fast500
 
dollar general annual reports 2002
dollar general annual reports 2002dollar general annual reports 2002
dollar general annual reports 2002
 
Deployment Tactics
Deployment TacticsDeployment Tactics
Deployment Tactics
 
20140528 valeant story draft deckv85
20140528 valeant story draft deckv8520140528 valeant story draft deckv85
20140528 valeant story draft deckv85
 
Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009Arc Sight Info Documents 10 21 2009
Arc Sight Info Documents 10 21 2009
 
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman The Pixel Lab 2015 | Don't lose heart - Sean Coleman
The Pixel Lab 2015 | Don't lose heart - Sean Coleman
 
Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009Arc Sight Info Documents 12 3 2009
Arc Sight Info Documents 12 3 2009
 
Document Classification In PHP - Slight Return
Document Classification In PHP - Slight ReturnDocument Classification In PHP - Slight Return
Document Classification In PHP - Slight Return
 
ZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 VersionZeroMQ Is The Answer: PHP Tek 11 Version
ZeroMQ Is The Answer: PHP Tek 11 Version
 
Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?Social media & dirigeants du Cac 40 : que disent les conversations ?
Social media & dirigeants du Cac 40 : que disent les conversations ?
 
Eca´s probabilidad y estadística Agosto 2012-Enero 2013
Eca´s probabilidad y estadística Agosto 2012-Enero 2013Eca´s probabilidad y estadística Agosto 2012-Enero 2013
Eca´s probabilidad y estadística Agosto 2012-Enero 2013
 
4 de febrero de 1992 pdf
4 de febrero de 1992 pdf4 de febrero de 1992 pdf
4 de febrero de 1992 pdf
 
Israel pide un rey
Israel pide un reyIsrael pide un rey
Israel pide un rey
 
Technology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDFTechnology-Fast-500-Winners-Brochure.PDF
Technology-Fast-500-Winners-Brochure.PDF
 

Similar to Teaching Your Machine To Find Fraudsters

Javascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introductionJavascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introduction
Iban Martinez
 
Crazy things done on PHP
Crazy things done on PHPCrazy things done on PHP
Crazy things done on PHPTaras Kalapun
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Raimonds Simanovskis
 
Your code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnConYour code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnCon
Rafael Dohms
 
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
Amazon Web Services
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitmfrost503
 
My Development Story
My Development StoryMy Development Story
My Development Story
Takahiro Fujiwara
 
Unit testing with zend framework tek11
Unit testing with zend framework tek11Unit testing with zend framework tek11
Unit testing with zend framework tek11
Michelangelo van Dam
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnit
mfrost503
 
Mocking Demystified
Mocking DemystifiedMocking Demystified
Mocking Demystified
Marcello Duarte
 
Unit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBeneluxUnit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBenelux
Michelangelo van Dam
 
Document Classification In PHP
Document Classification In PHPDocument Classification In PHP
Document Classification In PHP
Ian Barber
 
Gta v savegame
Gta v savegameGta v savegame
Gta v savegame
hozayfa999
 
WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015
Fernando Daciuk
 
ddd+scala
ddd+scaladdd+scala
ddd+scala
潤一 加藤
 
Game Development with SDL and Perl
Game Development with SDL and PerlGame Development with SDL and Perl
Game Development with SDL and Perl
garux
 
R57shell
R57shellR57shell
R57shell
ady36
 

Similar to Teaching Your Machine To Find Fraudsters (20)

Javascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introductionJavascript & jQuery: A pragmatic introduction
Javascript & jQuery: A pragmatic introduction
 
Crazy things done on PHP
Crazy things done on PHPCrazy things done on PHP
Crazy things done on PHP
 
Coding website
Coding websiteCoding website
Coding website
 
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and JasmineRails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
Rails-like JavaScript Using CoffeeScript, Backbone.js and Jasmine
 
Your code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnConYour code sucks, let's fix it - DPC UnCon
Your code sucks, let's fix it - DPC UnCon
 
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
TLS305 Using DynamoDB with the AWS SDK for PHP - AWS re: Invent 2012
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnit
 
My Development Story
My Development StoryMy Development Story
My Development Story
 
Unit testing with zend framework tek11
Unit testing with zend framework tek11Unit testing with zend framework tek11
Unit testing with zend framework tek11
 
Mocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnitMocking Dependencies in PHPUnit
Mocking Dependencies in PHPUnit
 
Mocking Demystified
Mocking DemystifiedMocking Demystified
Mocking Demystified
 
Ns2programs
Ns2programsNs2programs
Ns2programs
 
Unit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBeneluxUnit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBenelux
 
Document Classification In PHP
Document Classification In PHPDocument Classification In PHP
Document Classification In PHP
 
Gta v savegame
Gta v savegameGta v savegame
Gta v savegame
 
WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015WordPress Realtime - WordCamp São Paulo 2015
WordPress Realtime - WordCamp São Paulo 2015
 
Database api
Database apiDatabase api
Database api
 
ddd+scala
ddd+scaladdd+scala
ddd+scala
 
Game Development with SDL and Perl
Game Development with SDL and PerlGame Development with SDL and Perl
Game Development with SDL and Perl
 
R57shell
R57shellR57shell
R57shell
 

Recently uploaded

Securing your Kubernetes cluster_ a step-by-step guide to success !
Securing your Kubernetes cluster_ a step-by-step guide to success !Securing your Kubernetes cluster_ a step-by-step guide to success !
Securing your Kubernetes cluster_ a step-by-step guide to success !
KatiaHIMEUR1
 
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdfUnlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Malak Abu Hammad
 
20240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 202420240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 2024
Matthew Sinclair
 
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to ProductionGenerative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Aggregage
 
Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...
Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...
Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...
SOFTTECHHUB
 
Pushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 daysPushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 days
Adtran
 
Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?
Nexer Digital
 
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AIEnchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Vladimir Iglovikov, Ph.D.
 
How to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptxHow to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptx
danishmna97
 
Microsoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdfMicrosoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdf
Uni Systems S.M.S.A.
 
Climate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing DaysClimate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing Days
Kari Kakkonen
 
GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...
GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...
GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...
Neo4j
 
GridMate - End to end testing is a critical piece to ensure quality and avoid...
GridMate - End to end testing is a critical piece to ensure quality and avoid...GridMate - End to end testing is a critical piece to ensure quality and avoid...
GridMate - End to end testing is a critical piece to ensure quality and avoid...
ThomasParaiso2
 
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
Neo4j
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
Safe Software
 
UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5
DianaGray10
 
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdfObservability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Paige Cruz
 
Uni Systems Copilot event_05062024_C.Vlachos.pdf
Uni Systems Copilot event_05062024_C.Vlachos.pdfUni Systems Copilot event_05062024_C.Vlachos.pdf
Uni Systems Copilot event_05062024_C.Vlachos.pdf
Uni Systems S.M.S.A.
 
Mind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AIMind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AI
Kumud Singh
 
National Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practicesNational Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practices
Quotidiano Piemontese
 

Recently uploaded (20)

Securing your Kubernetes cluster_ a step-by-step guide to success !
Securing your Kubernetes cluster_ a step-by-step guide to success !Securing your Kubernetes cluster_ a step-by-step guide to success !
Securing your Kubernetes cluster_ a step-by-step guide to success !
 
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdfUnlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
 
20240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 202420240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 2024
 
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to ProductionGenerative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to Production
 
Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...
Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...
Why You Should Replace Windows 11 with Nitrux Linux 3.5.0 for enhanced perfor...
 
Pushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 daysPushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 days
 
Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?
 
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AIEnchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
 
How to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptxHow to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptx
 
Microsoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdfMicrosoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdf
 
Climate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing DaysClimate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing Days
 
GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...
GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...
GraphSummit Singapore | The Future of Agility: Supercharging Digital Transfor...
 
GridMate - End to end testing is a critical piece to ensure quality and avoid...
GridMate - End to end testing is a critical piece to ensure quality and avoid...GridMate - End to end testing is a critical piece to ensure quality and avoid...
GridMate - End to end testing is a critical piece to ensure quality and avoid...
 
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
 
UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5
 
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdfObservability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
 
Uni Systems Copilot event_05062024_C.Vlachos.pdf
Uni Systems Copilot event_05062024_C.Vlachos.pdfUni Systems Copilot event_05062024_C.Vlachos.pdf
Uni Systems Copilot event_05062024_C.Vlachos.pdf
 
Mind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AIMind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AI
 
National Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practicesNational Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practices
 

Teaching Your Machine To Find Fraudsters

  • 1. TEACHING YOUR MACHINE TO FIND FRAUDSTERS Ian Barber ianb@php.net phpir.com twitter.com/ianbarber
  • 3. 5% 3% SOME .1% SMALL NUMBERS 8%
  • 5. REALLY REALLY LEGITIMATE FRAUD EVALUATED 989 0 LEGITIMATE EVALUATED 10 1 FRAUD
  • 6. REALLY REALLY LEGITIMATE FRAUD 90% EVALUATED LEGITIMATE WR ONG989 0 EVALUATED 10 1 FRAUD
  • 8. 30 22.5 Clicks 15 7.5 0 Date
  • 9. SOFTWARE ARCHITECTURE Alarm Detector No Alarm Buffer User Clicks Landing Ad Page
  • 10. DETECTOR statistics Expected Clicks Threshold Data Buffer Sensitivity Alarm
  • 11. average.php function detect($sen) { $window = array(); $i = 0; $alarmCount = 0; $dtd = 0; $avg = $stddev = 0; $fraud = fopen("fraudclicks.csv", 'r'); while($d = fgetcsv($fraud)) { $i++; if(count($window) > 7) { array_shift($window); $avg = array_sum($window) / 7; foreach($window as $val) { $stddev += pow($val - $average, 2); } $stddev = sqrt($stddev/7);
  • 12. 0.2 0.15 0.1 0.05 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
  • 13. if($d[1] > ($avg + ($sen * $stddev))){ $alarmCount++; if($i > 201) { break; } } else { if($i > 201) { $dtd++; } } } array_push($window, $d[1]); } return array($alarmCount-1, $dtd); }
  • 14. 1.6 SENSITIVITY 30 18 False Alarms 1 Day To Detect 22.5 Clicks 15 7.5 0 Date
  • 15. 2.7 SENSITIVITY 30 1 False Alarm 18 Days To Detect 22.5 Clicks 15 7.5 0 Date
  • 17. function detect($sens) { sickavail.php $i = 0; $alarms = 0; $dtd = 0; $window = array(); $avail = array(); $fraud = fopen("fraudclicks.csv", 'r'); while($dat = fgetcsv($fraud)) { $dow = date("w", strtotime($dat[0])); if( count($window) >= 7 && isset($avail[$dow]) ) { $sick = 0; foreach($window as $day => $value) { $dowavg = array_sum($avail[$day]) / count($avail[$day]); $sick += $value / $dowavg; } $sick /= count($window);
  • 18. $avlblty = array_sum($avail[$dow]) / count($avail[$dow]); $est = $sick * $avlblty; $fac = fac($dat[1]); $p = exp(-$est) * pow($est,$dat[1]) / $fac; // poisson calc if($p < $sens && $dat[1] > $est) { $alarms++; if($i > 201) { break; } } else { if($i > 201) { $dtd++; } } } // end if
  • 19. 0.2 0.15 0.1 0.05 0 1 2 3 4 5 6 7 8 9 10
  • 20. 0.011 SENSITIVITY 30 1 False Alarm 1 Day To Detect 22.5 Clicks 15 7.5 0 Date
  • 22. classification model SOFTWARE ARCHITECTURE Fraud Classifier Not Fraud User Transaction Purchase Processor Transaction Learner Database
  • 23. EVALUATING THE CLASSIFIER Training Data Learner Model Test Data Prediction Classifier Accuracy Model
  • 24. 20 15 10 5 0 0 5 10 15 20
  • 25. 20 15 10 5 ? 0 0 5 10 15 20
  • 26. 20 15 10 5 ? 0 0 5 10 15 20
  • 27. $docs = array( array('fraud' => false, 'price' => 1699, 'desc'=>'toy ninja', 'ship' => 'US'), array('fraud' => false, 'price' => 20000, 'desc' => 'TV','ship' => 'US'), array('fraud' => false, 'price' => 2500, 'desc' => 'cds', 'ship' => 'US'), array('fraud' => true, 'price' => 20000, 'desc' => 'console', 'ship' => 'CN'), array('fraud' => true, 'price' => 5000, 'desc' => 'books', 'ship' => 'US'), array('fraud' => true, 'price' => 15000, 'desc' => 'ipod', 'ship' => 'CN'), );
  • 28. $db = new XapianWritableDatabase("index", Xapian::DB_CREATE_OR_OPEN); $idx = new XapianTermGenerator(); $stem = new XapianStem("english"); $idx->set_stemmer($stem); foreach($docs as $key => $doc) { $xdoc = new XapianDocument(); $xdoc->set_data($doc['fraud'] ? "fraud" : "clean"); $idx->set_document($xdoc); $idx->index_text($doc['price'] . ' ' . $doc['desc'] . ' ' . $doc['ship']); $db->add_document($xdoc, $key); } $db = null; frau dknn.php
  • 29. $test = array( testknn.ph p 'price' => 10000, 'desc' => 'TV', 'ship' => 'CN' ); $db = new XapianWritableDatabase("index", Xapian::DB_CREATE_OR_OPEN); $idx = new XapianTermGenerator(); $stem = new XapianStem("english"); $idx->set_stemmer($stem); $xdoc = new XapianDocument(); $idx->set_document($xdoc); $idx->index_text($test['price'] . ' ' . $test['desc'] . ' ' . $test['ship']); $id = $db->add_document($xdoc);
  • 30. $enq = new XapianEnquire($db); $rset = new XapianRSet(); $rset->add_document($id); $eset = $enq->get_eset(10, $rset); $terms = array(); $i = $eset->begin(); while ( !$i->equals($eset->end()) ) { $terms[] = $i->get_term(); $i->next(); } $q = new XapianQuery( XapianQuery::OP_OR, $terms); $enq->set_query($q); $matches = $enq->get_mset(0, 4, $rset);
  • 31. $i = $matches->begin(); while (!$i->equals($matches->end())) { if($i->get_document()->get_docid() != $id) { $class = $i->get_document()->get_data(); var_dump($class); } $i->next(); } $db->delete_document($id); $ php testknn.php string(5) "clean" string(5) "fraud" string(5) "fraud"
  • 33.
  • 34.
  • 35. function compareEmailToName($name, $email) { $name = strtolower($name); $email = strtolower($email); $parts = explode(" ", $name); $pcnt = 0; list($user, $dom) = explode("@", $email); $user = str_replace( array(".", "+"), " ", $user); $dom = preg_replace("/..*/", "", $dom); similar_text($name, $user, $pcnt); if($pcnt > 80) { return 1.0; } similar_text($name, $dom, $pcnt); if($pcnt > 80) { return 0.8; } email.php
  • 36. if(count($parts)) { $highest = 0; foreach($parts as $part) { similar_text($user, $part, $pcnt); if($pcnt > 50 && $pcnt > $highest) { $highest = $percent; } similar_text($dom, $part, $pcnt); if($pcnt > 50 && $pcnt > $highest) { $highest = $percent; } } return (1.7 * ($highest/100)) - 1; } return -1; }
  • 37.
  • 38.
  • 39.
  • 40. $data = array( 'purchase_value' => 20993, 'geo_country' => 'DE', 'previous_orders' => 1, 'time' => 6, 'timegap' => 146632, 'product_category' => 'small_item', 'delivery_matches_card' => 0, 'geo_ip_matches_card' => 1, 'difference_from_last_trans' => 8755, 'free_shipping' => 0, 'email_like_name' => 0, 'free_email_provider' => 0, 'disposable_email_provider' => 0, 'quantity' => 2, 'fraud' => 0);
  • 42. 20 15 10 5 0 0 5 10 15 20
  • 43. 20 15 10 5 0 0 5 10 15 20
  • 44. 20 15 10 5 0 0 5 10 15 20
  • 45. 20 15 10 5 0 0 5 10 15 20
  • 46. 20 15 10 5 0 0 5 10 15 20
  • 47. $ apt-get install libsvm-dev $ apt-get install libsvm-tools $ yum install libsvm-devel $ pecl install svm-beta $ echo extension=svm.so > /etc/php.d/svm.ini $ php -r '$s = new svm(); $m = $s->train (array(array(-1, -1), array(1, 1))); echo $m->predict(array(0, -1));' -1
  • 48. $fh = fopen('paydata.csv', 'r'); $output = array(); while($data = fgetcsv($fh)) { $output[] = array( $data[14] == 1 ? -1 : 1, 1 => ($data[0]/20000.00) - 1.0, // price 2 => $data[1] == 'CN' ? 1.0:-1.0, 3 => $data[1] == 'US' ? 1.0:-1.0, 4 => $data[5] == 'digital' ? 1.0:-1.0, 5 => $data[7] == 1 ? 1.0:-1.0, //geo 6 => $data[6] == 1 ? 1.0:-1.0, // deliv 12 => $data[9] == 1 ? 1.0:-1.0, // ship 13 => ($data[13] / 1.5) - 1.0, // qty ); } learn.php
  • 49. $svm = new svm(); $model = $svm->train($output, array(-1 => 0.65, 1 => 0.5)); $model->save('learn.model'); $fp = $tp = $fn = $tn = 0; foreach($output as $test) { $res = $model->predict($test); if($test[0] > 0) { if($res > 0) { $tp++; } else { $fn++; } } else { if($res > 0) { $fp++; } else { $tn++; } } }
  • 50. // ...snip.. loading test data from // paytest.csv $model = new SVMModel('learn.model'); $fp = $tp = $fn = $tn = 0; foreach($output as $test) { $res = $model->predict($test); if($test[0] > 0) { if($res > 0) { $tp++; } else { $fn++; } } else { if($res > 0) { $fp++; } else { $tn++; } } } test.php
  • 51. var_dump("True Positive " . $tp); var_dump("True Negative " . $tn); var_dump("False Positive " . $fp); var_dump("False Negative " . $fn); var_dump("Accuracy " . (($tp+$tn)/($tp+$tn+$fp+$fn)));
  • 52. $ php learn.php string(18) "True Positive 8316" string(18) "True Negative 1682" string(16) "False Positive 2" string(16) "False Negative 0" string(15) "Accuracy 0.9998" $ php test.php string(17) "True Positive 844" string(17) "True Negative 155" string(16) "False Positive 0" string(16) "False Negative 1" string(14) "Accuracy 0.999"
  • 53. training data Test Verify Update Automated Manual Manual
  • 54. Time Series Class Based Sensitivity Model False Days To False False Alarms Detect Positives Negatives
  • 57. Title Slide - CSI http://www.flickr.com/photos/39matt/5241862082 Sickness Availability - Chicago Fire Department http://www.flickr.com/photos/mike_miley/3929146730/ Model Buildings - Ah Ain’t Long For This Whorl http://www.flickr.com/photos/chadmiller/98014022/ Repeat Customer - McDonald’s Loyalty Card http://www.flickr.com/photos/fsse-info/3658873057/ Shipping - FedEx Truck http://www.flickr.com/photos/moto_club4ag/4852235145/ Velocity - Chevrolet Chevelle Dragster http://www.flickr.com/photos/jns001/2958999006/ GeoIP - Earth Asia Terminator View http://www.flickr.com/photos/flyingsinger/86898564/ Multiple Items - Boxes http://www.flickr.com/photos/skrewtape/851672959/