Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
DataScienceUWL
GitHub Repository: DataScienceUWL/DS775
Path: blob/main/Lessons/Lesson 14 - RecSys 2/resources/data/movielens/allbut.pl
871 views
1
#!/usr/local/bin/perl
2
3
# get args
4
if (@ARGV < 3) {
5
print STDERR "Usage: $0 base_name start stop max_test [ratings ...]\n";
6
exit 1;
7
}
8
$basename = shift;
9
$start = shift;
10
$stop = shift;
11
$maxtest = shift;
12
13
# open files
14
open( TESTFILE, ">$basename.test" ) or die "Cannot open $basename.test for writing\n";
15
open( BASEFILE, ">$basename.base" ) or die "Cannot open $basename.base for writing\n";
16
17
# init variables
18
$testcnt = 0;
19
20
while (<>) {
21
($user) = split;
22
if (! defined $ratingcnt{$user}) {
23
$ratingcnt{$user} = 0;
24
}
25
++$ratingcnt{$user};
26
if (($testcnt < $maxtest || $maxtest <= 0)
27
&& $ratingcnt{$user} >= $start && $ratingcnt{$user} <= $stop) {
28
++$testcnt;
29
print TESTFILE;
30
}
31
else {
32
print BASEFILE;
33
}
34
}
35
36