Send patches - preferably formatted by git format-patch - to patches at archlinux32 dot org.
summaryrefslogtreecommitdiff
path: root/vendor/ezyang/htmlpurifier/maintenance/config-scanner.php
blob: c614d1fbc2c3610759fd70ccf7e23d347225a85f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/php
<?php

chdir(dirname(__FILE__));
require_once 'common.php';
require_once '../library/HTMLPurifier.auto.php';
assertCli();

if (version_compare(PHP_VERSION, '5.2.2', '<')) {
    echo "This script requires PHP 5.2.2 or later, for tokenizer line numbers.";
    exit(1);
}

/**
 * @file
 * Scans HTML Purifier source code for $config tokens and records the
 * directive being used; configdoc can use this info later.
 *
 * Currently, this just dumps all the info onto the console. Eventually, it
 * will create an XML file that our XSLT transform can use.
 */

$FS = new FSTools();
chdir(dirname(__FILE__) . '/../library/');
$raw_files = $FS->globr('.', '*.php');
$files = array();
foreach ($raw_files as $file) {
    $file = substr($file, 2); // rm leading './'
    if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files
    if (substr_count($file, '.') > 1) continue; // rm meta files
    $files[] = $file;
}

/**
 * Moves the $i cursor to the next non-whitespace token
 */
function consumeWhitespace($tokens, &$i)
{
    do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE);
}

/**
 * Tests whether or not a token is a particular type. There are three run-cases:
 *      - ($token, $expect_token): tests if the token is $expect_token type;
 *      - ($token, $expect_value): tests if the token is the string $expect_value;
 *      - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and
 *        its string representation is $expect_value
 */
function testToken($token, $value_or_token, $value = null)
{
    if (is_null($value)) {
        if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token;
        else return $token === $value_or_token;
    } else {
        return is_array($token) && $token[0] === $value_or_token && $token[1] === $value;
    }
}

$counter = 0;
$full_counter = 0;
$tracker = array();

foreach ($files as $file) {
    $tokens = token_get_all(file_get_contents($file));
    $file = str_replace('\\', '/', $file);
    for ($i = 0, $c = count($tokens); $i < $c; $i++) {
        $ok = false;
        // Match $config
        if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true;
        // Match $this->config
        while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) {
            consumeWhitespace($tokens, $i);
            if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break;
            consumeWhitespace($tokens, $i);
            if (testToken($tokens[$i], T_STRING, 'config')) $ok = true;
            break;
        }
        if (!$ok) continue;

        $ok = false;
        for($i++; $i < $c; $i++) {
            if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') {
                break;
            }
            if (is_string($tokens[$i])) continue;
            if ($tokens[$i][0] === T_OBJECT_OPERATOR) {
                $ok = true;
                break;
            }
        }
        if (!$ok) continue;

        $line = $tokens[$i][2];

        consumeWhitespace($tokens, $i);
        if (!testToken($tokens[$i], T_STRING, 'get')) continue;

        consumeWhitespace($tokens, $i);
        if (!testToken($tokens[$i], '(')) continue;

        $full_counter++;

        $matched = false;
        do {

            // What we currently don't match are batch retrievals, and
            // wildcard retrievals. This data might be useful in the future,
            // which is why we have a do {} while loop that doesn't actually
            // do anything.

            consumeWhitespace($tokens, $i);
            if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue;
            $id = substr($tokens[$i][1], 1, -1);

            $counter++;
            $matched = true;

            if (!isset($tracker[$id])) $tracker[$id] = array();
            if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array();
            $tracker[$id][$file][] = $line;

        } while (0);

        //echo "$file:$line uses $namespace.$directive\n";
    }
}

echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n";

echo "Generating XML... ";

$xw = new XMLWriter();
$xw->openURI('../configdoc/usage.xml');
$xw->setIndent(true);
$xw->startDocument('1.0', 'UTF-8');
$xw->startElement('usage');
foreach ($tracker as $id => $files) {
    $xw->startElement('directive');
    $xw->writeAttribute('id', $id);
    foreach ($files as $file => $lines) {
        $xw->startElement('file');
        $xw->writeAttribute('name', $file);
        foreach ($lines as $line) {
            $xw->writeElement('line', $line);
        }
        $xw->endElement();
    }
    $xw->endElement();
}
$xw->endElement();
$xw->flush();

echo "done!\n";

// vim: et sw=4 sts=4