|
10 | 10 | import git |
11 | 11 |
|
12 | 12 | from elasticsearch import Elasticsearch |
| 13 | +from elasticsearch.exceptions import TransportError |
13 | 14 | from elasticsearch.helpers import bulk, streaming_bulk |
14 | 15 |
|
15 | 16 | def create_git_index(client, index): |
16 | 17 | # we will use user on several places |
17 | 18 | user_mapping = { |
18 | 19 | 'properties': { |
19 | 20 | 'name': { |
20 | | - 'type': 'multi_field', |
| 21 | + 'type': 'string', |
21 | 22 | 'fields': { |
22 | 23 | 'raw': {'type' : 'string', 'index' : 'not_analyzed'}, |
23 | | - 'name': {'type' : 'string'} |
24 | 24 | } |
25 | 25 | } |
26 | 26 | } |
27 | 27 | } |
28 | 28 |
|
29 | | - # create empty index |
30 | | - client.indices.create( |
31 | | - index=index, |
32 | | - body={ |
33 | | - 'settings': { |
34 | | - # just one shard, no replicas for testing |
35 | | - 'number_of_shards': 1, |
36 | | - 'number_of_replicas': 0, |
37 | | - |
38 | | - # custom analyzer for analyzing file paths |
39 | | - 'analysis': { |
40 | | - 'analyzer': { |
41 | | - 'file_path': { |
42 | | - 'type': 'custom', |
43 | | - 'tokenizer': 'path_hierarchy', |
44 | | - 'filter': ['lowercase'] |
45 | | - } |
46 | | - } |
| 29 | + create_index_body = { |
| 30 | + 'settings': { |
| 31 | + # just one shard, no replicas for testing |
| 32 | + 'number_of_shards': 1, |
| 33 | + 'number_of_replicas': 0, |
| 34 | + |
| 35 | + # custom analyzer for analyzing file paths |
| 36 | + 'analysis': { |
| 37 | + 'analyzer': { |
| 38 | + 'file_path': { |
| 39 | + 'type': 'custom', |
| 40 | + 'tokenizer': 'path_hierarchy', |
| 41 | + 'filter': ['lowercase'] |
47 | 42 | } |
| 43 | + } |
| 44 | + } |
| 45 | + }, |
| 46 | + 'mappings': { |
| 47 | + 'commits': { |
| 48 | + '_parent': { |
| 49 | + 'type': 'repos' |
48 | 50 | }, |
49 | | - 'mappings': { |
50 | | - 'commits': { |
51 | | - '_parent': { |
52 | | - 'type': 'repos' |
53 | | - }, |
54 | | - 'properties': { |
55 | | - 'author': user_mapping, |
56 | | - 'authored_date': {'type': 'date'}, |
57 | | - 'committer': user_mapping, |
58 | | - 'committed_date': {'type': 'date'}, |
59 | | - 'parent_shas': {'type': 'string', 'index' : 'not_analyzed'}, |
60 | | - 'description': {'type': 'string', 'analyzer': 'snowball'}, |
61 | | - 'files': {'type': 'string', 'analyzer': 'file_path'} |
62 | | - } |
| 51 | + 'properties': { |
| 52 | + 'author': user_mapping, |
| 53 | + 'authored_date': {'type': 'date'}, |
| 54 | + 'committer': user_mapping, |
| 55 | + 'committed_date': {'type': 'date'}, |
| 56 | + 'parent_shas': {'type': 'string', 'index' : 'not_analyzed'}, |
| 57 | + 'description': {'type': 'string', 'analyzer': 'snowball'}, |
| 58 | + 'files': {'type': 'string', 'analyzer': 'file_path'} |
| 59 | + } |
| 60 | + }, |
| 61 | + 'repos': { |
| 62 | + 'properties': { |
| 63 | + 'owner': user_mapping, |
| 64 | + 'created_at': {'type': 'date'}, |
| 65 | + 'description': { |
| 66 | + 'type': 'string', |
| 67 | + 'analyzer': 'snowball', |
63 | 68 | }, |
64 | | - 'repos': { |
65 | | - 'properties': { |
66 | | - 'owner': user_mapping, |
67 | | - 'created_at': {'type': 'date'}, |
68 | | - 'description': { |
69 | | - 'type': 'string', |
70 | | - 'analyzer': 'snowball', |
71 | | - }, |
72 | | - 'tags': { |
73 | | - 'type': 'string', |
74 | | - 'index': 'not_analyzed' |
75 | | - } |
76 | | - } |
| 69 | + 'tags': { |
| 70 | + 'type': 'string', |
| 71 | + 'index': 'not_analyzed' |
77 | 72 | } |
78 | 73 | } |
79 | | - }, |
| 74 | + } |
| 75 | + } |
| 76 | + } |
| 77 | + |
| 78 | + # create empty index |
| 79 | + try: |
| 80 | + client.indices.create( |
| 81 | + index=index, |
| 82 | + body=create_index_body, |
| 83 | + ) |
| 84 | + except TransportError, e: |
80 | 85 | # ignore already existing index |
81 | | - ignore=400 |
82 | | - ) |
| 86 | + if e.error == 'index_already_exists_exception': |
| 87 | + pass |
| 88 | + else: |
| 89 | + raise |
83 | 90 |
|
84 | 91 | def parse_commits(head, name): |
85 | 92 | """ |
|
0 commit comments