Merge pull request #1 from elasticsearch/master

sync with es/es
elastic · Jan 15, 2014 · b358f13 · b358f13
2 parents 7e5f51d + 4643f78
commit b358f13
Show file tree

Hide file tree

Showing 142 changed files with 5,029 additions and 903 deletions.
diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py
@@ -58,6 +58,12 @@
 """
 env = os.environ
 
+PLUGINS = [('bigdesk', 'lukas-vlcek/bigdesk'),
+           ('paramedic', 'karmi/elasticsearch-paramedic'),
+           ('segmentspy', 'polyfractal/elasticsearch-segmentspy'),
+           ('inquisitor', 'polyfractal/elasticsearch-inquisitor'),
+           ('head', 'mobz/elasticsearch-head')]
+
 LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log')
 
 def log(msg):
@@ -117,10 +123,11 @@ def verify_mvn_java_version(version, mvn):
 
 # Returns the hash of the current git HEAD revision
 def get_head_hash():
-  return get_hash('HEAD')
+  return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip()
 
-def get_hash(version):
-  return os.popen('git rev-parse --verify %s 2>&1' % (version)).read().strip()
+# Returns the hash of the given tag revision
+def get_tag_hash(tag):
+  return os.popen('git show-ref --tags %s --hash 2>&1' % (tag)).read().strip()
 
 # Returns the name of the current branch
 def get_current_branch():
@@ -133,6 +140,10 @@ def get_current_branch():
 def release_branch(version):
   return 'release_branch_%s' % version
 
+# runs get fetch on the given remote
+def fetch(remote):
+  run('git fetch %s' % remote)
+
 # Creates a new release branch from the given source branch
 # and rebases the source branch from the remote before creating
 # the release branch. Note: This fails if the source branch
@@ -309,7 +320,7 @@ def generate_checksums(files):
     res = res + [os.path.join(directory, checksum_file), release_file]
   return res
 
-def download_and_verify(release, files, base_url='https://download.elasticsearch.org/elasticsearch/elasticsearch'):
+def download_and_verify(release, files, plugins=None, base_url='https://download.elasticsearch.org/elasticsearch/elasticsearch'):
   print('Downloading and verifying release %s from %s' % (release, base_url))
   tmp_dir = tempfile.mkdtemp()
   try:
@@ -326,11 +337,12 @@ def download_and_verify(release, files, base_url='https://download.elasticsearch
       urllib.request.urlretrieve(url, checksum_file)
       print('  Verifying checksum %s' % (checksum_file))
       run('cd %s && sha1sum -c %s' % (tmp_dir, os.path.basename(checksum_file)))
-    smoke_test_release(release, downloaded_files, get_hash('v%s' % release))
+    smoke_test_release(release, downloaded_files, get_tag_hash('v%s' % release), plugins)
+    print('  SUCCESS')
   finally:
     shutil.rmtree(tmp_dir)
 
-def smoke_test_release(release, files, expected_hash):
+def smoke_test_release(release, files, expected_hash, plugins):
   for release_file in files:
     if not os.path.isfile(release_file):
       raise RuntimeError('Smoketest failed missing file %s' % (release_file))
@@ -344,9 +356,20 @@ def smoke_test_release(release, files, expected_hash):
       continue # nothing to do here 
     es_run_path = os.path.join(tmp_dir, 'elasticsearch-%s' % (release), 'bin/elasticsearch')
     print('  Smoke testing package [%s]' % release_file)
+    es_plugin_path = os.path.join(tmp_dir, 'elasticsearch-%s' % (release),'bin/plugin')
+    plugin_names = {}
+    for name, plugin  in plugins:
+      print('  Install plugin [%s] from [%s]' % (name, plugin))
+      run('%s %s %s' % (es_plugin_path, '-install', plugin))
+      plugin_names[name] = True
+
+    if release.startswith("0.90."):
+      background = '' # 0.90.x starts in background automatically
+    else:
+      background = '-d'
     print('  Starting elasticsearch deamon from [%s]' % os.path.join(tmp_dir, 'elasticsearch-%s' % release))
-    run('%s; %s -Des.node.name=smoke_tester -Des.cluster.name=prepare_release -Des.discovery.zen.ping.multicast.enabled=false -d'
-         % (java_exe(), es_run_path))
+    run('%s; %s -Des.node.name=smoke_tester -Des.cluster.name=prepare_release -Des.discovery.zen.ping.multicast.enabled=false %s'
+         % (java_exe(), es_run_path, background))
     conn = HTTPConnection('127.0.0.1', 9200, 20);
     wait_for_node_startup()
     try:
@@ -360,9 +383,25 @@ def smoke_test_release(release, files, expected_hash):
           if version['build_snapshot']:
             raise RuntimeError('Expected non snapshot version')
           if version['build_hash'].strip() !=  expected_hash:
-            raise RuntimeError('HEAD hash does not match expected [%s] but got [%s]' % (get_head_hash(), version['build_hash']))
+            raise RuntimeError('HEAD hash does not match expected [%s] but got [%s]' % (expected_hash, version['build_hash']))
           print('  Running REST Spec tests against package [%s]' % release_file)
           run_mvn('test -Dtests.rest=%s -Dtests.class=*.*RestTests' % ("127.0.0.1:9200"))
+          print('  Verify if plugins are listed in _nodes')
+          conn.request('GET', '/_nodes?plugin=true&pretty=true')
+          res = conn.getresponse()
+          if res.status == 200:
+            nodes = json.loads(res.read().decode("utf-8"))['nodes']
+            for _, node in nodes.items():
+              node_plugins = node['plugins']
+              for node_plugin in node_plugins:
+                if not plugin_names.get(node_plugin['name'], False):
+                  raise RuntimeError('Unexpeced plugin %s' % node_plugin['name'])
+                del plugin_names[node_plugin['name']]
+            if plugin_names:
+              raise RuntimeError('Plugins not loaded %s' % list(plugin_names.keys()))
+
+          else:
+           raise RuntimeError('Expected HTTP 200 but got %s' % res.status)
         else:
           raise RuntimeError('Expected HTTP 200 but got %s' % res.status)
       finally:
@@ -471,14 +510,11 @@ def check_s3_credentials():
   print('Preparing Release from branch [%s] running tests: [%s] dryrun: [%s]' % (src_branch, run_tests, dry_run))
   print('  JAVA_HOME is [%s]' % JAVA_HOME)
   print('  Running with maven command: [%s] ' % (MVN))
-  release_version = find_release_version(src_branch)
-
-  if not smoke_test_version and not dry_run:
-    smoke_test_version = release_version
-  elif smoke_test_version:
-    print("Skipping build - smoketest only against version %s" % smoke_test_version)
 
   if build:
+    release_version = find_release_version(src_branch)
+    if not dry_run:
+      smoke_test_version = release_version
     head_hash = get_head_hash()
     run_mvn('clean') # clean the env!
     print('  Release version: [%s]' % release_version)
@@ -497,11 +533,14 @@ def check_s3_credentials():
       print(''.join(['-' for _ in range(80)]))
       print('Building Release candidate')
       input('Press Enter to continue...')
-      print('  Running maven builds now and publish to sonartype- run-tests [%s]' % run_tests)
+      if not dry_run:
+        print('  Running maven builds now and publish to sonartype - run-tests [%s]' % run_tests)
+      else:
+        print('  Running maven builds now run-tests [%s]' % run_tests)
       build_release(run_tests=run_tests, dry_run=dry_run, cpus=cpus)
       artifacts = get_artifacts(release_version)
       artifacts_and_checksum = generate_checksums(artifacts)
-      smoke_test_release(release_version, artifacts, get_head_hash())
+      smoke_test_release(release_version, artifacts, get_head_hash(), PLUGINS)
       print(''.join(['-' for _ in range(80)]))
       print('Finish Release -- dry_run: %s' % dry_run)
       input('Press Enter to continue...')
@@ -530,5 +569,10 @@ def check_s3_credentials():
         run('git tag -d v%s' % release_version)
       # we delete this one anyways
       run('git branch -D %s' %  (release_branch(release_version)))
+  else:
+    print("Skipping build - smoketest only against version %s" % smoke_test_version)
+    run_mvn('clean') # clean the env!
+
   if smoke_test_version:
-    download_and_verify(smoke_test_version, artifact_names(smoke_test_version))
+    fetch(remote)
+    download_and_verify(smoke_test_version, artifact_names(smoke_test_version), plugins=PLUGINS)
diff --git a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
@@ -11,7 +11,8 @@ following types are supported: `arabic`, `armenian`, `basque`,
 
 All analyzers support setting custom `stopwords` either internally in
 the config, or by using an external stopwords file by setting
-`stopwords_path`.
+`stopwords_path`. Check <<analysis-stop-analyzer,Stop Analyzer>> for
+more details.
 
 The following analyzers support setting custom `stem_exclusion` list:
 `arabic`, `armenian`, `basque`, `brazilian`, `bulgarian`, `catalan`,

diff --git a/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc b/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc
@@ -14,8 +14,9 @@ type:
 |`pattern` |The regular expression pattern, defaults to `\W+`.
 |`flags` |The regular expression flags.
 |`stopwords` |A list of stopwords to initialize the stop filter with.
-Defaults to an 'empty' stopword list coming[1.0.0.RC1, Previously 
-defaulted to the English stopwords list]
+Defaults to an 'empty' stopword list added[1.0.0.RC1, Previously 
+defaulted to the English stopwords list]. Check
+<<analysis-stop-analyzer,Stop Analyzer>> for more details.
 |===================================================================
 
 *IMPORTANT*: The regular expression should match the *token separators*,

diff --git a/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc b/docs/reference/analysis/analyzers/snowball-analyzer.asciidoc
@@ -41,8 +41,9 @@ filter>> and defaults to `English`. Note that not all the language
 analyzers have a default set of stopwords provided.
 
 The `stopwords` parameter can be used to provide stopwords for the
-languages that has no defaults, or to simply replace the default set
-with your custom list. A default set of stopwords for many of these
+languages that have no defaults, or to simply replace the default set
+with your custom list. Check <<analysis-stop-analyzer,Stop Analyzer>>
+for more details. A default set of stopwords for many of these
 languages is available from for instance
 https://github.com/apache/lucene-solr/tree/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/[here]
 and

diff --git a/docs/reference/analysis/analyzers/standard-analyzer.asciidoc b/docs/reference/analysis/analyzers/standard-analyzer.asciidoc
@@ -17,9 +17,10 @@ type:
 [cols="<,<",options="header",]
 |=======================================================================
 |Setting |Description
-|`stopwords` |A list of stopword to initialize the stop filter with.
+|`stopwords` |A list of stopwords to initialize the stop filter with.
 Defaults to an 'empty' stopword list added[1.0.0.Beta1, Previously 
-defaulted to the English stopwords list]
+defaulted to the English stopwords list]. Check
+<<analysis-stop-analyzer,Stop Analyzer>> for more details.
 |`max_token_length` |The maximum token length. If a token is seen that
 exceeds this length then it is discarded. Defaults to `255`.
 |=======================================================================

diff --git a/docs/reference/analysis/analyzers/stop-analyzer.asciidoc b/docs/reference/analysis/analyzers/stop-analyzer.asciidoc
@@ -12,10 +12,11 @@ The following are settings that can be set for a `stop` analyzer type:
 [cols="<,<",options="header",]
 |=======================================================================
 |Setting |Description
-|`stopwords` |A list of stopword to initialize the stop filter with.
+|`stopwords` |A list of stopwords to initialize the stop filter with.
 Defaults to the english stop words.
-
 |`stopwords_path` |A path (either relative to `config` location, or
 absolute) to a stopwords file configuration.
 |=======================================================================
 
+Use `stopwords: _none_` to explicitly specify an 'empty' stopword list.
+
diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc
@@ -26,17 +26,30 @@ support wildcards, for example: `test*`, and the ability to "add" (`+`)
 and "remove" (`-`), for example: `+test*,-test3`.
 
 All multi indices API support the following url query string parameters:
-* `ignore_unavailable` - Controls whether to ignore if any specified indices are unavailable, this includes indices
-   that don't exist or closed indices. Either `true` or `false` can be specified.
-* `allow_no_indices` - Controls whether to fail if a wildcard indices expressions results into no concrete indices.
-   Either `true` or `false` can be specified. For example if the wildcard expression `foo*` is specified and no indices
-   are available that start with `foo` then depending on this setting the request will fail. This setting is also applicable
-   when `_all`, `*` or no index has been specified.
-* `expand_wildcards` - Controls to what kind of concrete indices wildcard indices expression expand to. If `open` is
-  specified then the wildcard expression if expanded to only open indices and if `closed` is specified then the wildcard
-  expression if expanded only to closed indices. Also both values (`open,closed`) can be specified to expand to all indices.
-
-The defaults settings for the above parameters dependent on the api being used.
+
+`ignore_unavailable`::
+
+Controls whether to ignore if any specified indices are unavailable, this
+includes indices that don't exist or closed indices. Either `true` or `false`
+can be specified.
+
+`allow_no_indices`::
+
+Controls whether to fail if a wildcard indices expressions results into no
+concrete indices. Either `true` or `false` can be specified. For example if
+the wildcard expression `foo*` is specified and no indices are available that
+start with `foo` then depending on this setting the request will fail. This
+setting is also applicable when `_all`, `*` or no index has been specified.
+
+`expand_wildcards`::
+
+Controls to what kind of concrete indices wildcard indices expression expand
+to. If `open` is specified then the wildcard expression if expanded to only
+open indices and if `closed` is specified then the wildcard expression if
+expanded only to closed indices. Also both values (`open,closed`) can be
+specified to expand to all indices.
+
+The defaults settings for the above parameters depend on the api being used.
 
 NOTE: Single index APIs such as the <<docs>> and the
 <<indices-aliases,single-index `alias` APIs>> do not support multiple indices.

diff --git a/docs/reference/docs/index_.asciidoc b/docs/reference/docs/index_.asciidoc
@@ -33,7 +33,7 @@ The result of the above index operation is:
 
 The index operation automatically creates an index if it has not been
 created before (check out the
-<<indices-create-index,create index API>> for manually 
+<<indices-create-index,create index API>> for manually
 creating an index), and also automatically creates a
 dynamic type mapping for the specific type if one has not yet been
 created (check out the <<indices-put-mapping,put mapping>>
@@ -44,12 +44,21 @@ objects will automatically be added to the mapping definition of the
 type specified. Check out the <<mapping,mapping>>
 section for more information on mapping definitions.
 
-Though explained on the <<mapping,mapping>> section,
-it's important to note that the format of the JSON document can also
-include the type (very handy when using JSON mappers), for example:
+Note that the format of the JSON document can also include the type (very handy
+when using JSON mappers) if the `index.mapping.allow_type_wrapper` setting is
+set to true, for example:
 
 [source,js]
 --------------------------------------------------
+$ curl -XPOST 'http://localhost:9200/twitter' -d '{
+  "settings": {
+    "index": {
+      "mapping.allow_type_wrapper": true
+    }
+  }
+}'
+{"acknowledged":true}
+
 $ curl -XPUT 'http://localhost:9200/twitter/tweet/1' -d '{
     "tweet" : {
         "user" : "kimchy",

diff --git a/docs/reference/index-modules/fielddata.asciidoc b/docs/reference/index-modules/fielddata.asciidoc
@@ -24,7 +24,39 @@ field data after a certain time of inactivity. Defaults to `-1`. For
 example, can be set to `5m` for a 5 minute expiry.
 |=======================================================================
 
-=== Field data formats
+[float]
+[[fielddata-circuit-breaker]]
+=== Field data circuit breaker
+The field data circuit breaker allows Elasticsearch to estimate the amount of
+memory a field will required to be loaded into memory. It can then prevent the
+field data loading by raising and exception. By default the limit is configured
+to 80% of the maximum JVM heap. It can be configured with the following
+parameters:
+
+[cols="<,<",options="header",]
+|=======================================================================
+|Setting |Description
+|`indices.fielddata.breaker.limit` |Maximum size of estimated field data
+to allow loading. Defaults to 80% of the maximum JVM heap.
+|`indices.fielddata.breaker.overhead` |A constant that all field data
+estimations are multiplied with to determine a final estimation. Defaults to
+1.03
+|=======================================================================
+
+Both the `indices.fielddata.breaker.limit` and
+`indices.fielddata.breaker.overhead` can be changed dynamically using the
+cluster update settings API.
+
+[float]
+[[fielddata-monitoring]]
+=== Monitoring field data
+
+You can monitor memory usage for field data as well as the field data circuit
+breaker using
+<<cluster-nodes-stats,Nodes Stats API>>
+
+[[fielddata-formats]]
+== Field data formats
 
 The field data format controls how field data should be stored.
 
@@ -236,34 +268,3 @@ The `frequency` and `regex` filters can be combined:
     }
 }
 --------------------------------------------------
-
-[float]
-[[field-data-circuit-breaker]]
-=== Field data circuit breaker
-The field data circuit breaker allows Elasticsearch to estimate the amount of
-memory a field will required to be loaded into memory. It can then prevent the
-field data loading by raising and exception. By default it is configured with
-no limit (-1 bytes), but is automatically set to `indices.fielddata.cache.size`
-if set. It can be configured with the following parameters:
-
-[cols="<,<",options="header",]
-|=======================================================================
-|Setting |Description
-|`indices.fielddata.breaker.limit` |Maximum size of estimated field data
-to allow loading. Defaults to 80% of the maximum JVM heap.
-|`indices.fielddata.breaker.overhead` |A constant that all field data
-estimations are multiplied with to determine a final estimation. Defaults to
-1.03
-|=======================================================================
-
-Both the `indices.fielddata.breaker.limit` and
-`indices.fielddata.breaker.overhead` can be changed dynamically using the
-cluster update settings API.
-
-[float]
-[[field-data-monitoring]]
-=== Monitoring field data
-
-You can monitor memory usage for field data as well as the field data circuit
-breaker using
-<<cluster-nodes-stats,Nodes Stats API>>
diff --git a/docs/reference/index-modules/store.asciidoc b/docs/reference/index-modules/store.asciidoc
@@ -81,6 +81,7 @@ Lucene `NIOFSDirectory`) using NIO. It allows multiple threads to read
 from the same file concurrently. It is not recommended on Windows
 because of a bug in the SUN Java implementation.
 
+[[mmapfs]]
 [float]
 ==== MMap FS
 

diff --git a/docs/reference/index.asciidoc b/docs/reference/index.asciidoc
@@ -3,6 +3,8 @@
 
 include::setup.asciidoc[]
 
+include::migration/migrate_1_0.asciidoc[]
+
 include::api-conventions.asciidoc[]
 
 include::docs.asciidoc[]
@@ -29,3 +31,5 @@ include::testing.asciidoc[]
 
 include::glossary.asciidoc[]
 
+
+